diff --git a/.gitignore b/.gitignore index 78db076..8a76b6c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ workspace/ +config_*.json +workspace_*/ emma_paper/ final_outputs/ *.wav diff --git a/README.md b/README.md index 7e3853d..b4c9522 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,39 @@ +
+
-# Toolomics +Toolomics Logo -*A suite of tools from the Holobiomics Lab for Agents, organized as a set of MCP servers.* +
+ +

Toolomics

+ +

+ A suite of MCP-based Tools from the HolobiomicsLab. Used by AI-Agents such as ***Mimosa-AI*** +

+ +

+ license + last-commit + repo-language-count +

+ +

+ + GitHub Stars + + + License: Apache 2.0 + +

+ +--- + +> ***Toolomics*** — deploys containerized tools, manages isolated instances, and enables file sharing across AI agents for bioinformatics, metabolomics, molecular docking, and beyond. + +**Use cases:** +- Deploy MCP servers for browser automation, PDF processing, and data extraction +- Run isolated, multi-instance agent workspaces with automatic resource management +- Orchestrate containerized bioinformatics pipelines (XCMS, RStudio, Redis) with zero config ## Install & deploy tools diff --git a/config_86517947.json b/config_86517947.json index 4605d5d..6a0b1f2 100644 --- a/config_86517947.json +++ b/config_86517947.json @@ -5,63 +5,73 @@ "enabled": false }, { - "path": "mcp_host/pdf/server.py", + "path": "mcp_host/memory/server.py", "port": 5001, "enabled": false }, { - "path": "mcp_host/html/server.py", + "path": "mcp_host/pdf/server.py", "port": 5002, + "enabled": true + }, + { + "path": "mcp_host/html/server.py", + "port": 5003, "enabled": false }, { "path": "mcp_host/graph_rag/server.py", - "port": 5003, + "port": 5004, "enabled": false }, { "path": "mcp_host/browser/server.py", - "port": 5004, + "port": 5005, "enabled": false }, { "path": "mcp_host/image_analysis/server.py", - "port": 5013, + "port": 5006, "enabled": true }, { "path": "mcp_host/csv/server.py", - "port": 5006, + "port": 5007, "enabled": true }, + { + "path": "mcp_host/skills/server.py", + "port": 5008, + "enabled": false + }, { "path": "mcp_host/txt_editor/server.py", - "port": 5007, + "port": 5009, "enabled": true }, { "path": "mcp_host/python_editor/server.py", - "port": 5008, + "port": 5010, "enabled": true }, { "path": "mcp_host/Rscript/docker-compose.yml", - "port": 5009, + "port": 5011, "enabled": false }, { "path": "mcp_host/shell/docker-compose.yml", - "port": 5010, + "port": 5012, "enabled": true }, { "path": "mcp_host/browser/searxng/docker-compose.yml", - "port": 5011, + "port": 5013, "enabled": false }, { "path": "mcp_host/decimer/docker-compose.yml", - "port": 5012, + "port": 5014, "enabled": false } ] \ No newline at end of file diff --git a/deploy.py b/deploy.py index 484a9fd..ee3cd78 100644 --- a/deploy.py +++ b/deploy.py @@ -480,22 +480,9 @@ def load_config(self) -> Dict[str, dict]: if self.config_path.stat().st_size == 0: logger.warning(f"Config file {self.config_path} is empty.") return {} - - # Check if file is corrupted by reading first character - try: - with open(self.config_path, 'rb') as f: - first_byte = f.read(1) - if first_byte and first_byte != b'[': - logger.error(f"Config file {self.config_path} is corrupted (starts with {first_byte!r} instead of b'[')") - logger.warning(f"Deleting corrupted config file to regenerate fresh") - self.config_path.unlink() - return {} - except Exception as e: - logger.error(f"Error checking config file: {e}") - return {} try: - with open(self.config_path, 'r', encoding='utf-8') as f: + with open(self.config_path, 'r', encoding='utf-8-sig') as f: # utf-8-sig handles BOM # Acquire shared lock for reading try: fcntl.flock(f.fileno(), fcntl.LOCK_SH) @@ -525,7 +512,7 @@ def load_config(self) -> Dict[str, dict]: else: raise ValueError("Can't parse config.json file") - logger.debug(f"Successfully loaded {len(config_dict)} items from config") + logger.info(f"Successfully loaded {len(config_dict)} items from config (enabled: {sum(1 for v in config_dict.values() if v.get('enabled'))})") return config_dict finally: # Release lock @@ -534,17 +521,28 @@ def load_config(self) -> Dict[str, dict]: except: pass - except (json.JSONDecodeError, KeyError, IndexError) as e: + except (json.JSONDecodeError, KeyError, IndexError, ValueError) as e: logger.error(f"Error loading config from {self.config_path}: {e}") # Try to read the file again to see what's actually there try: file_size = self.config_path.stat().st_size with open(self.config_path, 'rb') as f: raw_bytes = f.read(100) - logger.error(f"File size: {file_size}, first 100 bytes: {raw_bytes}") + logger.error(f"File size: {file_size}, first 100 bytes: {raw_bytes!r}") except Exception as debug_e: logger.error(f"Could not read file for debugging: {debug_e}") - logger.warning(f"Returning empty config due to parse error. File will be regenerated.") + + # Backup the corrupted file instead of silently regenerating + backup_path = self.config_path.with_suffix('.json.backup') + try: + import shutil + shutil.copy2(self.config_path, backup_path) + logger.warning(f"Backed up corrupted config to: {backup_path}") + except Exception as backup_e: + logger.warning(f"Could not backup config: {backup_e}") + + logger.warning(f"Config file appears corrupted. Will regenerate fresh config.") + logger.warning(f"If you had enabled services, please re-enable them after checking {backup_path}") return {} def save_config(self, config: Dict[str, dict]) -> None: @@ -622,7 +620,7 @@ def assign_ports(self, server_files: List[Path], compose_files: List[Path] = Non config[server_str] = {'port': next_host_port, 'enabled': False} used_ports.add(next_host_port) - logger.info(f"Assigned host port {next_host_port} to {server_str} (enabled by default)") + logger.info(f"Assigned host port {next_host_port} to {server_str} (disabled - edit config to enable)") next_host_port += 1 # Assign ports to docker-compose files @@ -640,7 +638,7 @@ def assign_ports(self, server_files: List[Path], compose_files: List[Path] = Non config[compose_str] = {'port': next_host_port, 'enabled': False} used_ports.add(next_host_port) - logger.info(f"Assigned host port {next_host_port} to {compose_str} (enabled by default)") + logger.info(f"Assigned host port {next_host_port} to {compose_str} (disabled - edit config to enable)") next_host_port += 1 self.save_config(config) diff --git a/mcp_host/browser/searxng/searxng_86517947/settings.yml b/mcp_host/browser/searxng/searxng_86517947/settings.yml new file mode 100644 index 0000000..13fc06b --- /dev/null +++ b/mcp_host/browser/searxng/searxng_86517947/settings.yml @@ -0,0 +1,2816 @@ +general: + # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} + debug: false + # displayed name + instance_name: "SearXNG" + # For example: https://example.com/privacy + privacypolicy_url: false + # use true to use your own donation page written in searx/info/en/donate.md + # use false to disable the donation link + donation_url: false + # mailto:contact@example.com + contact_url: false + # record stats + enable_metrics: true + # expose stats in open metrics format at /metrics + # leave empty to disable (no password set) + # open_metrics: + open_metrics: '' + +brand: + new_issue_url: https://github.com/searxng/searxng/issues/new + docs_url: https://docs.searxng.org/ + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki + issue_url: https://github.com/searxng/searxng/issues + # custom: + # # Custom entries in the footer: [title]: [link] + # links: + # Uptime: https://uptime.searxng.org/history/darmarit-org + # About: "https://searxng.org" + +search: + # Filter results. 0: None, 1: Moderate, 2: Strict + safe_search: 0 + # Existing autocomplete backends: "360search", "baidu", "brave", "dbpedia", "duckduckgo", "google", "yandex", + # "mwmbl", "naver", "seznam", "sogou", "startpage", "stract", "swisscows", "quark", "qwant", "wikipedia" - + # leave blank to turn it off by default. + autocomplete: "" + # minimun characters to type before autocompleter starts + autocomplete_min: 4 + # backend for the favicon near URL in search results. + # Available resolvers: "allesedv", "duckduckgo", "google", "yandex" - leave blank to turn it off by default. + favicon_resolver: "" + # Default search language - leave blank to detect from browser information or + # use codes from 'languages.py' + default_lang: "auto" + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages + # Available languages + # languages: + # - all + # - en + # - en-US + # - de + # - it-IT + # - fr + # - fr-BE + # ban time in seconds after engine errors + ban_time_on_fail: 5 + # max ban time in seconds after engine errors + max_ban_time_on_fail: 120 + suspended_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 86400 + # For error "CAPTCHA" + SearxEngineCaptcha: 86400 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 3600 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + + # remove format to deny access, use lower case. + # formats: [html, csv, json, rss] + formats: + - html + +server: + # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} + port: 8888 + bind_address: "127.0.0.1" + # public URL of the instance, to ensure correct inbound links. Is overwritten + # by ${SEARXNG_BASE_URL}. + base_url: false # "http://example.com/location" + # rate limit the number of request on the instance, block some bots. + # Is overwritten by ${SEARXNG_LIMITER} + limiter: false + # enable features designed only for public instances. + # Is overwritten by ${SEARXNG_PUBLIC_INSTANCE} + public_instance: false + + # If your instance owns a /etc/searxng/settings.yml file, then set the following + # values there. + + secret_key: "yRRpFrzXcnyMs2u8dJvqftExtViNyvma" # Is overwritten by ${SEARXNG_SECRET} + # Proxy image results through SearXNG. Is overwritten by ${SEARXNG_IMAGE_PROXY} + image_proxy: false + # 1.0 and 1.1 are supported + http_protocol_version: "1.0" + # POST queries are "more secure!" but are also the source of hard-to-locate + # annoyances, which is why GET may be better for end users and their browsers. + # see https://github.com/searxng/searxng/pull/3619 + # Is overwritten by ${SEARXNG_METHOD} + method: "POST" + default_http_headers: + X-Content-Type-Options: nosniff + X-Download-Options: noopen + X-Robots-Tag: noindex, nofollow + Referrer-Policy: no-referrer + +valkey: + # URL to connect valkey database. Is overwritten by ${SEARXNG_VALKEY_URL}. + # https://docs.searxng.org/admin/settings/settings_valkey.html#settings-valkey + # url: valkey://localhost:6379/0 + url: false + +ui: + # Custom static path - leave it blank if you didn't change + static_path: "" + # Custom templates path - leave it blank if you didn't change + templates_path: "" + # query_in_title: When true, the result page's titles contains the query + # it decreases the privacy, since the browser can records the page titles. + query_in_title: false + # ui theme + default_theme: simple + # center the results ? + center_alignment: false + # URL prefix of the internet archive, don't forget trailing slash (if needed). + # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" + # Default interface locale - leave blank to detect from browser information or + # use codes from the 'locales' config section + default_locale: "" + # Open result links in a new tab by default + # results_on_new_tab: false + theme_args: + # style of simple theme: auto, light, dark, black + simple_style: auto + # Perform search immediately if a category selected. + # Disable to select multiple categories at once and start the search manually. + search_on_category_select: true + # Hotkeys: default or vim + hotkeys: default + # URL formatting: pretty, full or host + url_formatting: pretty + +# Lock arbitrary settings on the preferences page. +# +# preferences: +# lock: +# - categories +# - language +# - autocomplete +# - favicon +# - safesearch +# - method +# - doi_resolver +# - locale +# - theme +# - results_on_new_tab +# - search_on_category_select +# - method +# - image_proxy +# - query_in_title + +# communication with search engines +# +outgoing: + # default timeout in seconds, can be override by engine + request_timeout: 3.0 + # the maximum timeout in seconds + # max_request_timeout: 10.0 + # suffix of searxng_useragent, could contain information like an email address + # to the administrator + useragent_suffix: "" + # The maximum number of concurrent connections that may be established. + pool_connections: 100 + # Allow the connection pool to maintain keep-alive connections below this + # point. + pool_maxsize: 20 + # See https://www.python-httpx.org/http2/ + enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10 + # + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 + + +# Plugin configuration, for more details see +# https://docs.searxng.org/admin/settings/settings_plugins.html +# +plugins: + + searx.plugins.calculator.SXNGPlugin: + active: true + + searx.plugins.infinite_scroll.SXNGPlugin: + active: false + + searx.plugins.hash_plugin.SXNGPlugin: + active: true + + searx.plugins.self_info.SXNGPlugin: + active: true + + searx.plugins.unit_converter.SXNGPlugin: + active: true + + searx.plugins.ahmia_filter.SXNGPlugin: + active: true + + searx.plugins.hostnames.SXNGPlugin: + active: true + + searx.plugins.time_zone.SXNGPlugin: + active: true + + searx.plugins.oa_doi_rewrite.SXNGPlugin: + active: false + + searx.plugins.tor_check.SXNGPlugin: + active: false + + searx.plugins.tracker_url_remover.SXNGPlugin: + active: true + + +# Configuration of the "Hostnames plugin": +# +# hostnames: +# replace: +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# remove: +# - '(.*\.)?facebook.com$' +# low_priority: +# - '(.*\.)?google(\..*)?$' +# high_priority: +# - '(.*\.)?wikipedia.org$' +# +# Alternatively you can use external files for configuring the "Hostnames plugin": +# +# hostnames: +# replace: 'rewrite-hosts.yml' +# +# Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'): +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# + +checker: + # disable checker when in debug mode + off_when_debug: true + + # use "scheduling: {}" to disable scheduling + # scheduling: interval or int + + # to activate the scheduler: + # * uncomment "scheduling" section + # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" + # to your uwsgi.ini + + # scheduling: + # start_after: [300, 1800] # delay to start the first run of the checker + # every: [86400, 90000] # how often the checker runs + + # additional tests: only for the YAML anchors (see the engines section) + # + additional_tests: + rosebud: &test_rosebud + matrix: + query: rosebud + lang: en + result_container: + - not_empty + - ['one_title_contains', 'citizen kane'] + test: + - unique_results + + android: &test_android + matrix: + query: ['android'] + lang: ['en', 'de', 'fr', 'zh-CN'] + result_container: + - not_empty + - ['one_title_contains', 'google'] + test: + - unique_results + + # tests: only for the YAML anchors (see the engines section) + tests: + infobox: &tests_infobox + infobox: + matrix: + query: ["linux", "new york", "bbc"] + result_container: + - has_infobox + +categories_as_tabs: + general: + images: + videos: + news: + map: + music: + it: + science: + files: + social media: + +engines: + - name: 360search + engine: 360search + shortcut: 360so + timeout: 10.0 + disabled: true + + - name: 360search videos + engine: 360search_videos + shortcut: 360sov + disabled: true + + - name: 9gag + engine: 9gag + shortcut: 9g + disabled: true + + - name: acfun + engine: acfun + shortcut: acf + disabled: true + + - name: adobe stock + engine: adobe_stock + shortcut: asi + categories: ["images"] + # https://docs.searxng.org/dev/engines/online/adobe_stock.html + adobe_order: relevance + adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"] + timeout: 6 + disabled: true + + - name: adobe stock video + engine: adobe_stock + shortcut: asv + network: adobe stock + categories: ["videos"] + adobe_order: relevance + adobe_content_types: ["video"] + timeout: 6 + disabled: true + + - name: adobe stock audio + engine: adobe_stock + shortcut: asa + network: adobe stock + categories: ["music"] + adobe_order: relevance + adobe_content_types: ["audio"] + timeout: 6 + disabled: true + + - name: astrophysics data system + engine: astrophysics_data_system + shortcut: ads + # read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html + api_key: "" + inactive: true + + - name: alpine linux packages + engine: alpinelinux + disabled: true + shortcut: alp + + - name: annas archive + engine: annas_archive + disabled: true + shortcut: aa + timeout: 5 + + - name: ansa + engine: ansa + shortcut: ans + disabled: true + + # - name: annas articles + # engine: annas_archive + # shortcut: aaa + # # https://docs.searxng.org/dev/engines/online/annas_archive.html + # aa_content: 'magazine' # book_fiction, book_unknown, book_nonfiction, book_comic + # aa_ext: 'pdf' # pdf, epub, .. + # aa_sort: oldest' # newest, oldest, largest, smallest + + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: true + + - name: apple app store + engine: apple_app_store + shortcut: aps + disabled: true + + # Requires Tor + - name: ahmia + engine: ahmia + # Might do up to two requests to perform a search. + # Since Tor is already slow by nature, the timeout is set very high. + timeout: 20.0 + categories: onions + enable_http: true + shortcut: ah + + - name: anaconda + engine: xpath + paging: true + first_page_num: 0 + search_url: https://anaconda.org/search?q={query}&page={pageno} + results_xpath: //tbody/tr + url_xpath: ./td/h5/a[last()]/@href + title_xpath: ./td/h5 + content_xpath: ./td[h5]/text() + categories: it + timeout: 6.0 + shortcut: conda + disabled: true + + - name: arch linux wiki + engine: archlinux + shortcut: al + + - name: nixos wiki + engine: mediawiki + shortcut: nixw + base_url: https://wiki.nixos.org/ + search_type: text + disabled: true + categories: [it, software wikis] + + - name: artic + engine: artic + shortcut: arc + timeout: 4.0 + + - name: arxiv + engine: arxiv + shortcut: arx + + - name: ask + engine: ask + shortcut: ask + disabled: true + + - name: azure + engine: azure + shortcut: az + categories: [it, cloud] + # azure_tenant_id: "your_tenant_id" + # azure_client_id: "your_client_id" + # azure_client_secret: "your_client_secret" + inactive: true + + # tmp suspended: dh key too small + # - name: base + # engine: base + # shortcut: bs + + - name: bandcamp + engine: bandcamp + shortcut: bc + categories: music + + - name: baidu + baidu_category: general + categories: [general] + engine: baidu + shortcut: bd + disabled: true + + - name: baidu images + baidu_category: images + categories: [images] + engine: baidu + shortcut: bdi + disabled: true + + - name: baidu kaifa + baidu_category: it + categories: [it] + engine: baidu + shortcut: bdk + disabled: true + + - name: wikipedia + engine: wikipedia + shortcut: wp + # add "list" to the array to get results in the results list + display_type: ["infobox"] + categories: [general] + + - name: bilibili + engine: bilibili + shortcut: bil + disabled: true + + - name: bing + engine: bing + shortcut: bi + disabled: true + + - name: bing images + engine: bing_images + shortcut: bii + + - name: bing news + engine: bing_news + shortcut: bin + + - name: bing videos + engine: bing_videos + shortcut: biv + + - name: bitchute + engine: bitchute + shortcut: bit + disabled: true + + - name: bitbucket + engine: xpath + paging: true + search_url: https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath: //article[@class="repo-summary"]/p + categories: [it, repos] + timeout: 4.0 + disabled: true + shortcut: bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML + + - name: bpb + engine: bpb + shortcut: bpb + disabled: true + + - name: btdigg + engine: btdigg + shortcut: bt + disabled: true + + - name: openverse + engine: openverse + categories: images + shortcut: opv + + - name: media.ccc.de + engine: ccc_media + shortcut: c3tv + # We don't set language: de here because media.ccc.de is not just + # for a German audience. It contains many English videos and many + # German videos have English subtitles. + disabled: true + + - name: cachy os packages + engine: cachy_os + shortcut: cos + disabled: true + + - name: chefkoch + engine: chefkoch + shortcut: chef + # to show premium or plus results too: + # skip_premium: false + + # WARNING: links from chinaso.com voilate users privacy + # Before activate these engines its mandatory to read + # - https://github.com/searxng/searxng/issues/4694 + # - https://docs.searxng.org/dev/engines/online/chinaso.html + + - name: chinaso news + engine: chinaso + shortcut: chinaso + categories: [news] + chinaso_category: news + chinaso_news_source: all + disabled: true + inactive: true + + - name: chinaso images + engine: chinaso + network: chinaso news + shortcut: chinasoi + categories: [images] + chinaso_category: images + disabled: true + inactive: true + + - name: chinaso videos + engine: chinaso + network: chinaso news + shortcut: chinasov + categories: [videos] + chinaso_category: videos + disabled: true + inactive: true + + - name: cloudflareai + engine: cloudflareai + shortcut: cfai + # get api token and accont id from https://developers.cloudflare.com/workers-ai/get-started/rest-api/ + cf_account_id: 'your_cf_accout_id' + cf_ai_api: 'your_cf_api' + # create your ai gateway by https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/ + cf_ai_gateway: 'your_cf_ai_gateway_name' + # find the model name from https://developers.cloudflare.com/workers-ai/models/#text-generation + cf_ai_model: 'ai_model_name' + # custom your preferences + # cf_ai_model_display_name: 'Cloudflare AI' + # cf_ai_model_assistant: 'prompts_for_assistant_role' + # cf_ai_model_system: 'prompts_for_system_role' + timeout: 30 + inactive: true + + - name: core.ac.uk + engine: core + shortcut: cor + # read https://docs.searxng.org/dev/engines/online/core.html + api_key: "" + inactive: true + + - name: crossref + engine: crossref + shortcut: cr + timeout: 30 + disabled: true + + - name: crowdview + engine: json_engine + shortcut: cv + categories: general + paging: false + search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} + results_query: results + url_query: link + title_query: title + content_query: snippet + title_html_to_text: true + content_html_to_text: true + disabled: true + about: + website: https://crowdview.ai/ + + - name: yep + engine: yep + shortcut: yep + categories: general + search_type: web + timeout: 15 + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news + disabled: true + + - name: currency + engine: currency_convert + shortcut: cc + + - name: deezer + engine: deezer + shortcut: dz + disabled: true + + - name: destatis + engine: destatis + shortcut: destat + disabled: true + + - name: deviantart + engine: deviantart + shortcut: da + timeout: 3.0 + + - name: devicons + engine: devicons + shortcut: di + timeout: 3.0 + + - name: ddg definitions + engine: duckduckgo_definitions + shortcut: ddd + weight: 2 + disabled: true + tests: *tests_infobox + + # cloudflare protected + # - name: digbt + # engine: digbt + # shortcut: dbt + # timeout: 6.0 + # disabled: true + + - name: docker hub + engine: docker_hub + shortcut: dh + categories: [it, packages] + + - name: encyclosearch + engine: json_engine + shortcut: es + categories: general + paging: true + search_url: https://encyclosearch.org/encyclosphere/search?q={query}&page={pageno}&resultsPerPage=15 + results_query: Results + url_query: SourceURL + title_query: Title + content_query: Description + disabled: true + about: + website: https://encyclosearch.org + official_api_documentation: https://encyclosearch.org/docs/#/rest-api + use_official_api: true + require_api_key: false + results: JSON + + - name: erowid + engine: xpath + paging: true + first_page_num: 0 + page_size: 30 + search_url: https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] + categories: [] + shortcut: ew + disabled: true + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: elasticsearch + shortcut: els + engine: elasticsearch + # base_url: http://localhost:9200 + # username: elastic + # password: changeme + # index: my-index + # enable_http: true + # available options: match, simple_query_string, term, terms, custom + query_type: match + # if query_type is set to custom, provide your query here + # custom_query_json: {"query":{"match_all": {}}} + # show_metadata: false + inactive: true + + - name: wikidata + engine: wikidata + shortcut: wd + timeout: 3.0 + weight: 2 + # add "list" to the array to get results in the results list + display_type: ["infobox"] + tests: *tests_infobox + categories: [general] + + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + + - name: duckduckgo images + engine: duckduckgo_extra + categories: [images, web] + ddg_category: images + shortcut: ddi + disabled: true + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos, web] + ddg_category: videos + shortcut: ddv + disabled: true + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news, web] + ddg_category: news + shortcut: ddn + disabled: true + + - name: duckduckgo weather + engine: duckduckgo_weather + shortcut: ddw + disabled: true + + - name: apple maps + engine: apple_maps + shortcut: apm + disabled: true + timeout: 5.0 + + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + + - name: tineye + engine: tineye + shortcut: tin + timeout: 9.0 + disabled: true + + - name: etymonline + engine: xpath + paging: true + search_url: https://etymonline.com/search?page={pageno}&q={query} + url_xpath: //a[contains(@class, "word__name--")]/@href + title_xpath: //a[contains(@class, "word__name--")] + content_xpath: //section[contains(@class, "word__defination")] + first_page_num: 1 + shortcut: et + categories: [dictionaries] + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: ebay + engine: ebay + shortcut: eb + base_url: 'https://www.ebay.com' + inactive: true + timeout: 5 + + - name: 1x + engine: www1x + shortcut: 1x + timeout: 3.0 + disabled: true + + - name: fdroid + engine: fdroid + shortcut: fd + disabled: true + + - name: findthatmeme + engine: findthatmeme + shortcut: ftm + disabled: true + + - name: flickr + categories: images + shortcut: fl + engine: flickr_noapi + + - name: flickr_api + # You can use the engine using the official stable API, but you need an API + # key, see: https://www.flickr.com/services/apps/create/ + engine: flickr + categories: images + shortcut: fla + # api_key: 'apikey' # required! + inactive: true + + - name: free software directory + engine: mediawiki + shortcut: fsd + categories: [it, software wikis] + base_url: https://directory.fsf.org/ + search_type: title + timeout: 5.0 + disabled: true + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 + + - name: freesound + engine: freesound + shortcut: fnd + timeout: 15.0 + # API key required, see: https://freesound.org/docs/api/overview.html + # api_key: MyAPIkey + inactive: true + + - name: frinkiac + engine: frinkiac + shortcut: frk + disabled: true + + - name: fynd + engine: xpath + search_url: https://fynd.bot/?search={query}&offset={pageno} + results_xpath: //div[contains(@class, "result-item")] + url_xpath: .//a/@href + title_xpath: .//div[contains(@class, "title-line")] + content_xpath: .//div[contains(@class, "description")] + thumbnail_xpath: .//img[contains(@class, "preview-img")]/@src + paging: true + first_page_num: 0 + page_size: 10 + categories: general + disabled: true + shortcut: fynd + about: + website: https://fynd.bot + use_official_api: false + require_api_key: false + results: HTML + + - name: fyyd + engine: fyyd + shortcut: fy + timeout: 8.0 + disabled: true + + - name: geizhals + engine: geizhals + shortcut: geiz + disabled: true + + - name: genius + engine: genius + shortcut: gen + + - name: gentoo + engine: mediawiki + shortcut: ge + categories: ["it", "software wikis"] + base_url: "https://wiki.gentoo.org/" + api_path: "api.php" + search_type: text + timeout: 10 + + - name: gitlab + engine: gitlab + base_url: https://gitlab.com + shortcut: gl + disabled: true + about: + website: https://gitlab.com/ + wikidata_id: Q16639197 + + # - name: gnome + # engine: gitlab + # base_url: https://gitlab.gnome.org + # shortcut: gn + # about: + # website: https://gitlab.gnome.org + # wikidata_id: Q44316 + + - name: github + engine: github + shortcut: gh + + - name: github code + engine: github_code + shortcut: ghc + inactive: true + ghc_auth: + # type is one of: + # * none + # * personal_access_token + # * bearer + # When none is passed, the token is not requried. + type: "none" + token: "token" + # specify whether to highlight the matching lines to the query + ghc_highlight_matching_lines: true + ghc_strip_new_lines: true + ghc_strip_whitespace: false + timeout: 10.0 + + - name: codeberg + # https://docs.searxng.org/dev/engines/online/gitea.html + engine: gitea + base_url: https://codeberg.org + shortcut: cb + disabled: true + + - name: gitea.com + engine: gitea + base_url: https://gitea.com + shortcut: gitea + disabled: true + + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + + - name: google + engine: google + shortcut: go + # additional_tests: + # android: *test_android + + - name: google images + engine: google_images + shortcut: goi + # additional_tests: + # android: *test_android + # dali: + # matrix: + # query: ['Dali Christ'] + # lang: ['en', 'de', 'fr', 'zh-CN'] + # result_container: + # - ['one_title_contains', 'Salvador'] + + - name: google news + engine: google_news + shortcut: gon + # additional_tests: + # android: *test_android + + - name: google videos + engine: google_videos + shortcut: gov + # additional_tests: + # android: *test_android + + - name: google scholar + engine: google_scholar + shortcut: gos + + - name: google play apps + engine: google_play + categories: [files, apps] + shortcut: gpa + play_categ: apps + disabled: true + + - name: google play movies + engine: google_play + categories: videos + shortcut: gpm + play_categ: movies + disabled: true + + - name: grokipedia + engine: grokipedia + shortcut: gp + disabled: true + inactive: true + + - name: material icons + engine: material_icons + shortcut: mi + disabled: true + + - name: habrahabr + engine: xpath + paging: true + search_url: https://habr.com/en/search/page{pageno}/?q={query} + results_xpath: //article[contains(@class, "tm-articles-list__item")] + url_xpath: .//a[@class="tm-title__link"]/@href + title_xpath: .//a[@class="tm-title__link"] + content_xpath: .//div[contains(@class, "article-formatted-body")] + categories: it + timeout: 4.0 + disabled: true + shortcut: habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + + - name: hex + engine: hex + shortcut: hex + disabled: true + # Valid values: name inserted_at updated_at total_downloads recent_downloads + sort_criteria: "recent_downloads" + page_size: 10 + + - name: crates.io + engine: crates + shortcut: crates + disabled: true + timeout: 6.0 + + - name: hoogle + engine: xpath + search_url: https://hoogle.haskell.org/?hoogle={query} + results_xpath: '//div[@class="result"]' + title_xpath: './/div[@class="ans"]//a' + url_xpath: './/div[@class="ans"]//a/@href' + content_xpath: './/div[@class="from"]' + page_size: 20 + categories: [it, packages] + shortcut: ho + about: + website: https://hoogle.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON + + - name: il post + engine: il_post + shortcut: pst + disabled: true + + - name: huggingface + engine: huggingface + shortcut: hf + disabled: true + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + disabled: true + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs + disabled: true + + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + + - name: imgur + engine: imgur + shortcut: img + disabled: true + + - name: ina + engine: ina + shortcut: in + timeout: 6.0 + disabled: true + + # - name: invidious + # engine: invidious + # # if you want to use invidious with SearXNG you should setup one locally + # # https://github.com/searxng/searxng/issues/2722#issuecomment-2884993248 + # base_url: + # - https://invidious.example1.com + # - https://invidious.example2.com + # shortcut: iv + # timeout: 3.0 + + - name: ipernity + engine: ipernity + shortcut: ip + disabled: true + + - name: iqiyi + engine: iqiyi + shortcut: iq + disabled: true + + - name: jisho + engine: jisho + shortcut: js + timeout: 3.0 + disabled: true + + - name: kickass + engine: kickass + base_url: + - https://kickasstorrents.to + - https://kickasstorrents.cr + - https://kickasstorrent.cr + - https://kickass.sx + - https://kat.am + shortcut: kc + timeout: 4.0 + + - name: lemmy communities + engine: lemmy + lemmy_type: Communities + shortcut: leco + + - name: lemmy users + engine: lemmy + network: lemmy communities + lemmy_type: Users + shortcut: leus + + - name: lemmy posts + engine: lemmy + network: lemmy communities + lemmy_type: Posts + shortcut: lepo + + - name: lemmy comments + engine: lemmy + network: lemmy communities + lemmy_type: Comments + shortcut: lecom + + - name: library genesis + engine: xpath + # search_url: https://libgen.is/search.php?req={query} + search_url: https://libgen.rs/search.php?req={query} + url_xpath: //a[contains(@href,"book/index.php?md5")]/@href + title_xpath: //a[contains(@href,"book/")]/text()[1] + content_xpath: //td/a[1][contains(@href,"=author")]/text() + categories: files + timeout: 7.0 + disabled: true + shortcut: lg + about: + website: https://libgen.fun/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: z-library + engine: zlibrary + shortcut: zlib + timeout: 7.0 + disabled: true + # https://github.com/searxng/searxng/issues/3610 + inactive: true + + - name: library of congress + engine: loc + shortcut: loc + categories: images + disabled: true + + - name: libretranslate + engine: libretranslate + # https://github.com/LibreTranslate/LibreTranslate?tab=readme-ov-file#mirrors + base_url: + - https://libretranslate.com/translate + # api_key: '' + shortcut: lt + inactive: true + + - name: lingva + engine: lingva + shortcut: lv + # set lingva instance in url, by default it will use the official instance + # url: https://lingva.thedaviddelta.com + + - name: lobste.rs + engine: xpath + search_url: https://lobste.rs/search?q={query}&what=stories&order=relevance + results_xpath: //li[contains(@class, "story")] + url_xpath: .//a[@class="u-url"]/@href + title_xpath: .//a[@class="u-url"] + content_xpath: .//a[@class="domain"] + categories: it + shortcut: lo + timeout: 5.0 + disabled: true + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: lucide + engine: lucide + shortcut: luc + timeout: 3.0 + + - name: marginalia + engine: marginalia + shortcut: mar + # To get an API key, please follow the instructions at + # - https://about.marginalia-search.com/article/api/ + # api_key: '' + disabled: true + inactive: true + + - name: mastodon users + engine: mastodon + mastodon_type: accounts + base_url: https://mastodon.social + shortcut: mau + + - name: mastodon hashtags + engine: mastodon + mastodon_type: hashtags + base_url: https://mastodon.social + shortcut: mah + + # - name: matrixrooms + # engine: mrs + # # https://docs.searxng.org/dev/engines/online/mrs.html + # # base_url: https://mrs-api-host + # shortcut: mtrx + # disabled: true + + - name: mdn + shortcut: mdn + engine: json_engine + categories: [it] + paging: true + search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} + results_query: documents + url_query: mdn_url + url_prefix: https://developer.mozilla.org + title_query: title + content_query: summary + about: + website: https://developer.mozilla.org + wikidata_id: Q3273508 + official_api_documentation: null + use_official_api: false + require_api_key: false + results: JSON + + - name: metacpan + engine: metacpan + shortcut: cpan + disabled: true + number_of_results: 20 + + # https://docs.searxng.org/dev/engines/offline/search-indexer-engines.html#module-searx.engines.meilisearch + # - name: meilisearch + # engine: meilisearch + # shortcut: mes + # enable_http: true + # base_url: http://localhost:7700 + # index: my-index + # auth_key: Bearer XXXX + + - name: microsoft learn + engine: microsoft_learn + shortcut: msl + disabled: true + + - name: mixcloud + engine: mixcloud + shortcut: mc + + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # enable_http: true + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + + - name: mozhi + engine: mozhi + base_url: + - https://mozhi.aryak.me + - https://translate.bus-hit.me + - https://nyc1.mz.ggtyler.dev + # mozhi_engine: google - see https://mozhi.aryak.me for supported engines + timeout: 4.0 + shortcut: mz + disabled: true + + - name: mwmbl + engine: mwmbl + # api_url: https://api.mwmbl.org + shortcut: mwm + disabled: true + + - name: niconico + engine: niconico + shortcut: nico + disabled: true + + - name: npm + engine: npm + shortcut: npm + timeout: 5.0 + disabled: true + + - name: nyaa + engine: nyaa + shortcut: nt + disabled: true + + - name: mankier + engine: json_engine + search_url: https://www.mankier.com/api/v2/mans/?q={query} + results_query: results + url_query: url + title_query: name + content_query: description + categories: it + shortcut: man + about: + website: https://www.mankier.com/ + official_api_documentation: https://www.mankier.com/api + use_official_api: true + require_api_key: false + results: JSON + + - name: odysee + engine: odysee + shortcut: od + disabled: true + + - name: ollama + engine: ollama + shortcut: ollama + disabled: true + + - name: openairedatasets + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: "science" + shortcut: oad + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openairepublications + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: science + shortcut: oap + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openalex + engine: openalex + shortcut: oa + # https://docs.searxng.org/dev/engines/online/openalex.html + # Recommended by OpenAlex: join the polite pool with an email address + # mailto: "[email protected]" + timeout: 5.0 + disabled: true + + - name: openclipart + engine: openclipart + shortcut: ocl + inactive: true + disabled: true + timeout: 30 + + - name: openlibrary + engine: openlibrary + shortcut: ol + timeout: 10 + disabled: true + + - name: openmeteo + engine: open_meteo + shortcut: om + disabled: true + + # - name: opensemanticsearch + # engine: opensemantic + # shortcut: oss + # base_url: 'http://localhost:8983/solr/opensemanticsearch/' + + - name: openstreetmap + engine: openstreetmap + shortcut: osm + + - name: openrepos + engine: xpath + paging: true + search_url: https://openrepos.net/search/node/{query}?page={pageno} + url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath: //li[@class="search-result"]//h3[@class="title"]/a + content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories: files + timeout: 4.0 + disabled: true + shortcut: or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: packagist + engine: json_engine + paging: true + search_url: https://packagist.org/search.json?q={query}&page={pageno} + results_query: results + url_query: url + title_query: name + content_query: description + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: pack + about: + website: https://packagist.org + wikidata_id: Q108311377 + official_api_documentation: https://packagist.org/apidoc + use_official_api: true + require_api_key: false + results: JSON + + - name: pdbe + engine: pdbe + shortcut: pdb + # Hide obsolete PDB entries. Default is not to hide obsolete structures + # hide_obsolete: false + + - name: photon + engine: photon + shortcut: ph + + - name: pinterest + engine: pinterest + shortcut: pin + + - name: piped + engine: piped + shortcut: ppd + categories: videos + piped_filter: videos + timeout: 3.0 + inactive: true + + # URL to use as link and for embeds + frontend_url: https://srv.piped.video + # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ + backend_url: + - https://pipedapi.ducks.party + - https://api.piped.private.coffee + + - name: piped.music + engine: piped + network: piped + shortcut: ppdm + categories: music + piped_filter: music_songs + timeout: 3.0 + inactive: true + + - name: piratebay + engine: piratebay + shortcut: tpb + # You may need to change this URL to a proxy if piratebay is blocked in your + # country + url: https://thepiratebay.org/ + timeout: 3.0 + + - name: pixabay images + engine: pixabay + pixabay_type: images + categories: images + shortcut: pixi + disabled: true + + - name: pixabay videos + engine: pixabay + pixabay_type: videos + categories: videos + shortcut: pixv + disabled: true + + - name: pixiv + shortcut: pv + engine: pixiv + disabled: true + inactive: true + remove_ai_images: false + pixiv_image_proxies: + - https://pximg.example.org + # A proxy is required to load the images. Hosting an image proxy server + # for Pixiv: + # --> https://pixivfe.pages.dev/hosting-image-proxy-server/ + # Proxies from public instances. Ask the public instances owners if they + # agree to receive traffic from SearXNG! + # --> https://codeberg.org/VnPower/PixivFE#instances + # --> https://github.com/searxng/searxng/pull/3192#issuecomment-1941095047 + # image proxy of https://pixiv.cat + # - https://i.pixiv.cat + # image proxy of https://www.pixiv.pics + # - https://pximg.cocomi.eu.org + # image proxy of https://pixivfe.exozy.me + # - https://pximg.exozy.me + # image proxy of https://pixivfe.ducks.party + # - https://pixiv.ducks.party + # image proxy of https://pixiv.perennialte.ch + # - https://pximg.perennialte.ch + + - name: podcastindex + engine: podcastindex + shortcut: podcast + + # Required dependency: psychopg2 + # - name: postgresql + # engine: postgresql + # database: postgres + # username: postgres + # password: postgres + # limit: 10 + # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + # shortcut : psql + + - name: presearch + engine: presearch + search_type: search + categories: [general, web] + shortcut: ps + timeout: 4.0 + disabled: true + + - name: presearch images + engine: presearch + network: presearch + search_type: images + categories: [images, web] + timeout: 4.0 + shortcut: psimg + disabled: true + + - name: presearch videos + engine: presearch + network: presearch + search_type: videos + categories: [general, web] + timeout: 4.0 + shortcut: psvid + disabled: true + + - name: presearch news + engine: presearch + network: presearch + search_type: news + categories: [news, web] + timeout: 4.0 + shortcut: psnews + disabled: true + + - name: pub.dev + engine: xpath + shortcut: pd + search_url: https://pub.dev/packages?q={query}&page={pageno} + paging: true + results_xpath: //div[contains(@class,"packages-item")] + url_xpath: ./div/h3/a/@href + title_xpath: ./div/h3/a + content_xpath: ./div/div/div[contains(@class,"packages-description")]/span + categories: [packages, it] + timeout: 3.0 + disabled: true + first_page_num: 1 + about: + website: https://pub.dev/ + official_api_documentation: https://pub.dev/help/api + use_official_api: false + require_api_key: false + results: HTML + + - name: public domain image archive + engine: public_domain_image_archive + shortcut: pdia + disabled: true + + - name: pubmed + engine: pubmed + shortcut: pub + + - name: pypi + shortcut: pypi + engine: pypi + + - name: quark + quark_category: general + categories: [general] + engine: quark + shortcut: qk + disabled: true + + - name: quark images + quark_category: images + categories: [images] + engine: quark + shortcut: qki + disabled: true + + - name: qwant + qwant_categ: web + engine: qwant + shortcut: qw + categories: [general, web] + disabled: true + additional_tests: + rosebud: *test_rosebud + + - name: qwant news + qwant_categ: news + engine: qwant + shortcut: qwn + categories: news + network: qwant + + - name: qwant images + qwant_categ: images + engine: qwant + shortcut: qwi + categories: [images, web] + network: qwant + + - name: qwant videos + qwant_categ: videos + engine: qwant + shortcut: qwv + categories: [videos, web] + network: qwant + + # - name: library + # engine: recoll + # shortcut: lib + # base_url: 'https://recoll.example.org/' + # search_dir: '' + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # timeout: 30.0 + # categories: files + # disabled: true + + # - name: recoll library reference + # engine: recoll + # base_url: 'https://recoll.example.org/' + # search_dir: reference + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # shortcut: libr + # timeout: 30.0 + # categories: files + # disabled: true + + - name: radio browser + engine: radio_browser + shortcut: rb + + - name: reddit + engine: reddit + shortcut: re + page_size: 25 + disabled: true + + - name: reuters + engine: reuters + shortcut: reu + # https://docs.searxng.org/dev/engines/online/reuters.html + # sort_order = "relevance" + + - name: right dao + engine: xpath + paging: true + page_size: 12 + search_url: https://rightdao.com/search?q={query}&start={pageno} + results_xpath: //div[contains(@class, "description")] + url_xpath: ../div[contains(@class, "title")]/a/@href + title_xpath: ../div[contains(@class, "title")] + content_xpath: . + categories: general + shortcut: rd + disabled: true + about: + website: https://rightdao.com/ + use_official_api: false + require_api_key: false + results: HTML + + - name: rottentomatoes + engine: rottentomatoes + shortcut: rt + disabled: true + + # Required dependency: valkey + # - name: myvalkey + # shortcut : rds + # engine: valkey_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 + + # tmp suspended: bad certificate + # - name: scanr structures + # shortcut: scs + # engine: scanr_structures + # disabled: true + + - name: searchmysite + engine: xpath + shortcut: sms + categories: general + paging: true + search_url: https://searchmysite.net/search/?q={query}&page={pageno} + results_xpath: //div[contains(@class,'search-result')] + url_xpath: .//a[contains(@class,'result-link')]/@href + title_xpath: .//span[contains(@class,'result-title-txt')]/text() + content_xpath: ./p[@id='result-hightlight'] + disabled: true + about: + website: https://searchmysite.net + + - name: selfhst icons + engine: selfhst + shortcut: si + disabled: true + + - name: sepiasearch + engine: sepiasearch + shortcut: sep + + - name: sogou + engine: sogou + shortcut: sogou + disabled: true + + - name: sogou images + engine: sogou_images + shortcut: sogoui + disabled: true + + - name: sogou videos + engine: sogou_videos + shortcut: sogouv + disabled: true + + - name: sogou wechat + engine: sogou_wechat + shortcut: sogouw + disabled: true + + - name: soundcloud + engine: soundcloud + shortcut: sc + + - name: stackoverflow + engine: stackexchange + shortcut: st + api_site: 'stackoverflow' + categories: [it, q&a] + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: [it, q&a] + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: [it, q&a] + + - name: discuss.python + engine: discourse + shortcut: dpy + base_url: 'https://discuss.python.org' + categories: [it, q&a] + disabled: true + + - name: caddy.community + engine: discourse + shortcut: caddy + base_url: 'https://caddy.community' + categories: [it, q&a] + disabled: true + + - name: pi-hole.community + engine: discourse + shortcut: pi + categories: [it, q&a] + base_url: 'https://discourse.pi-hole.net' + disabled: true + + - name: searchcode code + engine: searchcode_code + shortcut: scc + disabled: true + inactive: true + + # - name: searx + # engine: searx_engine + # shortcut: se + # instance_urls : + # - http://127.0.0.1:8888/ + # - ... + # disabled: true + + - name: semantic scholar + engine: semantic_scholar + shortcut: se + + # Spotify needs API credentials + # - name: spotify + # engine: spotify + # shortcut: stf + # api_client_id: ******* + # api_client_secret: ******* + + # - name: solr + # engine: solr + # shortcut: slr + # base_url: http://localhost:8983 + # collection: collection_name + # sort: '' # sorting: asc or desc + # field_list: '' # comma separated list of field names to display on the UI + # default_fields: '' # default field to query + # query_fields: '' # query fields + # enable_http: true + + - name: springer nature + engine: springer + shortcut: springer + timeout: 5 + # read https://docs.searxng.org/dev/engines/online/springer.html + api_key: "" + inactive: true + + - name: startpage + engine: startpage + shortcut: sp + startpage_categ: web + categories: [general, web] + additional_tests: + rosebud: *test_rosebud + + - name: startpage news + engine: startpage + startpage_categ: news + categories: [news, web] + shortcut: spn + + - name: startpage images + engine: startpage + startpage_categ: images + categories: [images, web] + shortcut: spi + + - name: steam + engine: steam + shortcut: stm + disabled: true + + - name: tokyotoshokan + engine: tokyotoshokan + shortcut: tt + timeout: 6.0 + disabled: true + + - name: solidtorrents + engine: solidtorrents + shortcut: solid + timeout: 4.0 + base_url: + - https://solidtorrents.to + - https://bitsearch.to + + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!mediathekview concert" + # + # - name: mediathekview + # engine: sqlite + # shortcut: mediathekview + # categories: [general, videos] + # result_type: MainResult + # database: searx/data/filmliste-v2.db + # query_str: >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + + - name: tagesschau + engine: tagesschau + # when set to false, display URLs from Tagesschau, and not the actual source + # (e.g. NDR, WDR, SWR, HR, ...) + use_source_url: true + shortcut: ts + disabled: true + + - name: tmdb + engine: xpath + paging: true + categories: movies + search_url: https://www.themoviedb.org/search?page={pageno}&query={query} + results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] + url_xpath: .//div[contains(@class,"poster")]/a/@href + thumbnail_xpath: .//img/@src + title_xpath: .//div[contains(@class,"title")]//h2 + content_xpath: .//div[contains(@class,"overview")] + shortcut: tm + disabled: true + + # Requires Tor + - name: torch + engine: xpath + paging: true + search_url: + http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and + results_xpath: //table//tr + url_xpath: ./td[2]/a + title_xpath: ./td[2]/b + content_xpath: ./td[2]/small + categories: onions + enable_http: true + shortcut: tch + + # TubeArchivist is a self-hosted Youtube archivist software. + # https://docs.searxng.org/dev/engines/online/tubearchivist.html + # + # - name: tubearchivist + # engine: tubearchivist + # shortcut: tuba + # base_url: + # ta_token: + # ta_link_to_mp4: false + + # torznab engine lets you query any torznab compatible indexer. Using this + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/dev/engines/online/torznab.html + - name: Torznab EZTV + engine: torznab + shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab + # enable_http: true # if using localhost + # api_key: xxxxxxxxxxxxxxx + show_magnet_links: true + show_torrent_files: false + # https://github.com/Jackett/Jackett/wiki/Jackett-Categories + torznab_categories: # optional + - 2000 + - 5000 + inactive: true + + # tmp suspended - too slow, too many errors + # - name: urbandictionary + # engine : xpath + # search_url : https://www.urbandictionary.com/define.php?term={query} + # url_xpath : //*[@class="word"]/@href + # title_xpath : //*[@class="def-header"] + # content_xpath: //*[@class="meaning"] + # shortcut: ud + + - name: unsplash + engine: unsplash + shortcut: us + + - name: yandex + engine: yandex + categories: general + search_type: web + shortcut: yd + disabled: true + + - name: yandex images + engine: yandex + network: yandex + categories: images + search_type: images + shortcut: ydi + disabled: true + + - name: yandex music + engine: yandex_music + network: yandex + shortcut: ydm + disabled: true + # https://yandex.com/support/music/access.html + + - name: yahoo + engine: yahoo + shortcut: yh + disabled: true + + - name: yahoo news + engine: yahoo_news + shortcut: yhn + + - name: youtube + shortcut: yt + engine: youtube_noapi + + - name: youtube_api + # You can use the engine using the official stable API, but you need an API + # key See: https://console.developers.google.com/project + engine: youtube_api + # api_key: '' # required! + shortcut: yta + inactive: true + + - name: dailymotion + engine: dailymotion + shortcut: dm + + - name: vimeo + engine: vimeo + shortcut: vm + + - name: wiby + engine: json_engine + paging: true + search_url: https://wiby.me/json/?q={query}&p={pageno} + url_query: URL + title_query: Title + content_query: Snippet + categories: [general, web] + shortcut: wib + disabled: true + about: + website: https://wiby.me/ + + - name: wikibooks + engine: mediawiki + weight: 0.5 + shortcut: wb + categories: [general, wikimedia] + base_url: "https://{language}.wikibooks.org/" + search_type: text + disabled: true + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 + + - name: wikinews + engine: mediawiki + shortcut: wn + categories: [news, wikimedia] + base_url: "https://{language}.wikinews.org/" + search_type: text + srsort: create_timestamp_desc + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 + + - name: wikiquote + engine: mediawiki + weight: 0.5 + shortcut: wq + categories: [general, wikimedia] + base_url: "https://{language}.wikiquote.org/" + search_type: text + disabled: true + additional_tests: + rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 + + - name: wikisource + engine: mediawiki + weight: 0.5 + shortcut: ws + categories: [general, wikimedia] + base_url: "https://{language}.wikisource.org/" + search_type: text + disabled: true + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 + + - name: wikispecies + engine: mediawiki + shortcut: wsp + categories: [general, science, wikimedia] + base_url: "https://species.wikimedia.org/" + search_type: text + disabled: true + about: + website: https://species.wikimedia.org/ + wikidata_id: Q13679 + tests: + wikispecies: + matrix: + query: "Campbell, L.I. et al. 2011: MicroRNAs" + lang: en + result_container: + - not_empty + - ['one_title_contains', 'Tardigrada'] + test: + - unique_results + + - name: wiktionary + engine: mediawiki + shortcut: wt + categories: [dictionaries, wikimedia] + base_url: "https://{language}.wiktionary.org/" + search_type: text + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 + + - name: wikiversity + engine: mediawiki + weight: 0.5 + shortcut: wv + categories: [general, wikimedia] + base_url: "https://{language}.wikiversity.org/" + search_type: text + disabled: true + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 + + - name: wikivoyage + engine: mediawiki + weight: 0.5 + shortcut: wy + categories: [general, wikimedia] + base_url: "https://{language}.wikivoyage.org/" + search_type: text + disabled: true + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 + + - name: wikicommons.images + engine: wikicommons + shortcut: wci + categories: images + wc_search_type: image + + - name: wikicommons.videos + engine: wikicommons + shortcut: wcv + categories: videos + wc_search_type: video + + - name: wikicommons.audio + engine: wikicommons + shortcut: wca + categories: music + wc_search_type: audio + + - name: wikicommons.files + engine: wikicommons + shortcut: wcf + categories: files + wc_search_type: file + + - name: wolframalpha + shortcut: wa + engine: wolframalpha_noapi + timeout: 6.0 + categories: general + disabled: true + + - name: wolframalpha_api + # You can use the engine using the official stable API, but you need an API + # key. See: https://products.wolframalpha.com/api/ + engine: wolframalpha_api + # api_key: '' # required! + shortcut: waa + timeout: 6.0 + categories: general + inactive: true + + - name: dictzone + engine: dictzone + shortcut: dc + + - name: mymemory translated + engine: translated + shortcut: tl + timeout: 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See: https://mymemory.translated.net/doc/usagelimits.php + # api_key: '' + + # Required dependency: mysql-connector-python + # - name: mysql + # engine: mysql_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mysql + + # Required dependency: mariadb + # - name: mariadb + # engine: mariadb_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mdb + + - name: 1337x + engine: 1337x + shortcut: 1337x + disabled: true + + - name: duden + engine: duden + shortcut: du + disabled: true + + - name: seznam + shortcut: szn + engine: seznam + disabled: true + + - name: deepl + engine: deepl + shortcut: dpl + # You can use the engine using the official stable API, but you need an API key + # See: https://www.deepl.com/pro-api?cta=header-pro-api + # api_key: '' # required! + timeout: 5.0 + inactive: true + + - name: mojeek + shortcut: mjk + engine: mojeek + categories: [general, web] + disabled: true + + - name: mojeek images + shortcut: mjkimg + engine: mojeek + categories: [images, web] + search_type: images + paging: false + disabled: true + + - name: mojeek news + shortcut: mjknews + engine: mojeek + categories: [news, web] + search_type: news + paging: false + disabled: true + + - name: moviepilot + engine: moviepilot + shortcut: mp + disabled: true + + - name: naver + categories: [general, web] + engine: naver + shortcut: nvr + disabled: true + + - name: naver images + naver_category: images + categories: [images] + engine: naver + shortcut: nvri + disabled: true + + - name: naver news + naver_category: news + categories: [news] + engine: naver + shortcut: nvrn + disabled: true + + - name: naver videos + naver_category: videos + categories: [videos] + engine: naver + shortcut: nvrv + disabled: true + + - name: rubygems + shortcut: rbg + engine: xpath + paging: true + search_url: https://rubygems.org/search?page={pageno}&query={query} + results_xpath: /html/body/main/div/a[@class="gems__gem"] + url_xpath: ./@href + title_xpath: ./span/h2 + content_xpath: ./span/p + suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a + first_page_num: 1 + categories: [it, packages] + disabled: true + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: peertube + engine: peertube + shortcut: ptb + paging: true + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com + categories: videos + disabled: true + timeout: 6.0 + + - name: mediathekviewweb + engine: mediathekviewweb + shortcut: mvw + disabled: true + + - name: yacy + # https://docs.searxng.org/dev/engines/online/yacy.html + engine: yacy + categories: general + search_type: text + # see https://github.com/searxng/searxng/pull/3631#issuecomment-2240903027 + base_url: + - https://yacy.searchlab.eu + shortcut: ya + disabled: true + # if you aren't using HTTPS for your local yacy instance disable https + # enable_http: false + search_mode: 'global' + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: yacy images + engine: yacy + network: yacy + categories: images + search_type: image + shortcut: yai + disabled: true + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: rumble + engine: rumble + shortcut: ru + base_url: https://rumble.com/ + paging: true + categories: videos + disabled: true + + - name: repology + engine: repology + shortcut: rep + disabled: true + inactive: true + + - name: livespace + engine: livespace + shortcut: ls + categories: videos + disabled: true + timeout: 5.0 + + - name: wordnik + engine: wordnik + shortcut: wnik + timeout: 5.0 + + - name: woxikon.de synonyme + engine: xpath + shortcut: woxi + categories: [dictionaries] + timeout: 5.0 + disabled: true + search_url: https://synonyme.woxikon.de/synonyme/{query}.php + url_xpath: //div[@class="upper-synonyms"]/a/@href + content_xpath: //div[@class="synonyms-list-group"] + title_xpath: //div[@class="upper-synonyms"]/a + no_result_for_http_status: [404] + about: + website: https://www.woxikon.de/ + wikidata_id: # No Wikidata ID + use_official_api: false + require_api_key: false + results: HTML + language: de + + - name: seekr news + engine: seekr + shortcut: senews + categories: news + seekr_category: news + disabled: true + + - name: seekr images + engine: seekr + network: seekr news + shortcut: seimg + categories: images + seekr_category: images + disabled: true + + - name: seekr videos + engine: seekr + network: seekr news + shortcut: sevid + categories: videos + seekr_category: videos + disabled: true + + - name: stract + engine: stract + shortcut: str + disabled: true + + - name: svgrepo + engine: svgrepo + shortcut: svg + timeout: 10.0 + disabled: true + + - name: tootfinder + engine: tootfinder + shortcut: toot + + - name: uxwing + engine: uxwing + shortcut: ux + disabled: true + + - name: voidlinux + engine: voidlinux + shortcut: void + disabled: true + + - name: wallhaven + engine: wallhaven + # api_key: abcdefghijklmnopqrstuvwxyz + shortcut: wh + inactive: true + + # wikimini: online encyclopedia for children + # The fulltext and title parameter is necessary for Wikimini because + # sometimes it will not show the results and redirect instead + - name: wikimini + engine: xpath + shortcut: wkmn + search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search + url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href + title_xpath: //li//div[@class="mw-search-result-heading"]/a + content_xpath: //li/div[@class="searchresult"] + categories: general + disabled: true + about: + website: https://wikimini.org/ + wikidata_id: Q3568032 + use_official_api: false + require_api_key: false + results: HTML + language: fr + + - name: wttr.in + engine: wttr + shortcut: wttr + timeout: 9.0 + + - name: brave + engine: brave + shortcut: br + time_range_support: true + paging: true + categories: [general, web] + brave_category: search + # brave_spellcheck: true + + - name: brave.images + engine: brave + network: brave + shortcut: brimg + categories: [images, web] + brave_category: images + + - name: brave.videos + engine: brave + network: brave + shortcut: brvid + categories: [videos, web] + brave_category: videos + + - name: brave.news + engine: brave + network: brave + shortcut: brnews + categories: news + brave_category: news + + # - name: brave.goggles + # engine: brave + # network: brave + # shortcut: brgog + # time_range_support: true + # paging: true + # categories: [general, web] + # brave_category: goggles + # Goggles: # required! This should be a URL ending in .goggle + + - name: lib.rs + shortcut: lrs + engine: lib_rs + disabled: true + + - name: sourcehut + shortcut: srht + engine: sourcehut + # https://docs.searxng.org/dev/engines/online/sourcehut.html + # sourcehut_sort_order: longest-active + disabled: true + + - name: bt4g + engine: bt4g + shortcut: bt4g + + - name: pkg.go.dev + engine: pkg_go_dev + shortcut: pgo + disabled: true + + - name: senscritique + engine: senscritique + shortcut: scr + timeout: 4.0 + disabled: true + + - name: minecraft wiki + engine: mediawiki + shortcut: mcw + categories: ["software wikis"] + base_url: https://minecraft.wiki/ + api_path: "api.php" + search_type: text + disabled: true + about: + website: https://minecraft.wiki/ + wikidata_id: Q105533483 + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name: ubuntuwiki +# engine: doku +# shortcut: uw +# base_url: 'https://doc.ubuntu-fr.org' + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: git grep +# engine: command +# command: ['git', 'grep', '{{QUERY}}'] +# shortcut: gg +# tokens: [] +# disabled: true +# delimiter: +# chars: ':' +# keys: ['filepath', 'code'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: locate +# engine: command +# command: ['locate', '{{QUERY}}'] +# shortcut: loc +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: find +# engine: command +# command: ['find', '.', '-name', '{{QUERY}}'] +# query_type: path +# shortcut: fnd +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: pattern search in files +# engine: command +# command: ['fgrep', '{{QUERY}}'] +# shortcut: fgr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: regex search in files +# engine: command +# command: ['grep', '{{QUERY}}'] +# shortcut: gr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +doi_resolvers: + oadoi.org: 'https://oadoi.org/' + doi.org: 'https://doi.org/' + sci-hub.se: 'https://sci-hub.se/' + sci-hub.st: 'https://sci-hub.st/' + sci-hub.ru: 'https://sci-hub.ru/' + +default_doi_resolver: 'oadoi.org' diff --git a/mcp_host/browser/searxng/searxng_86517947/settings.yml.new b/mcp_host/browser/searxng/searxng_86517947/settings.yml.new new file mode 100644 index 0000000..6eb5895 --- /dev/null +++ b/mcp_host/browser/searxng/searxng_86517947/settings.yml.new @@ -0,0 +1,2706 @@ +general: + # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} + debug: false + # displayed name + instance_name: "SearXNG" + # For example: https://example.com/privacy + privacypolicy_url: false + # use true to use your own donation page written in searx/info/en/donate.md + # use false to disable the donation link + donation_url: false + # mailto:contact@example.com + contact_url: false + # record stats + enable_metrics: true + # expose stats in open metrics format at /metrics + # leave empty to disable (no password set) + # open_metrics: + open_metrics: '' + +brand: + new_issue_url: https://github.com/searxng/searxng/issues/new + docs_url: https://docs.searxng.org/ + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki + issue_url: https://github.com/searxng/searxng/issues + # custom: + # # Custom entries in the footer: [title]: [link] + # links: + # Uptime: https://uptime.searxng.org/history/darmarit-org + # About: "https://searxng.org" + +search: + # Filter results. 0: None, 1: Moderate, 2: Strict + safe_search: 0 + # Existing autocomplete backends: "360search", "baidu", "bing", "brave", "dbpedia", "duckduckgo", "google", + # "yandex", "mwmbl", "naver", "seznam", "sogou", "startpage", "swisscows", "quark", "qwant", "wikipedia" - + # leave blank to turn it off by default. + autocomplete: "" + # minimun characters to type before autocompleter starts + autocomplete_min: 4 + # backend for the favicon near URL in search results. + # Available resolvers: "allesedv", "duckduckgo", "google", "yandex" - leave blank to turn it off by default. + favicon_resolver: "" + # Default search language - leave blank to detect from browser information or + # use codes from 'languages.py' + default_lang: "auto" + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages + # Available languages + # languages: + # - all + # - en + # - en-US + # - de + # - it-IT + # - fr + # - fr-BE + # ban time in seconds after engine errors + ban_time_on_fail: 5 + # max ban time in seconds after engine errors + max_ban_time_on_fail: 120 + suspended_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 180 + # For error "CAPTCHA" + SearxEngineCaptcha: 3600 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 180 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + + # remove format to deny access, use lower case. + # formats: [html, csv, json, rss] + formats: + - html + +server: + # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} + port: 8888 + bind_address: "127.0.0.1" + # public URL of the instance, to ensure correct inbound links. Is overwritten + # by ${SEARXNG_BASE_URL}. + base_url: false # "http://example.com/location" + # rate limit the number of request on the instance, block some bots. + # Is overwritten by ${SEARXNG_LIMITER} + limiter: false + # enable features designed only for public instances. + # Is overwritten by ${SEARXNG_PUBLIC_INSTANCE} + public_instance: false + + # If your instance owns a /etc/searxng/settings.yml file, then set the following + # values there. + + secret_key: "ultrasecretkey" # Is overwritten by ${SEARXNG_SECRET} + # Proxy image results through SearXNG. Is overwritten by ${SEARXNG_IMAGE_PROXY} + image_proxy: false + # 1.0 and 1.1 are supported + http_protocol_version: "1.0" + # POST queries are "more secure!" but are also the source of hard-to-locate + # annoyances, which is why GET may be better for end users and their browsers. + # see https://github.com/searxng/searxng/pull/3619 + # Is overwritten by ${SEARXNG_METHOD} + method: "POST" + default_http_headers: + X-Content-Type-Options: nosniff + X-Download-Options: noopen + X-Robots-Tag: noindex, nofollow + Referrer-Policy: no-referrer + +valkey: + # URL to connect valkey database. Is overwritten by ${SEARXNG_VALKEY_URL}. + # https://docs.searxng.org/admin/settings/settings_valkey.html#settings-valkey + # url: valkey://localhost:6379/0 + url: false + +ui: + # Custom static path - leave it blank if you didn't change + static_path: "" + # Custom templates path - leave it blank if you didn't change + templates_path: "" + # query_in_title: When true, the result page's titles contains the query + # it decreases the privacy, since the browser can records the page titles. + query_in_title: false + # ui theme + default_theme: simple + # center the results ? + center_alignment: false + # URL prefix of the internet archive, don't forget trailing slash (if needed). + # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" + # Default interface locale - leave blank to detect from browser information or + # use codes from the 'locales' config section + default_locale: "" + # Open result links in a new tab by default + # results_on_new_tab: false + theme_args: + # style of simple theme: auto, light, dark, black + simple_style: auto + # Perform search immediately if a category selected. + # Disable to select multiple categories at once and start the search manually. + search_on_category_select: true + # Hotkeys: default or vim + hotkeys: default + # URL formatting: pretty, full or host + url_formatting: pretty + +# Lock arbitrary settings on the preferences page. +# +# preferences: +# lock: +# - categories +# - language +# - autocomplete +# - favicon +# - safesearch +# - method +# - doi_resolver +# - locale +# - theme +# - results_on_new_tab +# - search_on_category_select +# - method +# - image_proxy +# - query_in_title + +# communication with search engines +# +outgoing: + # default timeout in seconds, can be override by engine + request_timeout: 3.0 + # the maximum timeout in seconds + # max_request_timeout: 10.0 + # suffix of searxng_useragent, could contain information like an email address + # to the administrator + useragent_suffix: "" + # The maximum number of concurrent connections that may be established. + pool_connections: 100 + # Allow the connection pool to maintain keep-alive connections below this + # point. + pool_maxsize: 20 + # See https://www.python-httpx.org/http2/ + enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10 + # + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 + + +# Plugin configuration, for more details see +# https://docs.searxng.org/admin/settings/settings_plugins.html +# +plugins: + + searx.plugins.calculator.SXNGPlugin: + active: true + + searx.plugins.infinite_scroll.SXNGPlugin: + active: false + + searx.plugins.hash_plugin.SXNGPlugin: + active: true + + searx.plugins.self_info.SXNGPlugin: + active: true + + searx.plugins.unit_converter.SXNGPlugin: + active: true + + searx.plugins.ahmia_filter.SXNGPlugin: + active: true + + searx.plugins.hostnames.SXNGPlugin: + active: true + + searx.plugins.time_zone.SXNGPlugin: + active: true + + searx.plugins.oa_doi_rewrite.SXNGPlugin: + active: false + + searx.plugins.tor_check.SXNGPlugin: + active: false + + searx.plugins.tracker_url_remover.SXNGPlugin: + active: true + + +# Configuration of the "Hostnames plugin": +# +# hostnames: +# replace: +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# remove: +# - '(.*\.)?facebook.com$' +# low_priority: +# - '(.*\.)?google(\..*)?$' +# high_priority: +# - '(.*\.)?wikipedia.org$' +# +# Alternatively you can use external files for configuring the "Hostnames plugin": +# +# hostnames: +# replace: 'rewrite-hosts.yml' +# +# Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'): +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# + + +categories_as_tabs: + general: + images: + videos: + news: + map: + music: + it: + science: + files: + social media: + +engines: + - name: 360search + engine: 360search + shortcut: 360so + timeout: 10.0 + disabled: true + + - name: 360search videos + engine: 360search_videos + shortcut: 360sov + disabled: true + + - name: 9gag + engine: 9gag + shortcut: 9g + disabled: true + + - name: acfun + engine: acfun + shortcut: acf + disabled: true + + - name: adobe stock + engine: adobe_stock + shortcut: asi + categories: ["images"] + # https://docs.searxng.org/dev/engines/online/adobe_stock.html + adobe_order: relevance + adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"] + timeout: 6 + disabled: true + + - name: adobe stock video + engine: adobe_stock + shortcut: asv + network: adobe stock + categories: ["videos"] + adobe_order: relevance + adobe_content_types: ["video"] + timeout: 6 + disabled: true + + - name: adobe stock audio + engine: adobe_stock + shortcut: asa + network: adobe stock + categories: ["music"] + adobe_order: relevance + adobe_content_types: ["audio"] + timeout: 6 + disabled: true + + - name: astrophysics data system + engine: astrophysics_data_system + shortcut: ads + # read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html + api_key: "" + inactive: true + + - name: alpine linux packages + engine: alpinelinux + disabled: true + shortcut: alp + + - name: annas archive + engine: annas_archive + base_url: + - https://annas-archive.gl + - https://annas-archive.vg + - https://annas-archive.pk + - https://annas-archive.gd + disabled: true + shortcut: aa + timeout: 5 + + - name: ansa + engine: ansa + shortcut: ans + disabled: true + + # - name: annas articles + # engine: annas_archive + # shortcut: aaa + # # https://docs.searxng.org/dev/engines/online/annas_archive.html + # aa_content: 'magazine' # book_fiction, book_unknown, book_nonfiction, book_comic + # aa_ext: 'pdf' # pdf, epub, .. + # aa_sort: oldest' # newest, oldest, largest, smallest + + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: true + + - name: apple app store + engine: apple_app_store + shortcut: aps + disabled: true + + # Requires Tor + - name: ahmia + engine: ahmia + # Might do up to two requests to perform a search. + # Since Tor is already slow by nature, the timeout is set very high. + timeout: 20.0 + categories: onions + enable_http: true + shortcut: ah + + - name: anaconda + engine: xpath + paging: true + first_page_num: 0 + search_url: https://anaconda.org/search?q={query}&page={pageno} + results_xpath: //tbody/tr + url_xpath: ./td/h5/a[last()]/@href + title_xpath: ./td/h5 + content_xpath: ./td[h5]/text() + categories: it + timeout: 6.0 + shortcut: conda + disabled: true + + - name: arch linux wiki + engine: archlinux + shortcut: al + + - name: nixos wiki + engine: mediawiki + shortcut: nixw + base_url: https://wiki.nixos.org/ + search_type: text + disabled: true + categories: [it, software wikis] + + - name: artic + engine: artic + shortcut: arc + timeout: 4.0 + + - name: artstation + engine: artstation + shortcut: as + categories: images + disabled: true + + - name: arxiv + engine: arxiv + shortcut: arx + + - name: ask + engine: ask + shortcut: ask + disabled: true + + - name: azure + engine: azure + shortcut: az + categories: [it, cloud] + # azure_tenant_id: "your_tenant_id" + # azure_client_id: "your_client_id" + # azure_client_secret: "your_client_secret" + inactive: true + + # tmp suspended: dh key too small + # - name: base + # engine: base + # shortcut: bs + + - name: bandcamp + engine: bandcamp + shortcut: bc + categories: music + + - name: baidu + baidu_category: general + categories: [general] + engine: baidu + shortcut: bd + disabled: true + + - name: baidu images + baidu_category: images + categories: [images] + engine: baidu + shortcut: bdi + disabled: true + + - name: baidu kaifa + baidu_category: it + categories: [it] + engine: baidu + shortcut: bdk + disabled: true + + - name: wikipedia + engine: wikipedia + shortcut: wp + # add "list" to the array to get results in the results list + display_type: ["infobox"] + categories: [general] + + - name: bilibili + engine: bilibili + shortcut: bil + disabled: true + + - name: bing + engine: bing + shortcut: bi + disabled: true + + - name: bing images + engine: bing_images + shortcut: bii + + - name: bing news + engine: bing_news + shortcut: bin + + - name: bing videos + engine: bing_videos + shortcut: biv + + - name: bitchute + engine: bitchute + shortcut: bit + disabled: true + + - name: bitbucket + engine: xpath + paging: true + search_url: https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath: //article[@class="repo-summary"]/p + categories: [it, repos] + timeout: 4.0 + disabled: true + shortcut: bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML + + - name: bpb + engine: bpb + shortcut: bpb + disabled: true + + - name: btdigg + engine: btdigg + shortcut: bt + disabled: true + + - name: openverse + engine: openverse + categories: images + shortcut: opv + + - name: media.ccc.de + engine: ccc_media + shortcut: c3tv + # We don't set language: de here because media.ccc.de is not just + # for a German audience. It contains many English videos and many + # German videos have English subtitles. + disabled: true + + - name: cachy os packages + engine: cachy_os + shortcut: cos + disabled: true + + - name: chefkoch + engine: chefkoch + shortcut: chef + # to show premium or plus results too: + # skip_premium: false + + # WARNING: links from chinaso.com voilate users privacy + # Before activate these engines its mandatory to read + # - https://github.com/searxng/searxng/issues/4694 + # - https://docs.searxng.org/dev/engines/online/chinaso.html + + - name: chinaso news + engine: chinaso + shortcut: chinaso + categories: [news] + chinaso_category: news + chinaso_news_source: all + disabled: true + inactive: true + + - name: chinaso images + engine: chinaso + network: chinaso news + shortcut: chinasoi + categories: [images] + chinaso_category: images + disabled: true + inactive: true + + - name: chinaso videos + engine: chinaso + network: chinaso news + shortcut: chinasov + categories: [videos] + chinaso_category: videos + disabled: true + inactive: true + + - name: cloudflareai + engine: cloudflareai + shortcut: cfai + # get api token and accont id from https://developers.cloudflare.com/workers-ai/get-started/rest-api/ + cf_account_id: 'your_cf_accout_id' + cf_ai_api: 'your_cf_api' + # create your ai gateway by https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/ + cf_ai_gateway: 'your_cf_ai_gateway_name' + # find the model name from https://developers.cloudflare.com/workers-ai/models/#text-generation + cf_ai_model: 'ai_model_name' + # custom your preferences + # cf_ai_model_display_name: 'Cloudflare AI' + # cf_ai_model_assistant: 'prompts_for_assistant_role' + # cf_ai_model_system: 'prompts_for_system_role' + timeout: 30 + inactive: true + + - name: core.ac.uk + engine: core + shortcut: cor + # read https://docs.searxng.org/dev/engines/online/core.html + api_key: "" + inactive: true + + - name: crossref + engine: crossref + shortcut: cr + timeout: 30 + disabled: true + + - name: crowdview + engine: json_engine + shortcut: cv + categories: general + paging: false + search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} + results_query: results + url_query: link + title_query: title + content_query: snippet + title_html_to_text: true + content_html_to_text: true + disabled: true + about: + website: https://crowdview.ai/ + + - name: yep + engine: yep + shortcut: yep + categories: general + search_type: web + timeout: 15 + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news + disabled: true + + - name: currency + engine: currency_convert + shortcut: cc + + - name: deezer + engine: deezer + shortcut: dz + disabled: true + + - name: destatis + engine: destatis + shortcut: destat + disabled: true + + - name: deviantart + engine: deviantart + shortcut: da + timeout: 3.0 + + - name: devicons + engine: devicons + shortcut: di + timeout: 3.0 + + - name: ddg definitions + engine: duckduckgo_definitions + shortcut: ddd + weight: 2 + disabled: true + + # cloudflare protected + # - name: digbt + # engine: digbt + # shortcut: dbt + # timeout: 6.0 + # disabled: true + + - name: docker hub + engine: docker_hub + shortcut: dh + categories: [it, packages] + + - name: encyclosearch + engine: json_engine + shortcut: es + categories: general + paging: true + search_url: https://encyclosearch.org/encyclosphere/search?q={query}&page={pageno}&resultsPerPage=15 + results_query: Results + url_query: SourceURL + title_query: Title + content_query: Description + disabled: true + about: + website: https://encyclosearch.org + official_api_documentation: https://encyclosearch.org/docs/#/rest-api + use_official_api: true + require_api_key: false + results: JSON + + - name: erowid + engine: xpath + paging: true + first_page_num: 0 + page_size: 30 + search_url: https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] + categories: [] + shortcut: ew + disabled: true + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: elasticsearch + shortcut: els + engine: elasticsearch + # base_url: http://localhost:9200 + # username: elastic + # password: changeme + # index: my-index + # enable_http: true + # available options: match, simple_query_string, term, terms, custom + query_type: match + # if query_type is set to custom, provide your query here + # custom_query_json: {"query":{"match_all": {}}} + # show_metadata: false + inactive: true + + - name: wikidata + engine: wikidata + shortcut: wd + timeout: 3.0 + weight: 2 + # add "list" to the array to get results in the results list + display_type: ["infobox"] + categories: [general] + + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + + - name: duckduckgo images + engine: duckduckgo_extra + categories: [images] + ddg_category: images + shortcut: ddi + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos] + ddg_category: videos + shortcut: ddv + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news] + ddg_category: news + shortcut: ddn + + - name: duckduckgo weather + engine: duckduckgo_weather + shortcut: ddw + disabled: true + + - name: apple maps + engine: apple_maps + shortcut: apm + disabled: true + timeout: 5.0 + + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + + - name: tineye + engine: tineye + shortcut: tin + timeout: 9.0 + disabled: true + + - name: etymonline + engine: xpath + paging: true + search_url: https://etymonline.com/search?page={pageno}&q={query} + url_xpath: //a[contains(@class, "word__name--")]/@href + title_xpath: //a[contains(@class, "word__name--")] + content_xpath: //section[contains(@class, "word__defination")] + first_page_num: 1 + shortcut: et + categories: [dictionaries] + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: ebay + engine: ebay + shortcut: eb + base_url: 'https://www.ebay.com' + inactive: true + timeout: 5 + + - name: 1x + engine: www1x + shortcut: 1x + timeout: 3.0 + disabled: true + + - name: fdroid + engine: fdroid + shortcut: fd + disabled: true + + - name: findthatmeme + engine: findthatmeme + shortcut: ftm + disabled: true + + - name: flickr + categories: images + shortcut: fl + engine: flickr_noapi + + - name: flickr_api + # You can use the engine using the official stable API, but you need an API + # key, see: https://www.flickr.com/services/apps/create/ + engine: flickr + categories: images + shortcut: fla + # api_key: 'apikey' # required! + inactive: true + + - name: free software directory + engine: mediawiki + shortcut: fsd + categories: [it, software wikis] + base_url: https://directory.fsf.org/ + search_type: title + timeout: 5.0 + disabled: true + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 + + - name: freesound + engine: freesound + shortcut: fnd + timeout: 15.0 + # API key required, see: https://freesound.org/docs/api/overview.html + # api_key: MyAPIkey + inactive: true + + - name: frinkiac + engine: frinkiac + shortcut: frk + disabled: true + + - name: fynd + engine: xpath + search_url: https://fynd.bot/?search={query}&offset={pageno}{safe_search} + safesearch: true + safe_search_map: + 0: '&safe=0' + 1: '&safe=1' + 2: '&safe=1' + results_xpath: //div[contains(@class, "result-item")] + url_xpath: .//a/@href + title_xpath: .//div[contains(@class, "title-line")] + content_xpath: .//div[contains(@class, "description")] + thumbnail_xpath: .//img[contains(@class, "preview-img")]/@src + paging: true + first_page_num: 0 + page_size: 10 + categories: general + disabled: true + shortcut: fynd + about: + website: https://fynd.bot + use_official_api: false + require_api_key: false + results: HTML + + - name: fyyd + engine: fyyd + shortcut: fy + timeout: 8.0 + disabled: true + + - name: geizhals + engine: geizhals + shortcut: geiz + disabled: true + + - name: genius + engine: genius + shortcut: gen + + - name: gentoo + engine: mediawiki + shortcut: ge + categories: ["it", "software wikis"] + base_url: "https://wiki.gentoo.org/" + api_path: "api.php" + search_type: text + timeout: 10 + + - name: gitlab + engine: gitlab + base_url: https://gitlab.com + shortcut: gl + disabled: true + about: + website: https://gitlab.com/ + wikidata_id: Q16639197 + + # - name: gnome + # engine: gitlab + # base_url: https://gitlab.gnome.org + # shortcut: gn + # about: + # website: https://gitlab.gnome.org + # wikidata_id: Q44316 + + - name: github + engine: github + shortcut: gh + + - name: github code + engine: github_code + shortcut: ghc + inactive: true + ghc_auth: + # type is one of: + # * none + # * personal_access_token + # * bearer + # When none is passed, the token is not requried. + type: "none" + token: "token" + # specify whether to highlight the matching lines to the query + ghc_highlight_matching_lines: true + ghc_strip_new_lines: true + ghc_strip_whitespace: false + timeout: 10.0 + + - name: codeberg + # https://docs.searxng.org/dev/engines/online/gitea.html + engine: gitea + base_url: https://codeberg.org + shortcut: cb + disabled: true + + - name: gitea.com + engine: gitea + base_url: https://gitea.com + shortcut: gitea + disabled: true + + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + + - name: google + engine: google + shortcut: go + + - name: google images + engine: google_images + shortcut: goi + + - name: google news + engine: google_news + shortcut: gon + + - name: google videos + engine: google_videos + shortcut: gov + + - name: google scholar + engine: google_scholar + shortcut: gos + + - name: google play apps + engine: google_play + categories: [files, apps] + shortcut: gpa + play_categ: apps + disabled: true + + - name: google play movies + engine: google_play + categories: videos + shortcut: gpm + play_categ: movies + disabled: true + + - name: grokipedia + engine: grokipedia + shortcut: gp + disabled: true + inactive: true + + - name: material icons + engine: material_icons + shortcut: mi + disabled: true + + - name: habrahabr + engine: xpath + paging: true + search_url: https://habr.com/en/search/page{pageno}/?q={query} + results_xpath: //article[contains(@class, "tm-articles-list__item")] + url_xpath: .//a[@class="tm-title__link"]/@href + title_xpath: .//a[@class="tm-title__link"] + content_xpath: .//div[contains(@class, "article-formatted-body")] + categories: it + timeout: 4.0 + disabled: true + shortcut: habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + + - name: hex + engine: hex + shortcut: hex + disabled: true + # Valid values: name inserted_at updated_at total_downloads recent_downloads + sort_criteria: "recent_downloads" + page_size: 10 + + - name: crates.io + engine: crates + shortcut: crates + disabled: true + timeout: 6.0 + + - name: hoogle + engine: xpath + search_url: https://hoogle.haskell.org/?hoogle={query} + results_xpath: '//div[@class="result"]' + title_xpath: './/div[@class="ans"]//a' + url_xpath: './/div[@class="ans"]//a/@href' + content_xpath: './/div[@class="from"]' + page_size: 20 + categories: [it, packages] + shortcut: ho + about: + website: https://hoogle.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON + + - name: il post + engine: il_post + shortcut: pst + disabled: true + + - name: huggingface + engine: huggingface + shortcut: hf + disabled: true + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + disabled: true + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs + disabled: true + + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + + - name: imgur + engine: imgur + shortcut: img + disabled: true + + - name: ina + engine: ina + shortcut: in + timeout: 6.0 + disabled: true + + # - name: invidious + # engine: invidious + # # if you want to use invidious with SearXNG you should setup one locally + # # https://github.com/searxng/searxng/issues/2722#issuecomment-2884993248 + # base_url: + # - https://invidious.example1.com + # - https://invidious.example2.com + # shortcut: iv + # timeout: 3.0 + + - name: ipernity + engine: ipernity + shortcut: ip + disabled: true + + - name: iqiyi + engine: iqiyi + shortcut: iq + disabled: true + + - name: jisho + engine: jisho + shortcut: js + timeout: 3.0 + disabled: true + + - name: kickass + engine: kickass + base_url: + - https://kickasstorrents.to + - https://kickasstorrents.cr + - https://kickasstorrent.cr + - https://kickass.sx + - https://kat.am + shortcut: kc + timeout: 4.0 + + - name: lemmy communities + engine: lemmy + lemmy_type: Communities + shortcut: leco + + - name: lemmy users + engine: lemmy + network: lemmy communities + lemmy_type: Users + shortcut: leus + + - name: lemmy posts + engine: lemmy + network: lemmy communities + lemmy_type: Posts + shortcut: lepo + + - name: lemmy comments + engine: lemmy + network: lemmy communities + lemmy_type: Comments + shortcut: lecom + + - name: library genesis + engine: xpath + # search_url: https://libgen.is/search.php?req={query} + search_url: https://libgen.rs/search.php?req={query} + url_xpath: //a[contains(@href,"book/index.php?md5")]/@href + title_xpath: //a[contains(@href,"book/")]/text()[1] + content_xpath: //td/a[1][contains(@href,"=author")]/text() + categories: files + timeout: 7.0 + disabled: true + shortcut: lg + about: + website: https://libgen.fun/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: z-library + engine: zlibrary + shortcut: zlib + timeout: 7.0 + disabled: true + # https://github.com/searxng/searxng/issues/3610 + inactive: true + + - name: library of congress + engine: loc + shortcut: loc + categories: images + disabled: true + + - name: libretranslate + engine: libretranslate + # https://github.com/LibreTranslate/LibreTranslate?tab=readme-ov-file#mirrors + base_url: + - https://libretranslate.com/translate + # api_key: '' + shortcut: lt + inactive: true + + - name: lingva + engine: lingva + shortcut: lv + # set lingva instance in url, by default it will use the official instance + # url: https://lingva.thedaviddelta.com + + - name: lobste.rs + engine: xpath + search_url: https://lobste.rs/search?q={query}&what=stories&order=relevance + results_xpath: //li[contains(@class, "story")] + url_xpath: .//a[@class="u-url"]/@href + title_xpath: .//a[@class="u-url"] + content_xpath: .//a[@class="domain"] + categories: it + shortcut: lo + timeout: 5.0 + disabled: true + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: lucide + engine: lucide + shortcut: luc + timeout: 3.0 + + - name: marginalia + engine: marginalia + shortcut: mar + # To get an API key, please follow the instructions at + # - https://about.marginalia-search.com/article/api/ + # api_key: '' + disabled: true + inactive: true + + - name: mastodon users + engine: mastodon + mastodon_type: accounts + base_url: https://mastodon.social + shortcut: mau + + - name: mastodon hashtags + engine: mastodon + mastodon_type: hashtags + base_url: https://mastodon.social + shortcut: mah + + # - name: matrixrooms + # engine: mrs + # # https://docs.searxng.org/dev/engines/online/mrs.html + # # base_url: https://mrs-api-host + # shortcut: mtrx + # disabled: true + + - name: mdn + shortcut: mdn + engine: json_engine + categories: [it] + paging: true + search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} + results_query: documents + url_query: mdn_url + url_prefix: https://developer.mozilla.org + title_query: title + content_query: summary + about: + website: https://developer.mozilla.org + wikidata_id: Q3273508 + official_api_documentation: null + use_official_api: false + require_api_key: false + results: JSON + + - name: metacpan + engine: metacpan + shortcut: cpan + disabled: true + number_of_results: 20 + + # https://docs.searxng.org/dev/engines/offline/search-indexer-engines.html#module-searx.engines.meilisearch + # - name: meilisearch + # engine: meilisearch + # shortcut: mes + # enable_http: true + # base_url: http://localhost:7700 + # index: my-index + # auth_key: Bearer XXXX + + - name: microsoft learn + engine: microsoft_learn + shortcut: msl + disabled: true + + - name: mixcloud + engine: mixcloud + shortcut: mc + + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # enable_http: true + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + + - name: mozhi + engine: mozhi + base_url: + - https://mozhi.aryak.me + - https://translate.bus-hit.me + - https://nyc1.mz.ggtyler.dev + # mozhi_engine: google - see https://mozhi.aryak.me for supported engines + timeout: 4.0 + shortcut: mz + disabled: true + + - name: mwmbl + engine: mwmbl + # api_url: https://api.mwmbl.org + shortcut: mwm + disabled: true + + - name: niconico + engine: niconico + shortcut: nico + disabled: true + + - name: npm + engine: npm + shortcut: npm + timeout: 5.0 + disabled: true + + - name: nyaa + engine: nyaa + shortcut: nt + disabled: true + + - name: mankier + engine: json_engine + search_url: https://www.mankier.com/api/v2/mans/?q={query} + results_query: results + url_query: url + title_query: name + content_query: description + categories: it + shortcut: man + about: + website: https://www.mankier.com/ + official_api_documentation: https://www.mankier.com/api + use_official_api: true + require_api_key: false + results: JSON + + - name: odysee + engine: odysee + shortcut: od + disabled: true + + - name: ollama + engine: ollama + shortcut: ollama + disabled: true + + - name: openairedatasets + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: "science" + shortcut: oad + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openairepublications + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: science + shortcut: oap + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openalex + engine: openalex + shortcut: oa + # https://docs.searxng.org/dev/engines/online/openalex.html + # Recommended by OpenAlex: join the polite pool with an email address + # mailto: "[email protected]" + timeout: 5.0 + disabled: true + + - name: openclipart + engine: openclipart + shortcut: ocl + inactive: true + disabled: true + timeout: 30 + + - name: openlibrary + engine: openlibrary + shortcut: ol + timeout: 10 + disabled: true + + - name: openmeteo + engine: open_meteo + shortcut: om + disabled: true + + # - name: opensemanticsearch + # engine: opensemantic + # shortcut: oss + # base_url: 'http://localhost:8983/solr/opensemanticsearch/' + + - name: openstreetmap + engine: openstreetmap + shortcut: osm + + - name: openrepos + engine: xpath + paging: true + search_url: https://openrepos.net/search/node/{query}?page={pageno} + url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath: //li[@class="search-result"]//h3[@class="title"]/a + content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories: files + timeout: 4.0 + disabled: true + shortcut: or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: packagist + engine: json_engine + paging: true + search_url: https://packagist.org/search.json?q={query}&page={pageno} + results_query: results + url_query: url + title_query: name + content_query: description + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: pack + about: + website: https://packagist.org + wikidata_id: Q108311377 + official_api_documentation: https://packagist.org/apidoc + use_official_api: true + require_api_key: false + results: JSON + + - name: pdbe + engine: pdbe + shortcut: pdb + # Hide obsolete PDB entries. Default is not to hide obsolete structures + # hide_obsolete: false + + - name: pexels + engine: pexels + shortcut: pe + + - name: photon + engine: photon + shortcut: ph + + - name: pinterest + engine: pinterest + shortcut: pin + + - name: piped + engine: piped + shortcut: ppd + categories: videos + piped_filter: videos + timeout: 3.0 + inactive: true + + # URL to use as link and for embeds + frontend_url: https://srv.piped.video + # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ + backend_url: + - https://pipedapi.ducks.party + - https://api.piped.private.coffee + + - name: piped.music + engine: piped + network: piped + shortcut: ppdm + categories: music + piped_filter: music_songs + timeout: 3.0 + inactive: true + + - name: piratebay + engine: piratebay + shortcut: tpb + # You may need to change this URL to a proxy if piratebay is blocked in your + # country + url: https://thepiratebay.org/ + timeout: 3.0 + + - name: pixabay images + engine: pixabay + pixabay_type: images + categories: images + shortcut: pixi + disabled: true + + - name: pixabay videos + engine: pixabay + pixabay_type: videos + categories: videos + shortcut: pixv + disabled: true + + - name: pixiv + shortcut: pv + engine: pixiv + disabled: true + inactive: true + remove_ai_images: false + pixiv_image_proxies: + - https://pximg.example.org + # A proxy is required to load the images. Hosting an image proxy server + # for Pixiv: + # --> https://pixivfe-docs.pages.dev/hosting/image-proxy-server/ + # Proxies from public instances. Ask the public instances owners if they + # agree to receive traffic from SearXNG! + # --> https://codeberg.org/VnPower/PixivFE#instances + # --> https://github.com/searxng/searxng/pull/3192#issuecomment-1941095047 + # image proxy of https://pixiv.cat + # - https://i.pixiv.cat + # image proxy of https://www.pixiv.pics + # - https://pximg.cocomi.eu.org + # image proxy of https://pixivfe.exozy.me + # - https://pximg.exozy.me + # image proxy of https://pixivfe.ducks.party + # - https://pixiv.ducks.party + # image proxy of https://pixiv.perennialte.ch + # - https://pximg.perennialte.ch + + - name: podcastindex + engine: podcastindex + shortcut: podcast + + # Required dependency: psychopg2 + # - name: postgresql + # engine: postgresql + # database: postgres + # username: postgres + # password: postgres + # limit: 10 + # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + # shortcut : psql + + - name: presearch + engine: presearch + search_type: search + categories: [general, web] + shortcut: ps + timeout: 4.0 + disabled: true + + - name: presearch images + engine: presearch + network: presearch + search_type: images + categories: [images, web] + timeout: 4.0 + shortcut: psimg + disabled: true + + - name: presearch videos + engine: presearch + network: presearch + search_type: videos + categories: [general, web] + timeout: 4.0 + shortcut: psvid + disabled: true + + - name: presearch news + engine: presearch + network: presearch + search_type: news + categories: [news, web] + timeout: 4.0 + shortcut: psnews + disabled: true + + - name: pub.dev + engine: xpath + shortcut: pd + search_url: https://pub.dev/packages?q={query}&page={pageno} + paging: true + results_xpath: //div[contains(@class,"packages-item")] + url_xpath: ./div/h3/a/@href + title_xpath: ./div/h3/a + content_xpath: ./div/div/div[contains(@class,"packages-description")]/span + categories: [packages, it] + timeout: 3.0 + disabled: true + first_page_num: 1 + about: + website: https://pub.dev/ + official_api_documentation: https://pub.dev/help/api + use_official_api: false + require_api_key: false + results: HTML + + - name: public domain image archive + engine: public_domain_image_archive + shortcut: pdia + disabled: true + + - name: pubmed + engine: pubmed + shortcut: pub + + - name: pypi + shortcut: pypi + engine: pypi + + - name: quark + quark_category: general + categories: [general] + engine: quark + shortcut: qk + disabled: true + + - name: quark images + quark_category: images + categories: [images] + engine: quark + shortcut: qki + disabled: true + + - name: qwant + qwant_categ: web + engine: qwant + shortcut: qw + categories: [general, web] + disabled: true + + - name: qwant news + qwant_categ: news + engine: qwant + shortcut: qwn + categories: news + network: qwant + + - name: qwant images + qwant_categ: images + engine: qwant + shortcut: qwi + categories: [images, web] + network: qwant + + - name: qwant videos + qwant_categ: videos + engine: qwant + shortcut: qwv + categories: [videos, web] + network: qwant + + # - name: library + # engine: recoll + # shortcut: lib + # base_url: 'https://recoll.example.org/' + # search_dir: '' + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # timeout: 30.0 + # categories: files + # disabled: true + + # - name: recoll library reference + # engine: recoll + # base_url: 'https://recoll.example.org/' + # search_dir: reference + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # shortcut: libr + # timeout: 30.0 + # categories: files + # disabled: true + + - name: radio browser + engine: radio_browser + shortcut: rb + + - name: reddit + engine: reddit + shortcut: re + page_size: 25 + disabled: true + + - name: reuters + engine: reuters + shortcut: reu + # https://docs.searxng.org/dev/engines/online/reuters.html + # sort_order = "relevance" + + - name: rottentomatoes + engine: rottentomatoes + shortcut: rt + disabled: true + + # Required dependency: valkey + # - name: myvalkey + # shortcut : rds + # engine: valkey_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 + + # tmp suspended: bad certificate + # - name: scanr structures + # shortcut: scs + # engine: scanr_structures + # disabled: true + + - name: searchmysite + engine: xpath + shortcut: sms + categories: general + paging: true + search_url: https://searchmysite.net/search/?q={query}&page={pageno} + results_xpath: //div[contains(@class,'search-result')] + url_xpath: .//a[contains(@class,'result-link')]/@href + title_xpath: .//span[contains(@class,'result-title-txt')]/text() + content_xpath: ./p[@id='result-hightlight'] + disabled: true + about: + website: https://searchmysite.net + + - name: selfhst icons + engine: selfhst + shortcut: si + disabled: true + + - name: sepiasearch + engine: sepiasearch + shortcut: sep + + - name: sogou + engine: sogou + shortcut: sogou + disabled: true + + - name: sogou images + engine: sogou_images + shortcut: sogoui + disabled: true + + - name: sogou videos + engine: sogou_videos + shortcut: sogouv + disabled: true + + - name: sogou wechat + engine: sogou_wechat + shortcut: sogouw + disabled: true + + - name: soundcloud + engine: soundcloud + shortcut: sc + + - name: stackoverflow + engine: stackexchange + shortcut: st + api_site: 'stackoverflow' + categories: [it, q&a] + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: [it, q&a] + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: [it, q&a] + + - name: discuss.python + engine: discourse + shortcut: dpy + base_url: 'https://discuss.python.org' + categories: [it, q&a] + disabled: true + + - name: caddy.community + engine: discourse + shortcut: caddy + base_url: 'https://caddy.community' + categories: [it, q&a] + disabled: true + + - name: pi-hole.community + engine: discourse + shortcut: pi + categories: [it, q&a] + base_url: 'https://discourse.pi-hole.net' + disabled: true + + # - name: searx + # engine: searx_engine + # shortcut: se + # instance_urls : + # - http://127.0.0.1:8888/ + # - ... + # disabled: true + + - name: semantic scholar + engine: semantic_scholar + shortcut: se + + # Spotify needs API credentials + # - name: spotify + # engine: spotify + # shortcut: stf + # api_client_id: ******* + # api_client_secret: ******* + + # - name: solr + # engine: solr + # shortcut: slr + # base_url: http://localhost:8983 + # collection: collection_name + # sort: '' # sorting: asc or desc + # field_list: '' # comma separated list of field names to display on the UI + # default_fields: '' # default field to query + # query_fields: '' # query fields + # enable_http: true + + - name: springer nature + engine: springer + shortcut: springer + timeout: 5 + # read https://docs.searxng.org/dev/engines/online/springer.html + api_key: "" + inactive: true + + - name: startpage + engine: startpage + shortcut: sp + startpage_categ: web + categories: [general, web] + + - name: startpage news + engine: startpage + startpage_categ: news + categories: [news, web] + shortcut: spn + + - name: startpage images + engine: startpage + startpage_categ: images + categories: [images, web] + shortcut: spi + + - name: steam + engine: steam + shortcut: stm + disabled: true + + - name: tokyotoshokan + engine: tokyotoshokan + shortcut: tt + timeout: 6.0 + disabled: true + + - name: solidtorrents + engine: solidtorrents + shortcut: solid + timeout: 4.0 + base_url: + - https://solidtorrents.to + - https://bitsearch.to + + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!mediathekview concert" + # + # - name: mediathekview + # engine: sqlite + # shortcut: mediathekview + # categories: [general, videos] + # result_type: MainResult + # database: searx/data/filmliste-v2.db + # query_str: >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + + - name: tagesschau + engine: tagesschau + # when set to false, display URLs from Tagesschau, and not the actual source + # (e.g. NDR, WDR, SWR, HR, ...) + use_source_url: true + shortcut: ts + disabled: true + + - name: tmdb + engine: xpath + paging: true + categories: movies + search_url: https://www.themoviedb.org/search?page={pageno}&query={query} + results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] + url_xpath: .//div[contains(@class,"poster")]/a/@href + thumbnail_xpath: .//img/@src + title_xpath: .//div[contains(@class,"title")]//h2 + content_xpath: .//div[contains(@class,"overview")] + shortcut: tm + disabled: true + + # Requires Tor + - name: torch + engine: xpath + paging: true + search_url: + http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and + results_xpath: //table//tr + url_xpath: ./td[2]/a + title_xpath: ./td[2]/b + content_xpath: ./td[2]/small + categories: onions + enable_http: true + shortcut: tch + + # TubeArchivist is a self-hosted Youtube archivist software. + # https://docs.searxng.org/dev/engines/online/tubearchivist.html + # + # - name: tubearchivist + # engine: tubearchivist + # shortcut: tuba + # base_url: + # ta_token: + # ta_link_to_mp4: false + + # torznab engine lets you query any torznab compatible indexer. Using this + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/dev/engines/online/torznab.html + - name: Torznab EZTV + engine: torznab + shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab + # enable_http: true # if using localhost + # api_key: xxxxxxxxxxxxxxx + show_magnet_links: true + show_torrent_files: false + # https://github.com/Jackett/Jackett/wiki/Jackett-Categories + torznab_categories: # optional + - 2000 + - 5000 + inactive: true + + # tmp suspended - too slow, too many errors + # - name: urbandictionary + # engine : xpath + # search_url : https://www.urbandictionary.com/define.php?term={query} + # url_xpath : //*[@class="word"]/@href + # title_xpath : //*[@class="def-header"] + # content_xpath: //*[@class="meaning"] + # shortcut: ud + + - name: unsplash + engine: unsplash + shortcut: us + + - name: yandex + engine: yandex + categories: general + search_type: web + shortcut: yd + disabled: true + + - name: yandex images + engine: yandex + network: yandex + categories: images + search_type: images + shortcut: ydi + disabled: true + + - name: yandex music + engine: yandex_music + network: yandex + shortcut: ydm + disabled: true + # https://yandex.com/support/music/access.html + + - name: yahoo + engine: yahoo + shortcut: yh + disabled: true + + - name: yahoo news + engine: yahoo_news + shortcut: yhn + + - name: youtube + shortcut: yt + engine: youtube_noapi + + - name: youtube_api + # You can use the engine using the official stable API, but you need an API + # key See: https://console.developers.google.com/project + engine: youtube_api + # api_key: '' # required! + shortcut: yta + inactive: true + + - name: dailymotion + engine: dailymotion + shortcut: dm + + - name: vimeo + engine: vimeo + shortcut: vm + + - name: wiby + engine: json_engine + paging: true + search_url: https://wiby.me/json/?q={query}&p={pageno} + url_query: URL + title_query: Title + content_query: Snippet + categories: [general, web] + shortcut: wib + disabled: true + about: + website: https://wiby.me/ + + - name: wikibooks + engine: mediawiki + weight: 0.5 + shortcut: wb + categories: [general, wikimedia] + base_url: "https://{language}.wikibooks.org/" + search_type: text + disabled: true + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 + + - name: wikinews + engine: mediawiki + shortcut: wn + categories: [news, wikimedia] + base_url: "https://{language}.wikinews.org/" + search_type: text + srsort: create_timestamp_desc + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 + + - name: wikiquote + engine: mediawiki + weight: 0.5 + shortcut: wq + categories: [general, wikimedia] + base_url: "https://{language}.wikiquote.org/" + search_type: text + disabled: true + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 + + - name: wikisource + engine: mediawiki + weight: 0.5 + shortcut: ws + categories: [general, wikimedia] + base_url: "https://{language}.wikisource.org/" + search_type: text + disabled: true + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 + + - name: wikispecies + engine: mediawiki + shortcut: wsp + categories: [general, science, wikimedia] + base_url: "https://species.wikimedia.org/" + search_type: text + disabled: true + about: + website: https://species.wikimedia.org/ + wikidata_id: Q13679 + + - name: wiktionary + engine: mediawiki + shortcut: wt + categories: [dictionaries, wikimedia] + base_url: "https://{language}.wiktionary.org/" + search_type: text + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 + + - name: wikiversity + engine: mediawiki + weight: 0.5 + shortcut: wv + categories: [general, wikimedia] + base_url: "https://{language}.wikiversity.org/" + search_type: text + disabled: true + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 + + - name: wikivoyage + engine: mediawiki + weight: 0.5 + shortcut: wy + categories: [general, wikimedia] + base_url: "https://{language}.wikivoyage.org/" + search_type: text + disabled: true + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 + + - name: wikicommons.images + engine: wikicommons + shortcut: wci + categories: images + wc_search_type: image + + - name: wikicommons.videos + engine: wikicommons + shortcut: wcv + categories: videos + wc_search_type: video + + - name: wikicommons.audio + engine: wikicommons + shortcut: wca + categories: music + wc_search_type: audio + + - name: wikicommons.files + engine: wikicommons + shortcut: wcf + categories: files + wc_search_type: file + + - name: wolframalpha + shortcut: wa + engine: wolframalpha_noapi + timeout: 6.0 + categories: general + disabled: true + + - name: wolframalpha_api + # You can use the engine using the official stable API, but you need an API + # key. See: https://products.wolframalpha.com/api/ + engine: wolframalpha_api + # api_key: '' # required! + shortcut: waa + timeout: 6.0 + categories: general + inactive: true + + - name: dictzone + engine: dictzone + shortcut: dc + + - name: mymemory translated + engine: translated + shortcut: tl + timeout: 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See: https://mymemory.translated.net/doc/usagelimits.php + # api_key: '' + + # Required dependency: mysql-connector-python + # - name: mysql + # engine: mysql_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mysql + + # Required dependency: mariadb + # - name: mariadb + # engine: mariadb_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mdb + + - name: 1337x + engine: 1337x + shortcut: 1337x + disabled: true + + - name: duden + engine: duden + shortcut: du + disabled: true + + - name: seznam + shortcut: szn + engine: seznam + disabled: true + + - name: deepl + engine: deepl + shortcut: dpl + # You can use the engine using the official stable API, but you need an API key + # See: https://www.deepl.com/pro-api?cta=header-pro-api + # api_key: '' # required! + timeout: 5.0 + inactive: true + + - name: mojeek + shortcut: mjk + engine: mojeek + categories: [general, web] + disabled: true + + - name: mojeek images + shortcut: mjkimg + engine: mojeek + categories: [images, web] + search_type: images + paging: false + disabled: true + + - name: mojeek news + shortcut: mjknews + engine: mojeek + categories: [news, web] + search_type: news + paging: false + disabled: true + + - name: moviepilot + engine: moviepilot + shortcut: mp + disabled: true + + - name: national vulnerability database + engine: nvd + shortcut: nvd + disabled: true + + - name: naver + categories: [general, web] + engine: naver + shortcut: nvr + disabled: true + + - name: naver images + naver_category: images + categories: [images] + engine: naver + shortcut: nvri + disabled: true + + - name: naver news + naver_category: news + categories: [news] + engine: naver + shortcut: nvrn + disabled: true + + - name: naver videos + naver_category: videos + categories: [videos] + engine: naver + shortcut: nvrv + disabled: true + + - name: rubygems + shortcut: rbg + engine: xpath + paging: true + search_url: https://rubygems.org/search?page={pageno}&query={query} + results_xpath: /html/body/main/div/a[@class="gems__gem"] + url_xpath: ./@href + title_xpath: ./span/h2 + content_xpath: ./span/p + suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a + first_page_num: 1 + categories: [it, packages] + disabled: true + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: peertube + engine: peertube + shortcut: ptb + paging: true + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com + categories: videos + disabled: true + timeout: 6.0 + + - name: mediathekviewweb + engine: mediathekviewweb + shortcut: mvw + disabled: true + + - name: yacy + # https://docs.searxng.org/dev/engines/online/yacy.html + engine: yacy + categories: general + search_type: text + # see https://github.com/searxng/searxng/pull/3631#issuecomment-2240903027 + base_url: + - https://yacy.searchlab.eu + shortcut: ya + disabled: true + # if you aren't using HTTPS for your local yacy instance disable https + # enable_http: false + search_mode: 'global' + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: yacy images + engine: yacy + network: yacy + categories: images + search_type: image + shortcut: yai + disabled: true + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: rumble + engine: rumble + shortcut: ru + base_url: https://rumble.com/ + paging: true + categories: videos + disabled: true + + - name: repology + engine: repology + shortcut: rep + disabled: true + inactive: true + + - name: wordnik + engine: wordnik + shortcut: wnik + timeout: 5.0 + + - name: woxikon.de synonyme + engine: xpath + shortcut: woxi + categories: [dictionaries] + timeout: 5.0 + disabled: true + search_url: https://synonyme.woxikon.de/synonyme/{query}.php + url_xpath: //div[@class="upper-synonyms"]/a/@href + content_xpath: //div[@class="synonyms-list-group"] + title_xpath: //div[@class="upper-synonyms"]/a + no_result_for_http_status: [404] + about: + website: https://www.woxikon.de/ + wikidata_id: # No Wikidata ID + use_official_api: false + require_api_key: false + results: HTML + language: de + + - name: svgrepo + engine: svgrepo + shortcut: svg + timeout: 10.0 + disabled: true + + - name: tootfinder + engine: tootfinder + shortcut: toot + + - name: uxwing + engine: uxwing + shortcut: ux + disabled: true + + - name: voidlinux + engine: voidlinux + shortcut: void + disabled: true + + - name: wallhaven + engine: wallhaven + # api_key: abcdefghijklmnopqrstuvwxyz + shortcut: wh + inactive: true + + # wikimini: online encyclopedia for children + # The fulltext and title parameter is necessary for Wikimini because + # sometimes it will not show the results and redirect instead + - name: wikimini + engine: xpath + shortcut: wkmn + search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search + url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href + title_xpath: //li//div[@class="mw-search-result-heading"]/a + content_xpath: //li/div[@class="searchresult"] + categories: general + disabled: true + about: + website: https://wikimini.org/ + wikidata_id: Q3568032 + use_official_api: false + require_api_key: false + results: HTML + language: fr + + - name: wttr.in + engine: wttr + shortcut: wttr + timeout: 9.0 + + - name: braveapi + engine: braveapi + # read https://docs.searxng.org/dev/engines/online/brave.html + api_key: "" + inactive: true + + - name: brave + engine: brave + shortcut: br + time_range_support: true + paging: true + categories: [general, web] + brave_category: search + # brave_spellcheck: true + + - name: brave.images + engine: brave + network: brave + shortcut: brimg + categories: [images, web] + brave_category: images + + - name: brave.videos + engine: brave + network: brave + shortcut: brvid + categories: [videos, web] + brave_category: videos + + - name: brave.news + engine: brave + network: brave + shortcut: brnews + categories: news + brave_category: news + + # - name: brave.goggles + # engine: brave + # network: brave + # shortcut: brgog + # time_range_support: true + # paging: true + # categories: [general, web] + # brave_category: goggles + # Goggles: # required! This should be a URL ending in .goggle + + - name: lib.rs + shortcut: lrs + engine: lib_rs + disabled: true + + - name: sourcehut + shortcut: srht + engine: sourcehut + # https://docs.searxng.org/dev/engines/online/sourcehut.html + # sourcehut_sort_order: longest-active + disabled: true + + - name: bt4g + engine: bt4g + shortcut: bt4g + + - name: pkg.go.dev + engine: pkg_go_dev + shortcut: pgo + disabled: true + + - name: senscritique + engine: senscritique + shortcut: scr + timeout: 4.0 + disabled: true + + - name: minecraft wiki + engine: mediawiki + shortcut: mcw + categories: ["software wikis"] + base_url: https://minecraft.wiki/ + api_path: "api.php" + search_type: text + disabled: true + about: + website: https://minecraft.wiki/ + wikidata_id: Q105533483 + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name: ubuntuwiki +# engine: doku +# shortcut: uw +# base_url: 'https://doc.ubuntu-fr.org' + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: git grep +# engine: command +# command: ['git', 'grep', '{{QUERY}}'] +# shortcut: gg +# tokens: [] +# disabled: true +# delimiter: +# chars: ':' +# keys: ['filepath', 'code'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: locate +# engine: command +# command: ['locate', '{{QUERY}}'] +# shortcut: loc +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: find +# engine: command +# command: ['find', '.', '-name', '{{QUERY}}'] +# query_type: path +# shortcut: fnd +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: pattern search in files +# engine: command +# command: ['fgrep', '{{QUERY}}'] +# shortcut: fgr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: regex search in files +# engine: command +# command: ['grep', '{{QUERY}}'] +# shortcut: gr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +doi_resolvers: + oadoi.org: 'https://oadoi.org/' + doi.org: 'https://doi.org/' + sci-hub.se: 'https://sci-hub.se/' + sci-hub.st: 'https://sci-hub.st/' + sci-hub.ru: 'https://sci-hub.ru/' + +default_doi_resolver: 'oadoi.org' diff --git a/mcp_host/browser/searxng/searxng_94aa6e5c/settings.yml b/mcp_host/browser/searxng/searxng_94aa6e5c/settings.yml new file mode 100644 index 0000000..3ab7db5 --- /dev/null +++ b/mcp_host/browser/searxng/searxng_94aa6e5c/settings.yml @@ -0,0 +1,2816 @@ +general: + # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} + debug: false + # displayed name + instance_name: "SearXNG" + # For example: https://example.com/privacy + privacypolicy_url: false + # use true to use your own donation page written in searx/info/en/donate.md + # use false to disable the donation link + donation_url: false + # mailto:contact@example.com + contact_url: false + # record stats + enable_metrics: true + # expose stats in open metrics format at /metrics + # leave empty to disable (no password set) + # open_metrics: + open_metrics: '' + +brand: + new_issue_url: https://github.com/searxng/searxng/issues/new + docs_url: https://docs.searxng.org/ + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki + issue_url: https://github.com/searxng/searxng/issues + # custom: + # # Custom entries in the footer: [title]: [link] + # links: + # Uptime: https://uptime.searxng.org/history/darmarit-org + # About: "https://searxng.org" + +search: + # Filter results. 0: None, 1: Moderate, 2: Strict + safe_search: 0 + # Existing autocomplete backends: "360search", "baidu", "brave", "dbpedia", "duckduckgo", "google", "yandex", + # "mwmbl", "naver", "seznam", "sogou", "startpage", "stract", "swisscows", "quark", "qwant", "wikipedia" - + # leave blank to turn it off by default. + autocomplete: "" + # minimun characters to type before autocompleter starts + autocomplete_min: 4 + # backend for the favicon near URL in search results. + # Available resolvers: "allesedv", "duckduckgo", "google", "yandex" - leave blank to turn it off by default. + favicon_resolver: "" + # Default search language - leave blank to detect from browser information or + # use codes from 'languages.py' + default_lang: "auto" + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages + # Available languages + # languages: + # - all + # - en + # - en-US + # - de + # - it-IT + # - fr + # - fr-BE + # ban time in seconds after engine errors + ban_time_on_fail: 5 + # max ban time in seconds after engine errors + max_ban_time_on_fail: 120 + suspended_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 86400 + # For error "CAPTCHA" + SearxEngineCaptcha: 86400 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 3600 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + + # remove format to deny access, use lower case. + # formats: [html, csv, json, rss] + formats: + - html + +server: + # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} + port: 8888 + bind_address: "127.0.0.1" + # public URL of the instance, to ensure correct inbound links. Is overwritten + # by ${SEARXNG_BASE_URL}. + base_url: false # "http://example.com/location" + # rate limit the number of request on the instance, block some bots. + # Is overwritten by ${SEARXNG_LIMITER} + limiter: false + # enable features designed only for public instances. + # Is overwritten by ${SEARXNG_PUBLIC_INSTANCE} + public_instance: false + + # If your instance owns a /etc/searxng/settings.yml file, then set the following + # values there. + + secret_key: "Uv0JgpD44aMj24DRNbqC1eHUBpHNzx" # Is overwritten by ${SEARXNG_SECRET} + # Proxy image results through SearXNG. Is overwritten by ${SEARXNG_IMAGE_PROXY} + image_proxy: false + # 1.0 and 1.1 are supported + http_protocol_version: "1.0" + # POST queries are "more secure!" but are also the source of hard-to-locate + # annoyances, which is why GET may be better for end users and their browsers. + # see https://github.com/searxng/searxng/pull/3619 + # Is overwritten by ${SEARXNG_METHOD} + method: "POST" + default_http_headers: + X-Content-Type-Options: nosniff + X-Download-Options: noopen + X-Robots-Tag: noindex, nofollow + Referrer-Policy: no-referrer + +valkey: + # URL to connect valkey database. Is overwritten by ${SEARXNG_VALKEY_URL}. + # https://docs.searxng.org/admin/settings/settings_valkey.html#settings-valkey + # url: valkey://localhost:6379/0 + url: false + +ui: + # Custom static path - leave it blank if you didn't change + static_path: "" + # Custom templates path - leave it blank if you didn't change + templates_path: "" + # query_in_title: When true, the result page's titles contains the query + # it decreases the privacy, since the browser can records the page titles. + query_in_title: false + # ui theme + default_theme: simple + # center the results ? + center_alignment: false + # URL prefix of the internet archive, don't forget trailing slash (if needed). + # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" + # Default interface locale - leave blank to detect from browser information or + # use codes from the 'locales' config section + default_locale: "" + # Open result links in a new tab by default + # results_on_new_tab: false + theme_args: + # style of simple theme: auto, light, dark, black + simple_style: auto + # Perform search immediately if a category selected. + # Disable to select multiple categories at once and start the search manually. + search_on_category_select: true + # Hotkeys: default or vim + hotkeys: default + # URL formatting: pretty, full or host + url_formatting: pretty + +# Lock arbitrary settings on the preferences page. +# +# preferences: +# lock: +# - categories +# - language +# - autocomplete +# - favicon +# - safesearch +# - method +# - doi_resolver +# - locale +# - theme +# - results_on_new_tab +# - search_on_category_select +# - method +# - image_proxy +# - query_in_title + +# communication with search engines +# +outgoing: + # default timeout in seconds, can be override by engine + request_timeout: 3.0 + # the maximum timeout in seconds + # max_request_timeout: 10.0 + # suffix of searxng_useragent, could contain information like an email address + # to the administrator + useragent_suffix: "" + # The maximum number of concurrent connections that may be established. + pool_connections: 100 + # Allow the connection pool to maintain keep-alive connections below this + # point. + pool_maxsize: 20 + # See https://www.python-httpx.org/http2/ + enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10 + # + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 + + +# Plugin configuration, for more details see +# https://docs.searxng.org/admin/settings/settings_plugins.html +# +plugins: + + searx.plugins.calculator.SXNGPlugin: + active: true + + searx.plugins.infinite_scroll.SXNGPlugin: + active: false + + searx.plugins.hash_plugin.SXNGPlugin: + active: true + + searx.plugins.self_info.SXNGPlugin: + active: true + + searx.plugins.unit_converter.SXNGPlugin: + active: true + + searx.plugins.ahmia_filter.SXNGPlugin: + active: true + + searx.plugins.hostnames.SXNGPlugin: + active: true + + searx.plugins.time_zone.SXNGPlugin: + active: true + + searx.plugins.oa_doi_rewrite.SXNGPlugin: + active: false + + searx.plugins.tor_check.SXNGPlugin: + active: false + + searx.plugins.tracker_url_remover.SXNGPlugin: + active: true + + +# Configuration of the "Hostnames plugin": +# +# hostnames: +# replace: +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# remove: +# - '(.*\.)?facebook.com$' +# low_priority: +# - '(.*\.)?google(\..*)?$' +# high_priority: +# - '(.*\.)?wikipedia.org$' +# +# Alternatively you can use external files for configuring the "Hostnames plugin": +# +# hostnames: +# replace: 'rewrite-hosts.yml' +# +# Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'): +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# + +checker: + # disable checker when in debug mode + off_when_debug: true + + # use "scheduling: {}" to disable scheduling + # scheduling: interval or int + + # to activate the scheduler: + # * uncomment "scheduling" section + # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" + # to your uwsgi.ini + + # scheduling: + # start_after: [300, 1800] # delay to start the first run of the checker + # every: [86400, 90000] # how often the checker runs + + # additional tests: only for the YAML anchors (see the engines section) + # + additional_tests: + rosebud: &test_rosebud + matrix: + query: rosebud + lang: en + result_container: + - not_empty + - ['one_title_contains', 'citizen kane'] + test: + - unique_results + + android: &test_android + matrix: + query: ['android'] + lang: ['en', 'de', 'fr', 'zh-CN'] + result_container: + - not_empty + - ['one_title_contains', 'google'] + test: + - unique_results + + # tests: only for the YAML anchors (see the engines section) + tests: + infobox: &tests_infobox + infobox: + matrix: + query: ["linux", "new york", "bbc"] + result_container: + - has_infobox + +categories_as_tabs: + general: + images: + videos: + news: + map: + music: + it: + science: + files: + social media: + +engines: + - name: 360search + engine: 360search + shortcut: 360so + timeout: 10.0 + disabled: true + + - name: 360search videos + engine: 360search_videos + shortcut: 360sov + disabled: true + + - name: 9gag + engine: 9gag + shortcut: 9g + disabled: true + + - name: acfun + engine: acfun + shortcut: acf + disabled: true + + - name: adobe stock + engine: adobe_stock + shortcut: asi + categories: ["images"] + # https://docs.searxng.org/dev/engines/online/adobe_stock.html + adobe_order: relevance + adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"] + timeout: 6 + disabled: true + + - name: adobe stock video + engine: adobe_stock + shortcut: asv + network: adobe stock + categories: ["videos"] + adobe_order: relevance + adobe_content_types: ["video"] + timeout: 6 + disabled: true + + - name: adobe stock audio + engine: adobe_stock + shortcut: asa + network: adobe stock + categories: ["music"] + adobe_order: relevance + adobe_content_types: ["audio"] + timeout: 6 + disabled: true + + - name: astrophysics data system + engine: astrophysics_data_system + shortcut: ads + # read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html + api_key: "" + inactive: true + + - name: alpine linux packages + engine: alpinelinux + disabled: true + shortcut: alp + + - name: annas archive + engine: annas_archive + disabled: true + shortcut: aa + timeout: 5 + + - name: ansa + engine: ansa + shortcut: ans + disabled: true + + # - name: annas articles + # engine: annas_archive + # shortcut: aaa + # # https://docs.searxng.org/dev/engines/online/annas_archive.html + # aa_content: 'magazine' # book_fiction, book_unknown, book_nonfiction, book_comic + # aa_ext: 'pdf' # pdf, epub, .. + # aa_sort: oldest' # newest, oldest, largest, smallest + + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: true + + - name: apple app store + engine: apple_app_store + shortcut: aps + disabled: true + + # Requires Tor + - name: ahmia + engine: ahmia + # Might do up to two requests to perform a search. + # Since Tor is already slow by nature, the timeout is set very high. + timeout: 20.0 + categories: onions + enable_http: true + shortcut: ah + + - name: anaconda + engine: xpath + paging: true + first_page_num: 0 + search_url: https://anaconda.org/search?q={query}&page={pageno} + results_xpath: //tbody/tr + url_xpath: ./td/h5/a[last()]/@href + title_xpath: ./td/h5 + content_xpath: ./td[h5]/text() + categories: it + timeout: 6.0 + shortcut: conda + disabled: true + + - name: arch linux wiki + engine: archlinux + shortcut: al + + - name: nixos wiki + engine: mediawiki + shortcut: nixw + base_url: https://wiki.nixos.org/ + search_type: text + disabled: true + categories: [it, software wikis] + + - name: artic + engine: artic + shortcut: arc + timeout: 4.0 + + - name: arxiv + engine: arxiv + shortcut: arx + + - name: ask + engine: ask + shortcut: ask + disabled: true + + - name: azure + engine: azure + shortcut: az + categories: [it, cloud] + # azure_tenant_id: "your_tenant_id" + # azure_client_id: "your_client_id" + # azure_client_secret: "your_client_secret" + inactive: true + + # tmp suspended: dh key too small + # - name: base + # engine: base + # shortcut: bs + + - name: bandcamp + engine: bandcamp + shortcut: bc + categories: music + + - name: baidu + baidu_category: general + categories: [general] + engine: baidu + shortcut: bd + disabled: true + + - name: baidu images + baidu_category: images + categories: [images] + engine: baidu + shortcut: bdi + disabled: true + + - name: baidu kaifa + baidu_category: it + categories: [it] + engine: baidu + shortcut: bdk + disabled: true + + - name: wikipedia + engine: wikipedia + shortcut: wp + # add "list" to the array to get results in the results list + display_type: ["infobox"] + categories: [general] + + - name: bilibili + engine: bilibili + shortcut: bil + disabled: true + + - name: bing + engine: bing + shortcut: bi + disabled: true + + - name: bing images + engine: bing_images + shortcut: bii + + - name: bing news + engine: bing_news + shortcut: bin + + - name: bing videos + engine: bing_videos + shortcut: biv + + - name: bitchute + engine: bitchute + shortcut: bit + disabled: true + + - name: bitbucket + engine: xpath + paging: true + search_url: https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath: //article[@class="repo-summary"]/p + categories: [it, repos] + timeout: 4.0 + disabled: true + shortcut: bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML + + - name: bpb + engine: bpb + shortcut: bpb + disabled: true + + - name: btdigg + engine: btdigg + shortcut: bt + disabled: true + + - name: openverse + engine: openverse + categories: images + shortcut: opv + + - name: media.ccc.de + engine: ccc_media + shortcut: c3tv + # We don't set language: de here because media.ccc.de is not just + # for a German audience. It contains many English videos and many + # German videos have English subtitles. + disabled: true + + - name: cachy os packages + engine: cachy_os + shortcut: cos + disabled: true + + - name: chefkoch + engine: chefkoch + shortcut: chef + # to show premium or plus results too: + # skip_premium: false + + # WARNING: links from chinaso.com voilate users privacy + # Before activate these engines its mandatory to read + # - https://github.com/searxng/searxng/issues/4694 + # - https://docs.searxng.org/dev/engines/online/chinaso.html + + - name: chinaso news + engine: chinaso + shortcut: chinaso + categories: [news] + chinaso_category: news + chinaso_news_source: all + disabled: true + inactive: true + + - name: chinaso images + engine: chinaso + network: chinaso news + shortcut: chinasoi + categories: [images] + chinaso_category: images + disabled: true + inactive: true + + - name: chinaso videos + engine: chinaso + network: chinaso news + shortcut: chinasov + categories: [videos] + chinaso_category: videos + disabled: true + inactive: true + + - name: cloudflareai + engine: cloudflareai + shortcut: cfai + # get api token and accont id from https://developers.cloudflare.com/workers-ai/get-started/rest-api/ + cf_account_id: 'your_cf_accout_id' + cf_ai_api: 'your_cf_api' + # create your ai gateway by https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/ + cf_ai_gateway: 'your_cf_ai_gateway_name' + # find the model name from https://developers.cloudflare.com/workers-ai/models/#text-generation + cf_ai_model: 'ai_model_name' + # custom your preferences + # cf_ai_model_display_name: 'Cloudflare AI' + # cf_ai_model_assistant: 'prompts_for_assistant_role' + # cf_ai_model_system: 'prompts_for_system_role' + timeout: 30 + inactive: true + + - name: core.ac.uk + engine: core + shortcut: cor + # read https://docs.searxng.org/dev/engines/online/core.html + api_key: "" + inactive: true + + - name: crossref + engine: crossref + shortcut: cr + timeout: 30 + disabled: true + + - name: crowdview + engine: json_engine + shortcut: cv + categories: general + paging: false + search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} + results_query: results + url_query: link + title_query: title + content_query: snippet + title_html_to_text: true + content_html_to_text: true + disabled: true + about: + website: https://crowdview.ai/ + + - name: yep + engine: yep + shortcut: yep + categories: general + search_type: web + timeout: 15 + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news + disabled: true + + - name: currency + engine: currency_convert + shortcut: cc + + - name: deezer + engine: deezer + shortcut: dz + disabled: true + + - name: destatis + engine: destatis + shortcut: destat + disabled: true + + - name: deviantart + engine: deviantart + shortcut: da + timeout: 3.0 + + - name: devicons + engine: devicons + shortcut: di + timeout: 3.0 + + - name: ddg definitions + engine: duckduckgo_definitions + shortcut: ddd + weight: 2 + disabled: true + tests: *tests_infobox + + # cloudflare protected + # - name: digbt + # engine: digbt + # shortcut: dbt + # timeout: 6.0 + # disabled: true + + - name: docker hub + engine: docker_hub + shortcut: dh + categories: [it, packages] + + - name: encyclosearch + engine: json_engine + shortcut: es + categories: general + paging: true + search_url: https://encyclosearch.org/encyclosphere/search?q={query}&page={pageno}&resultsPerPage=15 + results_query: Results + url_query: SourceURL + title_query: Title + content_query: Description + disabled: true + about: + website: https://encyclosearch.org + official_api_documentation: https://encyclosearch.org/docs/#/rest-api + use_official_api: true + require_api_key: false + results: JSON + + - name: erowid + engine: xpath + paging: true + first_page_num: 0 + page_size: 30 + search_url: https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] + categories: [] + shortcut: ew + disabled: true + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: elasticsearch + shortcut: els + engine: elasticsearch + # base_url: http://localhost:9200 + # username: elastic + # password: changeme + # index: my-index + # enable_http: true + # available options: match, simple_query_string, term, terms, custom + query_type: match + # if query_type is set to custom, provide your query here + # custom_query_json: {"query":{"match_all": {}}} + # show_metadata: false + inactive: true + + - name: wikidata + engine: wikidata + shortcut: wd + timeout: 3.0 + weight: 2 + # add "list" to the array to get results in the results list + display_type: ["infobox"] + tests: *tests_infobox + categories: [general] + + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + + - name: duckduckgo images + engine: duckduckgo_extra + categories: [images, web] + ddg_category: images + shortcut: ddi + disabled: true + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos, web] + ddg_category: videos + shortcut: ddv + disabled: true + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news, web] + ddg_category: news + shortcut: ddn + disabled: true + + - name: duckduckgo weather + engine: duckduckgo_weather + shortcut: ddw + disabled: true + + - name: apple maps + engine: apple_maps + shortcut: apm + disabled: true + timeout: 5.0 + + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + + - name: tineye + engine: tineye + shortcut: tin + timeout: 9.0 + disabled: true + + - name: etymonline + engine: xpath + paging: true + search_url: https://etymonline.com/search?page={pageno}&q={query} + url_xpath: //a[contains(@class, "word__name--")]/@href + title_xpath: //a[contains(@class, "word__name--")] + content_xpath: //section[contains(@class, "word__defination")] + first_page_num: 1 + shortcut: et + categories: [dictionaries] + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: ebay + engine: ebay + shortcut: eb + base_url: 'https://www.ebay.com' + inactive: true + timeout: 5 + + - name: 1x + engine: www1x + shortcut: 1x + timeout: 3.0 + disabled: true + + - name: fdroid + engine: fdroid + shortcut: fd + disabled: true + + - name: findthatmeme + engine: findthatmeme + shortcut: ftm + disabled: true + + - name: flickr + categories: images + shortcut: fl + engine: flickr_noapi + + - name: flickr_api + # You can use the engine using the official stable API, but you need an API + # key, see: https://www.flickr.com/services/apps/create/ + engine: flickr + categories: images + shortcut: fla + # api_key: 'apikey' # required! + inactive: true + + - name: free software directory + engine: mediawiki + shortcut: fsd + categories: [it, software wikis] + base_url: https://directory.fsf.org/ + search_type: title + timeout: 5.0 + disabled: true + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 + + - name: freesound + engine: freesound + shortcut: fnd + timeout: 15.0 + # API key required, see: https://freesound.org/docs/api/overview.html + # api_key: MyAPIkey + inactive: true + + - name: frinkiac + engine: frinkiac + shortcut: frk + disabled: true + + - name: fynd + engine: xpath + search_url: https://fynd.bot/?search={query}&offset={pageno} + results_xpath: //div[contains(@class, "result-item")] + url_xpath: .//a/@href + title_xpath: .//div[contains(@class, "title-line")] + content_xpath: .//div[contains(@class, "description")] + thumbnail_xpath: .//img[contains(@class, "preview-img")]/@src + paging: true + first_page_num: 0 + page_size: 10 + categories: general + disabled: true + shortcut: fynd + about: + website: https://fynd.bot + use_official_api: false + require_api_key: false + results: HTML + + - name: fyyd + engine: fyyd + shortcut: fy + timeout: 8.0 + disabled: true + + - name: geizhals + engine: geizhals + shortcut: geiz + disabled: true + + - name: genius + engine: genius + shortcut: gen + + - name: gentoo + engine: mediawiki + shortcut: ge + categories: ["it", "software wikis"] + base_url: "https://wiki.gentoo.org/" + api_path: "api.php" + search_type: text + timeout: 10 + + - name: gitlab + engine: gitlab + base_url: https://gitlab.com + shortcut: gl + disabled: true + about: + website: https://gitlab.com/ + wikidata_id: Q16639197 + + # - name: gnome + # engine: gitlab + # base_url: https://gitlab.gnome.org + # shortcut: gn + # about: + # website: https://gitlab.gnome.org + # wikidata_id: Q44316 + + - name: github + engine: github + shortcut: gh + + - name: github code + engine: github_code + shortcut: ghc + inactive: true + ghc_auth: + # type is one of: + # * none + # * personal_access_token + # * bearer + # When none is passed, the token is not requried. + type: "none" + token: "token" + # specify whether to highlight the matching lines to the query + ghc_highlight_matching_lines: true + ghc_strip_new_lines: true + ghc_strip_whitespace: false + timeout: 10.0 + + - name: codeberg + # https://docs.searxng.org/dev/engines/online/gitea.html + engine: gitea + base_url: https://codeberg.org + shortcut: cb + disabled: true + + - name: gitea.com + engine: gitea + base_url: https://gitea.com + shortcut: gitea + disabled: true + + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + + - name: google + engine: google + shortcut: go + # additional_tests: + # android: *test_android + + - name: google images + engine: google_images + shortcut: goi + # additional_tests: + # android: *test_android + # dali: + # matrix: + # query: ['Dali Christ'] + # lang: ['en', 'de', 'fr', 'zh-CN'] + # result_container: + # - ['one_title_contains', 'Salvador'] + + - name: google news + engine: google_news + shortcut: gon + # additional_tests: + # android: *test_android + + - name: google videos + engine: google_videos + shortcut: gov + # additional_tests: + # android: *test_android + + - name: google scholar + engine: google_scholar + shortcut: gos + + - name: google play apps + engine: google_play + categories: [files, apps] + shortcut: gpa + play_categ: apps + disabled: true + + - name: google play movies + engine: google_play + categories: videos + shortcut: gpm + play_categ: movies + disabled: true + + - name: grokipedia + engine: grokipedia + shortcut: gp + disabled: true + inactive: true + + - name: material icons + engine: material_icons + shortcut: mi + disabled: true + + - name: habrahabr + engine: xpath + paging: true + search_url: https://habr.com/en/search/page{pageno}/?q={query} + results_xpath: //article[contains(@class, "tm-articles-list__item")] + url_xpath: .//a[@class="tm-title__link"]/@href + title_xpath: .//a[@class="tm-title__link"] + content_xpath: .//div[contains(@class, "article-formatted-body")] + categories: it + timeout: 4.0 + disabled: true + shortcut: habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + + - name: hex + engine: hex + shortcut: hex + disabled: true + # Valid values: name inserted_at updated_at total_downloads recent_downloads + sort_criteria: "recent_downloads" + page_size: 10 + + - name: crates.io + engine: crates + shortcut: crates + disabled: true + timeout: 6.0 + + - name: hoogle + engine: xpath + search_url: https://hoogle.haskell.org/?hoogle={query} + results_xpath: '//div[@class="result"]' + title_xpath: './/div[@class="ans"]//a' + url_xpath: './/div[@class="ans"]//a/@href' + content_xpath: './/div[@class="from"]' + page_size: 20 + categories: [it, packages] + shortcut: ho + about: + website: https://hoogle.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON + + - name: il post + engine: il_post + shortcut: pst + disabled: true + + - name: huggingface + engine: huggingface + shortcut: hf + disabled: true + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + disabled: true + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs + disabled: true + + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + + - name: imgur + engine: imgur + shortcut: img + disabled: true + + - name: ina + engine: ina + shortcut: in + timeout: 6.0 + disabled: true + + # - name: invidious + # engine: invidious + # # if you want to use invidious with SearXNG you should setup one locally + # # https://github.com/searxng/searxng/issues/2722#issuecomment-2884993248 + # base_url: + # - https://invidious.example1.com + # - https://invidious.example2.com + # shortcut: iv + # timeout: 3.0 + + - name: ipernity + engine: ipernity + shortcut: ip + disabled: true + + - name: iqiyi + engine: iqiyi + shortcut: iq + disabled: true + + - name: jisho + engine: jisho + shortcut: js + timeout: 3.0 + disabled: true + + - name: kickass + engine: kickass + base_url: + - https://kickasstorrents.to + - https://kickasstorrents.cr + - https://kickasstorrent.cr + - https://kickass.sx + - https://kat.am + shortcut: kc + timeout: 4.0 + + - name: lemmy communities + engine: lemmy + lemmy_type: Communities + shortcut: leco + + - name: lemmy users + engine: lemmy + network: lemmy communities + lemmy_type: Users + shortcut: leus + + - name: lemmy posts + engine: lemmy + network: lemmy communities + lemmy_type: Posts + shortcut: lepo + + - name: lemmy comments + engine: lemmy + network: lemmy communities + lemmy_type: Comments + shortcut: lecom + + - name: library genesis + engine: xpath + # search_url: https://libgen.is/search.php?req={query} + search_url: https://libgen.rs/search.php?req={query} + url_xpath: //a[contains(@href,"book/index.php?md5")]/@href + title_xpath: //a[contains(@href,"book/")]/text()[1] + content_xpath: //td/a[1][contains(@href,"=author")]/text() + categories: files + timeout: 7.0 + disabled: true + shortcut: lg + about: + website: https://libgen.fun/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: z-library + engine: zlibrary + shortcut: zlib + timeout: 7.0 + disabled: true + # https://github.com/searxng/searxng/issues/3610 + inactive: true + + - name: library of congress + engine: loc + shortcut: loc + categories: images + disabled: true + + - name: libretranslate + engine: libretranslate + # https://github.com/LibreTranslate/LibreTranslate?tab=readme-ov-file#mirrors + base_url: + - https://libretranslate.com/translate + # api_key: '' + shortcut: lt + inactive: true + + - name: lingva + engine: lingva + shortcut: lv + # set lingva instance in url, by default it will use the official instance + # url: https://lingva.thedaviddelta.com + + - name: lobste.rs + engine: xpath + search_url: https://lobste.rs/search?q={query}&what=stories&order=relevance + results_xpath: //li[contains(@class, "story")] + url_xpath: .//a[@class="u-url"]/@href + title_xpath: .//a[@class="u-url"] + content_xpath: .//a[@class="domain"] + categories: it + shortcut: lo + timeout: 5.0 + disabled: true + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: lucide + engine: lucide + shortcut: luc + timeout: 3.0 + + - name: marginalia + engine: marginalia + shortcut: mar + # To get an API key, please follow the instructions at + # - https://about.marginalia-search.com/article/api/ + # api_key: '' + disabled: true + inactive: true + + - name: mastodon users + engine: mastodon + mastodon_type: accounts + base_url: https://mastodon.social + shortcut: mau + + - name: mastodon hashtags + engine: mastodon + mastodon_type: hashtags + base_url: https://mastodon.social + shortcut: mah + + # - name: matrixrooms + # engine: mrs + # # https://docs.searxng.org/dev/engines/online/mrs.html + # # base_url: https://mrs-api-host + # shortcut: mtrx + # disabled: true + + - name: mdn + shortcut: mdn + engine: json_engine + categories: [it] + paging: true + search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} + results_query: documents + url_query: mdn_url + url_prefix: https://developer.mozilla.org + title_query: title + content_query: summary + about: + website: https://developer.mozilla.org + wikidata_id: Q3273508 + official_api_documentation: null + use_official_api: false + require_api_key: false + results: JSON + + - name: metacpan + engine: metacpan + shortcut: cpan + disabled: true + number_of_results: 20 + + # https://docs.searxng.org/dev/engines/offline/search-indexer-engines.html#module-searx.engines.meilisearch + # - name: meilisearch + # engine: meilisearch + # shortcut: mes + # enable_http: true + # base_url: http://localhost:7700 + # index: my-index + # auth_key: Bearer XXXX + + - name: microsoft learn + engine: microsoft_learn + shortcut: msl + disabled: true + + - name: mixcloud + engine: mixcloud + shortcut: mc + + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # enable_http: true + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + + - name: mozhi + engine: mozhi + base_url: + - https://mozhi.aryak.me + - https://translate.bus-hit.me + - https://nyc1.mz.ggtyler.dev + # mozhi_engine: google - see https://mozhi.aryak.me for supported engines + timeout: 4.0 + shortcut: mz + disabled: true + + - name: mwmbl + engine: mwmbl + # api_url: https://api.mwmbl.org + shortcut: mwm + disabled: true + + - name: niconico + engine: niconico + shortcut: nico + disabled: true + + - name: npm + engine: npm + shortcut: npm + timeout: 5.0 + disabled: true + + - name: nyaa + engine: nyaa + shortcut: nt + disabled: true + + - name: mankier + engine: json_engine + search_url: https://www.mankier.com/api/v2/mans/?q={query} + results_query: results + url_query: url + title_query: name + content_query: description + categories: it + shortcut: man + about: + website: https://www.mankier.com/ + official_api_documentation: https://www.mankier.com/api + use_official_api: true + require_api_key: false + results: JSON + + - name: odysee + engine: odysee + shortcut: od + disabled: true + + - name: ollama + engine: ollama + shortcut: ollama + disabled: true + + - name: openairedatasets + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: "science" + shortcut: oad + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openairepublications + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: science + shortcut: oap + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openalex + engine: openalex + shortcut: oa + # https://docs.searxng.org/dev/engines/online/openalex.html + # Recommended by OpenAlex: join the polite pool with an email address + # mailto: "[email protected]" + timeout: 5.0 + disabled: true + + - name: openclipart + engine: openclipart + shortcut: ocl + inactive: true + disabled: true + timeout: 30 + + - name: openlibrary + engine: openlibrary + shortcut: ol + timeout: 10 + disabled: true + + - name: openmeteo + engine: open_meteo + shortcut: om + disabled: true + + # - name: opensemanticsearch + # engine: opensemantic + # shortcut: oss + # base_url: 'http://localhost:8983/solr/opensemanticsearch/' + + - name: openstreetmap + engine: openstreetmap + shortcut: osm + + - name: openrepos + engine: xpath + paging: true + search_url: https://openrepos.net/search/node/{query}?page={pageno} + url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath: //li[@class="search-result"]//h3[@class="title"]/a + content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories: files + timeout: 4.0 + disabled: true + shortcut: or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: packagist + engine: json_engine + paging: true + search_url: https://packagist.org/search.json?q={query}&page={pageno} + results_query: results + url_query: url + title_query: name + content_query: description + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: pack + about: + website: https://packagist.org + wikidata_id: Q108311377 + official_api_documentation: https://packagist.org/apidoc + use_official_api: true + require_api_key: false + results: JSON + + - name: pdbe + engine: pdbe + shortcut: pdb + # Hide obsolete PDB entries. Default is not to hide obsolete structures + # hide_obsolete: false + + - name: photon + engine: photon + shortcut: ph + + - name: pinterest + engine: pinterest + shortcut: pin + + - name: piped + engine: piped + shortcut: ppd + categories: videos + piped_filter: videos + timeout: 3.0 + inactive: true + + # URL to use as link and for embeds + frontend_url: https://srv.piped.video + # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ + backend_url: + - https://pipedapi.ducks.party + - https://api.piped.private.coffee + + - name: piped.music + engine: piped + network: piped + shortcut: ppdm + categories: music + piped_filter: music_songs + timeout: 3.0 + inactive: true + + - name: piratebay + engine: piratebay + shortcut: tpb + # You may need to change this URL to a proxy if piratebay is blocked in your + # country + url: https://thepiratebay.org/ + timeout: 3.0 + + - name: pixabay images + engine: pixabay + pixabay_type: images + categories: images + shortcut: pixi + disabled: true + + - name: pixabay videos + engine: pixabay + pixabay_type: videos + categories: videos + shortcut: pixv + disabled: true + + - name: pixiv + shortcut: pv + engine: pixiv + disabled: true + inactive: true + remove_ai_images: false + pixiv_image_proxies: + - https://pximg.example.org + # A proxy is required to load the images. Hosting an image proxy server + # for Pixiv: + # --> https://pixivfe.pages.dev/hosting-image-proxy-server/ + # Proxies from public instances. Ask the public instances owners if they + # agree to receive traffic from SearXNG! + # --> https://codeberg.org/VnPower/PixivFE#instances + # --> https://github.com/searxng/searxng/pull/3192#issuecomment-1941095047 + # image proxy of https://pixiv.cat + # - https://i.pixiv.cat + # image proxy of https://www.pixiv.pics + # - https://pximg.cocomi.eu.org + # image proxy of https://pixivfe.exozy.me + # - https://pximg.exozy.me + # image proxy of https://pixivfe.ducks.party + # - https://pixiv.ducks.party + # image proxy of https://pixiv.perennialte.ch + # - https://pximg.perennialte.ch + + - name: podcastindex + engine: podcastindex + shortcut: podcast + + # Required dependency: psychopg2 + # - name: postgresql + # engine: postgresql + # database: postgres + # username: postgres + # password: postgres + # limit: 10 + # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + # shortcut : psql + + - name: presearch + engine: presearch + search_type: search + categories: [general, web] + shortcut: ps + timeout: 4.0 + disabled: true + + - name: presearch images + engine: presearch + network: presearch + search_type: images + categories: [images, web] + timeout: 4.0 + shortcut: psimg + disabled: true + + - name: presearch videos + engine: presearch + network: presearch + search_type: videos + categories: [general, web] + timeout: 4.0 + shortcut: psvid + disabled: true + + - name: presearch news + engine: presearch + network: presearch + search_type: news + categories: [news, web] + timeout: 4.0 + shortcut: psnews + disabled: true + + - name: pub.dev + engine: xpath + shortcut: pd + search_url: https://pub.dev/packages?q={query}&page={pageno} + paging: true + results_xpath: //div[contains(@class,"packages-item")] + url_xpath: ./div/h3/a/@href + title_xpath: ./div/h3/a + content_xpath: ./div/div/div[contains(@class,"packages-description")]/span + categories: [packages, it] + timeout: 3.0 + disabled: true + first_page_num: 1 + about: + website: https://pub.dev/ + official_api_documentation: https://pub.dev/help/api + use_official_api: false + require_api_key: false + results: HTML + + - name: public domain image archive + engine: public_domain_image_archive + shortcut: pdia + disabled: true + + - name: pubmed + engine: pubmed + shortcut: pub + + - name: pypi + shortcut: pypi + engine: pypi + + - name: quark + quark_category: general + categories: [general] + engine: quark + shortcut: qk + disabled: true + + - name: quark images + quark_category: images + categories: [images] + engine: quark + shortcut: qki + disabled: true + + - name: qwant + qwant_categ: web + engine: qwant + shortcut: qw + categories: [general, web] + disabled: true + additional_tests: + rosebud: *test_rosebud + + - name: qwant news + qwant_categ: news + engine: qwant + shortcut: qwn + categories: news + network: qwant + + - name: qwant images + qwant_categ: images + engine: qwant + shortcut: qwi + categories: [images, web] + network: qwant + + - name: qwant videos + qwant_categ: videos + engine: qwant + shortcut: qwv + categories: [videos, web] + network: qwant + + # - name: library + # engine: recoll + # shortcut: lib + # base_url: 'https://recoll.example.org/' + # search_dir: '' + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # timeout: 30.0 + # categories: files + # disabled: true + + # - name: recoll library reference + # engine: recoll + # base_url: 'https://recoll.example.org/' + # search_dir: reference + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # shortcut: libr + # timeout: 30.0 + # categories: files + # disabled: true + + - name: radio browser + engine: radio_browser + shortcut: rb + + - name: reddit + engine: reddit + shortcut: re + page_size: 25 + disabled: true + + - name: reuters + engine: reuters + shortcut: reu + # https://docs.searxng.org/dev/engines/online/reuters.html + # sort_order = "relevance" + + - name: right dao + engine: xpath + paging: true + page_size: 12 + search_url: https://rightdao.com/search?q={query}&start={pageno} + results_xpath: //div[contains(@class, "description")] + url_xpath: ../div[contains(@class, "title")]/a/@href + title_xpath: ../div[contains(@class, "title")] + content_xpath: . + categories: general + shortcut: rd + disabled: true + about: + website: https://rightdao.com/ + use_official_api: false + require_api_key: false + results: HTML + + - name: rottentomatoes + engine: rottentomatoes + shortcut: rt + disabled: true + + # Required dependency: valkey + # - name: myvalkey + # shortcut : rds + # engine: valkey_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 + + # tmp suspended: bad certificate + # - name: scanr structures + # shortcut: scs + # engine: scanr_structures + # disabled: true + + - name: searchmysite + engine: xpath + shortcut: sms + categories: general + paging: true + search_url: https://searchmysite.net/search/?q={query}&page={pageno} + results_xpath: //div[contains(@class,'search-result')] + url_xpath: .//a[contains(@class,'result-link')]/@href + title_xpath: .//span[contains(@class,'result-title-txt')]/text() + content_xpath: ./p[@id='result-hightlight'] + disabled: true + about: + website: https://searchmysite.net + + - name: selfhst icons + engine: selfhst + shortcut: si + disabled: true + + - name: sepiasearch + engine: sepiasearch + shortcut: sep + + - name: sogou + engine: sogou + shortcut: sogou + disabled: true + + - name: sogou images + engine: sogou_images + shortcut: sogoui + disabled: true + + - name: sogou videos + engine: sogou_videos + shortcut: sogouv + disabled: true + + - name: sogou wechat + engine: sogou_wechat + shortcut: sogouw + disabled: true + + - name: soundcloud + engine: soundcloud + shortcut: sc + + - name: stackoverflow + engine: stackexchange + shortcut: st + api_site: 'stackoverflow' + categories: [it, q&a] + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: [it, q&a] + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: [it, q&a] + + - name: discuss.python + engine: discourse + shortcut: dpy + base_url: 'https://discuss.python.org' + categories: [it, q&a] + disabled: true + + - name: caddy.community + engine: discourse + shortcut: caddy + base_url: 'https://caddy.community' + categories: [it, q&a] + disabled: true + + - name: pi-hole.community + engine: discourse + shortcut: pi + categories: [it, q&a] + base_url: 'https://discourse.pi-hole.net' + disabled: true + + - name: searchcode code + engine: searchcode_code + shortcut: scc + disabled: true + inactive: true + + # - name: searx + # engine: searx_engine + # shortcut: se + # instance_urls : + # - http://127.0.0.1:8888/ + # - ... + # disabled: true + + - name: semantic scholar + engine: semantic_scholar + shortcut: se + + # Spotify needs API credentials + # - name: spotify + # engine: spotify + # shortcut: stf + # api_client_id: ******* + # api_client_secret: ******* + + # - name: solr + # engine: solr + # shortcut: slr + # base_url: http://localhost:8983 + # collection: collection_name + # sort: '' # sorting: asc or desc + # field_list: '' # comma separated list of field names to display on the UI + # default_fields: '' # default field to query + # query_fields: '' # query fields + # enable_http: true + + - name: springer nature + engine: springer + shortcut: springer + timeout: 5 + # read https://docs.searxng.org/dev/engines/online/springer.html + api_key: "" + inactive: true + + - name: startpage + engine: startpage + shortcut: sp + startpage_categ: web + categories: [general, web] + additional_tests: + rosebud: *test_rosebud + + - name: startpage news + engine: startpage + startpage_categ: news + categories: [news, web] + shortcut: spn + + - name: startpage images + engine: startpage + startpage_categ: images + categories: [images, web] + shortcut: spi + + - name: steam + engine: steam + shortcut: stm + disabled: true + + - name: tokyotoshokan + engine: tokyotoshokan + shortcut: tt + timeout: 6.0 + disabled: true + + - name: solidtorrents + engine: solidtorrents + shortcut: solid + timeout: 4.0 + base_url: + - https://solidtorrents.to + - https://bitsearch.to + + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!mediathekview concert" + # + # - name: mediathekview + # engine: sqlite + # shortcut: mediathekview + # categories: [general, videos] + # result_type: MainResult + # database: searx/data/filmliste-v2.db + # query_str: >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + + - name: tagesschau + engine: tagesschau + # when set to false, display URLs from Tagesschau, and not the actual source + # (e.g. NDR, WDR, SWR, HR, ...) + use_source_url: true + shortcut: ts + disabled: true + + - name: tmdb + engine: xpath + paging: true + categories: movies + search_url: https://www.themoviedb.org/search?page={pageno}&query={query} + results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] + url_xpath: .//div[contains(@class,"poster")]/a/@href + thumbnail_xpath: .//img/@src + title_xpath: .//div[contains(@class,"title")]//h2 + content_xpath: .//div[contains(@class,"overview")] + shortcut: tm + disabled: true + + # Requires Tor + - name: torch + engine: xpath + paging: true + search_url: + http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and + results_xpath: //table//tr + url_xpath: ./td[2]/a + title_xpath: ./td[2]/b + content_xpath: ./td[2]/small + categories: onions + enable_http: true + shortcut: tch + + # TubeArchivist is a self-hosted Youtube archivist software. + # https://docs.searxng.org/dev/engines/online/tubearchivist.html + # + # - name: tubearchivist + # engine: tubearchivist + # shortcut: tuba + # base_url: + # ta_token: + # ta_link_to_mp4: false + + # torznab engine lets you query any torznab compatible indexer. Using this + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/dev/engines/online/torznab.html + - name: Torznab EZTV + engine: torznab + shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab + # enable_http: true # if using localhost + # api_key: xxxxxxxxxxxxxxx + show_magnet_links: true + show_torrent_files: false + # https://github.com/Jackett/Jackett/wiki/Jackett-Categories + torznab_categories: # optional + - 2000 + - 5000 + inactive: true + + # tmp suspended - too slow, too many errors + # - name: urbandictionary + # engine : xpath + # search_url : https://www.urbandictionary.com/define.php?term={query} + # url_xpath : //*[@class="word"]/@href + # title_xpath : //*[@class="def-header"] + # content_xpath: //*[@class="meaning"] + # shortcut: ud + + - name: unsplash + engine: unsplash + shortcut: us + + - name: yandex + engine: yandex + categories: general + search_type: web + shortcut: yd + disabled: true + + - name: yandex images + engine: yandex + network: yandex + categories: images + search_type: images + shortcut: ydi + disabled: true + + - name: yandex music + engine: yandex_music + network: yandex + shortcut: ydm + disabled: true + # https://yandex.com/support/music/access.html + + - name: yahoo + engine: yahoo + shortcut: yh + disabled: true + + - name: yahoo news + engine: yahoo_news + shortcut: yhn + + - name: youtube + shortcut: yt + engine: youtube_noapi + + - name: youtube_api + # You can use the engine using the official stable API, but you need an API + # key See: https://console.developers.google.com/project + engine: youtube_api + # api_key: '' # required! + shortcut: yta + inactive: true + + - name: dailymotion + engine: dailymotion + shortcut: dm + + - name: vimeo + engine: vimeo + shortcut: vm + + - name: wiby + engine: json_engine + paging: true + search_url: https://wiby.me/json/?q={query}&p={pageno} + url_query: URL + title_query: Title + content_query: Snippet + categories: [general, web] + shortcut: wib + disabled: true + about: + website: https://wiby.me/ + + - name: wikibooks + engine: mediawiki + weight: 0.5 + shortcut: wb + categories: [general, wikimedia] + base_url: "https://{language}.wikibooks.org/" + search_type: text + disabled: true + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 + + - name: wikinews + engine: mediawiki + shortcut: wn + categories: [news, wikimedia] + base_url: "https://{language}.wikinews.org/" + search_type: text + srsort: create_timestamp_desc + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 + + - name: wikiquote + engine: mediawiki + weight: 0.5 + shortcut: wq + categories: [general, wikimedia] + base_url: "https://{language}.wikiquote.org/" + search_type: text + disabled: true + additional_tests: + rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 + + - name: wikisource + engine: mediawiki + weight: 0.5 + shortcut: ws + categories: [general, wikimedia] + base_url: "https://{language}.wikisource.org/" + search_type: text + disabled: true + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 + + - name: wikispecies + engine: mediawiki + shortcut: wsp + categories: [general, science, wikimedia] + base_url: "https://species.wikimedia.org/" + search_type: text + disabled: true + about: + website: https://species.wikimedia.org/ + wikidata_id: Q13679 + tests: + wikispecies: + matrix: + query: "Campbell, L.I. et al. 2011: MicroRNAs" + lang: en + result_container: + - not_empty + - ['one_title_contains', 'Tardigrada'] + test: + - unique_results + + - name: wiktionary + engine: mediawiki + shortcut: wt + categories: [dictionaries, wikimedia] + base_url: "https://{language}.wiktionary.org/" + search_type: text + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 + + - name: wikiversity + engine: mediawiki + weight: 0.5 + shortcut: wv + categories: [general, wikimedia] + base_url: "https://{language}.wikiversity.org/" + search_type: text + disabled: true + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 + + - name: wikivoyage + engine: mediawiki + weight: 0.5 + shortcut: wy + categories: [general, wikimedia] + base_url: "https://{language}.wikivoyage.org/" + search_type: text + disabled: true + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 + + - name: wikicommons.images + engine: wikicommons + shortcut: wci + categories: images + wc_search_type: image + + - name: wikicommons.videos + engine: wikicommons + shortcut: wcv + categories: videos + wc_search_type: video + + - name: wikicommons.audio + engine: wikicommons + shortcut: wca + categories: music + wc_search_type: audio + + - name: wikicommons.files + engine: wikicommons + shortcut: wcf + categories: files + wc_search_type: file + + - name: wolframalpha + shortcut: wa + engine: wolframalpha_noapi + timeout: 6.0 + categories: general + disabled: true + + - name: wolframalpha_api + # You can use the engine using the official stable API, but you need an API + # key. See: https://products.wolframalpha.com/api/ + engine: wolframalpha_api + # api_key: '' # required! + shortcut: waa + timeout: 6.0 + categories: general + inactive: true + + - name: dictzone + engine: dictzone + shortcut: dc + + - name: mymemory translated + engine: translated + shortcut: tl + timeout: 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See: https://mymemory.translated.net/doc/usagelimits.php + # api_key: '' + + # Required dependency: mysql-connector-python + # - name: mysql + # engine: mysql_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mysql + + # Required dependency: mariadb + # - name: mariadb + # engine: mariadb_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mdb + + - name: 1337x + engine: 1337x + shortcut: 1337x + disabled: true + + - name: duden + engine: duden + shortcut: du + disabled: true + + - name: seznam + shortcut: szn + engine: seznam + disabled: true + + - name: deepl + engine: deepl + shortcut: dpl + # You can use the engine using the official stable API, but you need an API key + # See: https://www.deepl.com/pro-api?cta=header-pro-api + # api_key: '' # required! + timeout: 5.0 + inactive: true + + - name: mojeek + shortcut: mjk + engine: mojeek + categories: [general, web] + disabled: true + + - name: mojeek images + shortcut: mjkimg + engine: mojeek + categories: [images, web] + search_type: images + paging: false + disabled: true + + - name: mojeek news + shortcut: mjknews + engine: mojeek + categories: [news, web] + search_type: news + paging: false + disabled: true + + - name: moviepilot + engine: moviepilot + shortcut: mp + disabled: true + + - name: naver + categories: [general, web] + engine: naver + shortcut: nvr + disabled: true + + - name: naver images + naver_category: images + categories: [images] + engine: naver + shortcut: nvri + disabled: true + + - name: naver news + naver_category: news + categories: [news] + engine: naver + shortcut: nvrn + disabled: true + + - name: naver videos + naver_category: videos + categories: [videos] + engine: naver + shortcut: nvrv + disabled: true + + - name: rubygems + shortcut: rbg + engine: xpath + paging: true + search_url: https://rubygems.org/search?page={pageno}&query={query} + results_xpath: /html/body/main/div/a[@class="gems__gem"] + url_xpath: ./@href + title_xpath: ./span/h2 + content_xpath: ./span/p + suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a + first_page_num: 1 + categories: [it, packages] + disabled: true + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: peertube + engine: peertube + shortcut: ptb + paging: true + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com + categories: videos + disabled: true + timeout: 6.0 + + - name: mediathekviewweb + engine: mediathekviewweb + shortcut: mvw + disabled: true + + - name: yacy + # https://docs.searxng.org/dev/engines/online/yacy.html + engine: yacy + categories: general + search_type: text + # see https://github.com/searxng/searxng/pull/3631#issuecomment-2240903027 + base_url: + - https://yacy.searchlab.eu + shortcut: ya + disabled: true + # if you aren't using HTTPS for your local yacy instance disable https + # enable_http: false + search_mode: 'global' + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: yacy images + engine: yacy + network: yacy + categories: images + search_type: image + shortcut: yai + disabled: true + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: rumble + engine: rumble + shortcut: ru + base_url: https://rumble.com/ + paging: true + categories: videos + disabled: true + + - name: repology + engine: repology + shortcut: rep + disabled: true + inactive: true + + - name: livespace + engine: livespace + shortcut: ls + categories: videos + disabled: true + timeout: 5.0 + + - name: wordnik + engine: wordnik + shortcut: wnik + timeout: 5.0 + + - name: woxikon.de synonyme + engine: xpath + shortcut: woxi + categories: [dictionaries] + timeout: 5.0 + disabled: true + search_url: https://synonyme.woxikon.de/synonyme/{query}.php + url_xpath: //div[@class="upper-synonyms"]/a/@href + content_xpath: //div[@class="synonyms-list-group"] + title_xpath: //div[@class="upper-synonyms"]/a + no_result_for_http_status: [404] + about: + website: https://www.woxikon.de/ + wikidata_id: # No Wikidata ID + use_official_api: false + require_api_key: false + results: HTML + language: de + + - name: seekr news + engine: seekr + shortcut: senews + categories: news + seekr_category: news + disabled: true + + - name: seekr images + engine: seekr + network: seekr news + shortcut: seimg + categories: images + seekr_category: images + disabled: true + + - name: seekr videos + engine: seekr + network: seekr news + shortcut: sevid + categories: videos + seekr_category: videos + disabled: true + + - name: stract + engine: stract + shortcut: str + disabled: true + + - name: svgrepo + engine: svgrepo + shortcut: svg + timeout: 10.0 + disabled: true + + - name: tootfinder + engine: tootfinder + shortcut: toot + + - name: uxwing + engine: uxwing + shortcut: ux + disabled: true + + - name: voidlinux + engine: voidlinux + shortcut: void + disabled: true + + - name: wallhaven + engine: wallhaven + # api_key: abcdefghijklmnopqrstuvwxyz + shortcut: wh + inactive: true + + # wikimini: online encyclopedia for children + # The fulltext and title parameter is necessary for Wikimini because + # sometimes it will not show the results and redirect instead + - name: wikimini + engine: xpath + shortcut: wkmn + search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search + url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href + title_xpath: //li//div[@class="mw-search-result-heading"]/a + content_xpath: //li/div[@class="searchresult"] + categories: general + disabled: true + about: + website: https://wikimini.org/ + wikidata_id: Q3568032 + use_official_api: false + require_api_key: false + results: HTML + language: fr + + - name: wttr.in + engine: wttr + shortcut: wttr + timeout: 9.0 + + - name: brave + engine: brave + shortcut: br + time_range_support: true + paging: true + categories: [general, web] + brave_category: search + # brave_spellcheck: true + + - name: brave.images + engine: brave + network: brave + shortcut: brimg + categories: [images, web] + brave_category: images + + - name: brave.videos + engine: brave + network: brave + shortcut: brvid + categories: [videos, web] + brave_category: videos + + - name: brave.news + engine: brave + network: brave + shortcut: brnews + categories: news + brave_category: news + + # - name: brave.goggles + # engine: brave + # network: brave + # shortcut: brgog + # time_range_support: true + # paging: true + # categories: [general, web] + # brave_category: goggles + # Goggles: # required! This should be a URL ending in .goggle + + - name: lib.rs + shortcut: lrs + engine: lib_rs + disabled: true + + - name: sourcehut + shortcut: srht + engine: sourcehut + # https://docs.searxng.org/dev/engines/online/sourcehut.html + # sourcehut_sort_order: longest-active + disabled: true + + - name: bt4g + engine: bt4g + shortcut: bt4g + + - name: pkg.go.dev + engine: pkg_go_dev + shortcut: pgo + disabled: true + + - name: senscritique + engine: senscritique + shortcut: scr + timeout: 4.0 + disabled: true + + - name: minecraft wiki + engine: mediawiki + shortcut: mcw + categories: ["software wikis"] + base_url: https://minecraft.wiki/ + api_path: "api.php" + search_type: text + disabled: true + about: + website: https://minecraft.wiki/ + wikidata_id: Q105533483 + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name: ubuntuwiki +# engine: doku +# shortcut: uw +# base_url: 'https://doc.ubuntu-fr.org' + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: git grep +# engine: command +# command: ['git', 'grep', '{{QUERY}}'] +# shortcut: gg +# tokens: [] +# disabled: true +# delimiter: +# chars: ':' +# keys: ['filepath', 'code'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: locate +# engine: command +# command: ['locate', '{{QUERY}}'] +# shortcut: loc +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: find +# engine: command +# command: ['find', '.', '-name', '{{QUERY}}'] +# query_type: path +# shortcut: fnd +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: pattern search in files +# engine: command +# command: ['fgrep', '{{QUERY}}'] +# shortcut: fgr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: regex search in files +# engine: command +# command: ['grep', '{{QUERY}}'] +# shortcut: gr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +doi_resolvers: + oadoi.org: 'https://oadoi.org/' + doi.org: 'https://doi.org/' + sci-hub.se: 'https://sci-hub.se/' + sci-hub.st: 'https://sci-hub.st/' + sci-hub.ru: 'https://sci-hub.ru/' + +default_doi_resolver: 'oadoi.org' diff --git a/mcp_host/browser/searxng/searxng_be7930fe/settings.yml b/mcp_host/browser/searxng/searxng_be7930fe/settings.yml new file mode 100644 index 0000000..78dc810 --- /dev/null +++ b/mcp_host/browser/searxng/searxng_be7930fe/settings.yml @@ -0,0 +1,2816 @@ +general: + # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} + debug: false + # displayed name + instance_name: "SearXNG" + # For example: https://example.com/privacy + privacypolicy_url: false + # use true to use your own donation page written in searx/info/en/donate.md + # use false to disable the donation link + donation_url: false + # mailto:contact@example.com + contact_url: false + # record stats + enable_metrics: true + # expose stats in open metrics format at /metrics + # leave empty to disable (no password set) + # open_metrics: + open_metrics: '' + +brand: + new_issue_url: https://github.com/searxng/searxng/issues/new + docs_url: https://docs.searxng.org/ + public_instances: https://searx.space + wiki_url: https://github.com/searxng/searxng/wiki + issue_url: https://github.com/searxng/searxng/issues + # custom: + # # Custom entries in the footer: [title]: [link] + # links: + # Uptime: https://uptime.searxng.org/history/darmarit-org + # About: "https://searxng.org" + +search: + # Filter results. 0: None, 1: Moderate, 2: Strict + safe_search: 0 + # Existing autocomplete backends: "360search", "baidu", "brave", "dbpedia", "duckduckgo", "google", "yandex", + # "mwmbl", "naver", "seznam", "sogou", "startpage", "stract", "swisscows", "quark", "qwant", "wikipedia" - + # leave blank to turn it off by default. + autocomplete: "" + # minimun characters to type before autocompleter starts + autocomplete_min: 4 + # backend for the favicon near URL in search results. + # Available resolvers: "allesedv", "duckduckgo", "google", "yandex" - leave blank to turn it off by default. + favicon_resolver: "" + # Default search language - leave blank to detect from browser information or + # use codes from 'languages.py' + default_lang: "auto" + # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages + # Available languages + # languages: + # - all + # - en + # - en-US + # - de + # - it-IT + # - fr + # - fr-BE + # ban time in seconds after engine errors + ban_time_on_fail: 5 + # max ban time in seconds after engine errors + max_ban_time_on_fail: 120 + suspended_times: + # Engine suspension time after error (in seconds; set to 0 to disable) + # For error "Access denied" and "HTTP error [402, 403]" + SearxEngineAccessDenied: 86400 + # For error "CAPTCHA" + SearxEngineCaptcha: 86400 + # For error "Too many request" and "HTTP error 429" + SearxEngineTooManyRequests: 3600 + # Cloudflare CAPTCHA + cf_SearxEngineCaptcha: 1296000 + cf_SearxEngineAccessDenied: 86400 + # ReCAPTCHA + recaptcha_SearxEngineCaptcha: 604800 + + # remove format to deny access, use lower case. + # formats: [html, csv, json, rss] + formats: + - html + +server: + # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} + port: 8888 + bind_address: "127.0.0.1" + # public URL of the instance, to ensure correct inbound links. Is overwritten + # by ${SEARXNG_BASE_URL}. + base_url: false # "http://example.com/location" + # rate limit the number of request on the instance, block some bots. + # Is overwritten by ${SEARXNG_LIMITER} + limiter: false + # enable features designed only for public instances. + # Is overwritten by ${SEARXNG_PUBLIC_INSTANCE} + public_instance: false + + # If your instance owns a /etc/searxng/settings.yml file, then set the following + # values there. + + secret_key: "mahNOkdR6sBLa59lwBp0wA503U7NR" # Is overwritten by ${SEARXNG_SECRET} + # Proxy image results through SearXNG. Is overwritten by ${SEARXNG_IMAGE_PROXY} + image_proxy: false + # 1.0 and 1.1 are supported + http_protocol_version: "1.0" + # POST queries are "more secure!" but are also the source of hard-to-locate + # annoyances, which is why GET may be better for end users and their browsers. + # see https://github.com/searxng/searxng/pull/3619 + # Is overwritten by ${SEARXNG_METHOD} + method: "POST" + default_http_headers: + X-Content-Type-Options: nosniff + X-Download-Options: noopen + X-Robots-Tag: noindex, nofollow + Referrer-Policy: no-referrer + +valkey: + # URL to connect valkey database. Is overwritten by ${SEARXNG_VALKEY_URL}. + # https://docs.searxng.org/admin/settings/settings_valkey.html#settings-valkey + # url: valkey://localhost:6379/0 + url: false + +ui: + # Custom static path - leave it blank if you didn't change + static_path: "" + # Custom templates path - leave it blank if you didn't change + templates_path: "" + # query_in_title: When true, the result page's titles contains the query + # it decreases the privacy, since the browser can records the page titles. + query_in_title: false + # ui theme + default_theme: simple + # center the results ? + center_alignment: false + # URL prefix of the internet archive, don't forget trailing slash (if needed). + # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" + # Default interface locale - leave blank to detect from browser information or + # use codes from the 'locales' config section + default_locale: "" + # Open result links in a new tab by default + # results_on_new_tab: false + theme_args: + # style of simple theme: auto, light, dark, black + simple_style: auto + # Perform search immediately if a category selected. + # Disable to select multiple categories at once and start the search manually. + search_on_category_select: true + # Hotkeys: default or vim + hotkeys: default + # URL formatting: pretty, full or host + url_formatting: pretty + +# Lock arbitrary settings on the preferences page. +# +# preferences: +# lock: +# - categories +# - language +# - autocomplete +# - favicon +# - safesearch +# - method +# - doi_resolver +# - locale +# - theme +# - results_on_new_tab +# - search_on_category_select +# - method +# - image_proxy +# - query_in_title + +# communication with search engines +# +outgoing: + # default timeout in seconds, can be override by engine + request_timeout: 3.0 + # the maximum timeout in seconds + # max_request_timeout: 10.0 + # suffix of searxng_useragent, could contain information like an email address + # to the administrator + useragent_suffix: "" + # The maximum number of concurrent connections that may be established. + pool_connections: 100 + # Allow the connection pool to maintain keep-alive connections below this + # point. + pool_maxsize: 20 + # See https://www.python-httpx.org/http2/ + enable_http2: true + # uncomment below section if you want to use a custom server certificate + # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults + # and https://www.python-httpx.org/compatibility/#ssl-configuration + # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer + # + # uncomment below section if you want to use a proxyq see: SOCKS proxies + # https://2.python-requests.org/en/latest/user/advanced/#proxies + # are also supported: see + # https://2.python-requests.org/en/latest/user/advanced/#socks + # + # proxies: + # all://: + # - http://proxy1:8080 + # - http://proxy2:8080 + # + # using_tor_proxy: true + # + # Extra seconds to add in order to account for the time taken by the proxy + # + # extra_proxy_timeout: 10 + # + # uncomment below section only if you have more than one network interface + # which can be the source of outgoing search requests + # + # source_ips: + # - 1.1.1.1 + # - 1.1.1.2 + # - fe80::/126 + + +# Plugin configuration, for more details see +# https://docs.searxng.org/admin/settings/settings_plugins.html +# +plugins: + + searx.plugins.calculator.SXNGPlugin: + active: true + + searx.plugins.infinite_scroll.SXNGPlugin: + active: false + + searx.plugins.hash_plugin.SXNGPlugin: + active: true + + searx.plugins.self_info.SXNGPlugin: + active: true + + searx.plugins.unit_converter.SXNGPlugin: + active: true + + searx.plugins.ahmia_filter.SXNGPlugin: + active: true + + searx.plugins.hostnames.SXNGPlugin: + active: true + + searx.plugins.time_zone.SXNGPlugin: + active: true + + searx.plugins.oa_doi_rewrite.SXNGPlugin: + active: false + + searx.plugins.tor_check.SXNGPlugin: + active: false + + searx.plugins.tracker_url_remover.SXNGPlugin: + active: true + + +# Configuration of the "Hostnames plugin": +# +# hostnames: +# replace: +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# '(.*\.)?reddit\.com$': 'teddit.example.com' +# '(.*\.)?redd\.it$': 'teddit.example.com' +# '(www\.)?twitter\.com$': 'nitter.example.com' +# remove: +# - '(.*\.)?facebook.com$' +# low_priority: +# - '(.*\.)?google(\..*)?$' +# high_priority: +# - '(.*\.)?wikipedia.org$' +# +# Alternatively you can use external files for configuring the "Hostnames plugin": +# +# hostnames: +# replace: 'rewrite-hosts.yml' +# +# Content of 'rewrite-hosts.yml' (place the file in the same directory as 'settings.yml'): +# '(.*\.)?youtube\.com$': 'yt.example.com' +# '(.*\.)?youtu\.be$': 'yt.example.com' +# + +checker: + # disable checker when in debug mode + off_when_debug: true + + # use "scheduling: {}" to disable scheduling + # scheduling: interval or int + + # to activate the scheduler: + # * uncomment "scheduling" section + # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" + # to your uwsgi.ini + + # scheduling: + # start_after: [300, 1800] # delay to start the first run of the checker + # every: [86400, 90000] # how often the checker runs + + # additional tests: only for the YAML anchors (see the engines section) + # + additional_tests: + rosebud: &test_rosebud + matrix: + query: rosebud + lang: en + result_container: + - not_empty + - ['one_title_contains', 'citizen kane'] + test: + - unique_results + + android: &test_android + matrix: + query: ['android'] + lang: ['en', 'de', 'fr', 'zh-CN'] + result_container: + - not_empty + - ['one_title_contains', 'google'] + test: + - unique_results + + # tests: only for the YAML anchors (see the engines section) + tests: + infobox: &tests_infobox + infobox: + matrix: + query: ["linux", "new york", "bbc"] + result_container: + - has_infobox + +categories_as_tabs: + general: + images: + videos: + news: + map: + music: + it: + science: + files: + social media: + +engines: + - name: 360search + engine: 360search + shortcut: 360so + timeout: 10.0 + disabled: true + + - name: 360search videos + engine: 360search_videos + shortcut: 360sov + disabled: true + + - name: 9gag + engine: 9gag + shortcut: 9g + disabled: true + + - name: acfun + engine: acfun + shortcut: acf + disabled: true + + - name: adobe stock + engine: adobe_stock + shortcut: asi + categories: ["images"] + # https://docs.searxng.org/dev/engines/online/adobe_stock.html + adobe_order: relevance + adobe_content_types: ["photo", "illustration", "zip_vector", "template", "3d", "image"] + timeout: 6 + disabled: true + + - name: adobe stock video + engine: adobe_stock + shortcut: asv + network: adobe stock + categories: ["videos"] + adobe_order: relevance + adobe_content_types: ["video"] + timeout: 6 + disabled: true + + - name: adobe stock audio + engine: adobe_stock + shortcut: asa + network: adobe stock + categories: ["music"] + adobe_order: relevance + adobe_content_types: ["audio"] + timeout: 6 + disabled: true + + - name: astrophysics data system + engine: astrophysics_data_system + shortcut: ads + # read https://docs.searxng.org/dev/engines/online/astrophysics_data_system.html + api_key: "" + inactive: true + + - name: alpine linux packages + engine: alpinelinux + disabled: true + shortcut: alp + + - name: annas archive + engine: annas_archive + disabled: true + shortcut: aa + timeout: 5 + + - name: ansa + engine: ansa + shortcut: ans + disabled: true + + # - name: annas articles + # engine: annas_archive + # shortcut: aaa + # # https://docs.searxng.org/dev/engines/online/annas_archive.html + # aa_content: 'magazine' # book_fiction, book_unknown, book_nonfiction, book_comic + # aa_ext: 'pdf' # pdf, epub, .. + # aa_sort: oldest' # newest, oldest, largest, smallest + + - name: apk mirror + engine: apkmirror + timeout: 4.0 + shortcut: apkm + disabled: true + + - name: apple app store + engine: apple_app_store + shortcut: aps + disabled: true + + # Requires Tor + - name: ahmia + engine: ahmia + # Might do up to two requests to perform a search. + # Since Tor is already slow by nature, the timeout is set very high. + timeout: 20.0 + categories: onions + enable_http: true + shortcut: ah + + - name: anaconda + engine: xpath + paging: true + first_page_num: 0 + search_url: https://anaconda.org/search?q={query}&page={pageno} + results_xpath: //tbody/tr + url_xpath: ./td/h5/a[last()]/@href + title_xpath: ./td/h5 + content_xpath: ./td[h5]/text() + categories: it + timeout: 6.0 + shortcut: conda + disabled: true + + - name: arch linux wiki + engine: archlinux + shortcut: al + + - name: nixos wiki + engine: mediawiki + shortcut: nixw + base_url: https://wiki.nixos.org/ + search_type: text + disabled: true + categories: [it, software wikis] + + - name: artic + engine: artic + shortcut: arc + timeout: 4.0 + + - name: arxiv + engine: arxiv + shortcut: arx + + - name: ask + engine: ask + shortcut: ask + disabled: true + + - name: azure + engine: azure + shortcut: az + categories: [it, cloud] + # azure_tenant_id: "your_tenant_id" + # azure_client_id: "your_client_id" + # azure_client_secret: "your_client_secret" + inactive: true + + # tmp suspended: dh key too small + # - name: base + # engine: base + # shortcut: bs + + - name: bandcamp + engine: bandcamp + shortcut: bc + categories: music + + - name: baidu + baidu_category: general + categories: [general] + engine: baidu + shortcut: bd + disabled: true + + - name: baidu images + baidu_category: images + categories: [images] + engine: baidu + shortcut: bdi + disabled: true + + - name: baidu kaifa + baidu_category: it + categories: [it] + engine: baidu + shortcut: bdk + disabled: true + + - name: wikipedia + engine: wikipedia + shortcut: wp + # add "list" to the array to get results in the results list + display_type: ["infobox"] + categories: [general] + + - name: bilibili + engine: bilibili + shortcut: bil + disabled: true + + - name: bing + engine: bing + shortcut: bi + disabled: true + + - name: bing images + engine: bing_images + shortcut: bii + + - name: bing news + engine: bing_news + shortcut: bin + + - name: bing videos + engine: bing_videos + shortcut: biv + + - name: bitchute + engine: bitchute + shortcut: bit + disabled: true + + - name: bitbucket + engine: xpath + paging: true + search_url: https://bitbucket.org/repo/all/{pageno}?name={query} + url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href + title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] + content_xpath: //article[@class="repo-summary"]/p + categories: [it, repos] + timeout: 4.0 + disabled: true + shortcut: bb + about: + website: https://bitbucket.org/ + wikidata_id: Q2493781 + official_api_documentation: https://developer.atlassian.com/bitbucket + use_official_api: false + require_api_key: false + results: HTML + + - name: bpb + engine: bpb + shortcut: bpb + disabled: true + + - name: btdigg + engine: btdigg + shortcut: bt + disabled: true + + - name: openverse + engine: openverse + categories: images + shortcut: opv + + - name: media.ccc.de + engine: ccc_media + shortcut: c3tv + # We don't set language: de here because media.ccc.de is not just + # for a German audience. It contains many English videos and many + # German videos have English subtitles. + disabled: true + + - name: cachy os packages + engine: cachy_os + shortcut: cos + disabled: true + + - name: chefkoch + engine: chefkoch + shortcut: chef + # to show premium or plus results too: + # skip_premium: false + + # WARNING: links from chinaso.com voilate users privacy + # Before activate these engines its mandatory to read + # - https://github.com/searxng/searxng/issues/4694 + # - https://docs.searxng.org/dev/engines/online/chinaso.html + + - name: chinaso news + engine: chinaso + shortcut: chinaso + categories: [news] + chinaso_category: news + chinaso_news_source: all + disabled: true + inactive: true + + - name: chinaso images + engine: chinaso + network: chinaso news + shortcut: chinasoi + categories: [images] + chinaso_category: images + disabled: true + inactive: true + + - name: chinaso videos + engine: chinaso + network: chinaso news + shortcut: chinasov + categories: [videos] + chinaso_category: videos + disabled: true + inactive: true + + - name: cloudflareai + engine: cloudflareai + shortcut: cfai + # get api token and accont id from https://developers.cloudflare.com/workers-ai/get-started/rest-api/ + cf_account_id: 'your_cf_accout_id' + cf_ai_api: 'your_cf_api' + # create your ai gateway by https://developers.cloudflare.com/ai-gateway/get-started/creating-gateway/ + cf_ai_gateway: 'your_cf_ai_gateway_name' + # find the model name from https://developers.cloudflare.com/workers-ai/models/#text-generation + cf_ai_model: 'ai_model_name' + # custom your preferences + # cf_ai_model_display_name: 'Cloudflare AI' + # cf_ai_model_assistant: 'prompts_for_assistant_role' + # cf_ai_model_system: 'prompts_for_system_role' + timeout: 30 + inactive: true + + - name: core.ac.uk + engine: core + shortcut: cor + # read https://docs.searxng.org/dev/engines/online/core.html + api_key: "" + inactive: true + + - name: crossref + engine: crossref + shortcut: cr + timeout: 30 + disabled: true + + - name: crowdview + engine: json_engine + shortcut: cv + categories: general + paging: false + search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} + results_query: results + url_query: link + title_query: title + content_query: snippet + title_html_to_text: true + content_html_to_text: true + disabled: true + about: + website: https://crowdview.ai/ + + - name: yep + engine: yep + shortcut: yep + categories: general + search_type: web + timeout: 15 + disabled: true + + - name: yep images + engine: yep + shortcut: yepi + categories: images + search_type: images + disabled: true + + - name: yep news + engine: yep + shortcut: yepn + categories: news + search_type: news + disabled: true + + - name: currency + engine: currency_convert + shortcut: cc + + - name: deezer + engine: deezer + shortcut: dz + disabled: true + + - name: destatis + engine: destatis + shortcut: destat + disabled: true + + - name: deviantart + engine: deviantart + shortcut: da + timeout: 3.0 + + - name: devicons + engine: devicons + shortcut: di + timeout: 3.0 + + - name: ddg definitions + engine: duckduckgo_definitions + shortcut: ddd + weight: 2 + disabled: true + tests: *tests_infobox + + # cloudflare protected + # - name: digbt + # engine: digbt + # shortcut: dbt + # timeout: 6.0 + # disabled: true + + - name: docker hub + engine: docker_hub + shortcut: dh + categories: [it, packages] + + - name: encyclosearch + engine: json_engine + shortcut: es + categories: general + paging: true + search_url: https://encyclosearch.org/encyclosphere/search?q={query}&page={pageno}&resultsPerPage=15 + results_query: Results + url_query: SourceURL + title_query: Title + content_query: Description + disabled: true + about: + website: https://encyclosearch.org + official_api_documentation: https://encyclosearch.org/docs/#/rest-api + use_official_api: true + require_api_key: false + results: JSON + + - name: erowid + engine: xpath + paging: true + first_page_num: 0 + page_size: 30 + search_url: https://www.erowid.org/search.php?q={query}&s={pageno} + url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href + title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() + content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] + categories: [] + shortcut: ew + disabled: true + about: + website: https://www.erowid.org/ + wikidata_id: Q1430691 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: elasticsearch + shortcut: els + engine: elasticsearch + # base_url: http://localhost:9200 + # username: elastic + # password: changeme + # index: my-index + # enable_http: true + # available options: match, simple_query_string, term, terms, custom + query_type: match + # if query_type is set to custom, provide your query here + # custom_query_json: {"query":{"match_all": {}}} + # show_metadata: false + inactive: true + + - name: wikidata + engine: wikidata + shortcut: wd + timeout: 3.0 + weight: 2 + # add "list" to the array to get results in the results list + display_type: ["infobox"] + tests: *tests_infobox + categories: [general] + + - name: duckduckgo + engine: duckduckgo + shortcut: ddg + + - name: duckduckgo images + engine: duckduckgo_extra + categories: [images, web] + ddg_category: images + shortcut: ddi + disabled: true + + - name: duckduckgo videos + engine: duckduckgo_extra + categories: [videos, web] + ddg_category: videos + shortcut: ddv + disabled: true + + - name: duckduckgo news + engine: duckduckgo_extra + categories: [news, web] + ddg_category: news + shortcut: ddn + disabled: true + + - name: duckduckgo weather + engine: duckduckgo_weather + shortcut: ddw + disabled: true + + - name: apple maps + engine: apple_maps + shortcut: apm + disabled: true + timeout: 5.0 + + - name: emojipedia + engine: emojipedia + timeout: 4.0 + shortcut: em + disabled: true + + - name: tineye + engine: tineye + shortcut: tin + timeout: 9.0 + disabled: true + + - name: etymonline + engine: xpath + paging: true + search_url: https://etymonline.com/search?page={pageno}&q={query} + url_xpath: //a[contains(@class, "word__name--")]/@href + title_xpath: //a[contains(@class, "word__name--")] + content_xpath: //section[contains(@class, "word__defination")] + first_page_num: 1 + shortcut: et + categories: [dictionaries] + about: + website: https://www.etymonline.com/ + wikidata_id: Q1188617 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: ebay + engine: ebay + shortcut: eb + base_url: 'https://www.ebay.com' + inactive: true + timeout: 5 + + - name: 1x + engine: www1x + shortcut: 1x + timeout: 3.0 + disabled: true + + - name: fdroid + engine: fdroid + shortcut: fd + disabled: true + + - name: findthatmeme + engine: findthatmeme + shortcut: ftm + disabled: true + + - name: flickr + categories: images + shortcut: fl + engine: flickr_noapi + + - name: flickr_api + # You can use the engine using the official stable API, but you need an API + # key, see: https://www.flickr.com/services/apps/create/ + engine: flickr + categories: images + shortcut: fla + # api_key: 'apikey' # required! + inactive: true + + - name: free software directory + engine: mediawiki + shortcut: fsd + categories: [it, software wikis] + base_url: https://directory.fsf.org/ + search_type: title + timeout: 5.0 + disabled: true + about: + website: https://directory.fsf.org/ + wikidata_id: Q2470288 + + - name: freesound + engine: freesound + shortcut: fnd + timeout: 15.0 + # API key required, see: https://freesound.org/docs/api/overview.html + # api_key: MyAPIkey + inactive: true + + - name: frinkiac + engine: frinkiac + shortcut: frk + disabled: true + + - name: fynd + engine: xpath + search_url: https://fynd.bot/?search={query}&offset={pageno} + results_xpath: //div[contains(@class, "result-item")] + url_xpath: .//a/@href + title_xpath: .//div[contains(@class, "title-line")] + content_xpath: .//div[contains(@class, "description")] + thumbnail_xpath: .//img[contains(@class, "preview-img")]/@src + paging: true + first_page_num: 0 + page_size: 10 + categories: general + disabled: true + shortcut: fynd + about: + website: https://fynd.bot + use_official_api: false + require_api_key: false + results: HTML + + - name: fyyd + engine: fyyd + shortcut: fy + timeout: 8.0 + disabled: true + + - name: geizhals + engine: geizhals + shortcut: geiz + disabled: true + + - name: genius + engine: genius + shortcut: gen + + - name: gentoo + engine: mediawiki + shortcut: ge + categories: ["it", "software wikis"] + base_url: "https://wiki.gentoo.org/" + api_path: "api.php" + search_type: text + timeout: 10 + + - name: gitlab + engine: gitlab + base_url: https://gitlab.com + shortcut: gl + disabled: true + about: + website: https://gitlab.com/ + wikidata_id: Q16639197 + + # - name: gnome + # engine: gitlab + # base_url: https://gitlab.gnome.org + # shortcut: gn + # about: + # website: https://gitlab.gnome.org + # wikidata_id: Q44316 + + - name: github + engine: github + shortcut: gh + + - name: github code + engine: github_code + shortcut: ghc + inactive: true + ghc_auth: + # type is one of: + # * none + # * personal_access_token + # * bearer + # When none is passed, the token is not requried. + type: "none" + token: "token" + # specify whether to highlight the matching lines to the query + ghc_highlight_matching_lines: true + ghc_strip_new_lines: true + ghc_strip_whitespace: false + timeout: 10.0 + + - name: codeberg + # https://docs.searxng.org/dev/engines/online/gitea.html + engine: gitea + base_url: https://codeberg.org + shortcut: cb + disabled: true + + - name: gitea.com + engine: gitea + base_url: https://gitea.com + shortcut: gitea + disabled: true + + - name: goodreads + engine: goodreads + shortcut: good + timeout: 4.0 + disabled: true + + - name: google + engine: google + shortcut: go + # additional_tests: + # android: *test_android + + - name: google images + engine: google_images + shortcut: goi + # additional_tests: + # android: *test_android + # dali: + # matrix: + # query: ['Dali Christ'] + # lang: ['en', 'de', 'fr', 'zh-CN'] + # result_container: + # - ['one_title_contains', 'Salvador'] + + - name: google news + engine: google_news + shortcut: gon + # additional_tests: + # android: *test_android + + - name: google videos + engine: google_videos + shortcut: gov + # additional_tests: + # android: *test_android + + - name: google scholar + engine: google_scholar + shortcut: gos + + - name: google play apps + engine: google_play + categories: [files, apps] + shortcut: gpa + play_categ: apps + disabled: true + + - name: google play movies + engine: google_play + categories: videos + shortcut: gpm + play_categ: movies + disabled: true + + - name: grokipedia + engine: grokipedia + shortcut: gp + disabled: true + inactive: true + + - name: material icons + engine: material_icons + shortcut: mi + disabled: true + + - name: habrahabr + engine: xpath + paging: true + search_url: https://habr.com/en/search/page{pageno}/?q={query} + results_xpath: //article[contains(@class, "tm-articles-list__item")] + url_xpath: .//a[@class="tm-title__link"]/@href + title_xpath: .//a[@class="tm-title__link"] + content_xpath: .//div[contains(@class, "article-formatted-body")] + categories: it + timeout: 4.0 + disabled: true + shortcut: habr + about: + website: https://habr.com/ + wikidata_id: Q4494434 + official_api_documentation: https://habr.com/en/docs/help/api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: hackernews + engine: hackernews + shortcut: hn + disabled: true + + - name: hex + engine: hex + shortcut: hex + disabled: true + # Valid values: name inserted_at updated_at total_downloads recent_downloads + sort_criteria: "recent_downloads" + page_size: 10 + + - name: crates.io + engine: crates + shortcut: crates + disabled: true + timeout: 6.0 + + - name: hoogle + engine: xpath + search_url: https://hoogle.haskell.org/?hoogle={query} + results_xpath: '//div[@class="result"]' + title_xpath: './/div[@class="ans"]//a' + url_xpath: './/div[@class="ans"]//a/@href' + content_xpath: './/div[@class="from"]' + page_size: 20 + categories: [it, packages] + shortcut: ho + about: + website: https://hoogle.haskell.org/ + wikidata_id: Q34010 + official_api_documentation: https://hackage.haskell.org/api + use_official_api: false + require_api_key: false + results: JSON + + - name: il post + engine: il_post + shortcut: pst + disabled: true + + - name: huggingface + engine: huggingface + shortcut: hf + disabled: true + + - name: huggingface datasets + huggingface_endpoint: datasets + engine: huggingface + shortcut: hfd + disabled: true + + - name: huggingface spaces + huggingface_endpoint: spaces + engine: huggingface + shortcut: hfs + disabled: true + + - name: imdb + engine: imdb + shortcut: imdb + timeout: 6.0 + disabled: true + + - name: imgur + engine: imgur + shortcut: img + disabled: true + + - name: ina + engine: ina + shortcut: in + timeout: 6.0 + disabled: true + + # - name: invidious + # engine: invidious + # # if you want to use invidious with SearXNG you should setup one locally + # # https://github.com/searxng/searxng/issues/2722#issuecomment-2884993248 + # base_url: + # - https://invidious.example1.com + # - https://invidious.example2.com + # shortcut: iv + # timeout: 3.0 + + - name: ipernity + engine: ipernity + shortcut: ip + disabled: true + + - name: iqiyi + engine: iqiyi + shortcut: iq + disabled: true + + - name: jisho + engine: jisho + shortcut: js + timeout: 3.0 + disabled: true + + - name: kickass + engine: kickass + base_url: + - https://kickasstorrents.to + - https://kickasstorrents.cr + - https://kickasstorrent.cr + - https://kickass.sx + - https://kat.am + shortcut: kc + timeout: 4.0 + + - name: lemmy communities + engine: lemmy + lemmy_type: Communities + shortcut: leco + + - name: lemmy users + engine: lemmy + network: lemmy communities + lemmy_type: Users + shortcut: leus + + - name: lemmy posts + engine: lemmy + network: lemmy communities + lemmy_type: Posts + shortcut: lepo + + - name: lemmy comments + engine: lemmy + network: lemmy communities + lemmy_type: Comments + shortcut: lecom + + - name: library genesis + engine: xpath + # search_url: https://libgen.is/search.php?req={query} + search_url: https://libgen.rs/search.php?req={query} + url_xpath: //a[contains(@href,"book/index.php?md5")]/@href + title_xpath: //a[contains(@href,"book/")]/text()[1] + content_xpath: //td/a[1][contains(@href,"=author")]/text() + categories: files + timeout: 7.0 + disabled: true + shortcut: lg + about: + website: https://libgen.fun/ + wikidata_id: Q22017206 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: z-library + engine: zlibrary + shortcut: zlib + timeout: 7.0 + disabled: true + # https://github.com/searxng/searxng/issues/3610 + inactive: true + + - name: library of congress + engine: loc + shortcut: loc + categories: images + disabled: true + + - name: libretranslate + engine: libretranslate + # https://github.com/LibreTranslate/LibreTranslate?tab=readme-ov-file#mirrors + base_url: + - https://libretranslate.com/translate + # api_key: '' + shortcut: lt + inactive: true + + - name: lingva + engine: lingva + shortcut: lv + # set lingva instance in url, by default it will use the official instance + # url: https://lingva.thedaviddelta.com + + - name: lobste.rs + engine: xpath + search_url: https://lobste.rs/search?q={query}&what=stories&order=relevance + results_xpath: //li[contains(@class, "story")] + url_xpath: .//a[@class="u-url"]/@href + title_xpath: .//a[@class="u-url"] + content_xpath: .//a[@class="domain"] + categories: it + shortcut: lo + timeout: 5.0 + disabled: true + about: + website: https://lobste.rs/ + wikidata_id: Q60762874 + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: lucide + engine: lucide + shortcut: luc + timeout: 3.0 + + - name: marginalia + engine: marginalia + shortcut: mar + # To get an API key, please follow the instructions at + # - https://about.marginalia-search.com/article/api/ + # api_key: '' + disabled: true + inactive: true + + - name: mastodon users + engine: mastodon + mastodon_type: accounts + base_url: https://mastodon.social + shortcut: mau + + - name: mastodon hashtags + engine: mastodon + mastodon_type: hashtags + base_url: https://mastodon.social + shortcut: mah + + # - name: matrixrooms + # engine: mrs + # # https://docs.searxng.org/dev/engines/online/mrs.html + # # base_url: https://mrs-api-host + # shortcut: mtrx + # disabled: true + + - name: mdn + shortcut: mdn + engine: json_engine + categories: [it] + paging: true + search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} + results_query: documents + url_query: mdn_url + url_prefix: https://developer.mozilla.org + title_query: title + content_query: summary + about: + website: https://developer.mozilla.org + wikidata_id: Q3273508 + official_api_documentation: null + use_official_api: false + require_api_key: false + results: JSON + + - name: metacpan + engine: metacpan + shortcut: cpan + disabled: true + number_of_results: 20 + + # https://docs.searxng.org/dev/engines/offline/search-indexer-engines.html#module-searx.engines.meilisearch + # - name: meilisearch + # engine: meilisearch + # shortcut: mes + # enable_http: true + # base_url: http://localhost:7700 + # index: my-index + # auth_key: Bearer XXXX + + - name: microsoft learn + engine: microsoft_learn + shortcut: msl + disabled: true + + - name: mixcloud + engine: mixcloud + shortcut: mc + + # MongoDB engine + # Required dependency: pymongo + # - name: mymongo + # engine: mongodb + # shortcut: md + # exact_match_only: false + # host: '127.0.0.1' + # port: 27017 + # enable_http: true + # results_per_page: 20 + # database: 'business' + # collection: 'reviews' # name of the db collection + # key: 'name' # key in the collection to search for + + - name: mozhi + engine: mozhi + base_url: + - https://mozhi.aryak.me + - https://translate.bus-hit.me + - https://nyc1.mz.ggtyler.dev + # mozhi_engine: google - see https://mozhi.aryak.me for supported engines + timeout: 4.0 + shortcut: mz + disabled: true + + - name: mwmbl + engine: mwmbl + # api_url: https://api.mwmbl.org + shortcut: mwm + disabled: true + + - name: niconico + engine: niconico + shortcut: nico + disabled: true + + - name: npm + engine: npm + shortcut: npm + timeout: 5.0 + disabled: true + + - name: nyaa + engine: nyaa + shortcut: nt + disabled: true + + - name: mankier + engine: json_engine + search_url: https://www.mankier.com/api/v2/mans/?q={query} + results_query: results + url_query: url + title_query: name + content_query: description + categories: it + shortcut: man + about: + website: https://www.mankier.com/ + official_api_documentation: https://www.mankier.com/api + use_official_api: true + require_api_key: false + results: JSON + + - name: odysee + engine: odysee + shortcut: od + disabled: true + + - name: ollama + engine: ollama + shortcut: ollama + disabled: true + + - name: openairedatasets + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: "science" + shortcut: oad + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openairepublications + engine: json_engine + paging: true + search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} + results_query: response/results/result + url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ + title_query: metadata/oaf:entity/oaf:result/title/$ + content_query: metadata/oaf:entity/oaf:result/description/$ + content_html_to_text: true + categories: science + shortcut: oap + timeout: 5.0 + about: + website: https://www.openaire.eu/ + wikidata_id: Q25106053 + official_api_documentation: https://api.openaire.eu/ + use_official_api: false + require_api_key: false + results: JSON + + - name: openalex + engine: openalex + shortcut: oa + # https://docs.searxng.org/dev/engines/online/openalex.html + # Recommended by OpenAlex: join the polite pool with an email address + # mailto: "[email protected]" + timeout: 5.0 + disabled: true + + - name: openclipart + engine: openclipart + shortcut: ocl + inactive: true + disabled: true + timeout: 30 + + - name: openlibrary + engine: openlibrary + shortcut: ol + timeout: 10 + disabled: true + + - name: openmeteo + engine: open_meteo + shortcut: om + disabled: true + + # - name: opensemanticsearch + # engine: opensemantic + # shortcut: oss + # base_url: 'http://localhost:8983/solr/opensemanticsearch/' + + - name: openstreetmap + engine: openstreetmap + shortcut: osm + + - name: openrepos + engine: xpath + paging: true + search_url: https://openrepos.net/search/node/{query}?page={pageno} + url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href + title_xpath: //li[@class="search-result"]//h3[@class="title"]/a + content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] + categories: files + timeout: 4.0 + disabled: true + shortcut: or + about: + website: https://openrepos.net/ + wikidata_id: + official_api_documentation: + use_official_api: false + require_api_key: false + results: HTML + + - name: packagist + engine: json_engine + paging: true + search_url: https://packagist.org/search.json?q={query}&page={pageno} + results_query: results + url_query: url + title_query: name + content_query: description + categories: [it, packages] + disabled: true + timeout: 5.0 + shortcut: pack + about: + website: https://packagist.org + wikidata_id: Q108311377 + official_api_documentation: https://packagist.org/apidoc + use_official_api: true + require_api_key: false + results: JSON + + - name: pdbe + engine: pdbe + shortcut: pdb + # Hide obsolete PDB entries. Default is not to hide obsolete structures + # hide_obsolete: false + + - name: photon + engine: photon + shortcut: ph + + - name: pinterest + engine: pinterest + shortcut: pin + + - name: piped + engine: piped + shortcut: ppd + categories: videos + piped_filter: videos + timeout: 3.0 + inactive: true + + # URL to use as link and for embeds + frontend_url: https://srv.piped.video + # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ + backend_url: + - https://pipedapi.ducks.party + - https://api.piped.private.coffee + + - name: piped.music + engine: piped + network: piped + shortcut: ppdm + categories: music + piped_filter: music_songs + timeout: 3.0 + inactive: true + + - name: piratebay + engine: piratebay + shortcut: tpb + # You may need to change this URL to a proxy if piratebay is blocked in your + # country + url: https://thepiratebay.org/ + timeout: 3.0 + + - name: pixabay images + engine: pixabay + pixabay_type: images + categories: images + shortcut: pixi + disabled: true + + - name: pixabay videos + engine: pixabay + pixabay_type: videos + categories: videos + shortcut: pixv + disabled: true + + - name: pixiv + shortcut: pv + engine: pixiv + disabled: true + inactive: true + remove_ai_images: false + pixiv_image_proxies: + - https://pximg.example.org + # A proxy is required to load the images. Hosting an image proxy server + # for Pixiv: + # --> https://pixivfe.pages.dev/hosting-image-proxy-server/ + # Proxies from public instances. Ask the public instances owners if they + # agree to receive traffic from SearXNG! + # --> https://codeberg.org/VnPower/PixivFE#instances + # --> https://github.com/searxng/searxng/pull/3192#issuecomment-1941095047 + # image proxy of https://pixiv.cat + # - https://i.pixiv.cat + # image proxy of https://www.pixiv.pics + # - https://pximg.cocomi.eu.org + # image proxy of https://pixivfe.exozy.me + # - https://pximg.exozy.me + # image proxy of https://pixivfe.ducks.party + # - https://pixiv.ducks.party + # image proxy of https://pixiv.perennialte.ch + # - https://pximg.perennialte.ch + + - name: podcastindex + engine: podcastindex + shortcut: podcast + + # Required dependency: psychopg2 + # - name: postgresql + # engine: postgresql + # database: postgres + # username: postgres + # password: postgres + # limit: 10 + # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' + # shortcut : psql + + - name: presearch + engine: presearch + search_type: search + categories: [general, web] + shortcut: ps + timeout: 4.0 + disabled: true + + - name: presearch images + engine: presearch + network: presearch + search_type: images + categories: [images, web] + timeout: 4.0 + shortcut: psimg + disabled: true + + - name: presearch videos + engine: presearch + network: presearch + search_type: videos + categories: [general, web] + timeout: 4.0 + shortcut: psvid + disabled: true + + - name: presearch news + engine: presearch + network: presearch + search_type: news + categories: [news, web] + timeout: 4.0 + shortcut: psnews + disabled: true + + - name: pub.dev + engine: xpath + shortcut: pd + search_url: https://pub.dev/packages?q={query}&page={pageno} + paging: true + results_xpath: //div[contains(@class,"packages-item")] + url_xpath: ./div/h3/a/@href + title_xpath: ./div/h3/a + content_xpath: ./div/div/div[contains(@class,"packages-description")]/span + categories: [packages, it] + timeout: 3.0 + disabled: true + first_page_num: 1 + about: + website: https://pub.dev/ + official_api_documentation: https://pub.dev/help/api + use_official_api: false + require_api_key: false + results: HTML + + - name: public domain image archive + engine: public_domain_image_archive + shortcut: pdia + disabled: true + + - name: pubmed + engine: pubmed + shortcut: pub + + - name: pypi + shortcut: pypi + engine: pypi + + - name: quark + quark_category: general + categories: [general] + engine: quark + shortcut: qk + disabled: true + + - name: quark images + quark_category: images + categories: [images] + engine: quark + shortcut: qki + disabled: true + + - name: qwant + qwant_categ: web + engine: qwant + shortcut: qw + categories: [general, web] + disabled: true + additional_tests: + rosebud: *test_rosebud + + - name: qwant news + qwant_categ: news + engine: qwant + shortcut: qwn + categories: news + network: qwant + + - name: qwant images + qwant_categ: images + engine: qwant + shortcut: qwi + categories: [images, web] + network: qwant + + - name: qwant videos + qwant_categ: videos + engine: qwant + shortcut: qwv + categories: [videos, web] + network: qwant + + # - name: library + # engine: recoll + # shortcut: lib + # base_url: 'https://recoll.example.org/' + # search_dir: '' + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # timeout: 30.0 + # categories: files + # disabled: true + + # - name: recoll library reference + # engine: recoll + # base_url: 'https://recoll.example.org/' + # search_dir: reference + # mount_prefix: /export + # dl_prefix: 'https://download.example.org' + # shortcut: libr + # timeout: 30.0 + # categories: files + # disabled: true + + - name: radio browser + engine: radio_browser + shortcut: rb + + - name: reddit + engine: reddit + shortcut: re + page_size: 25 + disabled: true + + - name: reuters + engine: reuters + shortcut: reu + # https://docs.searxng.org/dev/engines/online/reuters.html + # sort_order = "relevance" + + - name: right dao + engine: xpath + paging: true + page_size: 12 + search_url: https://rightdao.com/search?q={query}&start={pageno} + results_xpath: //div[contains(@class, "description")] + url_xpath: ../div[contains(@class, "title")]/a/@href + title_xpath: ../div[contains(@class, "title")] + content_xpath: . + categories: general + shortcut: rd + disabled: true + about: + website: https://rightdao.com/ + use_official_api: false + require_api_key: false + results: HTML + + - name: rottentomatoes + engine: rottentomatoes + shortcut: rt + disabled: true + + # Required dependency: valkey + # - name: myvalkey + # shortcut : rds + # engine: valkey_server + # exact_match_only: false + # host: '127.0.0.1' + # port: 6379 + # enable_http: true + # password: '' + # db: 0 + + # tmp suspended: bad certificate + # - name: scanr structures + # shortcut: scs + # engine: scanr_structures + # disabled: true + + - name: searchmysite + engine: xpath + shortcut: sms + categories: general + paging: true + search_url: https://searchmysite.net/search/?q={query}&page={pageno} + results_xpath: //div[contains(@class,'search-result')] + url_xpath: .//a[contains(@class,'result-link')]/@href + title_xpath: .//span[contains(@class,'result-title-txt')]/text() + content_xpath: ./p[@id='result-hightlight'] + disabled: true + about: + website: https://searchmysite.net + + - name: selfhst icons + engine: selfhst + shortcut: si + disabled: true + + - name: sepiasearch + engine: sepiasearch + shortcut: sep + + - name: sogou + engine: sogou + shortcut: sogou + disabled: true + + - name: sogou images + engine: sogou_images + shortcut: sogoui + disabled: true + + - name: sogou videos + engine: sogou_videos + shortcut: sogouv + disabled: true + + - name: sogou wechat + engine: sogou_wechat + shortcut: sogouw + disabled: true + + - name: soundcloud + engine: soundcloud + shortcut: sc + + - name: stackoverflow + engine: stackexchange + shortcut: st + api_site: 'stackoverflow' + categories: [it, q&a] + + - name: askubuntu + engine: stackexchange + shortcut: ubuntu + api_site: 'askubuntu' + categories: [it, q&a] + + - name: superuser + engine: stackexchange + shortcut: su + api_site: 'superuser' + categories: [it, q&a] + + - name: discuss.python + engine: discourse + shortcut: dpy + base_url: 'https://discuss.python.org' + categories: [it, q&a] + disabled: true + + - name: caddy.community + engine: discourse + shortcut: caddy + base_url: 'https://caddy.community' + categories: [it, q&a] + disabled: true + + - name: pi-hole.community + engine: discourse + shortcut: pi + categories: [it, q&a] + base_url: 'https://discourse.pi-hole.net' + disabled: true + + - name: searchcode code + engine: searchcode_code + shortcut: scc + disabled: true + inactive: true + + # - name: searx + # engine: searx_engine + # shortcut: se + # instance_urls : + # - http://127.0.0.1:8888/ + # - ... + # disabled: true + + - name: semantic scholar + engine: semantic_scholar + shortcut: se + + # Spotify needs API credentials + # - name: spotify + # engine: spotify + # shortcut: stf + # api_client_id: ******* + # api_client_secret: ******* + + # - name: solr + # engine: solr + # shortcut: slr + # base_url: http://localhost:8983 + # collection: collection_name + # sort: '' # sorting: asc or desc + # field_list: '' # comma separated list of field names to display on the UI + # default_fields: '' # default field to query + # query_fields: '' # query fields + # enable_http: true + + - name: springer nature + engine: springer + shortcut: springer + timeout: 5 + # read https://docs.searxng.org/dev/engines/online/springer.html + api_key: "" + inactive: true + + - name: startpage + engine: startpage + shortcut: sp + startpage_categ: web + categories: [general, web] + additional_tests: + rosebud: *test_rosebud + + - name: startpage news + engine: startpage + startpage_categ: news + categories: [news, web] + shortcut: spn + + - name: startpage images + engine: startpage + startpage_categ: images + categories: [images, web] + shortcut: spi + + - name: steam + engine: steam + shortcut: stm + disabled: true + + - name: tokyotoshokan + engine: tokyotoshokan + shortcut: tt + timeout: 6.0 + disabled: true + + - name: solidtorrents + engine: solidtorrents + shortcut: solid + timeout: 4.0 + base_url: + - https://solidtorrents.to + - https://bitsearch.to + + # For this demo of the sqlite engine download: + # https://liste.mediathekview.de/filmliste-v2.db.bz2 + # and unpack into searx/data/filmliste-v2.db + # Query to test: "!mediathekview concert" + # + # - name: mediathekview + # engine: sqlite + # shortcut: mediathekview + # categories: [general, videos] + # result_type: MainResult + # database: searx/data/filmliste-v2.db + # query_str: >- + # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, + # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, + # description AS content + # FROM film + # WHERE title LIKE :wildcard OR description LIKE :wildcard + # ORDER BY duration DESC + + - name: tagesschau + engine: tagesschau + # when set to false, display URLs from Tagesschau, and not the actual source + # (e.g. NDR, WDR, SWR, HR, ...) + use_source_url: true + shortcut: ts + disabled: true + + - name: tmdb + engine: xpath + paging: true + categories: movies + search_url: https://www.themoviedb.org/search?page={pageno}&query={query} + results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] + url_xpath: .//div[contains(@class,"poster")]/a/@href + thumbnail_xpath: .//img/@src + title_xpath: .//div[contains(@class,"title")]//h2 + content_xpath: .//div[contains(@class,"overview")] + shortcut: tm + disabled: true + + # Requires Tor + - name: torch + engine: xpath + paging: true + search_url: + http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and + results_xpath: //table//tr + url_xpath: ./td[2]/a + title_xpath: ./td[2]/b + content_xpath: ./td[2]/small + categories: onions + enable_http: true + shortcut: tch + + # TubeArchivist is a self-hosted Youtube archivist software. + # https://docs.searxng.org/dev/engines/online/tubearchivist.html + # + # - name: tubearchivist + # engine: tubearchivist + # shortcut: tuba + # base_url: + # ta_token: + # ta_link_to_mp4: false + + # torznab engine lets you query any torznab compatible indexer. Using this + # engine in combination with Jackett opens the possibility to query a lot of + # public and private indexers directly from SearXNG. More details at: + # https://docs.searxng.org/dev/engines/online/torznab.html + - name: Torznab EZTV + engine: torznab + shortcut: eztv + # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab + # enable_http: true # if using localhost + # api_key: xxxxxxxxxxxxxxx + show_magnet_links: true + show_torrent_files: false + # https://github.com/Jackett/Jackett/wiki/Jackett-Categories + torznab_categories: # optional + - 2000 + - 5000 + inactive: true + + # tmp suspended - too slow, too many errors + # - name: urbandictionary + # engine : xpath + # search_url : https://www.urbandictionary.com/define.php?term={query} + # url_xpath : //*[@class="word"]/@href + # title_xpath : //*[@class="def-header"] + # content_xpath: //*[@class="meaning"] + # shortcut: ud + + - name: unsplash + engine: unsplash + shortcut: us + + - name: yandex + engine: yandex + categories: general + search_type: web + shortcut: yd + disabled: true + + - name: yandex images + engine: yandex + network: yandex + categories: images + search_type: images + shortcut: ydi + disabled: true + + - name: yandex music + engine: yandex_music + network: yandex + shortcut: ydm + disabled: true + # https://yandex.com/support/music/access.html + + - name: yahoo + engine: yahoo + shortcut: yh + disabled: true + + - name: yahoo news + engine: yahoo_news + shortcut: yhn + + - name: youtube + shortcut: yt + engine: youtube_noapi + + - name: youtube_api + # You can use the engine using the official stable API, but you need an API + # key See: https://console.developers.google.com/project + engine: youtube_api + # api_key: '' # required! + shortcut: yta + inactive: true + + - name: dailymotion + engine: dailymotion + shortcut: dm + + - name: vimeo + engine: vimeo + shortcut: vm + + - name: wiby + engine: json_engine + paging: true + search_url: https://wiby.me/json/?q={query}&p={pageno} + url_query: URL + title_query: Title + content_query: Snippet + categories: [general, web] + shortcut: wib + disabled: true + about: + website: https://wiby.me/ + + - name: wikibooks + engine: mediawiki + weight: 0.5 + shortcut: wb + categories: [general, wikimedia] + base_url: "https://{language}.wikibooks.org/" + search_type: text + disabled: true + about: + website: https://www.wikibooks.org/ + wikidata_id: Q367 + + - name: wikinews + engine: mediawiki + shortcut: wn + categories: [news, wikimedia] + base_url: "https://{language}.wikinews.org/" + search_type: text + srsort: create_timestamp_desc + about: + website: https://www.wikinews.org/ + wikidata_id: Q964 + + - name: wikiquote + engine: mediawiki + weight: 0.5 + shortcut: wq + categories: [general, wikimedia] + base_url: "https://{language}.wikiquote.org/" + search_type: text + disabled: true + additional_tests: + rosebud: *test_rosebud + about: + website: https://www.wikiquote.org/ + wikidata_id: Q369 + + - name: wikisource + engine: mediawiki + weight: 0.5 + shortcut: ws + categories: [general, wikimedia] + base_url: "https://{language}.wikisource.org/" + search_type: text + disabled: true + about: + website: https://www.wikisource.org/ + wikidata_id: Q263 + + - name: wikispecies + engine: mediawiki + shortcut: wsp + categories: [general, science, wikimedia] + base_url: "https://species.wikimedia.org/" + search_type: text + disabled: true + about: + website: https://species.wikimedia.org/ + wikidata_id: Q13679 + tests: + wikispecies: + matrix: + query: "Campbell, L.I. et al. 2011: MicroRNAs" + lang: en + result_container: + - not_empty + - ['one_title_contains', 'Tardigrada'] + test: + - unique_results + + - name: wiktionary + engine: mediawiki + shortcut: wt + categories: [dictionaries, wikimedia] + base_url: "https://{language}.wiktionary.org/" + search_type: text + about: + website: https://www.wiktionary.org/ + wikidata_id: Q151 + + - name: wikiversity + engine: mediawiki + weight: 0.5 + shortcut: wv + categories: [general, wikimedia] + base_url: "https://{language}.wikiversity.org/" + search_type: text + disabled: true + about: + website: https://www.wikiversity.org/ + wikidata_id: Q370 + + - name: wikivoyage + engine: mediawiki + weight: 0.5 + shortcut: wy + categories: [general, wikimedia] + base_url: "https://{language}.wikivoyage.org/" + search_type: text + disabled: true + about: + website: https://www.wikivoyage.org/ + wikidata_id: Q373 + + - name: wikicommons.images + engine: wikicommons + shortcut: wci + categories: images + wc_search_type: image + + - name: wikicommons.videos + engine: wikicommons + shortcut: wcv + categories: videos + wc_search_type: video + + - name: wikicommons.audio + engine: wikicommons + shortcut: wca + categories: music + wc_search_type: audio + + - name: wikicommons.files + engine: wikicommons + shortcut: wcf + categories: files + wc_search_type: file + + - name: wolframalpha + shortcut: wa + engine: wolframalpha_noapi + timeout: 6.0 + categories: general + disabled: true + + - name: wolframalpha_api + # You can use the engine using the official stable API, but you need an API + # key. See: https://products.wolframalpha.com/api/ + engine: wolframalpha_api + # api_key: '' # required! + shortcut: waa + timeout: 6.0 + categories: general + inactive: true + + - name: dictzone + engine: dictzone + shortcut: dc + + - name: mymemory translated + engine: translated + shortcut: tl + timeout: 5.0 + # You can use without an API key, but you are limited to 1000 words/day + # See: https://mymemory.translated.net/doc/usagelimits.php + # api_key: '' + + # Required dependency: mysql-connector-python + # - name: mysql + # engine: mysql_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mysql + + # Required dependency: mariadb + # - name: mariadb + # engine: mariadb_server + # database: mydatabase + # username: user + # password: pass + # limit: 10 + # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' + # shortcut: mdb + + - name: 1337x + engine: 1337x + shortcut: 1337x + disabled: true + + - name: duden + engine: duden + shortcut: du + disabled: true + + - name: seznam + shortcut: szn + engine: seznam + disabled: true + + - name: deepl + engine: deepl + shortcut: dpl + # You can use the engine using the official stable API, but you need an API key + # See: https://www.deepl.com/pro-api?cta=header-pro-api + # api_key: '' # required! + timeout: 5.0 + inactive: true + + - name: mojeek + shortcut: mjk + engine: mojeek + categories: [general, web] + disabled: true + + - name: mojeek images + shortcut: mjkimg + engine: mojeek + categories: [images, web] + search_type: images + paging: false + disabled: true + + - name: mojeek news + shortcut: mjknews + engine: mojeek + categories: [news, web] + search_type: news + paging: false + disabled: true + + - name: moviepilot + engine: moviepilot + shortcut: mp + disabled: true + + - name: naver + categories: [general, web] + engine: naver + shortcut: nvr + disabled: true + + - name: naver images + naver_category: images + categories: [images] + engine: naver + shortcut: nvri + disabled: true + + - name: naver news + naver_category: news + categories: [news] + engine: naver + shortcut: nvrn + disabled: true + + - name: naver videos + naver_category: videos + categories: [videos] + engine: naver + shortcut: nvrv + disabled: true + + - name: rubygems + shortcut: rbg + engine: xpath + paging: true + search_url: https://rubygems.org/search?page={pageno}&query={query} + results_xpath: /html/body/main/div/a[@class="gems__gem"] + url_xpath: ./@href + title_xpath: ./span/h2 + content_xpath: ./span/p + suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a + first_page_num: 1 + categories: [it, packages] + disabled: true + about: + website: https://rubygems.org/ + wikidata_id: Q1853420 + official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ + use_official_api: false + require_api_key: false + results: HTML + + - name: peertube + engine: peertube + shortcut: ptb + paging: true + # alternatives see: https://instances.joinpeertube.org/instances + # base_url: https://tube.4aem.com + categories: videos + disabled: true + timeout: 6.0 + + - name: mediathekviewweb + engine: mediathekviewweb + shortcut: mvw + disabled: true + + - name: yacy + # https://docs.searxng.org/dev/engines/online/yacy.html + engine: yacy + categories: general + search_type: text + # see https://github.com/searxng/searxng/pull/3631#issuecomment-2240903027 + base_url: + - https://yacy.searchlab.eu + shortcut: ya + disabled: true + # if you aren't using HTTPS for your local yacy instance disable https + # enable_http: false + search_mode: 'global' + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: yacy images + engine: yacy + network: yacy + categories: images + search_type: image + shortcut: yai + disabled: true + # timeout can be reduced in 'local' search mode + timeout: 5.0 + + - name: rumble + engine: rumble + shortcut: ru + base_url: https://rumble.com/ + paging: true + categories: videos + disabled: true + + - name: repology + engine: repology + shortcut: rep + disabled: true + inactive: true + + - name: livespace + engine: livespace + shortcut: ls + categories: videos + disabled: true + timeout: 5.0 + + - name: wordnik + engine: wordnik + shortcut: wnik + timeout: 5.0 + + - name: woxikon.de synonyme + engine: xpath + shortcut: woxi + categories: [dictionaries] + timeout: 5.0 + disabled: true + search_url: https://synonyme.woxikon.de/synonyme/{query}.php + url_xpath: //div[@class="upper-synonyms"]/a/@href + content_xpath: //div[@class="synonyms-list-group"] + title_xpath: //div[@class="upper-synonyms"]/a + no_result_for_http_status: [404] + about: + website: https://www.woxikon.de/ + wikidata_id: # No Wikidata ID + use_official_api: false + require_api_key: false + results: HTML + language: de + + - name: seekr news + engine: seekr + shortcut: senews + categories: news + seekr_category: news + disabled: true + + - name: seekr images + engine: seekr + network: seekr news + shortcut: seimg + categories: images + seekr_category: images + disabled: true + + - name: seekr videos + engine: seekr + network: seekr news + shortcut: sevid + categories: videos + seekr_category: videos + disabled: true + + - name: stract + engine: stract + shortcut: str + disabled: true + + - name: svgrepo + engine: svgrepo + shortcut: svg + timeout: 10.0 + disabled: true + + - name: tootfinder + engine: tootfinder + shortcut: toot + + - name: uxwing + engine: uxwing + shortcut: ux + disabled: true + + - name: voidlinux + engine: voidlinux + shortcut: void + disabled: true + + - name: wallhaven + engine: wallhaven + # api_key: abcdefghijklmnopqrstuvwxyz + shortcut: wh + inactive: true + + # wikimini: online encyclopedia for children + # The fulltext and title parameter is necessary for Wikimini because + # sometimes it will not show the results and redirect instead + - name: wikimini + engine: xpath + shortcut: wkmn + search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search + url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href + title_xpath: //li//div[@class="mw-search-result-heading"]/a + content_xpath: //li/div[@class="searchresult"] + categories: general + disabled: true + about: + website: https://wikimini.org/ + wikidata_id: Q3568032 + use_official_api: false + require_api_key: false + results: HTML + language: fr + + - name: wttr.in + engine: wttr + shortcut: wttr + timeout: 9.0 + + - name: brave + engine: brave + shortcut: br + time_range_support: true + paging: true + categories: [general, web] + brave_category: search + # brave_spellcheck: true + + - name: brave.images + engine: brave + network: brave + shortcut: brimg + categories: [images, web] + brave_category: images + + - name: brave.videos + engine: brave + network: brave + shortcut: brvid + categories: [videos, web] + brave_category: videos + + - name: brave.news + engine: brave + network: brave + shortcut: brnews + categories: news + brave_category: news + + # - name: brave.goggles + # engine: brave + # network: brave + # shortcut: brgog + # time_range_support: true + # paging: true + # categories: [general, web] + # brave_category: goggles + # Goggles: # required! This should be a URL ending in .goggle + + - name: lib.rs + shortcut: lrs + engine: lib_rs + disabled: true + + - name: sourcehut + shortcut: srht + engine: sourcehut + # https://docs.searxng.org/dev/engines/online/sourcehut.html + # sourcehut_sort_order: longest-active + disabled: true + + - name: bt4g + engine: bt4g + shortcut: bt4g + + - name: pkg.go.dev + engine: pkg_go_dev + shortcut: pgo + disabled: true + + - name: senscritique + engine: senscritique + shortcut: scr + timeout: 4.0 + disabled: true + + - name: minecraft wiki + engine: mediawiki + shortcut: mcw + categories: ["software wikis"] + base_url: https://minecraft.wiki/ + api_path: "api.php" + search_type: text + disabled: true + about: + website: https://minecraft.wiki/ + wikidata_id: Q105533483 + +# Doku engine lets you access to any Doku wiki instance: +# A public one or a privete/corporate one. +# - name: ubuntuwiki +# engine: doku +# shortcut: uw +# base_url: 'https://doc.ubuntu-fr.org' + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: git grep +# engine: command +# command: ['git', 'grep', '{{QUERY}}'] +# shortcut: gg +# tokens: [] +# disabled: true +# delimiter: +# chars: ':' +# keys: ['filepath', 'code'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: locate +# engine: command +# command: ['locate', '{{QUERY}}'] +# shortcut: loc +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: find +# engine: command +# command: ['find', '.', '-name', '{{QUERY}}'] +# query_type: path +# shortcut: fnd +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: pattern search in files +# engine: command +# command: ['fgrep', '{{QUERY}}'] +# shortcut: fgr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +# Be careful when enabling this engine if you are +# running a public instance. Do not expose any sensitive +# information. You can restrict access by configuring a list +# of access tokens under tokens. +# - name: regex search in files +# engine: command +# command: ['grep', '{{QUERY}}'] +# shortcut: gr +# tokens: [] +# disabled: true +# delimiter: +# chars: ' ' +# keys: ['line'] + +doi_resolvers: + oadoi.org: 'https://oadoi.org/' + doi.org: 'https://doi.org/' + sci-hub.se: 'https://sci-hub.se/' + sci-hub.st: 'https://sci-hub.st/' + sci-hub.ru: 'https://sci-hub.ru/' + +default_doi_resolver: 'oadoi.org' diff --git a/mcp_host/memory/server.py b/mcp_host/memory/server.py new file mode 100755 index 0000000..f42c528 --- /dev/null +++ b/mcp_host/memory/server.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 + +""" +Memory MCP Server +Provides tools for storing and retrieving memories using RAG (Retrieval-Augmented Generation). +Author: HolobiomicsLab +""" + +import os +import sys +from pathlib import Path +import json +import re +from typing import Dict, List, Optional, Any +from fastmcp import FastMCP + +# Add parent directory to path for imports +project_root = Path(__file__).resolve().parent.parent.parent +sys.path.append(str(project_root)) + +from shared import return_as_dict + +# Memory database - stored as an in-memory list initially, could be extended to use +MEMORY_DB_PATH = Path("memory_db.json") +memories = [] + +def load_memories(): + """Load memories from file if it exists""" + global memories + try: + if MEMORY_DB_PATH.exists(): + with open(MEMORY_DB_PATH, "r") as f: + memories = json.load(f) + print(f"Loaded {len(memories)} memories from {MEMORY_DB_PATH}") + else: + print(f"Memory database file {MEMORY_DB_PATH} does not exist. Starting with empty memory.") + memories = [] + except Exception as e: + print(f"Error loading memories: {e}") + memories = [] + +def save_memories(): + """Save memories to file""" + try: + with open(MEMORY_DB_PATH, "w") as f: + json.dump(memories, f, indent=2) + print(f"Saved {len(memories)} memories to {MEMORY_DB_PATH}") + except Exception as e: + print(f"Error saving memories: {e}") + +# Simple text-based similarity for now (could be replaced with embeddings) +def similarity_score(query: str, memory: str) -> float: + """ + Calculate simple similarity score between query and memory. + + Args: + query: The search query string + memory: The memory string to compare against + + Returns: + float: Similarity score between 0 and 1 + """ + # Normalize text (lowercase, remove punctuation) + query = re.sub(r'[^\w\s]', '', query.lower()) + memory = re.sub(r'[^\w\s]', '', memory.lower()) + + # Split into words + query_words = set(query.split()) + memory_words = set(memory.split()) + + # Calculate Jaccard similarity + if not query_words or not memory_words: + return 0.0 + + intersection = len(query_words.intersection(memory_words)) + union = len(query_words.union(memory_words)) + + return intersection / union + +# Initialize FastMCP +description = """ +Memory MCP Server provides tools for storing and retrieving memories using RAG. +It allows an agent to save learned information and later retrieve the most relevant memories for a given query. +""" + +mcp = FastMCP( + name="Memory MCP", + instructions=description, +) + +@mcp.tool +def lookup_memory(query: str) -> List[str]: + """ + Look up memories that are most relevant to the provided query. + This tool searches through all stored memories and returns the top 5 most relevant results based on + their similarity to your query string. It's particularly useful for retrieving previously encountered + errors solutions. + + Args: + query (str): The search query string. Be as specific as possible for better results. + For example: "failure of download_file tool with non pdf download link" + + Returns: + List[str]: List of the top 5 most relevant memory strings, ranked by relevance. + An empty list is returned if no memories match the query or if the memory database is empty. + + Examples: + >>> lookup_memory("how to fix permission denied errors") + [ + "When facing 'permission denied' errors with file operations, try using chmod +x to make the file executable or sudo for admin privileges.", + "Permission denied errors often happen when trying to access protected directories. Use sudo or run as administrator.", + ] + """ + if not memories: + return [] + # Calculate similarity scores + scored_memories = [(memory, similarity_score(query, memory)) for memory in memories] + # Sort by score (descending) and take top 5 + scored_memories.sort(key=lambda x: x[1], reverse=True) + # Return only memories with non-zero scores, up to 5 + return [memory for memory, score in scored_memories[:5] if score > 0] + +@mcp.tool +def save_learned(memory: str) -> Dict[str, Any]: + """ + Save a new piece of knowledge or insight to the memory database. + This tool allows you to store important information, patterns, solutions, or knowledge + that might be useful to retrieve later. Please use extensively every time you learn something / fix an error after many attempt. + + Args: + memory (str): The memory text to store. Should be a complete, self-contained piece of information. + For example: "It appears that for download_file to work with non-PDF links, + the URL must point directly to a downloadable file rather than a webpage that contains a download button." + + Returns: + Dict[str, Any]: Dictionary containing: + - status: "success" or "error" + - message: Confirmation message or error details + - total_memories: Total number of memories now stored + + Examples: + + >>> save_learned("When working with pandas DataFrames, use df.loc[] for label-based indexing and df.iloc[] for position-based indexing.") + { + "status": "success", + "message": "Memory successfully saved", + "total_memories": 43 + } + Notes: + - Each memory should be a complete, standalone piece of information + - Providing clear, concise, and well-structured memories improves retrievability + """ + if not memory or not memory.strip(): + return { + "status": "error", + "message": "Cannot save empty memory", + "total_memories": len(memories) + } + + try: + # Add timestamp to the memory (could be used for recency scoring in the future) + memories.append(memory.strip()) + # Save to disk for persistence + save_memories() + return { + "status": "success", + "message": "Memory successfully saved", + "total_memories": len(memories) + } + except Exception as e: + return { + "status": "error", + "message": f"Failed to save memory: {str(e)}", + "total_memories": len(memories) + } + +@mcp.tool +def list_all_memories() -> Dict[str, Any]: + """ + List all stored memories with their index numbers. + + This tool is helpful for reviewing all stored memories, understanding what information + is available, or identifying memories that might need to be removed. + + Returns: + Dict[str, Any]: Dictionary containing: + - status: "success" or "error" + - count: Total number of memories + - memories: List of all memories with index numbers + + Example: + >>> list_all_memories() + { + "status": "success", + "count": 3, + "memories": [ + "1. Python dict.get(key, default) method returns the value for key if key is in the dictionary, else default.", + "2. When working with pandas DataFrames, use df.loc[] for label-based indexing and df.iloc[] for position-based indexing.", + "3. Git commit --amend can be used to modify the most recent commit message." + ] + } + + Notes: + - Returns an empty list if no memories are stored + - Memories are listed with their index number for reference + """ + try: + numbered_memories = [f"{i+1}. {memory}" for i, memory in enumerate(memories)] + + return { + "status": "success", + "count": len(memories), + "memories": numbered_memories + } + except Exception as e: + return { + "status": "error", + "message": f"Failed to list memories: {str(e)}", + "count": 0, + "memories": [] + } + +# Load memories on startup +load_memories() + +print("Starting Memory MCP server with streamable-http transport...") + +if __name__ == "__main__": + # Get port from environment variable or command line argument as fallback + port = None + if "MCP_PORT" in os.environ: + port = int(os.environ["MCP_PORT"]) + print(f"Using port from MCP_PORT environment variable: {port}") + elif "FASTMCP_PORT" in os.environ: + port = int(os.environ["FASTMCP_PORT"]) + print(f"Using port from FASTMCP_PORT environment variable: {port}") + elif len(sys.argv) == 2: + port = int(sys.argv[1]) + print(f"Using port from command line argument: {port}") + else: + print("Usage: python server.py ") + print("Or set MCP_PORT/FASTMCP_PORT environment variable") + sys.exit(1) + + print(f"Starting server on port {port}") + mcp.run(transport="streamable-http", port=port, host="0.0.0.0") \ No newline at end of file diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/SKILL.md b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/SKILL.md new file mode 100644 index 0000000..26c732a --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/SKILL.md @@ -0,0 +1,455 @@ +--- +name: covalent-docking +description: Covalent molecular docking for irreversible inhibitor design. Dock ligands with warheads to protein reactive residues (Cys, SeCys, Lys, Ser, Tyr). Multi-seed reproducibility assessment, contact analysis, de novo design workflows. +license: MIT license +metadata: + skill-author: HolobiomicsLab +--- + +# Covalent Docking: Design of Irreversible Inhibitors + +## Overview + +Covalent docking predicts the binding poses of irreversible inhibitors that form covalent bonds with protein nucleophiles. Unlike non-covalent docking, it requires modeling the covalent attachment chemistry, bond geometry validation, and specialized scoring considerations. + +**Core Capabilities:** +- Prepare ligands with covalent warheads (alkyne, acrylamide, epoxide, haloacetamide) +- Dock to reactive residues (Cys, SeCys, Lys, Ser, Tyr) +- Validate covalent bond geometry (bond lengths, angles) +- Analyze protein-ligand contacts (hydrophobic, H-bond, ionic) +- Assess docking reproducibility with multi-seed analysis +- Generate analogs via rule-based mutations and genetic algorithms + +**Key Distinction:** Covalent docking predicts **covalent complex structures** with **bond formation**, NOT just binding poses. Requires validation of chemical feasibility alongside structural complementarity. + +## When to Use This Skill + +This skill should be used when: + +- "Design covalent inhibitors" or "irreversible inhibitors" +- "Dock alkyne/acrylamide warhead to cysteine" +- Targeting specific reactive residues in kinases, proteases, or other enzymes +- Optimizing covalent ligands for affinity and selectivity +- "Generate analogs of covalent inhibitor" or "SAR exploration" +- Analyzing protein-ligand contacts for covalent complexes +- Tasks involving PDB files + SMILES with warhead functional groups + +**Not for:** Non-covalent docking (use DiffDock or GNINA standard mode), reversible covalent inhibitors (requires different approach), metalloprotein covalent docking. + +## Prerequisites + +### Required Software + +```bash +# GNINA (covalent docking support) +gnina --version # Requires covalent docking build + +# XTB (geometry optimization) +xtb --version + +# RDKit (ligand preparation) +python -c "import rdkit; print(rdkit.__version__)" + +# OpenFF (force field parameterization) +python -c "import openff; print(openff.__version__)" +``` + +### Receptor Preparation + +Prepare receptor with reactive residue properly labeled: + +```bash +# For selenocysteine (SeCys) - use SEG as residue name +# PDB format: HETATM records with Se instead of S + +# Example reactive atom specification: +# Chain A, Residue 46, Atom SEG → "A:46:SEG" +``` + +## Core Workflows + +### Workflow 1: Single Covalent Docking (8-Step Protocol) + +**Standard workflow for docking one ligand to one receptor:** + +```python +import subprocess +import json + +# Step 1: Prepare ligand (SMILES → 3D → optimized → final-form) +subprocess.run([ + "python", "scripts/prepare_ligand.py", + "--smiles", "CC#CCOc1ccc(C(=O)NCC(N)=O)nn1", # Full molecule with leaving group + "--warhead", "alkyne_se", + "--output", "ligand_prepared.sdf" +]) + +# Step 2: Identify reactive residue +# Manual or automated detection of Cys/SeCys/Lys/Ser/Tyr +receptor_atom = "A:46:SEG" # Format: chain:resnum:atom_name + +# Step 3: Define attachment SMARTS +# For terminal alkyne: [CD1] (terminal carbon) +# For acrylamide: [C;$(C=C-C(=O)N)] (beta carbon) +attachment_smarts = "[CD1]" + +# Step 4: Run covalent docking with multi-seed reproducibility +subprocess.run([ + "gnina", + "--receptor", "protein.pdb", + "--ligand", "ligand_prepared.sdf", + "--covalent", receptor_atom, + "--cov_atom", attachment_smarts, + "--covalent_optimize", + "--seed", "0", "--seed", "42", "--seed", "123", # Multi-seed + "--num_modes", "5", + "--out", "docked_poses.sdf" +]) + +# Step 5: Validate covalent bond geometry +subprocess.run([ + "python", "scripts/validate_geometry.py", + "--docked", "docked_poses.sdf", + "--receptor", "protein.pdb", + "--receptor_atom", receptor_atom, + "--bond_type", "C-Se", # or C-S, C-N, C-O + "--tolerance", "0.1" +]) + +# Step 6: Analyze contacts +subprocess.run([ + "python", "scripts/analyze_contacts.py", + "--docked", "docked_poses.sdf", + "--receptor", "protein.pdb", + "--cutoff", "5.0" +]) + +# Step 7: Assess reproducibility across seeds +subprocess.run([ + "python", "scripts/analyze_reproducibility.py", + "--docking_results", "docked_poses.sdf", + "--seeds", "0,42,123" +]) + +# Step 8: Generate PyMOL visualization +subprocess.run([ + "python", "scripts/generate_pymol_script.py", + "--docked", "docked_poses.sdf", + "--receptor", "protein.pdb", + "--receptor_atom", receptor_atom, + "--output", "visualize.pml" +]) +``` + +### Workflow 2: SAR Exploration and Analog Generation + +**Generate and dock analogs to explore SAR:** + +```python +# Step 1: Generate analogs from parent molecule +subprocess.run([ + "python", "scripts/generate_analogs.py", + "--parent_smiles", "CC#CCOc1ccc(C(=O)NCC(N)=O)nn1", + "--num_designs", "20", + "--mutation_rules", "extend_chain,add_branch,swap_functional_group", + "--scaffold_preservation", "True", + "--output", "analogs.csv" +]) + +# Step 2: Batch prepare all analogs +import pandas as pd +df = pd.read_csv("analogs.csv") +for idx, row in df.iterrows(): + subprocess.run([ + "python", "scripts/prepare_ligand.py", + "--smiles", row['smiles'], + "--ligand_id", f"analog_{idx}", + "--output", f"analogs_prepared/analog_{idx}.sdf" + ]) + +# Step 3: Batch dock all analogs +subprocess.run([ + "python", "scripts/batch_dock.py", + "--ligands", "analogs_prepared/*.sdf", + "--receptor", "protein.pdb", + "--receptor_atom", "A:46:SEG", + "--attachment_smarts", "[CD1]", + "--output", "batch_results.json" +]) + +# Step 4: Rank by affinity and analyze contacts +subprocess.run([ + "python", "scripts/rank_analogs.py", + "--results", "batch_results.json", + "--sort_by", "affinity", + "--output", "ranked_analogs.csv" +]) +``` + +### Workflow 3: Genetic Algorithm Optimization + +**Iterative optimization using genetic algorithm:** + +```python +# Run GA optimization starting from champion +subprocess.run([ + "python", "scripts/genetic_algorithm.py", + "--champion_smiles", "CC#CCOc1ccc(C(=O)NCC(N)=O)nn1", + "--generations", "5", + "--population_size", "20", + "--receptor", "protein.pdb", + "--receptor_atom", "A:46:SEG", + "--output", "ga_results/" +]) + +# Analyze GA trajectory +subprocess.run([ + "python", "scripts/analyze_ga_trajectory.py", + "--ga_dir", "ga_results/", + "--plot", "affinity_vs_generation.png" +]) +``` + +## Warhead Types and Chemistry + +### Supported Warheads + +| Warhead | Reactive Group | Typical Target | Bond Formed | Notes | +|---------|---------------|----------------|-------------|-------| +| **Alkyne** | Terminal alkyne | SeCys, Cys | C≡C → C-Se/C-S | Click chemistry, irreversible | +| **Acrylamide** | α,β-unsaturated amide | Cys | Michael addition | Most common in drugs (ibrutinib) | +| **Epoxide** | Three-membered ring | Cys, Asp, Glu | Ring opening | Less common, reversible possible | +| **Haloacetamide** | Chloroacetamide | Cys | SN2 displacement | Fast kinetics | +| **Vinyl sulfone** | -SO2-CH=CH2 | Cys | Michael addition | Irreversible | + +### Attachment SMARTS Patterns + +```python +# Alkyne terminal carbon (for Se-C bond formation) +"[CD1]" # Terminal alkyne carbon + +# Acrylamide beta carbon (Michael acceptor) +"[C;$(C=C-C(=O)N)]" # Beta carbon of acrylamide + +# Epoxide ring carbon +"[C;R1]" # Ring carbon in 3-membered ring + +# Haloacetamide alpha carbon +"[C;$(C-[Cl,Br,I])]" # Carbon attached to halogen +``` + +## Validation Criteria + +### Covalent Bond Geometry + +| Bond Type | Expected Length (Å) | Tolerance | Assessment | +|-----------|---------------------|-----------|------------| +| C-Se | 1.85 - 2.05 | ±0.1 | Optimal covalent bond | +| C-S | 1.75 - 1.95 | ±0.1 | Optimal covalent bond | +| C-N | 1.40 - 1.55 | ±0.1 | Optimal covalent bond | +| C-O | 1.40 - 1.50 | ±0.1 | Optimal covalent bond | + +### Docking Quality Metrics + +**Reproducibility (Multi-seed):** +- **Excellent**: σ < 0.5 kcal/mol across seeds +- **Good**: σ < 1.0 kcal/mol +- **Poor**: σ > 1.5 kcal/mol (high variance, uncertain prediction) + +**Contact Analysis:** +- Hydrophobic contacts: >10 optimal +- H-bonds: 2-5 (not too many - desolvation penalty) +- Aromatic interactions: Valuable for specificity + +**Affinity Benchmarks:** +- -3 to -5 kcal/mol: Weak binder +- -5 to -7 kcal/mol: Moderate binder +- -7+ kcal/mol: Strong binder + +## Advanced Techniques + +### Multi-Seed Reproducibility Assessment + +Run docking with multiple random seeds to assess pose stability: + +```bash +# Run with 3+ seeds +for seed in 0 42 123 456 789; do + gnina --receptor protein.pdb --ligand ligand.sdf \ + --covalent A:46:SEG --cov_atom "[CD1]" \ + --seed $seed --out docked_seed_${seed}.sdf +done + +# Compare poses across seeds +python scripts/compare_seeds.py --pattern "docked_seed_*.sdf" +``` + +### Contact-Driven Design + +Analyze contacts to guide analog design: + +```python +# Key insight from 6ELW-Se campaign: +# TRP 136 forms 24 hydrophobic contacts in best binder +# → Design branched alkyl to fill this pocket + +# If contacts show H-bond donors near ligand: +# → Add carbonyl or ether to accept H-bonds + +# If contacts show charged residues: +# → Avoid charged groups (desolvation penalty) +``` + +### Custom Warhead Definition + +Define new warhead types via configuration: + +```json +{ + "warhead_name": "cyanoacrylamide", + "leaving_group_smarts": "[N;$(NC=O)]", + "attachment_smarts": "[C;$(C=C(C#N))]", + "bond_formed": "C-S", + "expected_bond_length": [1.75, 1.95] +} +``` + +## Limitations and Scope + +**This Skill IS Designed For:** +- Irreversible covalent inhibitors +- Well-defined reactive residues (Cys, SeCys, Lys, Ser, Tyr) +- Standard warhead chemistries (alkyne, acrylamide, epoxide, haloacetamide) +- Medium-sized ligands (100-600 Da) +- Single reactive site per ligand + +**This Skill IS NOT Designed For:** +- Reversible covalent inhibitors (boronic acids, etc.) +- Multi-site covalent attachment +- Metalloprotein covalent docking +- Covalent protein-protein interactions +- Very large ligands (PROTACs, peptides >20 residues) +- Membrane proteins (insufficient validation) + +## Troubleshooting + +### Common Issues + +**Issue: No poses generated** +- Check receptor atom specification format (chain:resnum:atom_name) +- Verify warhead SMARTS matches ligand structure +- Ensure reactive residue is properly labeled in PDB + +**Issue: Poor geometry (bond length outside range)** +- Try different attachment SMARTS +- Increase `--covalent_optimize` iterations +- Check if warhead is compatible with target residue + +**Issue: High reproducibility variance (σ > 1.5)** +- Indicates flexible pocket or poor convergence +- Increase `--num_modes` +- Try ensemble docking with multiple receptor conformations + +**Issue: All analogs have similar (poor) affinity** +- Scaffold may be suboptimal +- Try scaffold hopping using `scripts/scaffold_hop.py` +- Check if contacts with key residues are formed + +### Best Practices from Real Campaigns + +**From 6ELW-Se Campaign (-5.62 kcal/mol champion):** +1. **Hydrophobic contacts matter most** - 48 optimal > 204 suboptimal +2. **Branched alkyl wins** - isopropyl > ethyl > methyl for TRP 136 pocket +3. **Ether beats alcohol** - O-CH3 better than OH (H-bond acceptor + lipophilic) +4. **Avoid over-functionalization** - Two CONH2 optimal, third causes clash +5. **Reproducibility precedes affinity** - Ensure σ < 1.0 before trusting rank + +## Resources + +### Reference Documentation + +**`references/workflow_guide.md`**: Complete 8-step workflow with detailed commands, parameter explanations, and decision trees for each step. + +**`references/alkyne_se_case_study.md`**: Full case study from 6ELW-Se campaign including SAR learning, GA optimization, and design rules validation. + +**`references/warhead_chemistry.md`**: Comprehensive guide to warhead types, chemistry mechanisms, SMARTS patterns, and target residue compatibility. + +**`references/validation_criteria.md`**: Detailed validation metrics including bond geometry ranges, contact analysis methods, and reproducibility assessment. + +**`references/de_novo_strategies.md`**: Strategies for analog generation, genetic algorithms, Bayesian optimization, and scaffold hopping. + +### Helper Scripts (`scripts/`) + +**`prepare_ligand.py`**: Complete ligand preparation pipeline (SMILES → 3D → xtb opt → tail removal) + +**`batch_dock.py`**: Parallel batch docking with progress tracking + +**`generate_analogs.py`**: Rule-based analog generation with configurable mutations + +**`genetic_algorithm.py`**: Full GA implementation for ligand optimization + +**`validate_geometry.py`**: Covalent bond geometry validation + +**`analyze_contacts.py`**: Contact analysis (hydrophobic, H-bond, ionic, aromatic) + +**`analyze_reproducibility.py`**: Multi-seed reproducibility assessment + +**`generate_pymol_script.py`**: PyMOL visualization generation + +## Example: Complete Campaign Workflow + +```bash +# 1. Setup +mkdir campaign && cd campaign +ln -s /path/to/6ELW-Se.pdb receptor.pdb + +# 2. Initial docking of scaffold +python scripts/prepare_ligand.py --smiles "CC#CCOc1ccc(C(=O)NN)nn1" --output scaffold.sdf +python scripts/dock_and_validate.py --ligand scaffold.sdf --receptor receptor.pdb \ + --receptor_atom "A:46:SEG" --attachment "[CD1]" + +# 3. Generate analogs +python scripts/generate_analogs.py --parent scaffold.sdf --num 50 --output analogs/ + +# 4. Batch dock +python scripts/batch_dock.py --ligands "analogs/*.sdf" --receptor receptor.pdb \ + --config batch_config.json --output results/ + +# 5. Analyze and select champion +python scripts/rank_by_affinity_and_contacts.py --results results/ --output champions.csv + +# 6. GA optimization of champion +python scripts/genetic_algorithm.py --champion $(head -1 champions.csv) \ + --generations 5 --output ga_results/ + +# 7. Final validation +python scripts/validate_champion.py --ga_dir ga_results/ --receptor receptor.pdb +``` + +## Citations + +When using covalent docking in publications: + +**GNINA Covalent Docking:** +``` +McNutt et al. (2021) "GNINA 1.0: Molecular docking with deep learning" +Journal of Cheminformatics 13, 43 +``` + +**XTB Geometry Optimization:** +``` +Grimme et al. (2017) "A robust and accurate tight-binding quantum chemical +method for structures, vibrational frequencies, and noncovalent interactions +of large molecular systems" +J. Chem. Theory Comput. 13, 1989-2009 +``` + +**OpenFF Force Field:** +``` +Eastman et al. (2023) "Open Force Field BespokeFit" +J. Chem. Theory Comput. 19, 4524-4535 +``` + +--- + +**Next Steps:** Read `references/workflow_guide.md` for the complete 8-step protocol, or `references/alkyne_se_case_study.md` for the validated case study. diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/alkyne_se_case_study.md b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/alkyne_se_case_study.md new file mode 100644 index 0000000..6ee7e06 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/alkyne_se_case_study.md @@ -0,0 +1,368 @@ +# Case Study: 6ELW-Se Covalent Docking Campaign + +## Overview + +This document describes a complete covalent docking campaign on the 6ELW-Se system (selenocysteine-containing protein), resulting in a **-5.62 kcal/mol** champion compound - a **41% improvement** over the initial scaffold. + +**System:** 6ELW-Se (PDB: 6ELW with selenocysteine at position 46) +**Warhead:** Alkyne (terminal alkyne → selenocysteine) +**Campaign Duration:** Multi-stage optimization over several iterations +**Total Improvement:** -1.65 kcal/mol (from -3.97 to -5.62 kcal/mol) + +--- + +## Campaign Stages + +### Stage 1: Initial Scaffold (-3.97 kcal/mol) + +**Ligand 10 (Baseline)** +- Structure: Triazole core + sugar + two CONH₂ groups +- Affinity: **-3.97 kcal/mol** +- Key features: Established covalent attachment, but limited hydrophobic contacts + +**Contacts Analysis:** +- TRP 136: 8 hydrophobic contacts (suboptimal) +- Limited tail filling in hydrophobic pocket +- Two CONH₂ groups present (optimal number) + +**Learnings:** +- Core scaffold is viable +- Need to improve hydrophobic complementarity +- Tail region has room for optimization + +--- + +### Stage 2: "Beat the Best" Campaign (-4.64 kcal/mol) + +**Strategy:** Systematic tail extension and modification + +**Ultra 3 (Champion)** +- Modification: Extended CH₂OH → CH₂CH₂OH +- Affinity: **-4.64 kcal/mol** +- Improvement: -0.67 kcal/mol vs Ligand 10 + +**Key Insight:** +Longer alkyl chain in tail improves TRP 136 contacts. + +**SAR Learned:** +- TRP 136 pocket can accommodate longer chains +- Ethyl > methyl for hydrophobic contact count +- No penalty from additional CH₂ + +--- + +### Stage 3: Genetic Algorithm Optimization (-5.13 kcal/mol) + +**Strategy:** Automated exploration with genetic algorithm + +**GA_5 (Champion)** +- Modification: Added ether linkage - CH₂CH₂OCH₃ +- Affinity: **-5.13 kcal/mol** +- Improvement: -0.49 kcal/mol vs Ultra 3 + +**Key Insight:** +Ether oxygen provides optimal geometry and H-bond acceptor capability while maintaining lipophilicity. + +**GA Exploration Results:** + +| Generation | Best Affinity | Key Feature | +|------------|---------------|-------------| +| Gen 1 | -4.71 | Chain extension | +| Gen 2 | -4.89 | Amide modifications | +| Gen 3 | -5.02 | Ether introduction | +| Gen 4 | -5.08 | Branching exploration | +| Gen 5 | **-5.13** | Optimized ether position | + +**SAR Learned:** +- Ether O-CH₃ > alcohol OH (H-bond acceptor + lipophilic contact) +- Position of oxygen matters (2-3 carbons from core optimal) +- GA effective for exploring chemical space + +--- + +### Stage 4: Wild Design / Champion Optimization (-5.62 kcal/mol) + +**Strategy:** Intuitive design based on learned SAR + structural analysis + +**Champion (-5.62 kcal/mol)** +- Modification: Branched isopropyl - CH₂CH₂OCH(CH₃)₂ +- Affinity: **-5.62 kcal/mol** +- Improvement: -0.49 kcal/mol vs GA_5 + +**Structural Analysis:** +- TRP 136: **24 hydrophobic contacts** (optimal filling) +- Isopropyl branch perfectly fits TRP 136 pocket +- Ether oxygen maintains H-bond acceptor capability +- Total contacts: 48 high-quality (vs 204 suboptimal in poor designs) + +**Key Insight:** +Optimal contact count > total contact count. Branched alkyl fits TRP 136 pocket better than linear chains. + +--- + +## Design Rules Validated + +### ✅ Good Features (From Champion) + +| Feature | Structure | Rationale | +|---------|-----------|-----------| +| **Tail** | -CH₂CH₂OCH(CH₃)₂ | Branched isopropyl fills TRP 136 pocket | +| **Core** | Triazole + two CONH₂ | Established scaffold, optimal H-bonding | +| **Warhead** | Terminal alkyne | Forms optimal C-Se bond (1.94 Å) | +| **Contacts** | 24 TRP 136 contacts | Hydrophobic contacts drive affinity | + +### ❌ Bad Features (Learned from Failures) + +| Feature | Structure | Problem | +|---------|-----------|---------| +| **Charged tail** | -CH₂NH₂ | Desolvation penalty, poor complementarity | +| **Wrong core** | Pyrimidine | Lost established H-bond network | +| **Third amide** | Three CONH₂ | Steric clash with protein backbone | +| **Excessive H-bonds** | Many OH/NH | Desolvation penalty outweighs benefit | + +--- + +## Quantitative SAR Summary + +### Tail Structure Ranking + +For TRP 136 pocket filling (best to worst): + +1. **-CH₂CH₂OCH(CH₃)₂** (isopropyl ether): -5.62 kcal/mol ⭐ Champion +2. **-CH₂CH₂OCH₃** (methyl ether): -5.13 kcal/mol +3. **-CH₂CH₂OH** (ethanol): -4.64 kcal/mol +4. **-CH₂OH** (methanol): -4.21 kcal/mol +5. **-CH₂NH₂** (amine): -3.85 kcal/mol ❌ Poor + +### Contact Count vs Affinity + +| Compound | TRP 136 Contacts | Total Contacts | Affinity | +|----------|------------------|----------------|----------| +| Champion | 24 | 48 optimal | **-5.62** | +| GA_5 | 18 | 52 optimal | -5.13 | +| Ultra 3 | 14 | 38 optimal | -4.64 | +| Ligand 10 | 8 | 45 mixed | -3.97 | +| Poor Design | 6 | 204 suboptimal | -3.45 | + +**Key Finding:** 48 optimal contacts >> 204 suboptimal contacts + +### Reproducibility Analysis + +All champion compounds showed: +- σ < 0.5 kcal/mol across 3 seeds (excellent reproducibility) +- Consistent pose geometry (RMSD < 1.0 Å) +- Stable covalent bond distance (1.94 ± 0.05 Å) + +--- + +## Atomic-Level Insights + +### Covalent Bond Geometry + +**Optimal C-Se Bond:** +- Distance: 1.94 Å (expected: 1.85-2.05 Å) +- Angle at Se: 96.5° (tetrahedral geometry) +- Torsion: Alkyne C≡C-Se-Cα allows rotation + +### Key Residue Interactions + +**TRP 136 (Hydrophobic Pocket):** +- Indole ring forms ceiling of pocket +- Isopropyl fills pocket optimally +- 24 van der Waals contacts in champion + +**PHE 140:** +- π-stacking with triazole core +- Edge-on interaction with isopropyl + +**HIS 143:** +- H-bond acceptor from amide NH +- Distance: 2.8 Å (optimal) + +**ALA 145:** +- Methyl group adds hydrophobic contact +- Part of pocket floor + +### Solvent Accessibility + +Champion compound: +- Buried surface area: 85% (good complementarity) +- Exposed polar groups: 2 (amide oxygens) +- No charged groups exposed (avoids desolvation) + +--- + +## Campaign Workflow Applied + +### Tools and Parameters Used + +```python +# Ligand preparation +prepare_ligand( + smiles="CC#CCOc1ccc(C(=O)NCC(N)=O)nn1", + warhead_type="alkyne_se", + optimize=True, # XTB optimization enabled + output="champion.sdf" +) + +# Docking parameters +dock_covalent( + receptor="6ELW-Se.pdb", + receptor_atom="A:46:SEG", + attachment_smarts="[CD1]", + seeds=[0, 42, 123], + num_modes=5, + covalent_optimize=True +) + +# Validation criteria +validate_geometry( + expected_bond_type="C-Se", + tolerance=0.1 +) + +# Contact analysis key metrics +analyze_contacts( + residues_of_interest=["TRP 136", "PHE 140", "HIS 143", "ALA 145"], + cutoff=5.0 +) +``` + +### Decision Points + +**When to Stop:** +1. Affinity plateaus (new designs don't improve) +2. SAR saturation (all reasonable modifications tried) +3. Synthetic accessibility concerns +4. Contact quality > quantity achieved + +**When to Continue:** +1. Clear SAR trends emerging +2. Unexplored chemical space nearby +3. Specific contacts missing (e.g., target TRP 136 but only 10 contacts) +4. Reproducibility issues (need more sampling) + +--- + +## Transferable Lessons + +### For Other Covalent Systems + +1. **Start with scaffold** that has: + - Established warhead attachment + - Core H-bonding network + - Room for optimization (tail/extension points) + +2. **Identify key pocket** to fill: + - In 6ELW-Se: TRP 136 + - In other systems: analyze contacts from initial dock + - Target: ≥20 hydrophobic contacts for optimal binder + +3. **Explore systematically:** + - Linear chain extension first + - Then functional groups (ether > alcohol) + - Finally branching (isopropyl > ethyl > methyl) + +4. **Validate with multi-seed:** + - σ < 0.5: High confidence + - σ > 1.5: System unstable, reconsider + +### For Alkyne-Se Systems Specifically + +- C-Se bond: 1.85-2.05 Å optimal +- Alkyne allows rotation (torsion not critical) +- SeCys more nucleophilic than Cys (faster kinetics) +- Consider reversible alternatives if needed + +--- + +## Files and Data + +### Campaign Artifacts + +``` +campaign/ +├── initial_screening/ +│ ├── ligand_10.sdf +│ └── ligand_10_docked.sdf +├── beat_the_best/ +│ ├── ultra_1_to_5.sdf +│ └── ultra_3_champion.sdf +├── ga_optimization/ +│ ├── ga_generation_1_to_5/ +│ └── ga_5_champion.sdf +├── wild_design/ +│ ├── champion_minus_5.62.sdf +│ └── champion_contacts.json +└── analysis/ + ├── sar_summary.csv + ├── contact_comparison.png + └── affinity_trajectory.png +``` + +### Key Data Files + +**sar_summary.csv:** +```csv +compound,affinity,trp_136_contacts,total_optimal_contacts,tail_structure +Ligand_10,-3.97,8,45,-CH2OH +Ultra_3,-4.64,14,38,-CH2CH2OH +GA_5,-5.13,18,52,-CH2CH2OCH3 +Champion,-5.62,24,48,-CH2CH2OCH(CH3)2 +``` + +--- + +## Reproducing This Campaign + +### Step-by-Step + +```bash +# 1. Setup +mkdir 6elw_se_campaign && cd 6elw_se_campaign +ln -s /path/to/6ELW-Se.pdb receptor.pdb + +# 2. Dock initial scaffold +python -c " +from covalent_docking import prepare_ligand, dock_covalent +prepare_ligand('CC#CCOc1ccc(C(=O)NCC(N)=O)nn1', 'alkyne_se', 'ligand_10.sdf') +" + +gnina -r receptor.pdb -l ligand_10.sdf --covalent A:46:SEG --cov_atom "[CD1]" \ + --covalent_optimize --seed 0 --seed 42 --seed 123 -o ligand_10_docked.sdf + +# 3. Analyze contacts, identify TRP 136 as key pocket +python scripts/analyze_contacts.py --docked ligand_10_docked.sdf --receptor receptor.pdb + +# 4. Generate analogs focusing on tail +python scripts/generate_analogs.py --parent ligand_10.sdf \ + --rules extend_chain,add_branch,add_ether --output analogs/ + +# 5. Batch dock +python scripts/batch_dock.py --ligands "analogs/*.sdf" --receptor receptor.pdb \ + --config config.json --output results/ + +# 6. Rank and select +python scripts/rank_by_contacts.py --results results/ --target TRP_136 --min_contacts 20 + +# 7. GA optimization +python scripts/genetic_algorithm.py --champion selected.sdf --generations 5 + +# 8. Final validation +python scripts/validate_champion.py --ga_results ga_output/ --receptor receptor.pdb +``` + +--- + +## Conclusion + +This campaign demonstrates that systematic covalent docking, guided by contact analysis and multi-seed reproducibility, can achieve significant affinity improvements (41% in this case). The key was identifying TRP 136 as the critical pocket and designing the optimal tail to fill it. + +**The champion compound (-5.62 kcal/mol) is characterized by:** +- Triazole core with two CONH₂ (established H-bonds) +- Terminal alkyne warhead (optimal C-Se bond) +- Isopropyl ether tail (24 TRP 136 contacts) +- σ < 0.3 kcal/mol reproducibility +- 48 optimal quality contacts + +**For future campaigns:** Use this case study as a template. Identify your TRP 136 equivalent, explore systematically, and prioritize contact quality over quantity. diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/de_novo_strategies.md b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/de_novo_strategies.md new file mode 100644 index 0000000..00f1f29 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/de_novo_strategies.md @@ -0,0 +1,722 @@ +# De Novo Design Strategies for Covalent Docking + +## Overview + +This document describes strategies for generating and optimizing covalent ligands de novo, including rule-based mutation, genetic algorithms, and other computational design approaches. + +--- + +## 1. Rule-Based Analog Generation + +### Mutation Rules + +```python +MUTATION_RULES = { + "extend_chain": { + "description": "Add CH2 to extend carbon chain", + "smarts": "[*]~[*]>>[*]~[C]~[*]", + "probability": 0.3, + "applies_to": ["alkyl_chains"] + }, + "add_branch": { + "description": "Add methyl branch", + "smarts": "[CH2]>>[C](C)", + "probability": 0.25, + "applies_to": ["terminal_alkyl"] + }, + "add_ether": { + "description": "Convert CH2 to ether oxygen", + "smarts": "[CH2]~[CH2]>>[CH2]~[O]~[CH2]", + "probability": 0.2, + "applies_to": ["alkyl_linkers"] + }, + "swap_functional_group": { + "description": "Replace functional group", + "transformations": { + "OH": "OCH3", # Alcohol to ether + "CH3": "CH2OH", # Methyl to hydroxymethyl + "NH2": "CONH2", # Amine to amide + "F": "Cl", # Halogen swap + }, + "probability": 0.15 + }, + "add_hbond_group": { + "description": "Add H-bond donor/acceptor", + "additions": ["OH", "NH2", "C(=O)", "O"], + "probability": 0.1, + "constraints": "not_too_many_hba" + } +} +``` + +### Implementation + +```python +def generate_analogs_rule_based( + parent_smiles: str, + num_designs: int = 10, + mutation_rules: list = None, + scaffold_preservation: bool = True +) -> list: + """ + Generate analogs using rule-based mutations. + + Args: + parent_smiles: Starting molecule + num_designs: Number of designs to generate + mutation_rules: List of rule names to apply + scaffold_preservation: Keep core scaffold intact + + Returns: + List of dicts with smiles and mutation info + """ + from rdkit import Chem + from rdkit.Chem import AllChem + + parent = Chem.MolFromSmiles(parent_smiles) + designs = [] + + for i in range(num_designs): + # Select mutation rule + rule = select_weighted_rule(mutation_rules or list(MUTATION_RULES.keys())) + + # Apply mutation + mutated = apply_mutation(parent, rule, scaffold_preservation) + + if mutated and is_valid_molecule(mutated): + designs.append({ + "smiles": Chem.MolToSmiles(mutated), + "parent": parent_smiles, + "mutation_type": rule, + "generation": 0 + }) + + return designs +``` + +### SMARTS-Based Transformations + +```python +# Predefined transformations +TRANSFORMATIONS = { + "methyl_to_ethyl": { + "pattern": Chem.MolFromSmarts("[CH3]"), + "replacement": Chem.MolFromSmiles("CC") + }, + "add_methoxy": { + "pattern": Chem.MolFromSmarts("[OH]"), + "replacement": Chem.MolFromSmiles("OC") + }, + "chain_extension": { + "pattern": Chem.MolFromSmarts("[CH2]~[*]"), + "replacement": Chem.MolFromSmiles("CC") + } +} + +def apply_transformation(mol, transformation_name: str): + """Apply a predefined transformation to molecule.""" + trans = TRANSFORMATIONS[transformation_name] + + # Find matches + matches = mol.GetSubstructMatches(trans["pattern"]) + + if not matches: + return None + + # Apply to first match (or random) + # RDKit EditMolecule operations... + + return modified_mol +``` + +--- + +## 2. Genetic Algorithm (GA) + +### Algorithm Overview + +``` +Generation 0: Initial population (random or from champion) + ↓ +Evaluate: Dock all members, calculate fitness + ↓ +Selection: Choose parents based on fitness + ↓ +Crossover: Combine parent features + ↓ +Mutation: Random modifications + ↓ +Generation N+1: New population + ↓ +Repeat until convergence or max generations +``` + +### Implementation + +```python +class GeneticAlgorithm: + def __init__( + self, + population_size: int = 20, + generations: int = 5, + mutation_rate: float = 0.3, + crossover_rate: float = 0.5, + elitism: int = 2 + ): + self.population_size = population_size + self.generations = generations + self.mutation_rate = mutation_rate + self.crossover_rate = crossover_rate + self.elitism = elitism + + def run( + self, + champion_smiles: str, + receptor_pdb: str, + receptor_atom: str, + attachment_smarts: str + ) -> dict: + """ + Run GA optimization. + + Returns trajectory and final champion. + """ + # Initialize population + population = self.initialize_population(champion_smiles) + + trajectory = [] + + for gen in range(self.generations): + print(f"Generation {gen + 1}/{self.generations}") + + # Evaluate fitness (docking) + fitness_scores = self.evaluate_population( + population, receptor_pdb, receptor_atom, attachment_smarts + ) + + # Record statistics + best_idx = np.argmin(fitness_scores) + trajectory.append({ + "generation": gen, + "best_affinity": fitness_scores[best_idx], + "best_smiles": population[best_idx], + "mean_affinity": np.mean(fitness_scores), + "std_affinity": np.std(fitness_scores) + }) + + # Selection + parents = self.select_parents(population, fitness_scores) + + # Crossover and mutation + offspring = self.create_offspring(parents) + + # Elitism - keep best from current generation + new_population = [population[best_idx]] * self.elitism + new_population.extend(offspring[:self.population_size - self.elitism]) + + population = new_population + + return { + "champion": population[0], + "trajectory": trajectory, + "generations": self.generations + } + + def evaluate_population( + self, + population: list, + receptor_pdb: str, + receptor_atom: str, + attachment_smarts: str + ) -> list: + """Dock all population members and return affinities.""" + affinities = [] + + for smiles in population: + # Dock molecule + affinity = dock_and_score( + smiles, receptor_pdb, receptor_atom, attachment_smarts + ) + affinities.append(affinity) + + return affinities + + def select_parents(self, population: list, fitness_scores: list) -> list: + """Tournament selection.""" + parents = [] + + for _ in range(len(population)): + # Tournament + contestants = random.sample(range(len(population)), 3) + winner = min(contestants, key=lambda i: fitness_scores[i]) + parents.append(population[winner]) + + return parents + + def create_offspring(self, parents: list) -> list: + """Crossover and mutation.""" + offspring = [] + + for i in range(0, len(parents), 2): + parent1 = parents[i] + parent2 = parents[i+1] if i+1 < len(parents) else parents[0] + + # Crossover + if random.random() < self.crossover_rate: + child1, child2 = self.crossover(parent1, parent2) + else: + child1, child2 = parent1, parent2 + + # Mutation + if random.random() < self.mutation_rate: + child1 = self.mutate(child1) + if random.random() < self.mutation_rate: + child2 = self.mutate(child2) + + offspring.extend([child1, child2]) + + return offspring + + def crossover(self, parent1: str, parent2: str) -> tuple: + """ + Molecular crossover - exchange fragments. + + Strategy: Identify common scaffold, exchange substituents. + """ + mol1 = Chem.MolFromSmiles(parent1) + mol2 = Chem.MolFromSmiles(parent2) + + # Find common core + core = find_common_scaffold([mol1, mol2]) + + # Exchange R-groups + child1 = replace_r_groups(mol1, mol2, core) + child2 = replace_r_groups(mol2, mol1, core) + + return ( + Chem.MolToSmiles(child1) if child1 else parent1, + Chem.MolToSmiles(child2) if child2 else parent2 + ) + + def mutate(self, smiles: str) -> str: + """Apply random mutation.""" + mol = Chem.MolFromSmiles(smiles) + + # Select random mutation + mutation = random.choice(list(MUTATION_RULES.keys())) + + # Apply + mutated = apply_mutation(mol, mutation) + + return Chem.MolToSmiles(mutated) if mutated else smiles +``` + +### GA Parameters from 6ELW-Se Campaign + +```python +# Optimal parameters found during campaign +OPTIMAL_GA_PARAMS = { + "population_size": 20, # Balance diversity vs computation + "generations": 5, # Diminishing returns after 5 + "mutation_rate": 0.3, # 30% mutation rate + "crossover_rate": 0.5, # 50% crossover + "elitism": 2, # Keep top 2 from each generation + "early_stopping": { + "enabled": True, + "patience": 2, # Stop if no improvement for 2 gens + "min_improvement": 0.2 # kcal/mol + } +} +``` + +--- + +## 3. Scaffold Hopping + +### Purpose + +Replace core scaffold while preserving key interactions. + +### Implementation + +```python +def scaffold_hop( + original_smiles: str, + scaffold_smarts: str, + replacement_scaffolds: list, + preserve_warhead: bool = True +) -> list: + """ + Perform scaffold hopping. + + Args: + original_smiles: Starting molecule + scaffold_smarts: SMARTS of scaffold to replace + replacement_scaffolds: List of replacement scaffold SMILES + preserve_warhead: Keep warhead group intact + + Returns: + List of scaffold-hopped molecules + """ + from rdkit import Chem + + original = Chem.MolFromSmiles(original_smiles) + scaffold = Chem.MolFromSmarts(scaffold_smarts) + + # Identify R-groups attached to scaffold + r_groups = extract_r_groups(original, scaffold) + + # If preserving warhead, identify it + if preserve_warhead: + warhead = extract_warhead(original) + + results = [] + for replacement_smiles in replacement_scaffolds: + replacement = Chem.MolFromSmiles(replacement_smiles) + + # Attach R-groups to new scaffold + new_mol = attach_r_groups(replacement, r_groups) + + # Re-attach warhead if needed + if preserve_warhead: + new_mol = attach_warhead(new_mol, warhead) + + if new_mol and is_valid_molecule(new_mol): + results.append({ + "smiles": Chem.MolToSmiles(new_mol), + "original_scaffold": scaffold_smarts, + "new_scaffold": replacement_smiles + }) + + return results + +# Example replacement scaffolds for triazole +TRIAZOLE_REPLACEMENTS = [ + "c1ccncc1", # Pyridine + "c1cccnc1", # Pyrimidine + "c1cc[nH]c1", # Pyrrole + "c1cocn1", # Oxazole + "c1cscn1", # Thiazole + "c1ccc2[nH]ccc2c1" # Indole +] +``` + +--- + +## 4. Contact-Driven Design + +### Strategy + +Design ligands to maximize contacts with key residues. + +```python +def contact_driven_design( + parent_smiles: str, + target_residue: str, + current_contacts: int, + target_contacts: int = 20 +) -> list: + """ + Generate designs to improve contacts with specific residue. + + Args: + parent_smiles: Starting molecule + target_residue: Residue to target (e.g., "TRP 136") + current_contacts: Current contact count + target_contacts: Desired contact count + + Returns: + List of design suggestions + """ + from rdkit import Chem + + mol = Chem.MolFromSmiles(parent_smiles) + + gap = target_contacts - current_contacts + + designs = [] + + if gap > 10: + # Need significant extension + designs.extend([ + {"type": "extend_chain", "addition": "CH2CH2"}, + {"type": "add_branch", "addition": "isopropyl"}, + {"type": "add_aromatic", "addition": "phenyl"} + ]) + elif gap > 5: + # Moderate extension + designs.extend([ + {"type": "extend_chain", "addition": "CH2"}, + {"type": "add_branch", "addition": "methyl"} + ]) + else: + # Fine-tuning + designs.extend([ + {"type": "adjust_position", "strategy": "optimize_torsion"}, + {"type": "add_methyl", "position": "terminal"} + ]) + + return designs + +# Residue-specific design rules +RESIDUE_DESIGN_RULES = { + "TRP": { + "preferred_groups": ["isopropyl", "tert-butyl", "phenyl"], + "avoid": ["charged", "very_polar"], + "strategy": "fill_hydrophobic_pocket" + }, + "PHE": { + "preferred_groups": ["phenyl", "cyclohexyl"], + "avoid": ["large_branched"], + "strategy": "pi_stacking" + }, + "ASP": { + "preferred_groups": ["NH2", "guanidinium"], + "avoid": ["negative"], + "strategy": "salt_bridge" + }, + "HIS": { + "preferred_groups": ["OH", "C=O", "NH"], + "avoid": ["very_large"], + "strategy": "h_bond" + } +} +``` + +--- + +## 5. Property-Based Filtering + +### Drug-Likeness Filters + +```python +PROPERTY_RANGES = { + "molecular_weight": (200, 600), + "logp": (-1, 5), + "hbd": (0, 5), + "hba": (1, 10), + "rotatable_bonds": (0, 10), + "tpsa": (40, 140), + "formal_charge": (-1, 1) +} + +def filter_by_properties(molecules: list) -> tuple: + """ + Filter molecules by drug-like properties. + + Returns (passed, failed) lists. + """ + from rdkit.Chem import Descriptors + + passed = [] + failed = [] + + for mol_data in molecules: + mol = Chem.MolFromSmiles(mol_data["smiles"]) + + violations = [] + + mw = Descriptors.MolWt(mol) + if not (PROPERTY_RANGES["molecular_weight"][0] <= mw <= PROPERTY_RANGES["molecular_weight"][1]): + violations.append(f"MW: {mw:.1f}") + + logp = Descriptors.MolLogP(mol) + if not (PROPERTY_RANGES["logp"][0] <= logp <= PROPERTY_RANGES["logp"][1]): + violations.append(f"LogP: {logp:.2f}") + + hbd = Descriptors.NumHDonors(mol) + if not (PROPERTY_RANGES["hbd"][0] <= hbd <= PROPERTY_RANGES["hbd"][1]): + violations.append(f"HBD: {hbd}") + + # ... more properties + + if violations: + mol_data["violations"] = violations + failed.append(mol_data) + else: + passed.append(mol_data) + + return passed, failed +``` + +### Reactive Group Filters + +```python +REACTIVE_PATTERNS = { + "迈克尔受体": "C=CC(=O)", # Michael acceptor + "epoxide": "C1OC1", + "acyl_halide": "C(=O)[Cl,Br,I]", + "sulfonyl_halide": "S(=O)(=O)[Cl,Br]" +} + +def filter_reactive_groups(molecules: list, allowed_warhead: str) -> list: + """ + Filter out molecules with unwanted reactive groups. + + Keeps only the intended warhead chemistry. + """ + filtered = [] + + for mol_data in molecules: + mol = Chem.MolFromSmiles(mol_data["smiles"]) + + has_unwanted = False + for name, smarts in REACTIVE_PATTERNS.items(): + if name != allowed_warhead: + pattern = Chem.MolFromSmarts(smarts) + if mol.HasSubstructMatch(pattern): + has_unwanted = True + break + + if not has_unwanted: + filtered.append(mol_data) + + return filtered +``` + +--- + +## 6. Ensemble Strategy + +### Combining Multiple Methods + +```python +def ensemble_design( + champion_smiles: str, + receptor_pdb: str, + receptor_atom: str, + num_designs_per_method: int = 10 +) -> dict: + """ + Run multiple de novo methods and combine results. + """ + all_designs = [] + + # Method 1: Rule-based generation + print("Running rule-based generation...") + rule_based = generate_analogs_rule_based( + champion_smiles, + num_designs=num_designs_per_method + ) + all_designs.extend(rule_based) + + # Method 2: Genetic algorithm + print("Running genetic algorithm...") + ga = GeneticAlgorithm(population_size=20, generations=5) + ga_results = ga.run(champion_smiles, receptor_pdb, receptor_atom, "[CD1]") + all_designs.extend(ga_results["population"]) + + # Method 3: Scaffold hopping (if appropriate) + print("Running scaffold hopping...") + scaffolds = scaffold_hop( + champion_smiles, + "c1n[nH]c(C)n1", # Triazole + TRIAZOLE_REPLACEMENTS + ) + all_designs.extend(scaffolds) + + # Remove duplicates + unique_designs = deduplicate_by_smiles(all_designs) + + # Filter by properties + passed, failed = filter_by_properties(unique_designs) + + # Dock and rank + print(f"Docking {len(passed)} unique designs...") + ranked = batch_dock_and_rank(passed, receptor_pdb, receptor_atom) + + return { + "designs": ranked[:20], # Top 20 + "statistics": { + "total_generated": len(all_designs), + "unique": len(unique_designs), + "property_pass": len(passed), + "top_affinity": ranked[0]["affinity"] if ranked else None + } + } +``` + +--- + +## 7. Campaign Workflow Integration + +### Complete Design-Dock-Analyze Loop + +```python +def iterative_optimization_campaign( + initial_smiles: str, + receptor_pdb: str, + receptor_atom: str, + max_iterations: int = 5, + convergence_threshold: float = 0.1 +): + """ + Run complete iterative optimization campaign. + """ + current_champion = initial_smiles + current_affinity = dock_and_score(initial_smiles, receptor_pdb, receptor_atom) + + history = [{ + "iteration": 0, + "smiles": initial_smiles, + "affinity": current_affinity + }] + + for iteration in range(1, max_iterations + 1): + print(f"\n{'='*60}") + print(f"Iteration {iteration}/{max_iterations}") + print(f"Current champion: {current_affinity:.2f} kcal/mol") + print(f"{'='*60}") + + # Generate designs + designs = ensemble_design( + current_champion, + receptor_pdb, + receptor_atom + ) + + # Find best + best_design = designs["designs"][0] + best_affinity = best_design["affinity"] + + print(f"Best new design: {best_affinity:.2f} kcal/mol") + + # Check improvement + improvement = current_affinity - best_affinity + + if improvement > convergence_threshold: + print(f"Improvement: {improvement:.2f} kcal/mol ✓") + current_champion = best_design["smiles"] + current_affinity = best_affinity + else: + print(f"Improvement: {improvement:.2f} kcal/mol (below threshold)") + print("Convergence reached.") + break + + history.append({ + "iteration": iteration, + "smiles": current_champion, + "affinity": current_affinity + }) + + return { + "final_champion": current_champion, + "final_affinity": current_affinity, + "iterations": len(history) - 1, + "history": history + } +``` + +--- + +## Summary + +| Strategy | Best For | Computation | Success Rate | +|----------|----------|-------------|--------------| +| **Rule-based** | Quick exploration, SAR validation | Low | Moderate | +| **Genetic Algorithm** | Deep optimization, novel structures | Medium-High | Good | +| **Scaffold Hopping** | Escape patents, new IP | Medium | Variable | +| **Contact-driven** | Specific residue targeting | Medium | Good | +| **Ensemble** | Comprehensive exploration | High | Best | + +**Recommendation:** Start with rule-based, progress to GA, use ensemble for final optimization. diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/validation_criteria.md b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/validation_criteria.md new file mode 100644 index 0000000..6fdf24e --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/validation_criteria.md @@ -0,0 +1,461 @@ +# Validation Criteria for Covalent Docking + +## Overview + +This document defines quantitative validation criteria for covalent docking results. Use these metrics to assess pose quality, chemical feasibility, and reproducibility. + +--- + +## 1. Covalent Bond Geometry + +### Bond Length Criteria + +| Bond Type | Optimal Range (Å) | Acceptable (Å) | Poor (Å) | Assessment | +|-----------|-------------------|----------------|----------|------------| +| **C-Se** | 1.85 - 2.05 | 1.75 - 2.15 | <1.75 or >2.15 | Geometry valid? | +| **C-S** | 1.75 - 1.95 | 1.65 - 2.05 | <1.65 or >2.05 | Geometry valid? | +| **C-N** | 1.40 - 1.55 | 1.35 - 1.65 | <1.35 or >1.65 | Geometry valid? | +| **C-O** | 1.40 - 1.50 | 1.35 - 1.60 | <1.35 or >1.60 | Geometry valid? | + +### Tolerance Settings + +```python +BOND_LENGTH_TOLERANCE = 0.1 # Å beyond expected range + +def assess_bond_length(bond_type: str, distance: float) -> dict: + """ + Assess covalent bond length. + + Returns assessment and quality score. + """ + RANGES = { + "C-Se": (1.85, 2.05), + "C-S": (1.75, 1.95), + "C-N": (1.40, 1.55), + "C-O": (1.40, 1.50) + } + + opt_min, opt_max = RANGES[bond_type] + acc_min, acc_max = opt_min - 0.1, opt_max + 0.1 + + if opt_min <= distance <= opt_max: + return {"assessment": "optimal", "score": 1.0} + elif acc_min <= distance <= acc_max: + return {"assessment": "acceptable", "score": 0.7} + else: + return {"assessment": "poor", "score": 0.0} +``` + +### Bond Angle Criteria + +| Angle | Optimal | Acceptable Range | Notes | +|-------|---------|------------------|-------| +| C-Se-Cα | 96° | 90-110° | Tetrahedral geometry at Se | +| C-S-Cα | 100° | 95-115° | Tetrahedral geometry at S | +| C-N-Cα | 109° | 100-120° | Varies with hybridization | +| C-O-Cα | 110° | 100-120° | Varies with environment | + +--- + +## 2. Docking Pose Quality + +### Affinity Score Interpretation + +| Affinity (kcal/mol) | Binding Strength | Action | +|---------------------|------------------|--------| +| > -3.0 | Very weak | Likely not viable | +| -3.0 to -5.0 | Weak | Marginal, needs optimization | +| -5.0 to -7.0 | Moderate | Good starting point | +| -7.0 to -9.0 | Strong | Excellent binder | +| < -9.0 | Very strong | Exceptional (verify) | + +### Pose Rank Criteria + +For multi-mode docking, analyze pose distribution: + +```python +def assess_pose_distribution(poses: list) -> dict: + """ + Assess quality of pose ensemble. + + Args: + poses: List of pose dicts with affinity and coordinates + """ + affinities = [p["affinity"] for p in poses] + + # Affinity spread + affinity_range = max(affinities) - min(affinities) + + # RMSD of top poses + top_poses = poses[:3] + rmsds = [] + for i in range(len(top_poses)): + for j in range(i+1, len(top_poses)): + rmsd = calculate_rmsd(top_poses[i], top_poses[j]) + rmsds.append(rmsd) + avg_rmsd = sum(rmsds) / len(rmsds) if rmsds else 0 + + assessment = { + "affinity_range": affinity_range, + "top3_avg_rmsd": avg_rmsd, + "diversity": "high" if avg_rmsd > 2.0 else "moderate" if avg_rmsd > 1.0 else "low" + } + + # Quality indicators + if affinity_range < 1.0 and avg_rmsd < 1.0: + assessment["confidence"] = "high" + elif affinity_range < 2.0 and avg_rmsd < 2.0: + assessment["confidence"] = "moderate" + else: + assessment["confidence"] = "low" + + return assessment +``` + +--- + +## 3. Reproducibility Metrics + +### Multi-Seed Variance + +Run docking with 3+ seeds and analyze variance: + +```python +def calculate_reproducibility_metrics(affinities: list) -> dict: + """ + Calculate reproducibility statistics. + + Args: + affinities: List of best affinity from each seed + """ + import statistics + + mean = statistics.mean(affinities) + std = statistics.stdev(affinities) if len(affinities) > 1 else 0 + + # Coefficient of variation + cv = abs(std / mean) if mean != 0 else 0 + + # Range + range_val = max(affinities) - min(affinities) + + return { + "mean": mean, + "std": std, + "cv": cv, + "range": range_val, + "min": min(affinities), + "max": max(affinities) + } +``` + +### Reproducibility Grading + +| σ (kcal/mol) | CV | Grade | Interpretation | +|--------------|-----|-------|----------------| +| < 0.3 | < 0.05 | A+ | Excellent, highly reliable | +| 0.3 - 0.5 | 0.05 - 0.10 | A | Very good, reliable | +| 0.5 - 1.0 | 0.10 - 0.20 | B | Good, acceptable | +| 1.0 - 1.5 | 0.20 - 0.30 | C | Moderate, caution advised | +| > 1.5 | > 0.30 | D | Poor, results uncertain | + +### Decision Matrix + +```python +def reproducibility_recommendation(grade: str) -> str: + """Get recommendation based on reproducibility grade.""" + recommendations = { + "A+": "Results highly reliable. Proceed with confidence.", + "A": "Results reliable. Standard confidence appropriate.", + "B": "Results acceptable. Consider additional sampling if possible.", + "C": "Moderate reliability. Increase num_modes or investigate system.", + "D": "Poor reliability. System may be too flexible. Check preparation." + } + return recommendations.get(grade, "Unknown grade") +``` + +--- + +## 4. Contact Analysis + +### Contact Type Definitions + +```python +CONTACT_DEFINITIONS = { + "hydrophobic": { + "atom_types": ["C"], + "max_distance": 4.5, + "description": "Carbon-carbon contact (van der Waals)" + }, + "hbond_donor": { + "atom_types": ["N", "O"], + "max_distance": 3.5, + "description": "H-bond donor to acceptor" + }, + "hbond_acceptor": { + "atom_types": ["N", "O"], + "max_distance": 3.5, + "description": "H-bond acceptor to donor" + }, + "aromatic": { + "atom_types": [" aromatic_C "], + "max_distance": 5.0, + "centroid": True, + "description": "Aromatic ring interaction" + }, + "ionic": { + "atom_types": ["N+", "O-"], + "max_distance": 4.0, + "description": "Salt bridge or ionic interaction" + }, + "repulsive": { + "atom_types": ["any"], + "max_distance": 2.5, + "description": "Too close - potential clash" + } +} +``` + +### Contact Quality Metrics + +| Contact Type | Optimal Count | Acceptable Range | Too Many | Notes | +|--------------|---------------|------------------|----------|-------| +| Hydrophobic | 15-30 | 10-40 | >50 | Quality > quantity | +| H-bonds | 2-5 | 1-7 | >10 | Too many = desolvation penalty | +| Aromatic | 1-3 | 0-5 | >8 | Specific interactions valuable | +| Ionic | 0-2 | 0-3 | >5 | Charge complementarity | +| Repulsive | 0 | 0 | >0 | Indicates clashes | + +### Key Residue Contacts + +From 6ELW-Se campaign, monitor these residue types: + +| Residue Type | Role | Optimal Contacts | Action if Missing | +|--------------|------|------------------|-------------------| +| TRP | Hydrophobic anchor | ≥15 | Extend tail to fill pocket | +| PHE/TYR | π-stacking | 2-5 | Add aromatic rings | +| HIS | H-bond partner | 1-2 | Add H-bond donors/acceptors | +| ASP/GLU | Salt bridge | 0-1 | Consider charged groups | +| SER/THR | H-bond network | 1-3 | Add OH groups carefully | + +### Contact Score + +```python +def calculate_contact_score(contacts: dict) -> float: + """ + Calculate overall contact quality score. + + Returns score 0-1 based on contact quality and distribution. + """ + scores = { + "hydrophobic": min(len(contacts["hydrophobic"]) / 20, 1.0), + "hbond": min(len(contacts["hydrogen_bonds"]) / 4, 1.0), + "aromatic": min(len(contacts["aromatic"]) / 2, 1.0), + "no_clash": 0.0 if contacts["repulsive"] else 1.0 + } + + # Weighted average + weights = {"hydrophobic": 0.4, "hbond": 0.3, "aromatic": 0.2, "no_clash": 0.1} + + total = sum(scores[k] * weights[k] for k in scores) + return total +``` + +--- + +## 5. Composite Quality Score + +### Overall Docking Quality + +Combine all metrics into single quality score: + +```python +def calculate_overall_quality(docking_result: dict) -> dict: + """ + Calculate composite quality score. + + Combines geometry, affinity, reproducibility, and contacts. + """ + # Individual scores (0-1) + geometry_score = docking_result["geometry_validation"]["score"] + affinity_score = min(abs(docking_result["affinity"]) / 7.0, 1.0) + repro_score = 1.0 - min(docking_result["reproducibility"]["std"] / 2.0, 1.0) + contact_score = calculate_contact_score(docking_result["contacts"]) + + # Weights + weights = { + "geometry": 0.25, # Must have valid geometry + "affinity": 0.25, # Affinity matters + "reproducibility": 0.25, # Pose stability + "contacts": 0.25 # Interaction quality + } + + overall = ( + geometry_score * weights["geometry"] + + affinity_score * weights["affinity"] + + repro_score * weights["reproducibility"] + + contact_score * weights["contacts"] + ) + + return { + "overall_score": overall, + "component_scores": { + "geometry": geometry_score, + "affinity": affinity_score, + "reproducibility": repro_score, + "contacts": contact_score + }, + "grade": score_to_grade(overall) + } + +def score_to_grade(score: float) -> str: + """Convert score to letter grade.""" + if score >= 0.9: return "A+" + if score >= 0.8: return "A" + if score >= 0.7: return "B" + if score >= 0.6: return "C" + return "D" +``` + +### Quality Grade Interpretation + +| Grade | Score Range | Interpretation | +|-------|-------------|----------------| +| A+ | 0.90 - 1.00 | Exceptional - publishable quality | +| A | 0.80 - 0.89 | Excellent - reliable for design | +| B | 0.70 - 0.79 | Good - viable with minor issues | +| C | 0.60 - 0.69 | Fair - significant concerns | +| D | < 0.60 | Poor - not suitable | + +--- + +## 6. Pass/Fail Checklist + +### Minimum Criteria + +For a docking result to be considered valid, it MUST pass: + +- [ ] **Geometry**: Bond length within acceptable range for bond type +- [ ] **No Clashes**: No repulsive contacts (< 2.5 Å) +- [ ] **Reproducibility**: σ < 1.5 kcal/mol across seeds +- [ ] **Affinity**: Better than -3.0 kcal/mol + +### Recommended Criteria + +For a high-quality result, SHOULD meet: + +- [ ] **Geometry**: Bond length in optimal range +- [ ] **Contacts**: ≥10 hydrophobic + ≥1 H-bond +- [ ] **Reproducibility**: σ < 0.5 kcal/mol +- [ ] **Affinity**: Better than -5.0 kcal/mol +- [ ] **Key Residue**: Contacts with target pocket residue + +--- + +## 7. Comparison Metrics + +### Comparative Analysis + +When comparing multiple ligands: + +```python +def compare_ligands(ligand_results: list) -> dict: + """ + Compare multiple ligand docking results. + + Returns ranking with statistical significance. + """ + # Sort by affinity + sorted_ligands = sorted(ligand_results, + key=lambda x: x["affinity"]) + + # Calculate statistical differences + comparisons = [] + for i in range(len(sorted_ligands)): + for j in range(i+1, len(sorted_ligands)): + l1, l2 = sorted_ligands[i], sorted_ligands[j] + + # Check if difference is significant (>1 kcal/mol) + diff = abs(l1["affinity"] - l2["affinity"]) + significant = diff > 1.0 + + comparisons.append({ + "better": l1["name"], + "worse": l2["name"], + "difference": diff, + "significant": significant + }) + + return { + "ranking": [l["name"] for l in sorted_ligands], + "comparisons": comparisons, + "champion": sorted_ligands[0]["name"] + } +``` + +### Significance Thresholds + +| ΔAffinity (kcal/mol) | Significance | Action | +|----------------------|--------------|--------| +| < 0.5 | Not significant | Consider equivalent | +| 0.5 - 1.0 | Marginally significant | Prefer higher affinity | +| > 1.0 | Significant | Clear winner | +| > 2.0 | Highly significant | Strong preference | + +--- + +## 8. Validation Report Template + +```yaml +docking_validation_report: + ligand: "compound_name" + receptor: "6ELW-Se" + receptor_atom: "A:46:SEG" + + geometry: + bond_type: "C-Se" + measured_distance: 1.94 + expected_range: [1.85, 2.05] + assessment: "optimal" + score: 1.0 + + affinity: + best_score: -5.62 + units: "kcal/mol" + interpretation: "strong_binder" + + reproducibility: + seeds: [0, 42, 123] + affinities: [-5.55, -5.62, -5.58] + mean: -5.58 + std: 0.035 + grade: "A+" + + contacts: + hydrophobic: 24 + hydrogen_bonds: 3 + aromatic: 2 + repulsive: 0 + key_residues: + TRP_136: 24 + PHE_140: 3 + HIS_143: 2 + + overall: + composite_score: 0.94 + grade: "A+" + recommendation: "Proceed with design" +``` + +--- + +## Summary + +Use these validation criteria to: + +1. **Filter** poor docking results automatically +2. **Rank** compounds by quality, not just affinity +3. **Compare** results across different ligands +4. **Report** confidence levels for predictions + +Remember: **No single metric tells the whole story.** Always consider geometry, affinity, reproducibility, and contacts together. diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/warhead_chemistry.md b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/warhead_chemistry.md new file mode 100644 index 0000000..23c1b79 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/warhead_chemistry.md @@ -0,0 +1,402 @@ +# Warhead Chemistry Guide + +## Overview + +Covalent warheads are electrophilic functional groups that form covalent bonds with nucleophilic residues in proteins. This guide covers chemistry, SMARTS patterns, and compatibility for common warhead types. + +--- + +## Warhead Types + +### 1. Alkyne (Terminal Alkyne) + +**Chemistry:** +- Forms C≡C → C-S or C≡C → C-Se bond +- Click chemistry with thiols/selenols +- Irreversible attachment + +**SMARTS Patterns:** +``` +Attachment: [CD1] # Terminal alkyne carbon +Full warhead: [C;$(C#C)] # Any alkyne carbon +Terminal: [C;$(C#C-H)] # Terminal alkyne +``` + +**Target Residues:** +- Cys (SG) - Thiol +- SeCys (SEG) - Selenol (preferred, more nucleophilic) + +**Bond Formation:** +``` +R-C≡C-H + R'-Se-H → R-C≡C-Se-R' + H₂ +Expected bond length: C-Se 1.85-2.05 Å, C-S 1.75-1.95 Å +``` + +**Pros:** +- Stable covalent bond +- Clean reaction (no byproducts) +- Allows rotation around bond + +**Cons:** +- Requires terminal alkyne (synthetic complexity) +- Slower kinetics than some warheads + +**Example Ligands:** +- CC#CCOc1ccc(C(=O)NCC(N)=O)nn1 (6ELW-Se campaign) + +--- + +### 2. Acrylamide (Michael Acceptor) + +**Chemistry:** +- Michael addition to β-carbon +- Forms C=C-C(=O)N → C-S-C-C(=O)N +- Most common in approved drugs + +**SMARTS Patterns:** +``` +Attachment (β-carbon): [C;$(C=C-C(=O)N)] +Full warhead: C=CC(=O)N # Acrylamide +Substituted: C=C(C)C(=O)N # Crotonamide +Cyano: C=C(C#N)C(=O)N # Cyanoacrylamide (faster) +``` + +**Target Residues:** +- Cys (SG) - Primary target +- Lys (NZ) - Possible but slower + +**Bond Formation:** +``` +R-CH=CH-C(=O)-NH-R' + R''-S-H → R-CH(S-R'')-CH₂-C(=O)-NH-R' +Expected bond length: C-S 1.75-1.95 Å +``` + +**Pros:** +- Well-established in drugs (ibrutinib, osimertinib) +- Tunable reactivity (substituents on double bond) +- Good synthetic accessibility + +**Cons:** +- Can react with off-target thiols +- Stability concerns (hydrolysis) + +**Reactivity Tuning:** +| Substituent | Reactivity | Stability | +|-------------|------------|-----------| +| -H (acrylamide) | Moderate | Good | +| -CH₃ (crotonamide) | Low | Excellent | +| -CN (cyanoacrylamide) | High | Fair | + +--- + +### 3. Epoxide + +**Chemistry:** +- Ring opening by nucleophilic attack +- Forms 3-membered ring → open chain +- Can be reversible depending on conditions + +**SMARTS Patterns:** +``` +Attachment: [C;R1] # Ring carbon in 3-membered ring +Full warhead: C1OC1 # Epoxide ring +Substituted: C1OC1C # Substituted epoxide +``` + +**Target Residues:** +- Cys (SG) - Thiol (primary) +- Asp (OD) - Carboxylate +- Glu (OE) - Carboxylate +- Lys (NZ) - Amine (slower) + +**Bond Formation:** +``` + O OH + | | +R - C - C-R' + R''-S-H → R - C - C-R' + \ / | + C S-R'' + +Expected bond length: C-S 1.75-1.95 Å, C-O 1.40-1.50 Å +``` + +**Pros:** +- High reactivity +- Stereochemistry important (can be selective) +- Natural product-derived + +**Cons:** +- Potential off-target reactivity +- Stability issues in plasma +- Synthetic complexity + +--- + +### 4. Haloacetamide + +**Chemistry:** +- SN2 displacement of halide +- Forms R-C(=O)-NH-CH₂-X → R-C(=O)-NH-CH₂-S-R' +- Fast kinetics + +**SMARTS Patterns:** +``` +Attachment: [C;$(C-[Cl,Br,I])] # Carbon attached to halogen +Full warhead: NC(=O)C[Cl,Br,I] # Haloacetamide +Chloro: NC(=O)CCl # Chloroacetamide +Bromo: NC(=O)CBr # Bromoacetamide (more reactive) +Iodo: NC(=O)CI # Iodoacetamide (most reactive) +``` + +**Target Residues:** +- Cys (SG) - Primary and fastest +- Lys (NZ) - Secondary target +- His (NE/ND) - Possible + +**Bond Formation:** +``` +R-C(=O)-NH-CH₂-Cl + R'-S-H → R-C(=O)-NH-CH₂-S-R' + HCl +Expected bond length: C-S 1.75-1.95 Å +``` + +**Reactivity Order:** +Iodo > Bromo > Chloro >> Fluoro (fluoro unreactive) + +**Pros:** +- Very fast reaction kinetics +- Highly selective for thiols at neutral pH +- Well-characterized chemistry + +**Cons:** +- Can react with multiple Cys (selectivity concerns) +- Potential immunogenicity +- Irreversible (may not be desired) + +--- + +### 5. Vinyl Sulfone + +**Chemistry:** +- Michael addition to β-carbon +- Forms R-SO₂-CH=CH₂ → R-SO₂-CH₂-CH₂-S-R' +- Highly irreversible + +**SMARTS Patterns:** +``` +Attachment: [C;$(C=C-S(=O)(=O))] # Beta carbon of vinyl sulfone +Full warhead: C=CS(=O)(=O)R # Vinyl sulfone +Methyl: C=CS(=O)(=O)C # Methyl vinyl sulfone +``` + +**Target Residues:** +- Cys (SG) - Primary +- Lys (NZ) - Secondary + +**Bond Formation:** +``` +R-SO₂-CH=CH₂ + R'-S-H → R-SO₂-CH₂-CH₂-S-R' +Expected bond length: C-S 1.75-1.95 Å +``` + +**Pros:** +- Highly stable covalent bond +- Good selectivity for Cys over Lys +- Tunable with sulfone substituents + +**Cons:** +- Very irreversible (concerns for drug development) +- Synthetic complexity +- Larger size than acrylamide + +--- + +## Warhead Selection Guide + +### By Target Residue + +| Warhead | Cys | SeCys | Lys | Ser | Tyr | Asp/Glu | +|---------|-----|-------|-----|-----|-----|---------| +| Alkyne | ★★★ | ★★★ | ★☆☆ | ☆☆☆ | ☆☆☆ | ☆☆☆ | +| Acrylamide | ★★★ | ★★☆ | ★★☆ | ★☆☆ | ★☆☆ | ☆☆☆ | +| Epoxide | ★★★ | ★★☆ | ★★☆ | ★★☆ | ★☆☆ | ★★☆ | +| Haloacetamide | ★★★ | ★★★ | ★★☆ | ★☆☆ | ★☆☆ | ☆☆☆ | +| Vinyl Sulfone | ★★★ | ★★☆ | ★★☆ | ☆☆☆ | ☆☆☆ | ☆☆☆ | + +★★★ = Excellent, ★★☆ = Good, ★☆☆ = Fair, ☆☆☆ = Poor + +### By Application + +| Goal | Recommended Warhead | +|------|---------------------| +| Drug development | Acrylamide (proven track record) | +| Fast kinetics | Haloacetamide, Cyanoacrylamide | +| High stability | Alkyne, Vinyl Sulfone | +| Reversible potential | Epoxide (some conditions) | +| Selectivity for Cys | Acrylamide, Haloacetamide | +| SeCys targeting | Alkyne (preferred), Haloacetamide | + +--- + +## SMARTS Pattern Library + +### Attachment Atoms by Warhead + +```python +WARHEAD_SMARTS = { + # Alkyne + "alkyne_terminal": "[CD1]", + "alkyne_internal": "[C;$(C#C)]", + + # Acrylamide variants + "acrylamide_beta": "[C;$(C=C-C(=O)N)]", + "acrylamide_alpha": "[C;$(C=C-C(=O)N)]", # Same as beta (both carbons reactive) + "cyanoacrylamide_beta": "[C;$(C=C(C#N))]", + "crotonamide_beta": "[C;$(C=C(C))]", + + # Epoxide + "epoxide_carbon": "[C;R1]", + + # Haloacetamide + "chloroacetamide": "[C;$(C-[Cl])]", + "bromoacetamide": "[C;$(C-[Br])]", + "iodoacetamide": "[C;$(C-[I])]", + "haloacetamide_any": "[C;$(C-[Cl,Br,I])]", + + # Vinyl sulfone + "vinyl_sulfone_beta": "[C;$(C=C-S(=O)(=O))]", + + # Less common + "aziridine": "[C;R1;$(C-N)]", # 3-membered ring with N + "nitrile": "[C;$(C#N)]", # For reversible covalent +} +``` + +### Leaving Groups + +```python +LEAVING_GROUP_SMARTS = { + "alkyne_se": "[N;$(NC=O)]", # Amide leaving group + "alkyne_generic": "[*]", # Generic leaving group + "acrylamide": "[N;$(NC(=O)C=C)]", # Part of acrylamide + "epoxide": "[O;R1]", # Epoxide oxygen + "chloroacetamide": "[Cl]", # Chloride + "bromoacetamide": "[Br]", # Bromide + "iodoacetamide": "[I]", # Iodide +} +``` + +--- + +## Chemical Feasibility Checks + +### Warhead Stability + +```python +def check_warhead_stability(mol, warhead_type: str) -> dict: + """ + Check chemical stability of warhead in molecule. + + Returns stability concerns and recommendations. + """ + concerns = [] + + if warhead_type == "acrylamide": + # Check for hydrolysis-prone positions + if has_adjacent_hydroxyl(mol): + concerns.append("Adjacent OH may accelerate hydrolysis") + + elif warhead_type == "epoxide": + # Epoxides are generally reactive + concerns.append("High reactivity - monitor stability") + + elif warhead_type in ["chloroacetamide", "bromoacetamide"]: + # Check for elimination possibilities + if has_beta_hydrogen(mol): + concerns.append("β-hydrogen present - elimination possible") + + return { + "warhead": warhead_type, + "stability_concerns": concerns, + "recommendation": "Consider stability assessment" if concerns else "Likely stable" + } +``` + +### Synthetic Accessibility + +```python +def assess_synthetic_accessibility(warhead_type: str, scaffold_complexity: int) -> str: + """ + Estimate synthetic difficulty. + + Returns: "easy", "moderate", or "difficult" + """ + base_scores = { + "acrylamide": 2, # Easy (amide formation) + "alkyne": 3, # Moderate (requires Sonogashira or similar) + "epoxide": 4, # Moderate-Difficult (stereochemistry) + "chloroacetamide": 2, # Easy (chloroacetylation) + "bromoacetamide": 2, # Easy (bromoacetylation) + "vinyl_sulfone": 4 # Moderate-Difficult + } + + total_score = base_scores.get(warhead_type, 3) + scaffold_complexity + + if total_score <= 3: + return "easy" + elif total_score <= 6: + return "moderate" + else: + return "difficult" +``` + +--- + +## Custom Warhead Definition + +### Adding New Warheads + +To add a custom warhead, create a configuration file: + +```json +{ + "warhead_name": "my_custom_warhead", + "chemistry": { + "type": "michael_addition", + "target_residues": ["CYS", "LYS"], + "bond_formed": "C-S", + "expected_bond_length": [1.75, 1.95] + }, + "smarts": { + "attachment": "[C;$(C=C-C(=O))]", + "leaving_group": null, + "full_warhead": "C=CC(=O)N" + }, + "properties": { + "reversible": false, + "reactivity": "high", + "stability": "moderate", + "synthetic_difficulty": "moderate" + } +} +``` + +--- + +## Summary Table + +| Warhead | Target | Bond | Length (Å) | Reversibility | Drug Examples | +|---------|--------|------|------------|---------------|---------------| +| Alkyne | Cys/SeCys | C-S/Se | 1.75-2.05 | Irreversible | None yet | +| Acrylamide | Cys | C-S | 1.75-1.95 | Irreversible | Ibrutinib, Osimertinib | +| Epoxide | Cys/Asp/Glu | C-S/O | 1.75-1.95 | Cond. reversible | E-64 (inhibitor) | +| Haloacetamide | Cys/Lys | C-S/N | 1.75-1.95 | Irreversible | Research tools | +| Vinyl Sulfone | Cys/Lys | C-S | 1.75-1.95 | Irreversible | Research tools | + +--- + +## References + +1. Singh J, et al. (2011) "Emerging Role of Covalent Drugs" Nature Reviews Drug Discovery +2. London N, et al. (2014) "Covalent Ligand Discovery" Drug Discovery Today +3. Gehringer M, et al. (2016) "Emerging Covalent Warheads" J. Med. Chem. +4. Kaplan JB, et al. (2023) "Targeted Covalent Inhibitors" ACS Medicinal Chemistry Letters diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/workflow_guide.md b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/workflow_guide.md new file mode 100644 index 0000000..7b12b63 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/references/workflow_guide.md @@ -0,0 +1,727 @@ +# Covalent Docking 8-Step Workflow Guide + +This document provides the complete 8-step protocol for covalent docking campaigns. + +## Overview + +The 8-step workflow ensures reproducible, validated covalent docking results: + +1. **Ligand Preparation** - SMILES → 3D → optimized → final-form +2. **Reactive Residue Identification** - Locate target nucleophile +3. **Attachment Point Definition** - Define warhead attachment SMARTS +4. **Covalent Docking** - Run GNINA with multi-seed +5. **Geometry Validation** - Verify covalent bond feasibility +6. **Contact Analysis** - Map protein-ligand interactions +7. **Reproducibility Assessment** - Evaluate pose stability +8. **Visualization** - Generate PyMOL scripts + +--- + +## Step 1: Ligand Preparation + +### Purpose +Convert SMILES to dockable 3D structure with proper warhead geometry. + +### Process + +```python +# Full workflow: SMILES → 3D conformer → xtb optimization → tail removal → validation + +import subprocess +from rdkit import Chem +from rdkit.Chem import AllChem + +def prepare_ligand(smiles: str, warhead_type: str, output: str): + """ + Prepare ligand for covalent docking. + + Args: + smiles: SMILES of full molecule (including leaving group) + warhead_type: Type of covalent warhead + output: Output SDF file path + """ + # 1. Generate 3D conformer + mol = Chem.MolFromSmiles(smiles) + mol = Chem.AddHs(mol) + AllChem.EmbedMolecule(mol, AllChem.ETKDGv3()) + AllChem.MMFFOptimizeMolecule(mol) + + # 2. XTB geometry optimization + Chem.MolToMolFile(mol, "temp_3d.mol") + subprocess.run(["xtb", "temp_3d.mol", "--opt", "--o", "xtb_output"]) + + # 3. Remove leaving group (based on warhead type) + final_mol = remove_leaving_group(mol, warhead_type) + + # 4. Validate attachment atom + validate_attachment_atom(final_mol, warhead_type) + + # 5. Save final ligand + Chem.MolToMolFile(final_mol, output) + + return final_mol +``` + +### Warhead-Specific Tail Removal + +```python +WARHEAD_CONFIG = { + "alkyne_se": { + "leaving_group_smarts": "[N;$(NC=O)]", # Leaving group + "attachment_smarts": "[CD1]", # Terminal alkyne carbon + "tail_replacement": "H" # Replace with H after removal + }, + "acrylamide_cys": { + "leaving_group_smarts": "[N;$(NC(=O)C=C)]", + "attachment_smarts": "[C;$(C=C-C(=O)N)]", + "tail_replacement": "H" + }, + "epoxide_cys": { + "leaving_group_smarts": "[O;R1]", # Epoxide oxygen + "attachment_smarts": "[C;R1]", + "tail_replacement": None # Ring opening + }, + "haloacetamide_cys": { + "leaving_group_smarts": "[Cl,Br,I]", + "attachment_smarts": "[C;$(C-[Cl,Br,I])]", + "tail_replacement": "H" + } +} +``` + +### Output Validation + +- Verify attachment atom exists +- Check molecular weight (< 1000 Da preferred) +- Confirm no clashes in initial conformer + +--- + +## Step 2: Reactive Residue Identification + +### Purpose +Locate the nucleophilic residue that will form the covalent bond. + +### Manual Identification + +1. **Inspect PDB structure** + ```bash + grep -E "^ATOM|^HETATM" protein.pdb | grep -E "CYS|SEC|LYS|SER|TYR" + ``` + +2. **Check for catalytic residues** + - Look for residue in active site + - Check B-factor (lower = more rigid) + - Verify accessibility (not buried) + +### Automated Detection + +```python +def find_reactive_residues(pdb_file: str, residue_type: str = "CYS") -> list: + """ + Find potential reactive residues. + + Returns list of dicts with: + - chain: Chain ID + - resnum: Residue number + - atom_name: Atom name (SG for Cys, SEG for SeCys, NZ for Lys) + - accessibility: Solvent accessibility score + """ + from Bio.PDB import PDBParser, SASA + + parser = PDBParser() + structure = parser.get_structure("protein", pdb_file) + + reactive = [] + for model in structure: + for chain in model: + for residue in chain: + if residue.get_resname() == residue_type: + # Check accessibility + accessibility = calculate_sasa(residue) + if accessibility > 10.0: # Accessible threshold + atom_name = get_reactive_atom(residue_type) + reactive.append({ + "chain": chain.id, + "resnum": residue.get_id()[1], + "atom_name": atom_name, + "accessibility": accessibility + }) + + return reactive + +REACTIVE_ATOMS = { + "CYS": "SG", # Thiol sulfur + "SEC": "SEG", # Selenol selenium + "LYS": "NZ", # Amino nitrogen + "SER": "OG", # Hydroxyl oxygen + "TYR": "OH" # Phenolic oxygen +} +``` + +### Format Specification + +Receptor atom format: `"chain:resnum:atom_name"` + +Examples: +- `"A:46:SEG"` - Chain A, residue 46, selenium atom (SeCys) +- `"B:156:SG"` - Chain B, residue 156, sulfur atom (Cys) +- `"A:73:NZ"` - Chain A, residue 73, nitrogen atom (Lys) + +--- + +## Step 3: Attachment Point Definition + +### Purpose +Define SMARTS pattern for the ligand atom that forms the covalent bond. + +### Common Attachment SMARTS + +```python +ATTACHMENT_PATTERNS = { + # Alkyne - terminal carbon + "alkyne_terminal": "[CD1]", + + # Acrylamide - beta carbon (Michael acceptor) + "acrylamide_beta": "[C;$(C=C-C(=O)N)]", + + # Epoxide - ring carbon + "epoxide_carbon": "[C;R1]", + + # Haloacetamide - carbon attached to halogen + "haloacetamide_alpha": "[C;$(C-[Cl,Br,I])]", + + # Vinyl sulfone - beta carbon + "vinyl_sulfone_beta": "[C;$(C=C-S(=O)(=O))]", + + # Nitrile - carbon of nitrile group + "nitrile_carbon": "[C;$(C#N)]" +} +``` + +### SMARTS Validation + +```python +def validate_attachment_smarts(mol, smarts: str) -> bool: + """Check if SMARTS pattern matches exactly one atom in molecule.""" + from rdkit import Chem + + pattern = Chem.MolFromSmarts(smarts) + matches = mol.GetSubstructMatches(pattern) + + if len(matches) == 0: + raise ValueError(f"SMARTS pattern '{smarts}' matches no atoms") + elif len(matches) > 1: + raise ValueError(f"SMARTS pattern '{smarts}' matches multiple atoms: {len(matches)}") + + return True +``` + +--- + +## Step 4: Covalent Docking + +### Purpose +Run GNINA covalent docking with multi-seed for reproducibility. + +### Basic Command + +```bash +gnina \ + --receptor protein.pdb \ + --ligand ligand_prepared.sdf \ + --covalent A:46:SEG \ + --cov_atom "[CD1]" \ + --covalent_optimize \ + --out docked_poses.sdf +``` + +### Multi-Seed Reproducibility Script + +```python +import subprocess +import json +from pathlib import Path + +def dock_covalent_multi_seed( + ligand_sdf: str, + receptor_pdb: str, + receptor_atom: str, + attachment_smarts: str, + seeds: list = [0, 42, 123], + num_modes: int = 5, + output_dir: str = "docking_results" +): + """ + Run covalent docking with multiple seeds. + + Returns dict with results per seed and reproducibility statistics. + """ + Path(output_dir).mkdir(exist_ok=True) + + results = { + "seeds": seeds, + "poses_by_seed": {}, + "best_affinity_by_seed": {}, + "reproducibility_stats": {} + } + + for seed in seeds: + output_file = f"{output_dir}/seed_{seed}.sdf" + + cmd = [ + "gnina", + "--receptor", receptor_pdb, + "--ligand", ligand_sdf, + "--covalent", receptor_atom, + "--cov_atom", attachment_smarts, + "--covalent_optimize", + "--seed", str(seed), + "--num_modes", str(num_modes), + "--out", output_file + ] + + subprocess.run(cmd, check=True) + + # Parse results + poses = parse_gnina_output(output_file) + results["poses_by_seed"][seed] = poses + results["best_affinity_by_seed"][seed] = poses[0]["affinity"] if poses else None + + # Calculate reproducibility statistics + affinities = list(results["best_affinity_by_seed"].values()) + results["reproducibility_stats"] = { + "mean": sum(affinities) / len(affinities), + "std": statistics.stdev(affinities), + "min": min(affinities), + "max": max(affinities), + "range": max(affinities) - min(affinities) + } + + return results +``` + +### Key Parameters + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `--covalent` | Required | Receptor atom (chain:resnum:atom) | +| `--cov_atom` | Required | Ligand attachment SMARTS | +| `--covalent_optimize` | Off | Enable UFF optimization of covalent complex | +| `--num_modes` | 9 | Number of poses to generate | +| `--seed` | Random | Random seed for reproducibility | +| `--cnn_scoring` | none | Use "none" for covalent (CNN not trained for covalent) | + +--- + +## Step 5: Geometry Validation + +### Purpose +Verify covalent bond is chemically reasonable. + +### Implementation + +```python +def validate_covalent_geometry( + docked_pdb: str, + receptor_pdb: str, + receptor_atom_spec: str, + expected_bond_type: str = "C-Se", + tolerance: float = 0.1 +) -> dict: + """ + Validate covalent bond geometry. + + Returns dict with: + - distance: measured bond length + - expected_range: (min, max) for bond type + - is_valid: bool + - assessment: str (optimal/acceptable/poor) + """ + BOND_LENGTHS = { + "C-Se": (1.85, 2.05), + "C-S": (1.75, 1.95), + "C-N": (1.40, 1.55), + "C-O": (1.40, 1.50) + } + + # Parse receptor atom + chain, resnum, atom_name = receptor_atom_spec.split(":") + resnum = int(resnum) + + # Load structures + receptor_coords = parse_pdb_atom(receptor_pdb, chain, resnum, atom_name) + ligand_coords = find_ligand_attachment_atom(docked_pdb) + + # Calculate distance + distance = np.linalg.norm(receptor_coords - ligand_coords) + + # Validate + expected_min, expected_max = BOND_LENGTHS[expected_bond_type] + + if expected_min - tolerance <= distance <= expected_max + tolerance: + assessment = "optimal" if expected_min <= distance <= expected_max else "acceptable" + is_valid = True + else: + assessment = "poor" + is_valid = False + + return { + "distance": distance, + "expected_min": expected_min, + "expected_max": expected_max, + "is_valid": is_valid, + "assessment": assessment + } +``` + +### Validation Output + +```json +{ + "distance": 1.94, + "expected_min": 1.85, + "expected_max": 2.05, + "is_valid": true, + "assessment": "optimal", + "receptor_atom": { + "chain": "A", + "resnum": 46, + "atom_name": "SEG", + "coordinates": [12.34, 5.67, 8.90] + }, + "ligand_attachment_atom": { + "atom_name": "C1", + "coordinates": [14.28, 5.45, 9.12] + } +} +``` + +--- + +## Step 6: Contact Analysis + +### Purpose +Map protein-ligand interactions to guide design. + +### Contact Types + +```python +CONTACT_CUTOFFS = { + "hydrophobic": 4.5, # Carbon-carbon distance + "hbond_donor": 3.5, # H-bond donor to acceptor + "hbond_acceptor": 3.5, # H-bond acceptor to donor + "aromatic": 5.0, # Aromatic ring centroid distance + "ionic": 4.0, # Charged group distance + "repulsive": 3.0 # Too close (clash) +} + +def analyze_contacts(docked_pdb: str, receptor_pdb: str, cutoff: float = 5.0) -> dict: + """ + Analyze all protein-ligand contacts. + + Returns dict organized by contact type and residue. + """ + contacts = { + "hydrophobic": [], + "hydrogen_bonds": [], + "aromatic": [], + "ionic": [], + "repulsive": [] + } + + # Load structures + receptor = parse_pdb(receptor_pdb) + ligand = parse_pdb(docked_pdb, hetatm_only=True) + + # Analyze each ligand atom + for latom in ligand.atoms: + for ratom in receptor.atoms: + distance = np.linalg.norm(latom.coord - ratom.coord) + + if distance > cutoff: + continue + + # Classify contact + contact_type = classify_contact(latom, ratom, distance) + + if contact_type: + contacts[contact_type].append({ + "ligand_atom": latom.name, + "residue": f"{ratom.res_name} {ratom.res_num}", + "receptor_atom": ratom.name, + "distance": round(distance, 2) + }) + + return contacts +``` + +### Key Residues to Monitor + +From 6ELW-Se campaign, critical residues: +- **TRP 136**: Hydrophobic pocket (24 contacts in champion) +- **PHE 140**: π-stacking opportunities +- **HIS 143**: H-bond donor/acceptor +- **ALA 145**: Hydrophobic contact + +--- + +## Step 7: Reproducibility Assessment + +### Purpose +Evaluate pose stability across random seeds. + +### Statistics + +```python +def analyze_reproducibility(poses_by_seed: dict) -> dict: + """ + Analyze docking reproducibility across seeds. + + Returns dict with statistical analysis. + """ + import statistics + + best_affinities = [poses[0]["affinity"] for poses in poses_by_seed.values()] + + analysis = { + "mean_affinity": statistics.mean(best_affinities), + "std_affinity": statistics.stdev(best_affinities), + "min_affinity": min(best_affinities), + "max_affinity": max(best_affinities), + "range": max(best_affinities) - min(best_affinities), + "best_seed": min(poses_by_seed.keys(), + key=lambda s: poses_by_seed[s][0]["affinity"]), + "recommendation": "" + } + + # Generate recommendation + std = analysis["std_affinity"] + if std < 0.5: + analysis["recommendation"] = "Excellent reproducibility. Results highly reliable." + analysis["reliability"] = "high" + elif std < 1.0: + analysis["recommendation"] = "Good reproducibility. Results reliable." + analysis["reliability"] = "good" + elif std < 1.5: + analysis["recommendation"] = "Moderate reproducibility. Consider more sampling." + analysis["reliability"] = "moderate" + else: + analysis["recommendation"] = "Poor reproducibility. Results uncertain. Increase seeds or check system." + analysis["reliability"] = "poor" + + return analysis +``` + +### Interpretation + +| σ (kcal/mol) | Assessment | Action | +|--------------|------------|--------| +| < 0.5 | Excellent | Proceed with confidence | +| 0.5 - 1.0 | Good | Standard reliability | +| 1.0 - 1.5 | Moderate | Consider increasing num_modes | +| > 1.5 | Poor | System may be too flexible; check preparation | + +--- + +## Step 8: Visualization + +### Purpose +Generate PyMOL scripts for structure analysis. + +### PyMOL Script Generation + +```python +def generate_pymol_script( + docked_pdb: str, + receptor_pdb: str, + receptor_atom_spec: str, + output_path: str, + ligand_name: str = "ligand" +): + """Generate PyMOL visualization script.""" + + chain, resnum, atom_name = receptor_atom_spec.split(":") + + script = f''' +# Load structures +load {receptor_pdb}, receptor +load {docked_pdb}, {ligand_name} + +# Display settings +set cartoon_fancy_helices, 1 +set cartoon_fancy_sheets, 1 +set sphere_scale, 0.3 + +# Color receptor +color gray70, receptor + +# Color ligand by element +color cyan, {ligand_name} and elem C +color red, {ligand_name} and elem O +color blue, {ligand_name} and elem N + +# Highlight reactive residue +select reactive_res, receptor and chain {chain} and resi {resnum} +show sticks, reactive_res +color orange, reactive_res + +# Show covalent bond as distance +select ligand_atom, {ligand_name} and name C1 +select receptor_atom, receptor and chain {chain} and resi {resnum} and name {atom_name} +distance covalent_bond, ligand_atom, receptor_atom +set dash_color, yellow, covalent_bond +set dash_width, 3 + +# Show surrounding residues within 5A +select pocket, byres (receptor within 5 of {ligand_name}) +show sticks, pocket +color lime, pocket and elem C + +# Ligand representation +show sticks, {ligand_name} +show spheres, {ligand_name} + +# Labels +label reactive_res and name CA, "%s-%s" % (resn, resi) + +# Center view +zoom {ligand_name} + +# Save session +save session.pse +''' + + with open(output_path, 'w') as f: + f.write(script) + + return output_path +``` + +--- + +## Complete Example + +```python +#!/usr/bin/env python3 +"""Complete 8-step covalent docking workflow.""" + +import subprocess +import json + +def main(): + # Configuration + config = { + "smiles": "CC#CCOc1ccc(C(=O)NCC(N)=O)nn1", + "receptor": "6ELW-Se.pdb", + "receptor_atom": "A:46:SEG", + "attachment_smarts": "[CD1]", + "warhead_type": "alkyne_se", + "seeds": [0, 42, 123], + "num_modes": 5 + } + + print("=" * 60) + print("COVALENT DOCKING 8-STEP WORKFLOW") + print("=" * 60) + + # Step 1: Ligand Preparation + print("\n[1/8] Preparing ligand...") + subprocess.run([ + "python", "scripts/prepare_ligand.py", + "--smiles", config["smiles"], + "--warhead", config["warhead_type"], + "--output", "ligand_prepared.sdf" + ]) + + # Step 2-4: Docking + print("\n[2-4/8] Running covalent docking...") + result = dock_covalent_multi_seed( + ligand_sdf="ligand_prepared.sdf", + receptor_pdb=config["receptor"], + receptor_atom=config["receptor_atom"], + attachment_smarts=config["attachment_smarts"], + seeds=config["seeds"], + num_modes=config["num_modes"] + ) + + # Step 5: Geometry Validation + print("\n[5/8] Validating geometry...") + geom = validate_covalent_geometry( + docked_pdb="docking_results/seed_0.sdf", + receptor_pdb=config["receptor"], + receptor_atom_spec=config["receptor_atom"], + expected_bond_type="C-Se" + ) + print(f" Bond distance: {geom['distance']:.2f} Å ({geom['assessment']})") + + # Step 6: Contact Analysis + print("\n[6/8] Analyzing contacts...") + contacts = analyze_contacts( + docked_pdb="docking_results/seed_0.sdf", + receptor_pdb=config["receptor"] + ) + print(f" Hydrophobic: {len(contacts['hydrophobic'])}") + print(f" H-bonds: {len(contacts['hydrogen_bonds'])}") + + # Step 7: Reproducibility + print("\n[7/8] Assessing reproducibility...") + repro = analyze_reproducibility(result["poses_by_seed"]) + print(f" σ = {repro['std_affinity']:.2f} kcal/mol ({repro['reliability']})") + + # Step 8: Visualization + print("\n[8/8] Generating visualization...") + generate_pymol_script( + docked_pdb="docking_results/seed_0.sdf", + receptor_pdb=config["receptor"], + receptor_atom_spec=config["receptor_atom"], + output_path="visualize.pml" + ) + + print("\n" + "=" * 60) + print("WORKFLOW COMPLETE") + print(f"Best affinity: {repro['min_affinity']:.2f} kcal/mol") + print("=" * 60) + +if __name__ == "__main__": + main() +``` + +--- + +## Decision Tree + +``` +Start + │ + ▼ +Prepare Ligand + │ + ├──► Fails? ──► Check SMILES / Warhead type + │ + ▼ +Dock (Multi-seed) + │ + ├──► No poses? ──► Check receptor atom / SMARTS + │ + ▼ +Validate Geometry + │ + ├──► Poor? ──► Try different attachment SMARTS + │ + ▼ +Analyze Contacts + │ + ▼ +Check Reproducibility + │ + ├──► σ > 1.5? ──► Increase num_modes / Check system + │ + ▼ +Results Reliable? + │ + ├──► No ──► Iterate with analogs + │ + ▼ +Yes → Generate analogs / Proceed +``` diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/analyze_contacts.py b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/analyze_contacts.py new file mode 100644 index 0000000..1be5924 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/analyze_contacts.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +""" +Analyze protein-ligand contacts in docked poses. + +Identifies hydrogen bonds, hydrophobic contacts, aromatic interactions, and ionic contacts. +""" + +import argparse +import json +import numpy as np +from collections import defaultdict + + +def parse_pdb(pdb_file: str, hetatm_only: bool = False) -> list: + """Parse PDB file and return list of atoms.""" + atoms = [] + + with open(pdb_file) as f: + for line in f: + if not line.startswith(("ATOM", "HETATM")): + continue + + if hetatm_only and not line.startswith("HETATM"): + continue + + atom = { + "record": line[0:6].strip(), + "atom_num": int(line[6:11]), + "atom_name": line[12:16].strip(), + "res_name": line[17:20].strip(), + "chain": line[21], + "res_num": int(line[22:26]), + "x": float(line[30:38]), + "y": float(line[38:46]), + "z": float(line[46:54]), + "element": line[76:78].strip() if len(line) > 76 else line[12:16].strip()[0] + } + atoms.append(atom) + + return atoms + + +def calculate_distance(atom1: dict, atom2: dict) -> float: + """Calculate distance between two atoms.""" + coord1 = np.array([atom1["x"], atom1["y"], atom1["z"]]) + coord2 = np.array([atom2["x"], atom2["y"], atom2["z"]]) + return np.linalg.norm(coord1 - coord2) + + +def is_hydrophobic(atom: dict) -> bool: + """Check if atom is hydrophobic.""" + # Carbon atoms (except carbonyl) are hydrophobic + if atom["element"] == "C": + # Check if it's a carbonyl carbon + if atom["atom_name"].startswith("C") and "O" in atom["res_name"]: + return False + return True + return False + + +def is_hbond_donor(atom: dict) -> bool: + """Check if atom is H-bond donor.""" + # N-H or O-H groups + if atom["element"] in ["N", "O"]: + return True + return False + + +def is_hbond_acceptor(atom: dict) -> bool: + """Check if atom is H-bond acceptor.""" + # Lone pair bearing atoms + if atom["element"] in ["N", "O"]: + return True + return False + + +def is_aromatic(atom: dict) -> bool: + """Check if atom is part of aromatic system.""" + aromatic_residues = ["PHE", "TYR", "TRP", "HIS"] + return atom["res_name"] in aromatic_residues and atom["element"] == "C" + + +def is_charged(atom: dict) -> bool: + """Check if atom is charged.""" + # Simplified - would need proper charge assignment + charged_atoms = { + "ASP": ["OD1", "OD2"], + "GLU": ["OE1", "OE2"], + "LYS": ["NZ"], + "ARG": ["NH1", "NH2"], + "HIS": ["ND1", "NE2"] + } + + if atom["res_name"] in charged_atoms: + return atom["atom_name"] in charged_atoms[atom["res_name"]] + return False + + +def classify_contact(ligand_atom: dict, receptor_atom: dict, distance: float) -> str: + """Classify the type of contact.""" + + # Repulsive/Clash + if distance < 2.5: + return "repulsive" + + # Hydrophobic + if is_hydrophobic(ligand_atom) and is_hydrophobic(receptor_atom): + if distance < 4.5: + return "hydrophobic" + + # Hydrogen bond + if distance < 3.5: + if is_hbond_donor(ligand_atom) and is_hbond_acceptor(receptor_atom): + return "hydrogen_bond" + if is_hbond_acceptor(ligand_atom) and is_hbond_donor(receptor_atom): + return "hydrogen_bond" + + # Aromatic + if is_aromatic(ligand_atom) and is_aromatic(receptor_atom): + if distance < 5.0: + return "aromatic" + + # Ionic + if is_charged(ligand_atom) and is_charged(receptor_atom): + if distance < 4.0: + return "ionic" + + return None + + +def analyze_contacts(docked_pdb: str, receptor_pdb: str, cutoff: float = 5.0) -> dict: + """Analyze all protein-ligand contacts.""" + + # Load structures + receptor_atoms = parse_pdb(receptor_pdb) + ligand_atoms = parse_pdb(docked_pdb, hetatm_only=True) + + if not ligand_atoms: + print("Warning: No HETATM records found in docked file") + ligand_atoms = parse_pdb(docked_pdb) + + # Initialize contact storage + contacts = { + "hydrophobic": [], + "hydrogen_bond": [], + "aromatic": [], + "ionic": [], + "repulsive": [], + "all": [] + } + + # Analyze contacts + for latom in ligand_atoms: + for ratom in receptor_atoms: + distance = calculate_distance(latom, ratom) + + if distance > cutoff: + continue + + contact_type = classify_contact(latom, ratom, distance) + + contact_info = { + "ligand_atom": latom["atom_name"], + "receptor_residue": f"{ratom['res_name']} {ratom['res_num']}", + "receptor_atom": ratom["atom_name"], + "distance": round(distance, 2), + "type": contact_type + } + + contacts["all"].append(contact_info) + + if contact_type: + contacts[contact_type].append(contact_info) + + return contacts + + +def summarize_by_residue(contacts: dict) -> dict: + """Summarize contacts by receptor residue.""" + residue_contacts = defaultdict(lambda: defaultdict(int)) + + for contact in contacts["all"]: + residue = contact["receptor_residue"] + ctype = contact["type"] or "other" + residue_contacts[residue][ctype] += 1 + + return dict(residue_contacts) + + +def main(): + parser = argparse.ArgumentParser( + description="Analyze protein-ligand contacts" + ) + parser.add_argument("--docked", required=True, + help="Docked pose PDB file") + parser.add_argument("--receptor", required=True, + help="Receptor PDB file") + parser.add_argument("--cutoff", type=float, default=5.0, + help="Distance cutoff in Angstroms") + parser.add_argument("--output", help="Output JSON file") + parser.add_argument("--summary", action="store_true", + help="Print summary only") + + args = parser.parse_args() + + print("="*60) + print("CONTACT ANALYSIS") + print("="*60) + print(f"\nDocked file: {args.docked}") + print(f"Receptor file: {args.receptor}") + print(f"Distance cutoff: {args.cutoff} Å") + + # Analyze contacts + contacts = analyze_contacts(args.docked, args.receptor, args.cutoff) + + # Print summary + print("\n" + "-"*60) + print("CONTACT SUMMARY") + print("-"*60) + print(f"\nTotal contacts within {args.cutoff} Å: {len(contacts['all'])}") + print(f"\nBy type:") + print(f" Hydrophobic: {len(contacts['hydrophobic']):3d}") + print(f" H-bonds: {len(contacts['hydrogen_bond']):3d}") + print(f" Aromatic: {len(contacts['aromatic']):3d}") + print(f" Ionic: {len(contacts['ionic']):3d}") + print(f" Repulsive: {len(contacts['repulsive']):3d}") + + # Summarize by residue + by_residue = summarize_by_residue(contacts) + + print("\n" + "-"*60) + print("TOP RESIDUES BY CONTACT COUNT") + print("-"*60) + + sorted_residues = sorted( + by_residue.items(), + key=lambda x: sum(x[1].values()), + reverse=True + )[:10] + + for residue, types in sorted_residues: + total = sum(types.values()) + type_str = ", ".join([f"{k}:{v}" for k, v in types.items()]) + print(f" {residue:15s} {total:3d} contacts ({type_str})") + + # Detailed list if not summary mode + if not args.summary and contacts['all']: + print("\n" + "-"*60) + print("DETAILED CONTACT LIST (first 20)") + print("-"*60) + print(f"{'Ligand':<10} {'Residue':<15} {'Atom':<6} {'Dist':<6} {'Type'}") + print("-"*60) + + for c in contacts['all'][:20]: + print(f"{c['ligand_atom']:<10} {c['receptor_residue']:<15} " + f"{c['receptor_atom']:<6} {c['distance']:<6.2f} {c['type'] or 'other'}") + + # Prepare output + output = { + "cutoff": args.cutoff, + "summary": { + "total_contacts": len(contacts['all']), + "hydrophobic": len(contacts['hydrophobic']), + "hydrogen_bonds": len(contacts['hydrogen_bond']), + "aromatic": len(contacts['aromatic']), + "ionic": len(contacts['ionic']), + "repulsive": len(contacts['repulsive']) + }, + "by_residue": by_residue, + "contacts": contacts['all'] if not args.summary else None + } + + # Save to file if requested + if args.output: + with open(args.output, 'w') as f: + json.dump(output, f, indent=2) + print(f"\nResults saved to: {args.output}") + + print("\n" + "="*60) + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/batch_dock.py b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/batch_dock.py new file mode 100644 index 0000000..0aa6222 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/batch_dock.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 +""" +Batch docking script for covalent inhibitors. + +Docks multiple ligands in parallel and generates a summary report. +""" + +import argparse +import json +import subprocess +import concurrent.futures +from pathlib import Path +from glob import glob + + +def dock_single_ligand( + ligand_sdf: str, + receptor_pdb: str, + receptor_atom: str, + attachment_smarts: str, + output_dir: str, + seeds: list = None +) -> dict: + """Dock a single ligand.""" + + seeds = seeds or [0, 42, 123] + ligand_name = Path(ligand_sdf).stem + output_prefix = f"{output_dir}/{ligand_name}" + + result = { + "ligand": ligand_name, + "ligand_file": ligand_sdf, + "success": False, + "affinity": None, + "error": None + } + + try: + # Run GNINA with multi-seed + cmd = [ + "gnina", + "--receptor", receptor_pdb, + "--ligand", ligand_sdf, + "--covalent", receptor_atom, + "--cov_atom", attachment_smarts, + "--covalent_optimize", + "--num_modes", "5" + ] + + # Add seeds + for seed in seeds: + cmd.extend(["--seed", str(seed)]) + + cmd.extend(["--out", f"{output_prefix}_docked.sdf"]) + + # Run docking + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=300 + ) + + if proc.returncode != 0: + result["error"] = proc.stderr + return result + + # Parse output to get affinity + # This is simplified - real implementation would parse SDF output + affinity = parse_affinity_from_output(proc.stdout) + + result["success"] = True + result["affinity"] = affinity + result["output_file"] = f"{output_prefix}_docked.sdf" + + except subprocess.TimeoutExpired: + result["error"] = "Docking timeout" + except FileNotFoundError: + result["error"] = "GNINA not found" + except Exception as e: + result["error"] = str(e) + + return result + + +def parse_affinity_from_output(stdout: str) -> float: + """Parse affinity from GNINA output.""" + # Simplified - look for affinity in output + # Real implementation would parse SDF file + for line in stdout.split('\n'): + if 'Affinity' in line or 'CNNaffinity' in line: + try: + # Extract number from line + parts = line.split() + for part in parts: + try: + return float(part) + except ValueError: + continue + except: + pass + return -1.0 # Unknown + + +def main(): + parser = argparse.ArgumentParser( + description="Batch dock multiple covalent ligands" + ) + parser.add_argument("--ligands", required=True, + help="Ligand files (glob pattern or directory)") + parser.add_argument("--receptor", required=True, + help="Receptor PDB file") + parser.add_argument("--receptor_atom", required=True, + help="Receptor atom (format: chain:resnum:atom_name)") + parser.add_argument("--attachment_smarts", default="[CD1]", + help="Attachment atom SMARTS pattern") + parser.add_argument("--output_dir", default="batch_results", + help="Output directory") + parser.add_argument("--seeds", default="0,42,123", + help="Comma-separated random seeds") + parser.add_argument("--parallel", type=int, default=1, + help="Number of parallel processes") + parser.add_argument("--output_json", default="batch_results.json", + help="Output JSON summary") + + args = parser.parse_args() + + # Parse seeds + seeds = [int(s) for s in args.seeds.split(",")] + + # Find ligand files + if Path(args.ligands).is_dir(): + ligand_files = list(Path(args.ligands).glob("*.sdf")) + else: + ligand_files = [Path(f) for f in glob(args.ligands)] + + if not ligand_files: + print(f"Error: No ligand files found matching '{args.ligands}'") + return 1 + + print("="*60) + print("BATCH COVALENT DOCKING") + print("="*60) + print(f"\nReceptor: {args.receptor}") + print(f"Receptor atom: {args.receptor_atom}") + print(f"Attachment SMARTS: {args.attachment_smarts}") + print(f"Seeds: {seeds}") + print(f"Ligands: {len(ligand_files)}") + print(f"Parallel: {args.parallel}") + print(f"Output directory: {args.output_dir}") + + # Create output directory + Path(args.output_dir).mkdir(parents=True, exist_ok=True) + + # Dock ligands + print("\n" + "-"*60) + print("DOCKING PROGRESS") + print("-"*60) + + results = [] + + if args.parallel > 1: + with concurrent.futures.ProcessPoolExecutor(max_workers=args.parallel) as executor: + futures = { + executor.submit( + dock_single_ligand, + str(ligand), + args.receptor, + args.receptor_atom, + args.attachment_smarts, + args.output_dir, + seeds + ): ligand for ligand in ligand_files + } + + for future in concurrent.futures.as_completed(futures): + ligand = futures[future] + try: + result = future.result() + results.append(result) + status = "✓" if result["success"] else "✗" + aff_str = f"{result['affinity']:.2f}" if result["affinity"] else "N/A" + print(f" {status} {result['ligand']}: {aff_str} kcal/mol") + except Exception as e: + print(f" ✗ {ligand.name}: Error - {e}") + results.append({ + "ligand": ligand.stem, + "success": False, + "error": str(e) + }) + else: + for ligand in ligand_files: + result = dock_single_ligand( + str(ligand), + args.receptor, + args.receptor_atom, + args.attachment_smarts, + args.output_dir, + seeds + ) + results.append(result) + + status = "✓" if result["success"] else "✗" + aff_str = f"{result['affinity']:.2f}" if result["affinity"] else "N/A" + print(f" {status} {result['ligand']}: {aff_str} kcal/mol") + + # Summary statistics + successful = [r for r in results if r["success"]] + failed = [r for r in results if not r["success"]] + + print("\n" + "-"*60) + print("SUMMARY") + print("-"*60) + print(f"\nTotal: {len(results)}") + print(f" Successful: {len(successful)}") + print(f" Failed: {len(failed)}") + + if successful: + affinities = [r["affinity"] for r in successful if r["affinity"]] + if affinities: + print(f"\nAffinity Statistics:") + print(f" Best: {min(affinities):.2f} kcal/mol") + print(f" Worst: {max(affinities):.2f} kcal/mol") + print(f" Mean: {sum(affinities)/len(affinities):.2f} kcal/mol") + + # Top 5 + sorted_results = sorted(successful, key=lambda x: x["affinity"] or 0) + print(f"\nTop 5:") + for i, r in enumerate(sorted_results[:5], 1): + print(f" {i}. {r['ligand']}: {r['affinity']:.2f} kcal/mol") + + if failed: + print(f"\nFailed ligands:") + for r in failed[:5]: + print(f" - {r['ligand']}: {r.get('error', 'Unknown error')}") + + # Save results + output_data = { + "receptor": args.receptor, + "receptor_atom": args.receptor_atom, + "attachment_smarts": args.attachment_smarts, + "seeds": seeds, + "summary": { + "total": len(results), + "successful": len(successful), + "failed": len(failed), + "best_affinity": min([r["affinity"] for r in successful if r["affinity"]], default=None) + }, + "results": results + } + + with open(args.output_json, 'w') as f: + json.dump(output_data, f, indent=2) + + print(f"\nResults saved to: {args.output_json}") + print("\n" + "="*60) + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/prepare_ligand.py b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/prepare_ligand.py new file mode 100644 index 0000000..d450e28 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/prepare_ligand.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +""" +Prepare ligand for covalent docking. + +Workflow: SMILES → 3D conformer → XTB optimization → tail removal → validation +""" + +import argparse +import subprocess +import tempfile +import os +from pathlib import Path + +try: + from rdkit import Chem + from rdkit.Chem import AllChem, Descriptors + RDKIT_AVAILABLE = True +except ImportError: + RDKIT_AVAILABLE = False + print("Warning: RDKit not available. Some features limited.") + + +WARHEAD_CONFIG = { + "alkyne_se": { + "name": "Terminal alkyne (SeCys target)", + "leaving_group_smarts": "[N;$(NC=O)]", + "attachment_smarts": "[CD1]", + "expected_bond": "C-Se" + }, + "alkyne_cys": { + "name": "Terminal alkyne (Cys target)", + "leaving_group_smarts": "[N;$(NC=O)]", + "attachment_smarts": "[CD1]", + "expected_bond": "C-S" + }, + "acrylamide_cys": { + "name": "Acrylamide (Cys target)", + "leaving_group_smarts": "[N;$(NC(=O)C=C)]", + "attachment_smarts": "[C;$(C=C-C(=O)N)]", + "expected_bond": "C-S" + }, + "epoxide_cys": { + "name": "Epoxide (Cys target)", + "leaving_group_smarts": "[O;R1]", + "attachment_smarts": "[C;R1]", + "expected_bond": "C-S" + }, + "haloacetamide_cys": { + "name": "Haloacetamide (Cys target)", + "leaving_group_smarts": "[Cl,Br,I]", + "attachment_smarts": "[C;$(C-[Cl,Br,I])]", + "expected_bond": "C-S" + } +} + + +def generate_3d_conformer(smiles: str, output_mol: str) -> bool: + """Generate 3D conformer from SMILES.""" + if not RDKIT_AVAILABLE: + print("Error: RDKit required for 3D generation") + return False + + mol = Chem.MolFromSmiles(smiles) + if mol is None: + print(f"Error: Invalid SMILES: {smiles}") + return False + + mol = Chem.AddHs(mol) + + # Generate 3D coordinates + AllChem.EmbedMolecule(mol, AllChem.ETKDGv3()) + AllChem.MMFFOptimizeMolecule(mol, mmffVariant='MMFF94') + + # Save + Chem.MolToMolFile(mol, output_mol) + return True + + +def run_xtb_optimization(input_mol: str, output_xyz: str) -> bool: + """Run XTB geometry optimization.""" + try: + result = subprocess.run( + ["xtb", input_mol, "--opt", "--o", output_xyz], + capture_output=True, + text=True, + timeout=300 + ) + return result.returncode == 0 + except FileNotFoundError: + print("Warning: XTB not found. Using unoptimized conformer.") + return False + except subprocess.TimeoutExpired: + print("Warning: XTB optimization timed out.") + return False + + +def remove_leaving_group(mol, warhead_type: str): + """Remove leaving group based on warhead type.""" + if warhead_type not in WARHEAD_CONFIG: + print(f"Warning: Unknown warhead type {warhead_type}, no removal performed") + return mol + + config = WARHEAD_CONFIG[warhead_type] + + # This is a simplified version - full implementation would use RDKit edits + # to actually remove the leaving group atoms + + return mol + + +def validate_attachment_atom(mol, warhead_type: str) -> dict: + """Validate that attachment atom exists and is accessible.""" + if warhead_type not in WARHEAD_CONFIG: + return {"valid": False, "error": f"Unknown warhead type: {warhead_type}"} + + config = WARHEAD_CONFIG[warhead_type] + + if not RDKIT_AVAILABLE: + return {"valid": True, "warning": "RDKit not available, skipping validation"} + + pattern = Chem.MolFromSmarts(config["attachment_smarts"]) + matches = mol.GetSubstructMatches(pattern) + + if len(matches) == 0: + return { + "valid": False, + "error": f"Attachment SMARTS '{config['attachment_smarts']}' not found" + } + elif len(matches) > 1: + return { + "valid": True, + "warning": f"Multiple attachment points found ({len(matches)}), using first" + } + + return { + "valid": True, + "attachment_idx": matches[0][0], + "expected_bond": config["expected_bond"] + } + + +def calculate_properties(smiles: str) -> dict: + """Calculate molecular properties.""" + if not RDKIT_AVAILABLE: + return {} + + mol = Chem.MolFromSmiles(smiles) + if mol is None: + return {} + + return { + "molecular_weight": Descriptors.MolWt(mol), + "logp": Descriptors.MolLogP(mol), + "hbd": Descriptors.NumHDonors(mol), + "hba": Descriptors.NumHAcceptors(mol), + "rotatable_bonds": Descriptors.NumRotatableBonds(mol), + "tpsa": Descriptors.TPSA(mol) + } + + +def main(): + parser = argparse.ArgumentParser( + description="Prepare ligand for covalent docking" + ) + parser.add_argument("--smiles", required=True, help="SMILES string of full molecule") + parser.add_argument("--warhead", default="alkyne_se", + choices=list(WARHEAD_CONFIG.keys()), + help="Warhead type") + parser.add_argument("--output", required=True, help="Output SDF file") + parser.add_argument("--ligand_id", help="Optional ligand identifier") + parser.add_argument("--optimize", action="store_true", default=True, + help="Run XTB optimization") + parser.add_argument("--no_optimize", action="store_true", + help="Skip XTB optimization") + + args = parser.parse_args() + + print(f"Preparing ligand: {args.ligand_id or 'unnamed'}") + print(f"SMILES: {args.smiles}") + print(f"Warhead: {WARHEAD_CONFIG[args.warhead]['name']}") + + # Calculate properties + props = calculate_properties(args.smiles) + if props: + print(f"\nProperties:") + print(f" MW: {props['molecular_weight']:.1f}") + print(f" LogP: {props['logp']:.2f}") + print(f" HBD/HBA: {props['hbd']}/{props['hba']}") + + with tempfile.TemporaryDirectory() as tmpdir: + # Step 1: Generate 3D conformer + print("\n[1/3] Generating 3D conformer...") + mol_file = os.path.join(tmpdir, "initial_3d.mol") + if not generate_3d_conformer(args.smiles, mol_file): + print("Error: Failed to generate 3D conformer") + return 1 + print(" ✓ 3D conformer generated") + + # Step 2: XTB optimization (optional) + if args.optimize and not args.no_optimize: + print("\n[2/3] Running XTB optimization...") + xtb_output = os.path.join(tmpdir, "xtb_opt.xyz") + if run_xtb_optimization(mol_file, xtb_output): + print(" ✓ XTB optimization complete") + mol_file = xtb_output + else: + print(" ⚠ XTB optimization failed or skipped") + else: + print("\n[2/3] Skipping XTB optimization") + + # Step 3: Remove leaving group and validate + print("\n[3/3] Processing for covalent docking...") + + if RDKIT_AVAILABLE: + mol = Chem.MolFromMolFile(mol_file) + if mol is None: + # Try XYZ format + mol = Chem.MolFromSmiles(args.smiles) + mol = Chem.AddHs(mol) + + # Remove leaving group + mol = remove_leaving_group(mol, args.warhead) + + # Validate attachment + validation = validate_attachment_atom(mol, args.warhead) + + if not validation["valid"]: + print(f" ✗ Validation failed: {validation['error']}") + return 1 + + if "warning" in validation: + print(f" ⚠ {validation['warning']}") + + # Save final + Chem.MolToMolFile(mol, args.output) + else: + # Without RDKit, just copy the file + import shutil + shutil.copy(mol_file, args.output) + + print(f" ✓ Final ligand saved: {args.output}") + + print("\n" + "="*60) + print("Ligand preparation complete") + print("="*60) + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/validate_geometry.py b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/validate_geometry.py new file mode 100644 index 0000000..b49e971 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/covalent-docking/scripts/validate_geometry.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +""" +Validate covalent bond geometry in docked poses. + +Measures receptor-ligand distance and compares to expected bond lengths. +""" + +import argparse +import json +import numpy as np +from pathlib import Path + + +def parse_atom_line(line: str) -> dict: + """Parse PDB ATOM/HETATM line.""" + return { + "record": line[0:6].strip(), + "atom_num": int(line[6:11]), + "atom_name": line[12:16].strip(), + "res_name": line[17:20].strip(), + "chain": line[21], + "res_num": int(line[22:26]), + "x": float(line[30:38]), + "y": float(line[38:46]), + "z": float(line[46:54]) + } + + +def find_receptor_atom(pdb_file: str, chain: str, res_num: int, atom_name: str) -> dict: + """Find specific atom in receptor PDB.""" + with open(pdb_file) as f: + for line in f: + if not line.startswith(("ATOM", "HETATM")): + continue + + atom = parse_atom_line(line) + if (atom["chain"] == chain and + atom["res_num"] == res_num and + atom["atom_name"] == atom_name): + return atom + + return None + + +def find_ligand_attachment_atom(pdb_file: str) -> dict: + """Find the ligand atom closest to where covalent bond should form.""" + # In covalent docking, the ligand should be positioned such that + # the warhead attachment atom is near the receptor reactive atom + + # For now, find the first HETATM that's likely the attachment point + # (closest to the receptor atom would require knowing both) + + hetatms = [] + with open(pdb_file) as f: + for line in f: + if line.startswith("HETATM"): + atom = parse_atom_line(line) + hetatms.append(atom) + + if not hetatms: + return None + + # Return first carbon atom (likely attachment point) + for atom in hetatms: + if atom["atom_name"].startswith("C"): + return atom + + return hetatms[0] + + +def calculate_distance(atom1: dict, atom2: dict) -> float: + """Calculate distance between two atoms.""" + coord1 = np.array([atom1["x"], atom1["y"], atom1["z"]]) + coord2 = np.array([atom2["x"], atom2["y"], atom2["z"]]) + return np.linalg.norm(coord1 - coord2) + + +def validate_bond_length(bond_type: str, distance: float, tolerance: float) -> dict: + """Validate bond length against expected range.""" + BOND_LENGTHS = { + "C-Se": (1.85, 2.05), + "C-S": (1.75, 1.95), + "C-N": (1.40, 1.55), + "C-O": (1.40, 1.50) + } + + if bond_type not in BOND_LENGTHS: + return { + "valid": False, + "error": f"Unknown bond type: {bond_type}" + } + + expected_min, expected_max = BOND_LENGTHS[bond_type] + + if expected_min - tolerance <= distance <= expected_max + tolerance: + if expected_min <= distance <= expected_max: + assessment = "optimal" + else: + assessment = "acceptable" + is_valid = True + else: + assessment = "poor" + is_valid = False + + return { + "valid": is_valid, + "assessment": assessment, + "distance": round(distance, 2), + "expected_min": expected_min, + "expected_max": expected_max, + "tolerance": tolerance + } + + +def main(): + parser = argparse.ArgumentParser( + description="Validate covalent bond geometry" + ) + parser.add_argument("--docked", required=True, + help="Docked pose PDB/SDF file") + parser.add_argument("--receptor", required=True, + help="Receptor PDB file") + parser.add_argument("--receptor_atom", required=True, + help="Receptor atom (format: chain:resnum:atom_name)") + parser.add_argument("--bond_type", default="C-Se", + choices=["C-Se", "C-S", "C-N", "C-O"], + help="Expected covalent bond type") + parser.add_argument("--tolerance", type=float, default=0.1, + help="Tolerance beyond expected range (Angstroms)") + parser.add_argument("--output", help="Output JSON file (optional)") + + args = parser.parse_args() + + print("="*60) + print("COVALENT BOND GEOMETRY VALIDATION") + print("="*60) + + # Parse receptor atom specification + try: + chain, resnum, atom_name = args.receptor_atom.split(":") + resnum = int(resnum) + except ValueError: + print(f"Error: Invalid receptor atom format. Use 'chain:resnum:atom_name'") + return 1 + + print(f"\nReceptor atom: {chain}:{resnum}:{atom_name}") + print(f"Expected bond type: {args.bond_type}") + + # Find receptor atom + receptor_atom = find_receptor_atom(args.receptor, chain, resnum, atom_name) + if receptor_atom is None: + print(f"Error: Could not find receptor atom {args.receptor_atom}") + return 1 + + print(f" Found at: ({receptor_atom['x']:.3f}, {receptor_atom['y']:.3f}, {receptor_atom['z']:.3f})") + + # Find ligand attachment atom + ligand_atom = find_ligand_attachment_atom(args.docked) + if ligand_atom is None: + print(f"Error: Could not find ligand attachment atom") + return 1 + + print(f"\nLigand atom: {ligand_atom['atom_name']}") + print(f" Found at: ({ligand_atom['x']:.3f}, {ligand_atom['y']:.3f}, {ligand_atom['z']:.3f})") + + # Calculate distance + distance = calculate_distance(receptor_atom, ligand_atom) + print(f"\nDistance: {distance:.2f} Å") + + # Validate + result = validate_bond_length(args.bond_type, distance, args.tolerance) + + print(f"\nExpected range: {result['expected_min']:.2f} - {result['expected_max']:.2f} Å") + print(f"Assessment: {result['assessment'].upper()}") + + if result['valid']: + print("\n✓ Geometry VALID") + else: + print("\n✗ Geometry INVALID") + print(f" Distance {distance:.2f} Å outside acceptable range") + + # Prepare output + output = { + "success": result['valid'], + "distance": result['distance'], + "expected_range": [result['expected_min'], result['expected_max']], + "assessment": result['assessment'], + "receptor_atom": { + "name": receptor_atom['atom_name'], + "residue": f"{receptor_atom['res_name']} {receptor_atom['res_num']}", + "chain": receptor_atom['chain'], + "coordinates": [receptor_atom['x'], receptor_atom['y'], receptor_atom['z']] + }, + "ligand_atom": { + "name": ligand_atom['atom_name'], + "coordinates": [ligand_atom['x'], ligand_atom['y'], ligand_atom['z']] + } + } + + # Save to file if requested + if args.output: + with open(args.output, 'w') as f: + json.dump(output, f, indent=2) + print(f"\nResults saved to: {args.output}") + + print("\n" + "="*60) + + return 0 if result['valid'] else 1 + + +if __name__ == "__main__": + exit(main()) diff --git a/mcp_host/skills/scientific-skills/scientific-skills/feature-based-molecular-networking/SKILL.md b/mcp_host/skills/scientific-skills/scientific-skills/feature-based-molecular-networking/SKILL.md new file mode 100644 index 0000000..0adc875 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/feature-based-molecular-networking/SKILL.md @@ -0,0 +1,455 @@ +# Feature-Based Molecular Networking (FBMN) - Agent Skills + +## Domain Overview + +FBMN connects LC-MS/MS metabolomics data with bioactivity assays to identify bioactive compounds in complex mixtures (e.g., natural products, botanical extracts). + +**Core Principle**: Compounds with similar MS/MS fragmentation patterns are structurally related. If one is bioactive, its neighbors in the network may also be bioactive. + +## Standard Workflow + +### Stage 1: Data Preprocessing +1. **Input Requirements**: + - Raw MS data (.mzXML/.mzML) - centroided, centroid mode + - Feature quantification table (from MZmine/Optimus) + - Bioactivity data (EC50, CC50, % inhibition, etc.) + +2. **Feature Table Format**: + - Rows: Features (compounds) + - Columns: Sample intensities + - Metadata: m/z, RT, optional compound IDs + - Bioactivity row: Often embedded as first/last row + +3. **Concrete Data Format Examples**: + + **MZmine Feature Quantification Table** (CSV): + ``` + row ID,row m/z,row retention time,Extract.mzXML Peak area,F_5.mzXML Peak area,F_6.mzXML Peak area,... + BioactivityCHIKV,,,68,1,4,... + 1,270.279,1698,5690330.648,246596025.8,186949611.9,... + 2,271.283,1699,569378.051,40396489.5,30710029.19,... + ``` + - First three columns are always: `row ID`, `row m/z`, `row retention time` + - Sample columns follow the pattern: `.mzXML Peak area` + - The bioactivity row has an identifier (e.g., `BioactivityCHIKV`) in `row ID`, empty m/z and RT, and activity values in sample columns + - Intensity values are peak areas (continuous, non-negative, can be zero) + - Retention time is in seconds + + **MGF Spectral File** (MS/MS spectra): + ``` + BEGIN IONS + FEATURE_ID=9 + PEPMASS=270.279162444 + RTINSECONDS=1697.6755 + SCANS=1 + MSLEVEL=2 + CHARGE=1+ + 88.075294 60009.355469 + 102.091164 493287.875000 + 116.106377 447821.937500 + 130.122665 232475.390625 + 144.137909 127955.078125 + END IONS + ``` + - Each spectrum is delimited by `BEGIN IONS` / `END IONS` + - Header fields: `FEATURE_ID` (links to feature table `row ID`), `PEPMASS` (precursor m/z), `RTINSECONDS` (retention time) + - Body: pairs of `m/z intensity` values (fragment peaks), one per line + - `PEPMASS` is the parent ion mass — this is the intact molecule's m/z + - Fragment peaks are the pieces produced when the parent ion breaks apart + + **Metadata File** (CSV, maps samples to bioactivity): + ``` + sample_id,filename,activity + Extract,Extract.mzXML Peak area,68.0 + F_5,F_5.mzXML Peak area,1.0 + F_6,F_6.mzXML Peak area,4.0 + ``` + - `filename` must exactly match column headers in the feature table + +### Stage 2: Molecular Networking +**Goal**: Connect structurally similar compounds via MS/MS spectral similarity. + +**Key Parameters**: +| Parameter | Typical Range | Guidance | +|-----------|---------------|----------| +| Cosine tolerance | 0.02-0.1 Da | Lower = stricter matching | +| Min matched peaks | 4-18 | More peaks = higher confidence | +| Min cosine score | 0.3-0.7 | Higher = fewer but better edges | +| TopK | 10-50 | Keep only best N edges per node | +| Parent mass tolerance | 0.02-0.1 Da or None | Use to limit comparisons | + +**GNPS Defaults** (safe starting point): +- Cosine tolerance: 0.05 Da +- Min matched peaks: 6 +- Min cosine score: 0.3 +- TopK: 10 + +**Strict Settings** (publication quality): +- Cosine tolerance: 0.02 Da +- Min matched peaks: 18 +- Min cosine score: 0.7 +- TopK: 10 + +#### How Cosine Similarity Works (Algorithm) + +Two MS/MS spectra are compared by treating their fragment peaks as vectors and computing cosine similarity on the matched peaks: + +1. **Peak Matching**: For each peak in spectrum A, find the closest peak in spectrum B within `cosine_tolerance` (in Da). A peak can only be matched once (greedy assignment, best-first). + +2. **Score Calculation**: Given N matched peak pairs with intensities (a₁,b₁), (a₂,b₂), ..., (aₙ,bₙ): + ``` + cosine = Σ(aᵢ × bᵢ) / (√Σ(aᵢ²) × √Σ(bᵢ²)) + ``` + The result ranges from 0 (no similarity) to 1 (identical fragmentation). + +3. **Matched Peaks Count**: Reported alongside the score. Both the score AND the count must exceed thresholds (`min_cosine_score`, `min_matched_peaks`) for an edge to be created. + +4. **Intensity Weighting**: Peaks can be weighted by m/z^a × intensity^b. Common settings: `mz_power=0, intensity_power=1` (weight by intensity only, ignore m/z magnitude). + +**Modified Cosine** (for analog search): Before matching, shift all fragment peaks of one spectrum by the precursor mass difference (Δm/z = precursor_A − precursor_B). This detects structurally related compounds that differ by a modification (e.g., a hydroxylation adds 16 Da to the precursor, shifting all fragments that retain that moiety). Standard cosine is sufficient when comparing compounds with near-identical precursor masses. + +**Spectrum Preprocessing** (apply before comparison): +- Normalize intensities to 0–1 range (divide by max intensity) +- Remove peaks below a relative intensity threshold (e.g., 1%) +- Keep only peaks within a useful m/z range (e.g., 50–2000 Da) +- Require a minimum number of peaks per spectrum (e.g., 5) + +#### How to Build the Network from Similarities + +1. **Compute all-vs-all pairwise similarities**: For N spectra, compute the N×N similarity matrix. Each cell (i,j) contains the cosine score between spectrum i and j. This is symmetric and has 1.0 on the diagonal. + +2. **Optional: Parent mass filtering**: If `parent_mass_tolerance` is set, skip comparison for pairs where |precursor_mz_i − precursor_mz_j| > tolerance. This reduces computation and removes spurious matches between unrelated mass ranges. + +3. **TopK filtering**: For each node, sort its neighbors by similarity score (descending). Keep only the top K edges. This prevents hub nodes (common scaffolds) from dominating the network. + +4. **Score threshold**: Among the TopK edges, discard any with score < `min_cosine_score`. + +5. **Matched peaks threshold**: Also discard edges where the number of matched peaks < `min_matched_peaks`. + +6. **Build undirected graph**: An edge between nodes A and B exists if A has B in its filtered TopK OR B has A in its filtered TopK (symmetric union). Edge weight = cosine score. + +7. **Node attributes**: Each node stores `precursor_mz` and `retention_time` from the spectrum metadata. These are used later for bioactivity mapping and visualization. + +The resulting graph is typically stored as GraphML (Cytoscape-compatible) or as a NetworkX object for further analysis. + +### Stage 3: Bioactivity Correlation +**Goal**: Find features whose intensity correlates with bioactivity. + +**Statistical Approach** (from Nothias et al. 2018): + +The goal is to test, for each of the M features, whether its intensity across samples correlates with bioactivity. The procedure, step by step: + +1. **Assemble vectors**: For each feature f, extract its intensity vector **x_f** = [intensity in sample 1, intensity in sample 2, ..., intensity in sample S] across all S samples. Separately, assemble the bioactivity vector **y** = [activity of sample 1, activity of sample 2, ..., activity of sample S]. Both vectors must be in the same sample order. + +2. **Z-score normalization** (scaling): For each feature vector **x_f**, compute z-scores independently: + ``` + x_scaled = (x - mean(x)) / std(x) + ``` + Do the same for the bioactivity vector **y**. This is equivalent to R's `scale()` function. Each vector is normalized independently (use its own mean and std). Scaling ensures that correlation reflects pattern similarity, not magnitude. + +3. **Pearson correlation**: For each feature, compute Pearson r between the scaled intensity vector and the scaled bioactivity vector: + ``` + r = Σ(x_scaled_i × y_scaled_i) / (n - 1) + ``` + (After z-score normalization, Pearson r simplifies to this.) The p-value tests H₀: r = 0. Since both vectors are already z-scored, this is equivalent to computing Pearson on the original data — scaling does not change the correlation coefficient, but it is conventional in this workflow and matters if combining with other methods. + +4. **Multiple testing correction**: You have M p-values (one per feature). Apply correction across all M tests: + - **Bonferroni**: p_corrected = p_raw × M. Very conservative — controls family-wise error rate. Use when you need high confidence (few false positives). + - **FDR (Benjamini-Hochberg)**: Controls false discovery rate. Less conservative — allows more discoveries at the cost of some false positives. Use for exploratory analysis. + +5. **Significance threshold**: Mark features with corrected p < 0.05 as significant. These are the candidate bioactive compounds. + +**Order of operations matters**: Scale first, then correlate, then correct. Do NOT correct before correlating, and do NOT scale the bioactivity vector using the intensity statistics (each vector uses its own mean/std). + +**Alternative Methods**: +- Spearman (rank-based, robust to outliers — use when bioactivity has extreme values or non-linear relationships) +- Random Forest feature importance (captures non-linear relationships, but requires more samples) +- Kendall tau (another rank-based method, more robust with small sample sizes) + +**Critical Step**: Match features to network nodes correctly! +- Don't match by ID (feature_120 ≠ spec_119) +- Match by m/z AND retention time +- Use nearest neighbor with tolerance (m/z within 0.01 Da) + +### Stage 4: Visualization +**Goal**: Show network with bioactivity overlay. + +**Essential Elements**: +1. **Node Size**: Bioactivity correlation strength (|r|) +2. **Node Color**: Retention time or Selectivity Index (SI) +3. **Labels**: m/z values for identification +4. **Interactivity**: Click to highlight connected nodes +5. **Edge Transparency**: Show similarity scores + +**Common Visualization Issues**: + +#### Issue: Auto-coloring overrides explicit colors +**Symptom**: Nodes appear yellow or wrong colors +**Root Cause**: vis.js uses `group` property to determine color +**Fix**: Remove `group` from node data, use only for color lookup: +```javascript +// WRONG - group property overrides color +{ id: 1, group: 2, color: {background: 'blue'} } + +// CORRECT - no group property +{ id: 1, color: {background: 'blue', border: 'darkblue', + highlight: {...}, hover: {...}} } +``` + +#### Issue: Bioactivity not showing +**Symptom**: All nodes have r = 0 or wrong values +**Root Cause**: Feature-to-node mapping mismatch +**Fix**: Match by nearest m/z + RT, not by ID + +## Validation Checklist + +### Data Integrity +- [ ] Feature table contains bioactivity row +- [ ] Samples in feature table match MS filenames +- [ ] m/z and RT values present for all features + +### Network Quality +- [ ] Expected number of nodes (should match features) +- [ ] Edge count reasonable (10-100 edges/node typical) +- [ ] Similarity scores distributed (not all 0 or all 1) + +### Bioactivity Mapping +- [ ] High r values (>0.8) for known bioactive compounds +- [ ] Check specific paper compounds if known +- [ ] Significant p-values after correction + +### Visualization +- [ ] No default colors (yellow/orange) appearing +- [ ] Node sizes proportional to |r| +- [ ] Click-to-highlight works +- [ ] Connected nodes show in sidebar + +## Parameter Selection Guide + +### Choosing Cosine Tolerance +- **0.02 Da**: High-res MS (Orbitrap, Q-TOF) - strict +- **0.05 Da**: Standard resolution - balanced +- **0.1 Da**: Low-res MS - permissive + +### Choosing Min Matched Peaks +- **4-6**: Exploratory, complex mixtures +- **10-12**: Balanced +- **18+**: High confidence, clean spectra + +### Choosing Min Cosine Score +- **0.1-0.3**: Exploratory, find analogs +- **0.5-0.7**: Publication quality +- **0.8-0.9**: Very high confidence only + +### Parent Mass Tolerance +- **Use when**: Want to prevent comparing compounds with very different masses +- **Don't use when**: Looking for analogs with different modifications +- **Typical**: 0.02-0.05 Da or None (compare all) + +## Common Bioactivity Assays + +| Assay | Readout | Typical Format | +|-------|---------|----------------| +| CHIKV | % inhibition | Row in feature table | +| DENV | EC50/CC50 | Separate metadata file | +| Cytotoxicity | CC50 | Separate metadata file | +| MIC | μg/mL | Row in feature table | + +## Expected Outputs + +### From Networking +- GraphML file (Cytoscape-compatible) +- Edge list with similarity scores +- Node attributes (m/z, RT) + +### From Statistics +- Correlation table (feature, r, p-value, significant) +- Random Forest importance (optional) +- PCoA/HCA plots (optional) + +### From Visualization +- Interactive HTML +- PNG/SVG static images + +## Paper-Specific Adaptations + +When reproducing a specific paper: + +1. **Identify their parameters**: Check methods section for cosine tolerance, min peaks +2. **Match their bioactivity assay**: Same cell line, same readout +3. **Use their known compounds**: Validate by finding same m/z values +4. **Replicate their figures**: Same network layout, same color scheme + +## Troubleshooting Framework + +### Problem: Too few edges +- Lower `min_cosine_score` +- Decrease `min_matched_peaks` +- Increase `cosine_tolerance` + +### Problem: Too many edges +- Raise `min_cosine_score` +- Increase `min_matched_peaks` +- Enable `parent_mass_tolerance` + +### Problem: No significant correlations +- Check metadata mapping (samples match?) +- Try FDR instead of Bonferroni +- Check bioactivity values (not all same?) +- Try Spearman instead of Pearson + +### Problem: Colors wrong in visualization +- Remove `group` property from node data +- Set explicit colors with all states (background, border, highlight, hover) +- Disable vis.js `groups` option + +## Key Literature + +**Original FBMN Paper**: +Nothias et al. (2018) "Bioactivity-Based Molecular Networking for the Discovery of Drug Leads in Natural Product Bioassay-Guided Fractionation" J. Nat. Prod. + +**Key Concepts**: +- Molecular networking (Watrous et al. 2012) +- Feature-based MN (Nothias et al. 2018) +- GNPS platform (Wang et al. 2016) + +## Implementation Notes + +**Language**: Python (most tools), R (optional stats), JavaScript (visualization) + +**Key Libraries**: +- matchms (networking) +- networkx (graph operations) +- scipy.stats (correlations) +- statsmodels (p-value correction) +- vis.js (visualization) + +**Data Formats**: +- Input: .mzXML/.mzML, .csv +- Network: .graphml, .gml +- Output: .csv, .html, .png + +## Agent Decision Protocol + +### When to Ask User + +**Always ask before proceeding if:** + +1. **Bioactivity assay unclear** + - No mention of cell line, virus strain, or readout method + - Bioactivity values don't match typical formats (not % inhibition, not EC50, not MIC) + - Multiple bioactivity columns with no description + +2. **No positive controls available** + - Paper mentions known bioactive compounds but no m/z values given + - Cannot validate network without reference compounds + - Unclear what "active" means (no threshold specified) + +3. **Data format unusual** + - Feature table not from MZmine/Optimus/OpenMS + - MS data not in .mzXML/.mzML format + - Missing required columns (m/z, RT, sample intensities) + +4. **Parameters far outside typical ranges** + - Cosine tolerance > 0.5 Da (extremely unusual) + - Min matched peaks > 50 (likely wrong) + - Network has < 10 edges or > 10,000 edges per node + +5. **Multiple valid approaches possible** + - Paper mentions both Pearson and Spearman without stating which was used + - Both FDR and Bonferroni mentioned + - Analog search vs. standard search unclear + +### When to Proceed Autonomously + +**Safe to proceed without asking if:** + +1. **Standard MZmine output detected** + - Columns: "row ID", "row m/z", "row retention time", "*.mzXML Peak area" + - Bioactivity row present ("BioactivityCHIKV", "Activity", etc.) + +2. **Common bioactivity assay** + - CHIKV/DENV/Zika (antiviral) + - Cytotoxicity (CC50) + - Antibacterial (MIC) + - Standard cancer cell lines + +3. **Parameters within typical ranges** + - Cosine tolerance: 0.02-0.1 Da + - Min matched peaks: 4-18 + - Min cosine: 0.1-0.7 + - TopK: 10-50 + +4. **Known validation compounds** + - Paper lists specific m/z values for bioactives + - Reference standards mentioned + - Can verify mapping is correct + +### Self-Correction Triggers + +**Detect these issues → Apply fix automatically:** + +| Symptom | Diagnosis | Fix | +|---------|-----------|-----| +| All nodes yellow/orange | `group` property overriding colors | Remove `group` from node data, keep only for lookup | +| Bioactivity r=0 for all nodes | Feature-to-node mapping failed | Switch from ID matching to m/z+RT matching | +| Zero significant features | P-value correction too strict | Try FDR instead of Bonferroni | +| Too few edges (< 100 total) | Parameters too strict | Lower min_cosine_score, decrease min_matched_peaks | +| Too many edges (> 50,000) | Parameters too loose | Raise min_cosine_score, enable parent_mass_tolerance | +| Known compound not found | m/z tolerance too strict | Increase to 0.01-0.02 Da matching window | +| All similarities = 0 | Cosine tolerance wrong | Check MS resolution, adjust tolerance | +| Node labels not visible | Font size too small or color wrong | Increase to 14-16px, ensure contrast | + +### Confidence Score System + +**High Confidence** (proceed autonomously): +- Standard formats detected ✓ +- Parameters in normal ranges ✓ +- Validation compounds match ✓ +- Network metrics reasonable ✓ + +**Medium Confidence** (proceed with logging): +- Unusual but valid parameter combination +- One validation compound mismatch +- Bioactivity format needs parsing + +**Low Confidence** (ask user): +- Multiple validation failures +- Unknown data format +- Contradictory parameters +- Missing critical metadata + +### Recovery Strategies + +**If pipeline fails at Stage 2 (Networking):** +1. Check spectra loaded correctly (not empty) +2. Try lower cosine tolerance (0.05 → 0.02) +3. Disable parent_mass_tolerance if enabled +4. Check min_matched_peaks not > actual peaks per spectrum + +**If pipeline fails at Stage 3 (Statistics):** +1. Verify metadata samples match feature table columns +2. Try without scaling (if bioactivity already normalized) +3. Switch correlation method (Pearson ↔ Spearman) +4. Disable p-value correction (for exploratory) + +**If visualization is wrong:** +1. Colors wrong → Remove `group` property +2. No bioactivity → Check mapping function +3. Wrong values → Verify nearest-neighbor tolerance +4. Layout messy → Adjust physics parameters + +## Learning + +When this workflow fails on a new paper: +1. Check if they used classical MN (Dereplicator) vs FBMN (MZmine) +2. Check if they used analog search (allows mass differences) +3. Check their specific bioactivity normalization method +4. Check if they used retention time alignment +5. Check instrument parameters (resolution affects cosine tolerance) + +**Build pattern library:** +- Track which parameters work for which instrument types +- Note common bioactivity formats by research field +- Record typical network sizes for different sample types +- Maintain mapping of known compound m/z values by species \ No newline at end of file diff --git a/mcp_host/skills/scientific-skills/scientific-skills/molecular_networking_matchms/SKILL.md b/mcp_host/skills/scientific-skills/scientific-skills/molecular_networking_matchms/SKILL.md new file mode 100644 index 0000000..9822e48 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/molecular_networking_matchms/SKILL.md @@ -0,0 +1,279 @@ +--- +name: Molecular Networking with matchms +description: Comprehensive guide to molecular networking analysis using the matchms Python library for mass spectrometry data +version: 1.0 +license: MIT +tags: + - mass-spectrometry + - metabolomics + - molecular-networking + - bioinformatics + - cheminformatics +--- + +# Molecular Networking with matchms + +## Overview + +Molecular networking is a computational approach for organizing and visualizing mass spectrometry (MS/MS) data by connecting similar spectra in a network graph. This visualization helps researchers: + +- **Identify** families of structurally related molecules +- **Discover** new analogs of known compounds +- **Perform dereplication** (identify known compounds) +- **Guide** isolation and characterization of unknowns +- **Explore** chemical diversity in complex samples + +**matchms** is a Python library designed for importing, processing, and analyzing MS/MS data to create molecular networks. + +### Key Features + +- Multiple file format support (MGF, mzML, mzXML, MSP, JSON) +- Flexible spectrum processing pipelines +- Various similarity algorithms (cosine-based, fingerprint-based) +- Network creation and export capabilities +- Integration with GNPS and Cytoscape +- Extensible through companion packages + +## Installation + +### Prerequisites + +- Python 3.10, 3.11, 3.12, or 3.13 +- Operating Systems: Linux, macOS, Windows +- Memory: 4-8GB RAM recommended + +### Installation Methods + +**Conda (Recommended):** + +**pip:** + +**Development:** + +### Related Packages + +### Verification + +## Quick Start + +## Core Concepts + +### Spectral Similarity + +- **Cosine Similarity**: Compares spectra as vectors in m/z-intensity space +- **Modified Cosine**: Accounts for precursor mass differences (finds analogs) +- **Fingerprint Similarity**: Compares molecular fingerprints + +### Network Structure + +- **Nodes**: Individual mass spectra (compounds/features) +- **Edges**: Connections with similarity above threshold +- **Clusters**: Groups of related compounds (molecular families) + +### Key Principles + +1. Similar structures produce similar fragmentation patterns +2. Network topology reveals chemical families +3. Known compounds help annotate unknowns +4. Metadata enhances analysis + +## Complete Workflow + +### Step 1: Load Spectra + +### Step 2: Process Spectra + +**Using SpectrumProcessor (Recommended):** + +**Using Individual Filters:** + +### Step 3: Calculate Similarities + +**CosineGreedy (Fast):** + +**ModifiedCosine (For Analogs):** + +**Parallel Processing:** + +### Step 4: Create Network + +### Step 5: Export Results + +## Code Templates + +### Template 1: Basic Workflow + +### Template 2: Analog Discovery + +### Template 3: Dereplication + +### Template 4: Batch Processing + +### Template 5: GNPS Preparation + +## Parameter Optimization + +### Similarity Parameters + +**tolerance**: m/z matching tolerance +- High-res MS: 0.005 - 0.05 Da +- Low-res MS: 0.1 - 0.5 Da +- Recommended: 0.02 Da (high-res), 0.1 Da (low-res) + +**mz_power**: m/z weighting +- 0.0 (default): No weighting +- 0.5-2.0: Emphasize high m/z peaks + +**intensity_power**: intensity weighting +- 0.5: Reduce noise +- 1.0 (default): Linear +- 2.0: Emphasize high peaks + +### Network Parameters + +**score_cutoff**: similarity threshold +- CosineGreedy: 0.7 - 0.8 +- ModifiedCosine: 0.65 - 0.75 +- Higher: Sparser networks +- Lower: Denser networks + +**max_links**: edges per node +- 5: Very sparse +- 10: Balanced (typical) +- 20: Dense + +**top_n**: candidates to consider +- Should be ≥ max_links +- Typical: 1.5-2x max_links + +**link_method**: +- 'single': More connections (exploratory) +- 'mutual': Stricter (cleaner networks) + +### Filter Parameters + +**n_required**: minimum peaks +- 5-10: Typical quality control +- Higher: Better reliability, less data + +**n_max**: maximum peaks +- 100 (recommended): Good balance +- Lower: Faster computation + +**mz_from/mz_to**: m/z range +- 10/1000: Small molecules +- 50/2000: Larger compounds + +## Integration + +### GNPS + +### Cytoscape + +### spec2vec + +## Troubleshooting + +### No peaks left after filtering + +**Cause**: Too aggressive filtering + +**Solutions**: +- Reduce `n_required` +- Increase `mz_to` +- Check input data quality + +### Empty network + +**Cause**: `score_cutoff` too high + +**Solutions**: +- Lower cutoff (try 0.6-0.65) +- Increase `max_links` +- Verify spectra are comparable + +### ModifiedCosine not working + +**Cause**: Missing `precursor_mz` + +**Solution**: + +### Memory errors + +**Solutions**: +- Process in chunks +- Use CosineGreedy (not CosineHungarian) +- Reduce peaks: `reduce_to_number_of_peaks(s, n_max=50)` +- Filter before similarity calculation + +### Slow performance + +**Solutions**: +- Use CosineGreedy (fastest) +- Reduce peaks early +- Use parallel processing +- Filter dataset first + +## Best Practices + +### Data Quality +- Validate input data +- Remove low-quality spectra +- Normalize intensities +- Check metadata consistency + +### Processing +- Start with default pipelines +- Document all parameters +- Save intermediate results +- Use version control + +### Networking +- Test different parameters +- Compare algorithms +- Validate with known compounds +- Use metadata + +### Performance +- Batch process large datasets +- Use parallel computation +- Reduce peaks early +- Choose appropriate algorithms + +### Reproducibility +- Save all parameters +- Export processed data +- Track library versions +- Share complete workflows + +## Data Format: MGF + +### Essential Metadata + +- `precursor_mz`: Precursor m/z +- `spectrum_id`: Unique identifier +- `charge`: Ion charge state +- `retention_time`: Retention time +- `compound_name`: Compound name +- `smiles`: SMILES string +- `inchi`: InChI string + +## Additional Resources + +- Documentation: https://matchms.readthedocs.io/ +- GitHub: https://github.com/matchms/matchms +- GNPS: https://gnps.ucsd.edu/ +- Cytoscape: https://cytoscape.org/ + +## Summary + +matchms provides: + +✓ Multiple file formats +✓ Flexible processing +✓ Various similarity algorithms +✓ Network creation and export +✓ Tool integration +✓ Extensible design + +This skill covers installation, workflows, API reference, templates, optimization, integration, troubleshooting, and best practices for molecular networking with matchms. diff --git a/mcp_host/skills/scientific-skills/scientific-skills/test/SKILL.md b/mcp_host/skills/scientific-skills/scientific-skills/test/SKILL.md new file mode 100644 index 0000000..2267878 --- /dev/null +++ b/mcp_host/skills/scientific-skills/scientific-skills/test/SKILL.md @@ -0,0 +1 @@ +a test is very easy to use, you just gotta test it! \ No newline at end of file diff --git a/mcp_host/skills/server.py b/mcp_host/skills/server.py index ee581f0..47eb605 100755 --- a/mcp_host/skills/server.py +++ b/mcp_host/skills/server.py @@ -306,6 +306,146 @@ def list_skill_references(skill_name: str) -> Dict[str, Any]: "message": f"Failed to list references: {str(e)}" } +@mcp.tool +def create_new_skill(skill_name: str, knowledge_markdown: str) -> Dict[str, Any]: + """ + Create a new custom skill with the provided knowledge documentation. + + This tool creates a new skill directory with a SKILL.md file containing the provided + markdown content. The skill will then be available through list_all_skills() and + show_skill() after creation. + + Args: + skill_name (str): The name of the new skill. Should be lowercase, alphanumeric, + and can contain hyphens or underscores. Examples: "my-custom-tool", + "custom_analysis", "newpackage" + knowledge_markdown (str): The markdown content for the skill documentation. + Should include sections like Overview, Installation, + Quick Start, Core Capabilities, etc. Can include YAML + frontmatter with metadata (name, description, license). + + Returns: + Dict[str, Any]: Dictionary containing: + - status: "success" or "error" + - skill_name: The created skill name + - skill_path: Path to the created skill directory + - message: Additional information or error details + + Examples: + >>> create_new_skill( + ... skill_name="my-custom-tool", + ... knowledge_markdown='''--- + ... name: my-custom-tool + ... description: A custom tool for specialized analysis + ... --- + ... + ... # My Custom Tool + ... + ... ## Overview + ... This is a custom tool for specialized analysis workflows. + ... + ... ## Installation + ... ```bash + ... pip install my-custom-tool + ... ``` + ... + ... ## Quick Start + ... ```python + ... from my_custom_tool import analyze + ... result = analyze(data) + ... ``` + ... ''' + ... ) + { + "status": "success", + "skill_name": "my-custom-tool", + "message": "Skill 'my-custom-tool' created successfully" + } + + Notes: + - Skill names must be unique - existing skills cannot be overwritten + - The skill will be immediately available after creation + - Consider including standard sections: Overview, Installation, Quick Start, + Core Capabilities, Common Workflows, Troubleshooting, Additional Resources + """ + import re + + try: + # Validate skill name + if not skill_name: + return { + "status": "error", + "skill_name": skill_name, + "message": "Skill name cannot be empty" + } + + # Check for valid characters (lowercase alphanumeric, hyphens, underscores) + if not re.match(r'^[a-z0-9][a-z0-9_-]*$', skill_name): + return { + "status": "error", + "skill_name": skill_name, + "message": "Skill name must be lowercase, start with alphanumeric, and contain only letters, numbers, hyphens, or underscores" + } + + # Check if skill already exists + skill_path = SKILLS_DIR / skill_name + if skill_path.exists(): + return { + "status": "error", + "skill_name": skill_name, + "skill_path": str(skill_path), + "message": f"Skill '{skill_name}' already exists. Choose a different name." + } + + # Validate knowledge_markdown is not empty + if not knowledge_markdown or not knowledge_markdown.strip(): + return { + "status": "error", + "skill_name": skill_name, + "message": "Knowledge markdown content cannot be empty" + } + + # Ensure SKILLS_DIR exists + if not SKILLS_DIR.exists(): + SKILLS_DIR.mkdir(parents=True, exist_ok=True) + + # Create skill directory + skill_path.mkdir(parents=True, exist_ok=True) + + # Create SKILL.md file + skill_file = skill_path / "SKILL.md" + with open(skill_file, 'w', encoding='utf-8') as f: + f.write(knowledge_markdown) + + return { + "status": "success", + "skill_name": skill_name, + "message": f"Skill '{skill_name}' created successfully. Use show_skill('{skill_name}') to view it." + } + + except PermissionError: + return { + "status": "error", + "skill_name": skill_name, + "message": f"Permission denied when creating skill directory. Check write permissions." + } + except Exception as e: + # Clean up partial creation if needed + try: + if skill_path.exists(): + import shutil + shutil.rmtree(skill_path) + except: + pass + + return { + "status": "error", + "skill_name": skill_name, + "skill_path": "", + "message": f"Failed to create skill: {str(e)}" + } + + @mcp.tool def show_skill_reference(skill_name: str, reference_name: str) -> Dict[str, Any]: """ diff --git a/shared.py b/shared.py index 2bd29c8..aa7d6aa 100644 --- a/shared.py +++ b/shared.py @@ -84,6 +84,8 @@ def run_bash_subprocess( result = subprocess.run( command, capture_output=True, text=True, timeout=timeout, shell=True, cwd=cwd ) + print("stdout:", result.stdout) + print("stderr:", result.stderr) return CommandResult( status="success" if result.returncode == 0 else "error", stdout=result.stdout, diff --git a/start.sh b/start.sh index 1433161..fc3e468 100755 --- a/start.sh +++ b/start.sh @@ -1,59 +1,29 @@ #!/bin/bash -# Function to check if python3.11 is installed -check_python311() { - if ! command -v python3.11 &> /dev/null; then - echo "Python 3.11 is not installed on your system." - read -p "Would you like to install Python 3.11? (y/n): " install_python - if [[ "$install_python" =~ ^[Yy]$ ]]; then - echo "Installing Python 3.11..." - if [[ "$OSTYPE" == "darwin"* ]]; then - # macOS - if command -v brew &> /dev/null; then - brew install python@3.11 - else - echo "Error: Homebrew not found. Please install Homebrew first or install Python 3.11 manually." - exit 1 - fi - elif [[ "$OSTYPE" == "linux-gnu"* ]]; then - # Linux - if command -v apt-get &> /dev/null; then - sudo apt-get update - sudo apt-get install -y python3.11 python3.11-venv python3.11-dev - elif command -v yum &> /dev/null; then - sudo yum install -y python3.11 - else - echo "Error: Unsupported package manager. Please install Python 3.11 manually." - exit 1 - fi - else - echo "Error: Unsupported operating system. Please install Python 3.11 manually." - exit 1 - fi - - if ! command -v python3.11 &> /dev/null; then - echo "Error: Python 3.11 installation failed." - exit 1 - fi - echo "Python 3.11 installed successfully!" - else - echo "Error: Python 3.11 is required to run this script." - exit 1 - fi +# Use PYTHON_PATH environment variable if set, otherwise default to python3 +PYTHON=${PYTHON_PATH:-python3} + +# Function to check if Python is available +check_python() { + if ! command -v "$PYTHON" &> /dev/null; then + echo "Error: Python not found at '$PYTHON'" + echo "Please set the PYTHON_PATH environment variable to your Python executable." + echo "Example: export PYTHON_PATH=/usr/bin/python3.11" + exit 1 else - echo "Python 3.11 found: $(python3.11 --version)" + echo "Python found: $($PYTHON --version) at $(which $PYTHON)" fi } -# Function to check if pip is installed for python3.11 +# Function to check if pip is installed check_pip() { - if ! python3.11 -m pip --version &> /dev/null; then - echo "pip is not installed for Python 3.11." + if ! $PYTHON -m pip --version &> /dev/null; then + echo "pip is not installed for $PYTHON." read -p "Would you like to install pip? (y/n): " install_pip if [[ "$install_pip" =~ ^[Yy]$ ]]; then echo "Installing pip..." - python3.11 -m ensurepip --upgrade - if ! python3.11 -m pip --version &> /dev/null; then + $PYTHON -m ensurepip --upgrade + if ! $PYTHON -m pip --version &> /dev/null; then echo "Error: pip installation failed." exit 1 fi @@ -63,7 +33,7 @@ check_pip() { exit 1 fi else - echo "pip found: $(python3.11 -m pip --version)" + echo "pip found: $($PYTHON -m pip --version)" fi } @@ -74,7 +44,7 @@ install_requirements() { read -p "Would you like to install dependencies from requirements.txt? (y/n): " install_deps if [[ "$install_deps" =~ ^[Yy]$ ]]; then echo "Installing dependencies..." - python3.11 -m pip install -r requirements.txt + $PYTHON -m pip install -r requirements.txt if [ $? -eq 0 ]; then echo "Dependencies installed successfully!" else @@ -94,7 +64,7 @@ install_requirements() { # Check and install prerequisites echo "=== Checking Prerequisites ===" -check_python311 +check_python check_pip install_requirements echo "=== Prerequisites Check Complete ===" @@ -133,19 +103,71 @@ fi # Check for processes using ports echo "Checking for processes using ports $START_PORT-$END_PORT..." PROCESSES_FOUND=false +PYTHON_PROCESSES_FOUND=false +declare -a BLOCKING_PIDS +declare -a BLOCKING_PORTS +declare -a BLOCKING_COMMANDS +declare -a PYTHON_PIDS +declare -a PYTHON_PORTS +declare -a PYTHON_COMMANDS for ((port=$START_PORT; port<=$END_PORT; port++)); do PID=$(lsof -ti :$port 2>/dev/null) if [ -n "$PID" ]; then - echo "Port $port is being used by process $PID" + # Get the full command path using ps + FULL_CMD=$(ps -p "$PID" -o command= 2>/dev/null) + echo "Port $port is being used by (PID: $PID):" + echo " $FULL_CMD" PROCESSES_FOUND=true + BLOCKING_PIDS+=("$PID") + BLOCKING_PORTS+=("$port") + BLOCKING_COMMANDS+=("$FULL_CMD") + + # Check if it's a Python process + if [[ "$FULL_CMD" == *python* ]]; then + PYTHON_PROCESSES_FOUND=true + PYTHON_PIDS+=("$PID") + PYTHON_PORTS+=("$port") + PYTHON_COMMANDS+=("$FULL_CMD") + fi fi done +# If Python processes found, ask user if they want to kill them +if [ "$PYTHON_PROCESSES_FOUND" = true ]; then + echo "" + echo "The following Python processes are blocking the required ports:" + for i in "${!PYTHON_PIDS[@]}"; do + echo " - Port ${PYTHON_PORTS[$i]} (PID: ${PYTHON_PIDS[$i]}):" + echo " ${PYTHON_COMMANDS[$i]}" + done + echo "" + read -p "Would you like to kill these Python processes? (y/n): " kill_processes + if [[ "$kill_processes" =~ ^[Yy]$ ]]; then + for pid in "${PYTHON_PIDS[@]}"; do + echo "Killing process $pid..." + kill -9 "$pid" 2>/dev/null + if [ $? -eq 0 ]; then + echo " ✓ Process $pid killed successfully" + else + echo " ✗ Failed to kill process $pid (may require sudo)" + fi + done + echo "" + else + echo "Python processes not killed. Some ports may be unavailable." + echo "" + fi +elif [ "$PROCESSES_FOUND" = true ]; then + echo "" + echo "Note: Non-Python processes are using ports but will not be killed automatically." + echo "" +fi + # Calculate instance ID from workspace path (same logic as deploy.py) # This gives us the config filename that will be used WORKSPACE_ABS=$(cd "$WORKSPACE" 2>/dev/null && pwd || echo "$WORKSPACE") -INSTANCE_ID=$(python3.11 -c "import hashlib; import os; ws = os.path.abspath('$WORKSPACE'); print(hashlib.md5(ws.encode()).hexdigest()[:8])" 2>/dev/null || echo "unknown") +INSTANCE_ID=$($PYTHON -c "import hashlib; import os; ws = os.path.abspath('$WORKSPACE'); print(hashlib.md5(ws.encode()).hexdigest()[:8])" 2>/dev/null || echo "unknown") INSTANCE_CONFIG="config_${INSTANCE_ID}.json" # Check if workspace is new (doesn't exist) @@ -169,7 +191,7 @@ echo " Workspace: $WORKSPACE" echo "" echo "Deploying MCP servers..." -python3.11 deploy.py --config config.json --mcp-dir mcp_host --host_port_min "$START_PORT" --host_port_max "$END_PORT" --workspace $WORKSPACE & +$PYTHON deploy.py --config config.json --mcp-dir mcp_host --host_port_min "$START_PORT" --host_port_max "$END_PORT" --workspace $WORKSPACE & HOST_PID=$! wait $HOST_PID @@ -183,4 +205,4 @@ echo " Edit this file to enable/disable MCP services:" echo " 1. Edit $INSTANCE_CONFIG" echo " 2. Change 'enabled': false to 'enabled': true for services you want" echo " 3. Restart the deployment to apply changes" -echo "" \ No newline at end of file +echo ""