diff --git a/.gitignore b/.gitignore index e31375c1e..20c8c3627 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ .idea/ docs/_build *.pyc +*.pyc.* +__pycache__/ *.log .tox /SPEC.md diff --git a/docs/changelog.rst b/docs/changelog.rst index e3889882c..558b9680a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -3,6 +3,12 @@ ChangeLog ========== +Next (unreleased) +------------------ + +1. 新增代理源 **谷德代理**; (2026-05-14) +2. 引入tox自动化测试, 放弃Python 3.7以下版本支持; (2026-05-14) + 2.4.2 (2024-01-18) ------------------ diff --git a/fetcher/proxyFetcher.py b/fetcher/proxyFetcher.py index 17a65bc2a..a46222101 100644 --- a/fetcher/proxyFetcher.py +++ b/fetcher/proxyFetcher.py @@ -16,6 +16,8 @@ import json from time import sleep +from lxml import etree + from util.webRequest import WebRequest @@ -24,12 +26,75 @@ class ProxyFetcher(object): proxy getter """ + @staticmethod + def _parse_proxies_from_text(text): + if not text: + return [] + proxy_pattern = re.compile(r'(?\s*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*?(\d+)', r.text) - for proxy in proxies: - yield ":".join(proxy) + request = WebRequest() + ti_url = "https://ip.ihuan.me/ti.html" + tqdl_url = "https://ip.ihuan.me/tqdl.html" + ti_resp = request.get(ti_url, timeout=10, verify=False) + form_data = {} + if ti_resp.tree is not None: + for input_tag in ti_resp.tree.xpath("//form//input[@name]"): + name = "".join(input_tag.xpath("./@name")).strip() + value = "".join(input_tag.xpath("./@value")).strip() + if name: + form_data[name] = value + + key = form_data.get("key") + if not key: + key_match = re.search(r'name=["\']key["\'][^>]*value=["\']([^"\']+)', ti_resp.text) + if not key_match: + key_match = re.search(r'key["\']?\s*[:=]\s*["\']([0-9a-f]{16,})', ti_resp.text) + key = key_match.group(1) if key_match else "" + + if not key: + return + + header = { + "Origin": "https://ip.ihuan.me", + "Referer": ti_url, + } + data = form_data.copy() + data.update({ + "num": "2000", + "port": "", + "kill_port": "", + "address": "", + "kill_address": "", + "anonymity": "", + "type": "", + "post": "", + "sort": "1", + "key": key, + }) + r = request.post(tqdl_url, header=header, data=data, timeout=10, verify=False) + proxies = ProxyFetcher._parse_proxies_from_tree(r.tree) + proxies.extend(ProxyFetcher._parse_proxies_from_text(r.text)) + for proxy in ProxyFetcher._yield_unique_proxies(proxies): + yield proxy @staticmethod def freeProxy09(page_count=1): @@ -181,6 +284,55 @@ def freeProxy12(): if ip and port: yield "%s:%s" % (ip, port) + @staticmethod + def freeProxy13(): + """ FreeVPNNode 中国代理 https://cn.freevpnnode.com/free-proxy-for-china/ """ + # url = "https://cn.freevpnnode.com/free-proxy-for-china/" + url = "https://cn.freevpnnode.com/free-proxy/" + r = WebRequest().get(url, timeout=5, retry_time=1, verify=False) + proxies = ProxyFetcher._parse_proxies_from_tree(r.tree) + proxies.extend(ProxyFetcher._parse_proxies_from_text(r.text)) + for proxy in ProxyFetcher._yield_unique_proxies(proxies): + yield proxy + + @staticmethod + def freeProxy14(): + """ SCDN 代理接口 """ + # url = "https://proxy.scdn.io/get_proxies.php?protocol=&country=%E4%B8%AD%E5%9B%BD&per_page=100&page=1" + url = "https://proxy.scdn.io/get_proxies.php?protocol=&country=&per_page=100&page=1" + r = WebRequest().get(url, timeout=5, retry_time=1, verify=False) + try: + data = r.json + proxies = [] + table_html = data.get("table_html") if isinstance(data, dict) else "" + if table_html: + tree = etree.HTML("%s
" % table_html) + proxies.extend(ProxyFetcher._parse_proxies_from_tree(tree)) + + if not proxies: + proxies = ProxyFetcher._parse_proxies_from_json(data) + if not proxies: + proxies = ProxyFetcher._parse_proxies_from_text(r.text) + for proxy in ProxyFetcher._yield_unique_proxies(proxies): + yield proxy + except Exception as e: + print(e) + + @staticmethod + def freeProxy15(): + """ Geonode Free Proxy 中国代理 https://geonode.com/free-proxy-list/ """ + # url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&country=CN" + url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc" + r = WebRequest().get(url, timeout=5, retry_time=1, verify=False) + try: + proxies = ProxyFetcher._parse_proxies_from_json(r.json) + if not proxies: + proxies = ProxyFetcher._parse_proxies_from_text(r.text) + for proxy in ProxyFetcher._yield_unique_proxies(proxies): + yield proxy + except Exception as e: + print(e) + # @staticmethod # def wallProxy01(): # """ diff --git a/helper/check.py b/helper/check.py index 937645c0f..0b732b84d 100644 --- a/helper/check.py +++ b/helper/check.py @@ -79,9 +79,9 @@ def preValidator(cls, proxy): @classmethod def regionGetter(cls, proxy): try: - url = 'https://searchplugin.csdn.net/api/v1/ip/get?ip=%s' % proxy.proxy.split(':')[0] + url = 'https://api.ip.sb/geoip/%s' % proxy.proxy.split(':')[0] r = WebRequest().get(url=url, retry_time=1, timeout=2).json - return r['data']['address'] + return r.get('country_code') except: return 'error' diff --git a/requirements.txt b/requirements.txt index ab2fdbc49..ad68b2c07 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -requests==2.30.0 +requests==2.31.0 gunicorn==19.9.0 lxml==4.9.2 redis==3.5.3 diff --git a/setting.py b/setting.py index f6ed89c8a..e8a669f58 100644 --- a/setting.py +++ b/setting.py @@ -57,6 +57,9 @@ "freeProxy10", "freeProxy11", "freeProxy12", + "freeProxy13", + "freeProxy14", + "freeProxy15", ] # ############# proxy validator ################# diff --git a/util/six.py b/util/six.py index 14ee059ba..d31e12138 100644 --- a/util/six.py +++ b/util/six.py @@ -30,7 +30,10 @@ def iteritems(d, **kw): from urlparse import urlparse if PY3: - from imp import reload as reload_six + try: + from importlib import reload as reload_six + except ImportError: + from imp import reload as reload_six else: reload_six = reload diff --git a/util/webRequest.py b/util/webRequest.py index bf0555216..97164773a 100644 --- a/util/webRequest.py +++ b/util/webRequest.py @@ -86,8 +86,38 @@ def get(self, url, header=None, retry_time=3, retry_interval=5, timeout=5, *args self.log.info("retry %s second after" % retry_interval) time.sleep(retry_interval) + def post(self, url, header=None, retry_time=3, retry_interval=5, timeout=5, *args, **kwargs): + """ + post method + :param url: target url + :param header: headers + :param retry_time: retry time + :param retry_interval: retry interval + :param timeout: network timeout + :return: + """ + headers = self.header + if header and isinstance(header, dict): + headers.update(header) + while True: + try: + self.response = requests.post(url, headers=headers, timeout=timeout, *args, **kwargs) + return self + except Exception as e: + self.log.error("requests: %s error: %s" % (url, str(e))) + retry_time -= 1 + if retry_time <= 0: + resp = Response() + resp.status_code = 200 + self.response = resp + return self + self.log.info("retry %s second after" % retry_interval) + time.sleep(retry_interval) + @property def tree(self): + if not self.response.content: + return None return etree.HTML(self.response.content) @property @@ -101,4 +131,3 @@ def json(self): except Exception as e: self.log.error(str(e)) return {} -