Skip to content
Merged
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
.idea/
docs/_build
*.pyc
*.pyc.*
__pycache__/
*.log
.tox
/SPEC.md
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
ChangeLog
==========

Next (unreleased)
------------------

1. 新增代理源 **谷德代理**; (2026-05-14)
2. 引入tox自动化测试, 放弃Python 3.7以下版本支持; (2026-05-14)

2.4.2 (2024-01-18)
------------------

Expand Down
166 changes: 159 additions & 7 deletions fetcher/proxyFetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import json
from time import sleep

from lxml import etree

from util.webRequest import WebRequest


Expand All @@ -24,12 +26,75 @@ class ProxyFetcher(object):
proxy getter
"""

@staticmethod
def _parse_proxies_from_text(text):
if not text:
return []
proxy_pattern = re.compile(r'(?<![\d.])(\d{1,3}(?:\.\d{1,3}){3})(?:\s*:\s*|\s+)(\d{2,5})(?!\d)')
return ["%s:%s" % proxy for proxy in proxy_pattern.findall(text)]

@staticmethod
def _parse_proxies_from_json(data):
proxies = []
if isinstance(data, dict):
proxy = data.get("proxy") or data.get("addr") or data.get("address")
if proxy:
proxies.extend(ProxyFetcher._parse_proxies_from_text(str(proxy)))

ip = data.get("ip") or data.get("host") or data.get("server")
port = data.get("port")
if ip and port:
proxies.append("%s:%s" % (ip, port))

parsed_keys = {"proxy", "addr", "address", "ip", "host", "server", "port"}
for key, value in data.items():
if key in parsed_keys:
continue
proxies.extend(ProxyFetcher._parse_proxies_from_json(value))
elif isinstance(data, list):
for item in data:
proxies.extend(ProxyFetcher._parse_proxies_from_json(item))
elif isinstance(data, str):
proxies.extend(ProxyFetcher._parse_proxies_from_text(data))
return proxies

@staticmethod
def _parse_proxies_from_tree(tree):
proxies = []
if tree is None:
return proxies
for tr in tree.xpath("//tr"):
cells = [" ".join(td.xpath(".//text()")).strip() for td in tr.xpath("./td")]
if len(cells) < 2:
continue
ip = ""
port = ""
for cell in cells:
ip_match = re.search(r'\d{1,3}(?:\.\d{1,3}){3}', cell)
port_match = re.search(r'\b\d{2,5}\b', cell)
if ip_match and not ip:
ip = ip_match.group()
continue
if port_match and not port:
port = port_match.group()
if ip and port:
proxies.append("%s:%s" % (ip, port))
return proxies

@staticmethod
def _yield_unique_proxies(proxies):
seen = set()
for proxy in proxies:
if proxy not in seen:
seen.add(proxy)
yield proxy

@staticmethod
def freeProxy01():
"""
站大爷 https://www.zdaye.com/dayProxy.html
"""
start_url = "https://www.zdaye.com/dayProxy.html"
start_url = "https://www.zdaye.com/free/"
html_tree = WebRequest().get(start_url, verify=False).tree
latest_page_time = html_tree.xpath("//span[@class='thread_time_info']/text()")[0].strip()
from datetime import datetime
Expand Down Expand Up @@ -132,12 +197,50 @@ def freeProxy07():
@staticmethod
def freeProxy08():
""" 小幻代理 """
urls = ['https://ip.ihuan.me/address/5Lit5Zu9.html']
for url in urls:
r = WebRequest().get(url, timeout=10)
proxies = re.findall(r'>\s*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*?</a></td><td>(\d+)</td>', r.text)
for proxy in proxies:
yield ":".join(proxy)
request = WebRequest()
ti_url = "https://ip.ihuan.me/ti.html"
tqdl_url = "https://ip.ihuan.me/tqdl.html"
ti_resp = request.get(ti_url, timeout=10, verify=False)
form_data = {}
if ti_resp.tree is not None:
for input_tag in ti_resp.tree.xpath("//form//input[@name]"):
name = "".join(input_tag.xpath("./@name")).strip()
value = "".join(input_tag.xpath("./@value")).strip()
if name:
form_data[name] = value

key = form_data.get("key")
if not key:
key_match = re.search(r'name=["\']key["\'][^>]*value=["\']([^"\']+)', ti_resp.text)
if not key_match:
key_match = re.search(r'key["\']?\s*[:=]\s*["\']([0-9a-f]{16,})', ti_resp.text)
key = key_match.group(1) if key_match else ""

if not key:
return

header = {
"Origin": "https://ip.ihuan.me",
"Referer": ti_url,
}
data = form_data.copy()
data.update({
"num": "2000",
"port": "",
"kill_port": "",
"address": "",
"kill_address": "",
"anonymity": "",
"type": "",
"post": "",
"sort": "1",
"key": key,
})
r = request.post(tqdl_url, header=header, data=data, timeout=10, verify=False)
proxies = ProxyFetcher._parse_proxies_from_tree(r.tree)
proxies.extend(ProxyFetcher._parse_proxies_from_text(r.text))
for proxy in ProxyFetcher._yield_unique_proxies(proxies):
yield proxy

@staticmethod
def freeProxy09(page_count=1):
Expand Down Expand Up @@ -181,6 +284,55 @@ def freeProxy12():
if ip and port:
yield "%s:%s" % (ip, port)

@staticmethod
def freeProxy13():
""" FreeVPNNode 中国代理 https://cn.freevpnnode.com/free-proxy-for-china/ """
# url = "https://cn.freevpnnode.com/free-proxy-for-china/"
url = "https://cn.freevpnnode.com/free-proxy/"
r = WebRequest().get(url, timeout=5, retry_time=1, verify=False)
proxies = ProxyFetcher._parse_proxies_from_tree(r.tree)
proxies.extend(ProxyFetcher._parse_proxies_from_text(r.text))
for proxy in ProxyFetcher._yield_unique_proxies(proxies):
yield proxy

@staticmethod
def freeProxy14():
""" SCDN 代理接口 """
# url = "https://proxy.scdn.io/get_proxies.php?protocol=&country=%E4%B8%AD%E5%9B%BD&per_page=100&page=1"
url = "https://proxy.scdn.io/get_proxies.php?protocol=&country=&per_page=100&page=1"
r = WebRequest().get(url, timeout=5, retry_time=1, verify=False)
try:
data = r.json
proxies = []
table_html = data.get("table_html") if isinstance(data, dict) else ""
if table_html:
tree = etree.HTML("<table>%s</table>" % table_html)
proxies.extend(ProxyFetcher._parse_proxies_from_tree(tree))

if not proxies:
proxies = ProxyFetcher._parse_proxies_from_json(data)
if not proxies:
proxies = ProxyFetcher._parse_proxies_from_text(r.text)
for proxy in ProxyFetcher._yield_unique_proxies(proxies):
yield proxy
except Exception as e:
print(e)

@staticmethod
def freeProxy15():
""" Geonode Free Proxy 中国代理 https://geonode.com/free-proxy-list/ """
# url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc&country=CN"
url = "https://proxylist.geonode.com/api/proxy-list?limit=500&page=1&sort_by=lastChecked&sort_type=desc"
r = WebRequest().get(url, timeout=5, retry_time=1, verify=False)
try:
proxies = ProxyFetcher._parse_proxies_from_json(r.json)
if not proxies:
proxies = ProxyFetcher._parse_proxies_from_text(r.text)
for proxy in ProxyFetcher._yield_unique_proxies(proxies):
yield proxy
except Exception as e:
print(e)

# @staticmethod
# def wallProxy01():
# """
Expand Down
4 changes: 2 additions & 2 deletions helper/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def preValidator(cls, proxy):
@classmethod
def regionGetter(cls, proxy):
try:
url = 'https://searchplugin.csdn.net/api/v1/ip/get?ip=%s' % proxy.proxy.split(':')[0]
url = 'https://api.ip.sb/geoip/%s' % proxy.proxy.split(':')[0]
r = WebRequest().get(url=url, retry_time=1, timeout=2).json
return r['data']['address']
return r.get('country_code')
except:
return 'error'

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
requests==2.30.0
requests==2.31.0
gunicorn==19.9.0
lxml==4.9.2
redis==3.5.3
Expand Down
3 changes: 3 additions & 0 deletions setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@
"freeProxy10",
"freeProxy11",
"freeProxy12",
"freeProxy13",
"freeProxy14",
"freeProxy15",
]

# ############# proxy validator #################
Expand Down
5 changes: 4 additions & 1 deletion util/six.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ def iteritems(d, **kw):
from urlparse import urlparse

if PY3:
from imp import reload as reload_six
try:
from importlib import reload as reload_six
except ImportError:
from imp import reload as reload_six
else:
reload_six = reload

Expand Down
31 changes: 30 additions & 1 deletion util/webRequest.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,38 @@ def get(self, url, header=None, retry_time=3, retry_interval=5, timeout=5, *args
self.log.info("retry %s second after" % retry_interval)
time.sleep(retry_interval)

def post(self, url, header=None, retry_time=3, retry_interval=5, timeout=5, *args, **kwargs):
"""
post method
:param url: target url
:param header: headers
:param retry_time: retry time
:param retry_interval: retry interval
:param timeout: network timeout
:return:
"""
headers = self.header
if header and isinstance(header, dict):
headers.update(header)
while True:
try:
self.response = requests.post(url, headers=headers, timeout=timeout, *args, **kwargs)
return self
except Exception as e:
self.log.error("requests: %s error: %s" % (url, str(e)))
retry_time -= 1
if retry_time <= 0:
resp = Response()
resp.status_code = 200
self.response = resp
return self
self.log.info("retry %s second after" % retry_interval)
time.sleep(retry_interval)

@property
def tree(self):
if not self.response.content:
return None
return etree.HTML(self.response.content)

@property
Expand All @@ -101,4 +131,3 @@ def json(self):
except Exception as e:
self.log.error(str(e))
return {}

Loading