1111from lib .core .common import getSafeExString
1212from lib .core .common import popValue
1313from lib .core .common import pushValue
14- from lib .core .common import readInput
1514from lib .core .common import urlencode
1615from lib .core .convert import getBytes
1716from lib .core .convert import getUnicode
2423from lib .core .enums import REDIRECTION
2524from lib .core .exception import SqlmapBaseException
2625from lib .core .exception import SqlmapConnectionException
27- from lib .core .exception import SqlmapUserQuitException
2826from lib .core .settings import BING_REGEX
2927from lib .core .settings import DUCKDUCKGO_REGEX
3028from lib .core .settings import DUMMY_SEARCH_USER_AGENT
3735from thirdparty .six .moves import urllib as _urllib
3836from thirdparty .socks import socks
3937
40- def _search ( dork ):
38+ def _fetch ( url , headers , data = None ):
4139 """
42- This method performs the effective search on Google providing
43- the google dork and the Google session cookie
40+ Fetches and returns the (decoded) content of a search engine results page
41+ (or None in case of a connection issue)
4442 """
4543
46- if not dork :
47- return None
48-
49- page = None
50- data = None
51- requestHeaders = {}
52- responseHeaders = {}
53-
54- requestHeaders [HTTP_HEADER .USER_AGENT ] = dict (conf .httpHeaders ).get (HTTP_HEADER .USER_AGENT , DUMMY_SEARCH_USER_AGENT )
55- requestHeaders [HTTP_HEADER .ACCEPT_ENCODING ] = HTTP_ACCEPT_ENCODING_HEADER_VALUE
56- requestHeaders [HTTP_HEADER .COOKIE ] = GOOGLE_CONSENT_COOKIE
44+ retVal = None
5745
5846 try :
59- req = _urllib .request .Request ("https://www.google.com/ncr" , headers = requestHeaders )
47+ req = _urllib .request .Request (url , data = getBytes ( data ) if data else None , headers = headers )
6048 conn = _urllib .request .urlopen (req )
61- except Exception as ex :
62- errMsg = "unable to connect to Google ('%s')" % getSafeExString (ex )
63- raise SqlmapConnectionException (errMsg )
6449
65- gpage = conf .googlePage if conf .googlePage > 1 else 1
66- logger .info ("using search result page #%d" % gpage )
67-
68- url = "https://www.google.com/search?" # NOTE: if consent fails, try to use the "http://"
69- url += "q=%s&" % urlencode (dork , convall = True )
70- url += "num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search"
71- url += "&start=%d" % ((gpage - 1 ) * 100 )
72-
73- try :
74- req = _urllib .request .Request (url , headers = requestHeaders )
75- conn = _urllib .request .urlopen (req )
76-
77- requestMsg = "HTTP request:\n GET %s" % url
50+ requestMsg = "HTTP request:\n %s %s" % ("POST" if data else "GET" , url )
7851 requestMsg += " %s" % _http_client .HTTPConnection ._http_vsn_str
7952 logger .log (CUSTOM_LOGGING .TRAFFIC_OUT , requestMsg )
8053
8154 page = conn .read ()
82- code = conn .code
83- status = conn .msg
8455 responseHeaders = conn .info ()
8556
86- responseMsg = "HTTP response (%s - %d):\n " % (status , code )
87-
57+ responseMsg = "HTTP response (%s - %d):\n " % (conn .msg , conn .code )
8858 if conf .verbose <= 4 :
8959 responseMsg += getUnicode (responseHeaders , UNICODE_ENCODING )
9060 elif conf .verbose > 4 :
9161 responseMsg += "%s\n %s\n " % (responseHeaders , page )
92-
9362 logger .log (CUSTOM_LOGGING .TRAFFIC_IN , responseMsg )
63+
64+ page = decodePage (page , responseHeaders .get (HTTP_HEADER .CONTENT_ENCODING ), responseHeaders .get (HTTP_HEADER .CONTENT_TYPE ))
65+ retVal = getUnicode (page ) # Note: if decodePage call fails (Issue #4202)
9466 except _urllib .error .HTTPError as ex :
9567 try :
96- page = ex .read ()
97- responseHeaders = ex .info ()
98- except Exception as _ :
99- warnMsg = "problem occurred while trying to get "
100- warnMsg += "an error page information (%s)" % getSafeExString (_ )
101- logger .critical (warnMsg )
102- return None
68+ retVal = getUnicode (ex .read ())
69+ except Exception :
70+ pass
10371 except (_urllib .error .URLError , _http_client .error , socket .error , socket .timeout , socks .ProxyError ):
104- errMsg = "unable to connect to Google"
105- raise SqlmapConnectionException (errMsg )
72+ pass
10673
107- page = decodePage ( page , responseHeaders . get ( HTTP_HEADER . CONTENT_ENCODING ), responseHeaders . get ( HTTP_HEADER . CONTENT_TYPE ))
74+ return retVal
10875
109- page = getUnicode (page ) # Note: if decodePage call fails (Issue #4202)
76+ def _search (dork ):
77+ """
78+ This method performs the effective search using the provided dork,
79+ trying the available search engines in order of (current) scraping
80+ reliability and returning the results of the first one that yields any
81+ (so that the failure of a single engine does not break the feature)
82+ """
11083
111- retVal = [_urllib .parse .unquote (match .group (1 ) or match .group (2 )) for match in re .finditer (GOOGLE_REGEX , page , re .I )]
84+ if not dork :
85+ return None
11286
113- if not retVal and "detected unusual traffic" in page :
114- warnMsg = "Google has detected 'unusual' traffic from "
115- warnMsg += "used IP address disabling further searches"
87+ retVal = []
88+ seen = set ()
11689
117- if conf .proxyList :
90+ requestHeaders = {
91+ HTTP_HEADER .USER_AGENT : dict (conf .httpHeaders ).get (HTTP_HEADER .USER_AGENT , DUMMY_SEARCH_USER_AGENT ),
92+ HTTP_HEADER .ACCEPT_ENCODING : HTTP_ACCEPT_ENCODING_HEADER_VALUE ,
93+ HTTP_HEADER .COOKIE : GOOGLE_CONSENT_COOKIE ,
94+ }
95+
96+ gpage = conf .googlePage if conf .googlePage > 1 else 1
97+ logger .info ("using search result page #%d" % gpage )
98+
99+ encoded = urlencode (dork , convall = True )
100+
101+ # Note: (name, url, POST data, regex, regex flags, match->link). Ordered by current scraping reliability; tried in turn until one yields results (DuckDuckGo currently being the only consistently scrapeable one)
102+ engines = (
103+ ("DuckDuckGo" , "https://html.duckduckgo.com/html/" , "q=%s&s=%d" % (encoded , (gpage - 1 ) * 30 ), DUCKDUCKGO_REGEX , re .I | re .S , lambda match : match .group (1 ).replace ("&" , "&" )),
104+ ("Bing" , "https://www.bing.com/search?q=%s&first=%d" % (encoded , (gpage - 1 ) * 10 + 1 ), None , BING_REGEX , re .I | re .S , lambda match : match .group (1 )),
105+ ("Google" , "https://www.google.com/search?q=%s&num=100&hl=en&complete=0&safe=off&filter=0&btnG=Search&start=%d" % (encoded , (gpage - 1 ) * 100 ), None , GOOGLE_REGEX , re .I , lambda match : match .group (1 ) or match .group (2 )),
106+ )
107+
108+ for name , url , data , regex , flags , extract in engines :
109+ page = _fetch (url , requestHeaders , data )
110+
111+ if not page :
112+ continue
113+
114+ count = 0
115+ for match in re .finditer (regex , page , flags ):
116+ link = _urllib .parse .unquote (extract (match ))
117+ if link and link not in seen :
118+ seen .add (link )
119+ retVal .append (link )
120+ count += 1
121+
122+ if count :
123+ logger .info ("found %d usable link%s using %s" % (count , 's' if count != 1 else "" , name ))
124+ break # Note: stop at the first engine that actually returns results (others are only fallbacks)
125+
126+ # Note: switch proxy (if available) when an abuse/captcha page was served (instead of pointlessly falling through to the next engine from the same blocked IP)
127+ if conf .proxyList and (("detected unusual traffic" in page ) or ("issue with the Tor Exit Node you are currently using" in page )):
128+ warnMsg = "%s has detected 'unusual' traffic from the used IP address" % name
118129 raise SqlmapBaseException (warnMsg )
119- else :
120- logger .critical (warnMsg )
121130
122131 if not retVal :
123- message = "no usable links found. What do you want to do?"
124- message += "\n [1] (re)try with DuckDuckGo (default)"
125- message += "\n [2] (re)try with Bing"
126- message += "\n [3] quit"
127- choice = readInput (message , default = '1' )
128-
129- if choice == '3' :
130- raise SqlmapUserQuitException
131- elif choice == '2' :
132- url = "https://www.bing.com/search?q=%s&first=%d" % (urlencode (dork , convall = True ), (gpage - 1 ) * 10 + 1 )
133- regex = BING_REGEX
134- else :
135- url = "https://html.duckduckgo.com/html/"
136- data = "q=%s&s=%d" % (urlencode (dork , convall = True ), (gpage - 1 ) * 30 )
137- regex = DUCKDUCKGO_REGEX
138-
139- try :
140- req = _urllib .request .Request (url , data = getBytes (data ), headers = requestHeaders )
141- conn = _urllib .request .urlopen (req )
142-
143- requestMsg = "HTTP request:\n GET %s" % url
144- requestMsg += " %s" % _http_client .HTTPConnection ._http_vsn_str
145- logger .log (CUSTOM_LOGGING .TRAFFIC_OUT , requestMsg )
146-
147- page = conn .read ()
148- code = conn .code
149- status = conn .msg
150- responseHeaders = conn .info ()
151- page = decodePage (page , responseHeaders .get ("Content-Encoding" ), responseHeaders .get ("Content-Type" ))
152-
153- responseMsg = "HTTP response (%s - %d):\n " % (status , code )
154-
155- if conf .verbose <= 4 :
156- responseMsg += getUnicode (responseHeaders , UNICODE_ENCODING )
157- elif conf .verbose > 4 :
158- responseMsg += "%s\n %s\n " % (responseHeaders , page )
159-
160- logger .log (CUSTOM_LOGGING .TRAFFIC_IN , responseMsg )
161- except _urllib .error .HTTPError as ex :
162- try :
163- page = ex .read ()
164- page = decodePage (page , ex .headers .get ("Content-Encoding" ), ex .headers .get ("Content-Type" ))
165- except socket .timeout :
166- warnMsg = "connection timed out while trying "
167- warnMsg += "to get error page information (%d)" % ex .code
168- logger .critical (warnMsg )
169- return None
170- except :
171- errMsg = "unable to connect"
172- raise SqlmapConnectionException (errMsg )
173-
174- page = getUnicode (page ) # Note: if decodePage call fails (Issue #4202)
175-
176- retVal = [_urllib .parse .unquote (match .group (1 ).replace ("&" , "&" )) for match in re .finditer (regex , page , re .I | re .S )]
177-
178- if not retVal and "issue with the Tor Exit Node you are currently using" in page :
179- warnMsg = "DuckDuckGo has detected 'unusual' traffic from "
180- warnMsg += "used (Tor) IP address"
181-
182- if conf .proxyList :
183- raise SqlmapBaseException (warnMsg )
184- else :
185- logger .critical (warnMsg )
132+ warnMsg = "no usable links found (search engines might be blocking the used IP address)"
133+ logger .critical (warnMsg )
186134
187135 return retVal
188136
@@ -206,6 +154,7 @@ def search(dork):
206154 return search (dork )
207155 else :
208156 raise
157+
209158 finally :
210159 kb .choices .redirect = popValue ()
211160
0 commit comments