44import logging
55import time
66import requests
7+ import httpx
78import tempfile
89import urllib3
910
@@ -43,6 +44,7 @@ def __init__(self):
4344 # If we use a proxy or Tor, we set this to True
4445 self ._proxy_works = False
4546 self .proxy_mode = None
47+ self ._proxies = {}
4648 # If we have a Tor server that we can refresh, we set this to True
4749 self ._tor_process = None
4850 self ._can_refresh_tor = False
@@ -183,8 +185,12 @@ def _use_proxy(self, http: str, https: str = None) -> bool:
183185 """
184186 if https is None :
185187 https = http
188+ if http [:4 ] != "http" :
189+ http = "http://" + http
190+ if https [:5 ] != "https" :
191+ https = "https://" + https
186192
187- proxies = {'http' : http , 'https' : https }
193+ proxies = {'http:// ' : http , 'https:// ' : https }
188194 if self .proxy_mode == ProxyMode .SCRAPERAPI :
189195 r = requests .get ("http://api.scraperapi.com/account" , params = {'api_key' : self ._API_KEY }).json ()
190196 if "error" in r :
@@ -198,7 +204,7 @@ def _use_proxy(self, http: str, https: str = None) -> bool:
198204 self ._proxy_works = self ._check_proxy (proxies )
199205
200206 if self ._proxy_works :
201- self ._session . proxies = proxies
207+ self ._proxies = proxies
202208 self ._new_session ()
203209
204210 return self ._proxy_works
@@ -353,8 +359,8 @@ def _get_webdriver(self):
353359 def _get_chrome_webdriver (self ):
354360 if self ._proxy_works :
355361 webdriver .DesiredCapabilities .CHROME ['proxy' ] = {
356- "httpProxy" : self ._session . proxies ['http' ],
357- "sslProxy" : self ._session . proxies ['https' ],
362+ "httpProxy" : self ._proxies ['http' ],
363+ "sslProxy" : self ._proxies ['https' ],
358364 "proxyType" : "MANUAL"
359365 }
360366
@@ -369,8 +375,8 @@ def _get_firefox_webdriver(self):
369375 if self ._proxy_works :
370376 # Redirect webdriver through proxy
371377 webdriver .DesiredCapabilities .FIREFOX ['proxy' ] = {
372- "httpProxy" : self ._session . proxies ['http' ],
373- "sslProxy" : self ._session . proxies ['https' ],
378+ "httpProxy" : self ._proxies ['http' ],
379+ "sslProxy" : self ._proxies ['https' ],
374380 "proxyType" : "MANUAL" ,
375381 }
376382
@@ -439,11 +445,12 @@ def _handle_captcha2(self, url):
439445 return self ._session
440446
441447 def _new_session (self ):
448+ init_kwargs = {}
442449 proxies = {}
443450 if self ._session :
444- proxies = self ._session . proxies
451+ proxies = self ._proxies
445452 self ._close_session ()
446- self ._session = requests . Session ()
453+ # self._session = httpx.Client ()
447454 self .got_403 = False
448455
449456 # Suppress the misleading traceback from UserAgent()
@@ -453,15 +460,18 @@ def _new_session(self):
453460 'accept' : 'text/html,application/xhtml+xml,application/xml' ,
454461 'User-Agent' : UserAgent ().random ,
455462 }
456- self ._session .headers .update (_HEADERS )
463+ # self._session.headers.update(_HEADERS)
464+ init_kwargs .update (headers = _HEADERS )
457465
458466 if self ._proxy_works :
459- self ._session .proxies = proxies
467+ init_kwargs ["proxies" ] = proxies #.get("http", None)
468+ self ._proxies = proxies
460469 if self .proxy_mode is ProxyMode .SCRAPERAPI :
461470 # SSL Certificate verification must be disabled for
462471 # ScraperAPI requests to work.
463472 # https://www.scraperapi.com/documentation/
464- self ._session .verify = False
473+ init_kwargs ["verify" ] = False
474+ self ._session = httpx .Client (** init_kwargs )
465475 self ._webdriver = None
466476
467477 return self ._session
@@ -496,7 +506,7 @@ def _fp_coroutine(self, timeout=1, wait_time=120):
496506 all_proxies = freeproxy .get_proxy_list ()
497507 if proxy in self ._dirty_freeproxies :
498508 continue
499- proxies = {'http' : proxy , 'https' : proxy }
509+ proxies = {'http:// ' : proxy , 'https:// ' : proxy }
500510 proxy_works = self ._check_proxy (proxies )
501511 if proxy_works :
502512 dirty_proxy = (yield proxy )
0 commit comments