From 5f8f00f8a5db6b845eb59f5cdb02c9a5b9bd3984 Mon Sep 17 00:00:00 2001 From: VeNoMouS Date: Sat, 25 Feb 2023 19:12:03 +1300 Subject: [PATCH] Add turnstile to captcha providers --- cloudscraper/captcha/2captcha.py | 28 ++- cloudscraper/captcha/9kw.py | 66 +++---- cloudscraper/captcha/anticaptcha.py | 227 +++++++++++++++++-------- cloudscraper/captcha/capmonster.py | 13 +- cloudscraper/captcha/capsolver.py | 80 ++++++--- cloudscraper/captcha/deathbycaptcha.py | 14 +- 6 files changed, 285 insertions(+), 143 deletions(-) diff --git a/cloudscraper/captcha/2captcha.py b/cloudscraper/captcha/2captcha.py index 7fae7f3..1052e02 100644 --- a/cloudscraper/captcha/2captcha.py +++ b/cloudscraper/captcha/2captcha.py @@ -29,6 +29,11 @@ def __init__(self): super(captchaSolver, self).__init__('2captcha') self.host = 'https://2captcha.com' self.session = requests.Session() + self.captchaType = { + 'reCaptcha': 'userrecaptcha', + 'hCaptcha': 'hcaptcha', + 'turnstile': 'turnstile' + } # ------------------------------------------------------------------------------- # @@ -175,23 +180,16 @@ def _checkRequest(response): 'soft_id': 2905 } - data.update( - { - 'method': 'userrcaptcha', - 'googlekey': siteKey - } if captchaType == 'reCaptcha' else { - 'method': 'hcaptcha', - 'sitekey': siteKey - } - ) + data.update({ + 'method': self.captchaType[captchaType], + 'googlekey' if captchaType == 'reCaptcha' else 'sitekey': siteKey + }) if self.proxy: - data.update( - { - 'proxy': self.proxy, - 'proxytype': self.proxyType - } - ) + data.update({ + 'proxy': self.proxy, + 'proxytype': self.proxyType + }) response = polling2.poll( lambda: self.session.post( diff --git a/cloudscraper/captcha/9kw.py b/cloudscraper/captcha/9kw.py index 143def8..df3589d 100644 --- a/cloudscraper/captcha/9kw.py +++ b/cloudscraper/captcha/9kw.py @@ -12,30 +12,35 @@ ) from ..exceptions import ( - reCaptchaServiceUnavailable, - reCaptchaAPIError, - reCaptchaTimeout, - reCaptchaParameter, - reCaptchaBadJobID + CaptchaException, + CaptchaServiceUnavailable, + CaptchaAPIError, + CaptchaTimeout, + CaptchaParameter, + CaptchaBadJobID ) -from . import reCaptcha +from . import Captcha -class captchaSolver(reCaptcha): +class captchaSolver(Captcha): def __init__(self): super(captchaSolver, self).__init__('9kw') self.host = 'https://www.9kw.eu/index.cgi' self.maxtimeout = 180 self.session = requests.Session() + self.captchaType = { + 'reCaptcha': 'recaptchav2', + 'hCaptcha': 'hcaptcha' + } # ------------------------------------------------------------------------------- # @staticmethod def checkErrorStatus(response): if response.status_code in [500, 502]: - raise reCaptchaServiceUnavailable( + raise CaptchaServiceUnavailable( f'9kw: Server Side Error {response.status_code}' ) @@ -98,18 +103,18 @@ def checkErrorStatus(response): if response.text.startswith('{'): if response.json().get('error'): - raise reCaptchaAPIError(error_codes.get(int(response.json().get('error')))) + raise CaptchaAPIError(error_codes.get(int(response.json().get('error')))) else: error_code = int(re.search(r'^00(?P\d+)', response.text).groupdict().get('error_code', 0)) if error_code: - raise reCaptchaAPIError(error_codes.get(error_code)) + raise CaptchaAPIError(error_codes.get(error_code)) # ------------------------------------------------------------------------------- # def requestJob(self, jobID): if not jobID: - raise reCaptchaBadJobID( - "9kw: Error bad job id to request reCaptcha against." + raise CaptchaBadJobID( + "9kw: Error bad job id to request against." ) def _checkRequest(response): @@ -139,7 +144,7 @@ def _checkRequest(response): if response: return response.json().get('answer') else: - raise reCaptchaTimeout("9kw: Error failed to solve reCaptcha.") + raise CaptchaTimeout("9kw: Error failed to solve.") # ------------------------------------------------------------------------------- # @@ -152,11 +157,6 @@ def _checkRequest(response): return None - captchaMap = { - 'reCaptcha': 'recaptchav2', - 'hCaptcha': 'hcaptcha' - } - response = polling.poll( lambda: self.session.post( self.host, @@ -165,7 +165,7 @@ def _checkRequest(response): 'action': 'usercaptchaupload', 'interactive': 1, 'file-upload-01': siteKey, - 'oldsource': captchaMap[captchaType], + 'oldsource': self.captchaType[captchaType], 'pageurl': url, 'maxtimeout': self.maxtimeout, 'json': 1 @@ -180,33 +180,35 @@ def _checkRequest(response): if response: return response.json().get('captchaid') else: - raise reCaptchaBadJobID('9kw: Error no valid job id was returned.') + raise CaptchaBadJobID('9kw: Error no valid job id was returned.') # ------------------------------------------------------------------------------- # - - def getCaptchaAnswer(self, captchaType, url, siteKey, reCaptchaParams): + def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): jobID = None - if not reCaptchaParams.get('api_key'): - raise reCaptchaParameter("9kw: Missing api_key parameter.") + if not captchaParams.get('api_key'): + raise CaptchaParameter("9kw: Missing api_key parameter.") + + self.api_key = captchaParams.get('api_key') - self.api_key = reCaptchaParams.get('api_key') + if captchaParams.get('maxtimeout'): + self.maxtimeout = captchaParams.get('maxtimeout') - if reCaptchaParams.get('maxtimeout'): - self.maxtimeout = reCaptchaParams.get('maxtimeout') + if captchaParams.get('proxy'): + self.session.proxies = captchaParams.get('proxies') - if reCaptchaParams.get('proxy'): - self.session.proxies = reCaptchaParams.get('proxies') + if captchaType not in self.captchaType: + raise CaptchaException(f'9kw: {captchaType} is not supported by this provider.') try: jobID = self.requestSolve(captchaType, url, siteKey) return self.requestJob(jobID) except polling.TimeoutException: - raise reCaptchaTimeout( - f"9kw: reCaptcha solve took to long to execute 'captchaid' {jobID}, aborting." + raise CaptchaTimeout( + f"9kw: solve took to long to execute 'captchaid' {jobID}, aborting." ) -# ------------------------------------------------------------------------------- # +# ------------------------------------------------------------------------------- # captchaSolver() diff --git a/cloudscraper/captcha/anticaptcha.py b/cloudscraper/captcha/anticaptcha.py index ed35a55..b9ae808 100644 --- a/cloudscraper/captcha/anticaptcha.py +++ b/cloudscraper/captcha/anticaptcha.py @@ -1,31 +1,24 @@ from __future__ import absolute_import -from ..exceptions import ( - CaptchaParameter, - CaptchaTimeout, - CaptchaAPIError -) + +import requests try: from urlparse import urlparse except ImportError: from urllib.parse import urlparse +from ..exceptions import ( + CaptchaServiceUnavailable, + CaptchaAPIError, + CaptchaTimeout, + CaptchaParameter, + CaptchaBadJobID +) + try: - from python_anticaptcha import ( - AnticaptchaClient, - NoCaptchaTaskProxylessTask, - HCaptchaTaskProxyless, - NoCaptchaTask, - HCaptchaTask, - AnticaptchaException - ) + import polling2 except ImportError: - raise ImportError( - "Please install/upgrade the python module 'python_anticaptcha' via " - "pip install python-anticaptcha or https://github.com/ad-m/python-anticaptcha/" - ) - -import sys + raise ImportError("Please install the python module 'polling2' via pip") from . import Captcha @@ -33,76 +26,172 @@ class captchaSolver(Captcha): def __init__(self): - if sys.modules['python_anticaptcha'].__version__ < '0.6': - raise ImportError( - "Please upgrade the python module 'python_anticaptcha' via " - "pip install -U python-anticaptcha or https://github.com/ad-m/python-anticaptcha/" - ) super(captchaSolver, self).__init__('anticaptcha') + self.host = 'https://api.anti-captcha.com' + self.session = requests.Session() + self.captchaType = { + 'reCaptcha': 'NoCaptchaTask', + 'hCaptcha': 'HCaptchaTask', + 'turnstile': 'TurnstileTask' + } # ------------------------------------------------------------------------------- # - def parseProxy(self, url, user_agent): - parsed = urlparse(url) + @staticmethod + def checkErrorStatus(response): + if response.status_code in [500, 502]: + raise CaptchaServiceUnavailable( + f'anticaptcha: Server Side Error {response.status_code}' + ) + + payload = response.json() + if payload['errorId'] >= 1: + if 'errorDescription' in payload: + raise CaptchaAPIError( + payload['errorDescription'] + ) + else: + raise CaptchaAPIError(payload['errorCode']) + + # ------------------------------------------------------------------------------- # + + def requestJob(self, taskID): + if not taskID: + raise CaptchaBadJobID( + 'anticaptcha: Error bad task id to request Captcha.' + ) - return dict( - proxy_type=parsed.scheme, - proxy_address=parsed.hostname, - proxy_port=parsed.port, - proxy_login=parsed.username, - proxy_password=parsed.password, - user_agent=user_agent + def _checkRequest(response): + self.checkErrorStatus(response) + + if response.ok and response.json()['status'] == 'ready': + return True + + return None + + response = polling2.poll( + lambda: self.session.post( + f'{self.host}/getTaskResult', + json={ + 'clientKey': self.clientKey, + 'taskId': taskID + }, + timeout=30 + ), + check_success=_checkRequest, + step=5, + timeout=180 ) + if response: + payload = response.json()['solution'] + if 'token' in payload: + return payload['token'] + else: + return payload['gRecaptchaResponse'] + else: + raise CaptchaTimeout( + "anticaptcha: Error failed to solve Captcha." + ) + # ------------------------------------------------------------------------------- # - def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): - if not captchaParams.get('api_key'): - raise CaptchaParameter("anticaptcha: Missing api_key parameter.") + def requestSolve(self, captchaType, url, siteKey): + def _checkRequest(response): + self.checkErrorStatus(response) - client = AnticaptchaClient(captchaParams.get('api_key')) - client.SOFT_ID = 959 + if response.ok and response.json()['taskId']: + return True - if captchaParams.get('proxy') and not captchaParams.get('no_proxy'): - captchaMap = { - 'reCaptcha': NoCaptchaTask, - 'hCaptcha': HCaptchaTask - } + return None - proxy = self.parseProxy( - captchaParams.get('proxy', {}).get('https'), - captchaParams.get('User-Agent', '') - ) + data = { + 'clientKey': self.clientKey, + 'task': { + 'websiteURL': url, + 'websiteKey': siteKey, + 'type': self.captchaType[captchaType] + }, + 'softId': 959 + } - task = captchaMap[captchaType]( - url, - siteKey, - **proxy - ) + if self.proxy: + data['task'].update(self.proxy) else: - captchaMap = { - 'reCaptcha': NoCaptchaTaskProxylessTask, - 'hCaptcha': HCaptchaTaskProxyless - } - task = captchaMap[captchaType](url, siteKey) + data['task']['type'] = f"{data['task']['type']}Proxyless" + + response = polling2.poll( + lambda: self.session.post( + f'{self.host}/createTask', + json=data, + allow_redirects=False, + timeout=30 + ), + check_success=_checkRequest, + step=5, + timeout=180 + ) - if not hasattr(client, 'createTaskSmee'): - raise NotImplementedError( - "Please upgrade 'python_anticaptcha' via pip or download it from " - "https://github.com/ad-m/python-anticaptcha/" + if response: + return response.json()['taskId'] + else: + raise CaptchaBadJobID( + 'anticaptcha: Error no task id was returned.' ) - job = client.createTaskSmee(task, timeout=180) + # ------------------------------------------------------------------------------- # - try: - job.join(maximum_time=180) - except (AnticaptchaException) as e: - raise CaptchaTimeout(f"{getattr(e, 'message', e)}") + def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): + taskID = None + + if not captchaParams.get('clientKey'): + raise CaptchaParameter( + "anticaptcha: Missing clientKey parameter." + ) - if 'solution' in job._last_result: - return job.get_solution_response() + self.clientKey = captchaParams.get('clientKey') + + if captchaParams.get('proxy') and not captchaParams.get('no_proxy'): + hostParsed = urlparse(captchaParams.get('proxy', {}).get('https')) + + if not hostParsed.scheme: + raise CaptchaParameter('Cannot parse proxy correctly, bad scheme') + + if not hostParsed.netloc: + raise CaptchaParameter('Cannot parse proxy correctly, bad netloc') + + ports = { + 'http': 80, + 'https': 443 + } + + self.proxy = { + 'proxyType': hostParsed.scheme, + 'proxyAddress': hostParsed.hostname, + 'proxyPort': hostParsed.port if hostParsed.port else ports[self.proxy['proxyType']], + 'proxyLogin': hostParsed.username, + 'proxyPassword': hostParsed.password, + } else: - raise CaptchaAPIError('Job did not return `solution` key in payload.') + self.proxy = None + + try: + taskID = self.requestSolve(captchaType, url, siteKey) + return self.requestJob(taskID) + except polling2.TimeoutException: + try: + if taskID: + self.reportJob(taskID) + except polling2.TimeoutException: + raise CaptchaTimeout( + "anticaptcha: Captcha solve took to long and also failed " + f"reporting the task with task id {taskID}." + ) + + raise CaptchaTimeout( + "anticaptcha: Captcha solve took to long to execute " + f"task id {taskID}, aborting." + ) # ------------------------------------------------------------------------------- # diff --git a/cloudscraper/captcha/capmonster.py b/cloudscraper/captcha/capmonster.py index 8442de0..5846b2d 100644 --- a/cloudscraper/captcha/capmonster.py +++ b/cloudscraper/captcha/capmonster.py @@ -29,6 +29,11 @@ def __init__(self): super(captchaSolver, self).__init__('capmonster') self.host = 'https://api.capmonster.cloud' self.session = requests.Session() + self.captchaType = { + 'reCaptcha': 'NoCaptchaTask', + 'hCaptcha': 'HCaptchaTask', + 'turnstile': 'TurnstileTask' + } # ------------------------------------------------------------------------------- # @@ -79,7 +84,11 @@ def _checkRequest(response): ) if response: - return response.json()['solution']['gRecaptchaResponse'] + payload = response.json()['solution'] + if 'token' in payload: + return payload['token'] + else: + return payload['gRecaptchaResponse'] else: raise CaptchaTimeout( "CapMonster: Error failed to solve Captcha." @@ -101,7 +110,7 @@ def _checkRequest(response): 'task': { 'websiteURL': url, 'websiteKey': siteKey, - 'type': 'NoCaptchaTask' if captchaType == 'reCaptcha' else 'HCaptchaTask' + 'type': self.captchaType[captchaType] }, 'softId': 37 } diff --git a/cloudscraper/captcha/capsolver.py b/cloudscraper/captcha/capsolver.py index 0a96579..79b70f6 100644 --- a/cloudscraper/captcha/capsolver.py +++ b/cloudscraper/captcha/capsolver.py @@ -2,6 +2,11 @@ import requests +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse + from ..exceptions import ( CaptchaServiceUnavailable, CaptchaAPIError, @@ -10,7 +15,6 @@ CaptchaBadJobID ) - try: import polling2 except ImportError: @@ -21,14 +25,19 @@ class captchaSolver(Captcha): def __init__(self): + super(captchaSolver, self).__init__('capsolver') self.host = 'https://api.capsolver.com' self.session = requests.Session() - super(captchaSolver, self).__init__('capsolver') + self.captchaType = { + 'reCaptcha': 'ReCaptchaV2Task', + 'hCaptcha': 'HCaptchaTask', + 'turnstile': 'AntiCloudflareTask' + } # ------------------------------------------------------------------------------- # @staticmethod - def checkErrorStatus(response, request_type): + def checkErrorStatus(response, fnct): if response.status_code in [500, 502]: raise CaptchaServiceUnavailable(f'CapSolver: Server Side Error {response.status_code}') @@ -39,7 +48,7 @@ def checkErrorStatus(response, request_type): if rPayload.get('errorDescription', False) and 'Current system busy' not in rPayload['errorDescription']: raise CaptchaAPIError( - f"CapSolver: {request_type} -> {rPayload.get('errorDescription')}" + f"CapSolver -> {fnct} -> {rPayload.get('errorDescription')}" ) # ------------------------------------------------------------------------------- # @@ -49,12 +58,10 @@ def requestJob(self, jobID): raise CaptchaBadJobID("CapSolver: Error bad job id to request task result.") def _checkRequest(response): - self.checkErrorStatus(response, 'getTaskResult') + self.checkErrorStatus(response, 'requestJob') try: - rPayload = response.json() - if response.ok: - if rPayload.get("solution", {}).get('gRecaptchaResponse'): - return True + if response.ok and response.json()['status'] == 'ready': + return True except Exception: pass return None @@ -75,9 +82,11 @@ def _checkRequest(response): if response: try: - rPayload = response.json() - if rPayload.get('solution', {}).get('gRecaptchaResponse'): - return rPayload['solution']['gRecaptchaResponse'] + rPayload = response.json()['solution'] + if 'token' in rPayload: + return rPayload['token'] + else: + return rPayload['gRecaptchaResponse'] except Exception: pass @@ -88,6 +97,9 @@ def _checkRequest(response): # ------------------------------------------------------------------------------- # def requestSolve(self, captchaType, url, siteKey): + + # ------------------------------------------------------------------------------- # + def _checkRequest(response): self.checkErrorStatus(response, 'createTask') try: @@ -99,18 +111,30 @@ def _checkRequest(response): pass return None + # ------------------------------------------------------------------------------- # + + payload = { + 'clientKey': self.api_key, + 'appId': '9E717405-8C70-49B3-B277-7C2F2196484B', + 'task': { + 'type': self.captchaType[captchaType], + 'websiteURL': url, + 'websiteKey': siteKey + } + } + + if captchaType == 'turnstile': + payload['task']['metadata'] = {'type': 'turnstile'} + + if self.proxy: + payload['task']['proxy'] = self.proxy + else: + payload['task']['type'] = f"{self.captchaType[captchaType]}Proxyless" + response = polling2.poll( lambda: self.session.post( f'{self.host}/createTask', - json={ - 'clientKey': self.api_key, - 'appId': '9E717405-8C70-49B3-B277-7C2F2196484B', - 'task': { - 'type': 'HCaptchaTaskProxyless', - 'websiteURL': url, - 'websiteKey': siteKey - } - }, + json=payload, allow_redirects=False, timeout=30 ), @@ -133,9 +157,21 @@ def _checkRequest(response): def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): if not captchaParams.get('api_key'): raise CaptchaParameter("CapSolver: Missing api_key parameter.") - self.api_key = captchaParams.get('api_key') + if captchaParams.get('proxy') and not captchaParams.get('no_proxy'): + hostParsed = urlparse(captchaParams.get('proxy', {}).get('https')) + + if not hostParsed.scheme: + raise CaptchaParameter('Cannot parse proxy correctly, bad scheme') + + if not hostParsed.netloc: + raise CaptchaParameter('Cannot parse proxy correctly, bad netloc') + + self.proxy = captchaParams['proxy']['https'] + else: + self.proxy = None + try: jobID = self.requestSolve(captchaType, url, siteKey) return self.requestJob(jobID) diff --git a/cloudscraper/captcha/deathbycaptcha.py b/cloudscraper/captcha/deathbycaptcha.py index 33c5ef2..724b04d 100644 --- a/cloudscraper/captcha/deathbycaptcha.py +++ b/cloudscraper/captcha/deathbycaptcha.py @@ -13,6 +13,7 @@ raise ImportError("Please install the python module 'polling2' via pip") from ..exceptions import ( + CaptchaException, CaptchaServiceUnavailable, CaptchaTimeout, CaptchaParameter, @@ -29,6 +30,10 @@ def __init__(self): super(captchaSolver, self).__init__('deathbycaptcha') self.host = 'http://api.dbcapi.me/api' self.session = requests.Session() + self.captchaType = { + 'reCaptcha': '4', + 'hCaptcha': '7' + } # ------------------------------------------------------------------------------- # @@ -181,7 +186,7 @@ def _checkRequest(response): }) data.update({ - 'type': '4', + 'type': self.captchaType[captchaType], 'token_params': json.dumps(jPayload) }) else: @@ -197,7 +202,7 @@ def _checkRequest(response): }) data.update({ - 'type': '7', + 'type': self.captchaType[captchaType], 'hcaptcha_params': json.dumps(jPayload) }) @@ -246,6 +251,9 @@ def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): else: self.proxy = None + if captchaType not in self.captchaType: + raise CaptchaException(f'DeathByCaptcha: {captchaType} is not supported by this provider.') + try: jobID = self.requestSolve(captchaType, url, siteKey) return self.requestJob(jobID) @@ -262,7 +270,7 @@ def getCaptchaAnswer(self, captchaType, url, siteKey, captchaParams): f"DeathByCaptcha: Captcha solve took to long to execute job id {jobID}, aborting." ) - # ------------------------------------------------------------------------------- # + captchaSolver()