-
Notifications
You must be signed in to change notification settings - Fork 30
feat: improve behavior of HTTP redirects #182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
c552ce3
4d221e7
eeaeb8a
885dbfb
ef1f6b5
9bee88c
56242fe
bbfc6fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,11 +22,13 @@ | |
| from http.cookiejar import CookieJar | ||
| from os.path import basename | ||
| from typing import Dict, List, Optional, Tuple, Union | ||
| from urllib3.util.retry import Retry | ||
| from urllib.parse import urlparse | ||
|
|
||
| import requests | ||
| from requests.structures import CaseInsensitiveDict | ||
| from requests.exceptions import JSONDecodeError | ||
| from urllib3.exceptions import MaxRetryError | ||
| from urllib3.util.retry import Retry | ||
|
|
||
| from ibm_cloud_sdk_core.authenticators import Authenticator | ||
| from .api_exception import ApiException | ||
|
|
@@ -52,6 +54,10 @@ | |
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| MAX_REDIRECTS = 10 | ||
| SAFE_HEADERS = ['authorization', 'www-authenticate', 'cookie', 'cookie2'] | ||
|
|
||
|
|
||
| # pylint: disable=too-many-instance-attributes | ||
| # pylint: disable=too-many-locals | ||
| class BaseService: | ||
|
|
@@ -96,7 +102,7 @@ def __init__( | |
| service_url: str = None, | ||
| authenticator: Authenticator = None, | ||
| disable_ssl_verification: bool = False, | ||
| enable_gzip_compression: bool = False | ||
| enable_gzip_compression: bool = False, | ||
| ) -> None: | ||
| self.set_service_url(service_url) | ||
| self.http_client = requests.Session() | ||
|
|
@@ -280,6 +286,7 @@ def set_default_headers(self, headers: Dict[str, str]) -> None: | |
| else: | ||
| raise TypeError("headers parameter must be a dictionary") | ||
|
|
||
| # pylint: disable=too-many-branches | ||
| def send(self, request: requests.Request, **kwargs) -> DetailedResponse: | ||
| """Send a request and wrap the response in a DetailedResponse or APIException. | ||
|
|
||
|
|
@@ -294,7 +301,9 @@ def send(self, request: requests.Request, **kwargs) -> DetailedResponse: | |
| """ | ||
| # Use a one minute timeout when our caller doesn't give a timeout. | ||
| # http://docs.python-requests.org/en/master/user/quickstart/#timeouts | ||
| kwargs = dict({"timeout": 60}, **kwargs) | ||
| # We also disable the default redirection, to have more granular control | ||
| # over the headers sent in each request. | ||
| kwargs = dict({'timeout': 60, 'allow_redirects': False}, **kwargs) | ||
| kwargs = dict(kwargs, **self.http_config) | ||
|
|
||
| if self.disable_ssl_verification: | ||
|
|
@@ -314,6 +323,38 @@ def send(self, request: requests.Request, **kwargs) -> DetailedResponse: | |
| try: | ||
| response = self.http_client.request(**request, cookies=self.jar, **kwargs) | ||
|
|
||
| # Handle HTTP redirects. | ||
| redirects_count = 0 | ||
| # Check if the response is a redirect to another host. | ||
| while response.is_redirect and response.next is not None and redirects_count < MAX_REDIRECTS: | ||
| redirects_count += 1 | ||
|
|
||
| # urllib3 has already prepared a request that can almost be used as-is. | ||
| next_request = response.next | ||
|
|
||
| # If both the original and the redirected URL are under the `.cloud.ibm.com` domain, | ||
| # copy the safe headers that are used for authentication purposes, | ||
| if self.service_url.endswith('.cloud.ibm.com') and urlparse(next_request.url).netloc.endswith( | ||
| '.cloud.ibm.com' | ||
| ): | ||
| original_headers = request.get('headers') | ||
| for header, value in original_headers.items(): | ||
| if header.lower() in SAFE_HEADERS: | ||
| next_request.headers[header] = value | ||
| # otherwise remove them manually, because `urllib3` doesn't strip all of them. | ||
| else: | ||
| for header in SAFE_HEADERS: | ||
| next_request.headers.pop(header, None) | ||
|
|
||
| response = self.http_client.send(next_request, **kwargs) | ||
|
|
||
| # If we reached the max number of redirects and the last response is still a redirect | ||
| # stop processing the response and return an error to the user. | ||
| if redirects_count == MAX_REDIRECTS and response.is_redirect: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not do the limit check immediately after incrementing
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense! I need to excuse myself so: initially I implemented the error handling in a different way then I forgot to refactor that check.. 😄 |
||
| raise MaxRetryError( | ||
| None, response.url, reason=f'reached the maximum number of redirects: {MAX_REDIRECTS}' | ||
| ) | ||
|
|
||
| # Process a "success" response. | ||
| if 200 <= response.status_code <= 299: | ||
| if response.status_code == 204 or request['method'] == 'HEAD': | ||
|
|
@@ -362,7 +403,7 @@ def prepare_request( | |
| params: Optional[dict] = None, | ||
| data: Optional[Union[str, dict]] = None, | ||
| files: Optional[Union[Dict[str, Tuple[str]], List[Tuple[str, Tuple[str, ...]]]]] = None, | ||
| **kwargs | ||
| **kwargs, | ||
| ) -> dict: | ||
| """Build a dict that represents an HTTP service request. | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| # coding=utf-8 | ||
| # pylint: disable=missing-docstring,protected-access,too-few-public-methods | ||
| # pylint: disable=missing-docstring,protected-access,too-few-public-methods,too-many-lines | ||
| import gzip | ||
| import json | ||
| import os | ||
|
|
@@ -788,6 +788,166 @@ def test_retry_config_external(): | |
| assert retry_err.value.reason == error | ||
|
|
||
|
|
||
| @responses.activate | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to add some additional tests:
Also, because you are completely overriding the default "follow redirects" behavior, I think you'll need to add some tests for redirect scenarios involving POST and a representative sample of redirect status codes. There are some subtle gotcha's when implementing redirects and we need to make sure that we are providing equivalent function as compared to the default "follow redirects" behavior.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tests are already there those scenarios. Copied from the tests def test_redirect_ibm_to_ibm_success():
url_from = 'http://region1.cloud.ibm.com/'
url_to = 'http://region2.cloud.ibm.com/'
...and def test_redirect_not_ibm_to_not_ibm_fail():
url_from = 'http://region1.notcloud.ibm.com/'
url_to = 'http://region2.notcloud.ibm.com/'
...Are you talking about something else?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's not the case. Still the # If redirects aren't being followed, store the response on the Request for Response.next().
if not allow_redirects:
try:
r._next = next(
self.resolve_redirects(r, request, yield_requests=True, **kwargs)
)
except StopIteration:
passLet's touch base later, to make sure we are on the same page! :)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Redirecting to the same host SHOULD allow the safe headers to be included in the redirected request, regardless of whether the host is inside or outside the cloud.ibm.com domain.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, let's discuss tomorrow.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ok it's my bad, I used the domain and host words interchangeably(which is incorrect, I know :))... |
||
| def test_redirect_ibm_to_ibm_success(): | ||
| url_from = 'http://region1.cloud.ibm.com/' | ||
| url_to = 'http://region2.cloud.ibm.com/' | ||
|
|
||
| safe_headers = { | ||
| 'Authorization': 'foo', | ||
| 'WWW-Authenticate': 'bar', | ||
| 'Cookie': 'baz', | ||
| 'Cookie2': 'baz2', | ||
| } | ||
|
|
||
| responses.add( | ||
| responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect' | ||
| ) | ||
| responses.add(responses.GET, url_to, status=200, body='successfully redirected') | ||
|
|
||
| service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator()) | ||
|
|
||
| prepped = service.prepare_request('GET', '', headers=safe_headers) | ||
| response = service.send(prepped) | ||
| result = response.get_result() | ||
|
|
||
| assert result.status_code == 200 | ||
| assert result.url == url_to | ||
| assert result.text == 'successfully redirected' | ||
|
|
||
| # Make sure the headers are included in the 2nd, redirected request. | ||
| redirected_headers = responses.calls[1].request.headers | ||
| for key in safe_headers: | ||
| assert key in redirected_headers | ||
|
|
||
|
|
||
| @responses.activate | ||
| def test_redirect_not_ibm_to_ibm_fail(): | ||
| url_from = 'http://region1.notcloud.ibm.com/' | ||
| url_to = 'http://region2.cloud.ibm.com/' | ||
|
|
||
| safe_headers = { | ||
| 'Authorization': 'foo', | ||
| 'WWW-Authenticate': 'bar', | ||
| 'Cookie': 'baz', | ||
| 'Cookie2': 'baz2', | ||
| } | ||
|
|
||
| responses.add( | ||
| responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect' | ||
| ) | ||
| responses.add(responses.GET, url_to, status=200, body='successfully redirected') | ||
|
|
||
| service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator()) | ||
|
|
||
| prepped = service.prepare_request('GET', '', headers=safe_headers) | ||
| response = service.send(prepped) | ||
| result = response.get_result() | ||
|
|
||
| assert result.status_code == 200 | ||
| assert result.url == url_to | ||
| assert result.text == 'successfully redirected' | ||
|
|
||
| # Make sure the headers have been excluded from the 2nd, redirected request. | ||
| redirected_headers = responses.calls[1].request.headers | ||
| for key in safe_headers: | ||
| assert key not in redirected_headers | ||
|
|
||
|
|
||
| @responses.activate | ||
| def test_redirect_ibm_to_not_ibm_fail(): | ||
| url_from = 'http://region1.cloud.ibm.com/' | ||
| url_to = 'http://region2.notcloud.ibm.com/' | ||
|
|
||
| safe_headers = { | ||
| 'Authorization': 'foo', | ||
| 'WWW-Authenticate': 'bar', | ||
| 'Cookie': 'baz', | ||
| 'Cookie2': 'baz2', | ||
| } | ||
|
|
||
| responses.add( | ||
| responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect' | ||
| ) | ||
| responses.add(responses.GET, url_to, status=200, body='successfully redirected') | ||
|
|
||
| service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator()) | ||
|
|
||
| prepped = service.prepare_request('GET', '', headers=safe_headers) | ||
| response = service.send(prepped) | ||
| result = response.get_result() | ||
|
|
||
| assert result.status_code == 200 | ||
| assert result.url == url_to | ||
| assert result.text == 'successfully redirected' | ||
|
|
||
| # Make sure the headers have been excluded from the 2nd, redirected request. | ||
| redirected_headers = responses.calls[1].request.headers | ||
| for key in safe_headers: | ||
| assert key not in redirected_headers | ||
|
|
||
|
|
||
| @responses.activate | ||
| def test_redirect_not_ibm_to_not_ibm_fail(): | ||
| url_from = 'http://region1.notcloud.ibm.com/' | ||
| url_to = 'http://region2.notcloud.ibm.com/' | ||
|
|
||
| safe_headers = { | ||
| 'Authorization': 'foo', | ||
| 'WWW-Authenticate': 'bar', | ||
| 'Cookie': 'baz', | ||
| 'Cookie2': 'baz2', | ||
| } | ||
|
|
||
| responses.add( | ||
| responses.GET, url_from, status=302, adding_headers={'Location': url_to}, body='just about to redirect' | ||
| ) | ||
| responses.add(responses.GET, url_to, status=200, body='successfully redirected') | ||
|
|
||
| service = BaseService(service_url=url_from, authenticator=NoAuthAuthenticator()) | ||
|
|
||
| prepped = service.prepare_request('GET', '', headers=safe_headers) | ||
| response = service.send(prepped) | ||
| result = response.get_result() | ||
|
|
||
| assert result.status_code == 200 | ||
| assert result.url == url_to | ||
| assert result.text == 'successfully redirected' | ||
|
|
||
| # Make sure the headers have been excluded from the 2nd, redirected request. | ||
| redirected_headers = responses.calls[1].request.headers | ||
| for key in safe_headers: | ||
| assert key not in redirected_headers | ||
|
|
||
|
|
||
| @responses.activate | ||
| def test_redirect_ibm_to_ibm_exhausted_fail(): | ||
| redirects = 11 | ||
| safe_headers = { | ||
| 'Authorization': 'foo', | ||
| 'WWW-Authenticate': 'bar', | ||
| 'Cookie': 'baz', | ||
| 'Cookie2': 'baz2', | ||
| } | ||
|
|
||
| for i in range(redirects): | ||
| responses.add( | ||
| responses.GET, | ||
| f'http://region{i+1}.cloud.ibm.com/', | ||
| status=302, | ||
| adding_headers={'Location': f'http://region{i+2}.cloud.ibm.com/'}, | ||
| body='just about to redirect', | ||
| ) | ||
|
|
||
| service = BaseService(service_url='http://region1.cloud.ibm.com/', authenticator=NoAuthAuthenticator()) | ||
|
|
||
| with pytest.raises(MaxRetryError) as ex: | ||
| prepped = service.prepare_request('GET', '', headers=safe_headers) | ||
| service.send(prepped) | ||
|
|
||
| assert ex.value.reason == 'reached the maximum number of redirects: 10' | ||
|
|
||
|
|
||
| @responses.activate | ||
| def test_user_agent_header(): | ||
| service = AnyServiceV1('2018-11-20', authenticator=NoAuthAuthenticator()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
self.service_url.endswith('.cloud.ibm.com')will most definitely not work in the general case. To see why, consider a service url likehttps://myhost.ibm.com/apiorhttps://myhost.ibm.com:8080. I think you'll need to parseself.service_urland extract the hostname from it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that's a good point and I already parse the url in the second part of the expression... :)