From 0e92aeb000115674144c7af82d43329418205e40 Mon Sep 17 00:00:00 2001 From: Idan Gazit Date: Tue, 1 May 2012 22:51:10 +0300 Subject: [PATCH] Fix unicode leaks Wrap stdlib quote, unquote, urlencode in versions that protect the internal usage of unicode. All data going out is encoded as UTF-8, and the results are read back in and decoded from UTF-8. --- oauthlib/common.py | 81 +++++++++++-------------- oauthlib/oauth1/rfc5849/__init__.py | 3 +- oauthlib/oauth1/rfc5849/parameters.py | 3 +- tests/oauth1/rfc5849/test_parameters.py | 2 +- 4 files changed, 39 insertions(+), 50 deletions(-) diff --git a/oauthlib/common.py b/oauthlib/common.py index fa8ffa02..86909ed7 100644 --- a/oauthlib/common.py +++ b/oauthlib/common.py @@ -10,6 +10,7 @@ """ import re +import urllib import urlparse @@ -24,55 +25,45 @@ def quote(s, safe=u'/'): - """A unicode-safe version of urllib.quote""" - # fastpath - if not s: - if s is None: - raise TypeError('None object cannot be quoted') - return s - cachekey = (safe, always_safe) - try: - (quoter, safe) = _safe_quoters[cachekey] - except KeyError: - safe_map = _safe_map.copy() - safe_map.update([(c, c) for c in safe]) - quoter = safe_map.__getitem__ - safe = always_safe + safe - _safe_quoters[cachekey] = (quoter, safe) - if not s.rstrip(safe): - return s - return u''.join(map(quoter, s)) - -_hexdig = u'0123456789ABCDEFabcdef' -_hextochr = dict((a + b, unichr(int(a + b, 16))) - for a in _hexdig for b in _hexdig) + encoded = s.encode("utf-8") + quoted = urllib.quote(encoded, safe) + return quoted.decode("utf-8") def unquote(s): - """A unicode-safe version of urllib.unquote""" - res = s.split('%') - # fastpath - if len(res) == 1: - return s - s = res[0] - for item in res[1:]: - try: - s += _hextochr[item[:2]] + item[2:] - except KeyError: - s += u'%' + item - except UnicodeDecodeError: - s += unichr(int(item[:2], 16)) + item[2:] - return s + encoded = s.encode("utf-8") + unquoted = urllib.unquote(encoded) + return unquoted.decode("utf-8") + + +def urlencode(params): + utf8_params = encode_params_utf8(params) + urlencoded = urllib.urlencode(utf8_params) + return urlencoded.decode("utf-8") + +def encode_params_utf8(params): + """Ensures that all parameters in a list of 2-element tuples are encoded to + bytestrings using UTF-8 + """ + encoded = [] + for k, v in params: + encoded.append(( + k.encode('utf-8') if isinstance(k, unicode) else k, + v.encode('utf-8') if isinstance(v, unicode) else v)) + return encoded -def unicode_params(params): - """Ensures that all parameters in a list of 2-element tuples are unicode""" - clean = [] + +def decode_params_utf8(params): + """Ensures that all parameters in a list of 2-element tuples are decoded to + unicode using UTF-8. + """ + decoded = [] for k, v in params: - clean.append(( - unicode(k, 'utf-8') if isinstance(k, str) else k, - unicode(v, 'utf-8') if isinstance(v, str) else v)) - return clean + decoded.append(( + k.decode('utf-8') if isinstance(k, str) else k, + v.decode('utf-8') if isinstance(v, str) else v)) + return decoded urlencoded = set(always_safe) | set(u'=&;%+~') @@ -104,7 +95,7 @@ def urldecode(query): params = urlparse.parse_qsl(query, keep_blank_values=True) # unicode all the things - return unicode_params(params) + return decode_params_utf8(params) def extract_params(raw): @@ -129,7 +120,7 @@ def extract_params(raw): params = None else: params = list(raw.items() if isinstance(raw, dict) else raw) - params = unicode_params(params) + params = decode_params_utf8(params) else: params = None diff --git a/oauthlib/oauth1/rfc5849/__init__.py b/oauthlib/oauth1/rfc5849/__init__.py index a10af1e4..327bae0b 100644 --- a/oauthlib/oauth1/rfc5849/__init__.py +++ b/oauthlib/oauth1/rfc5849/__init__.py @@ -11,9 +11,8 @@ import logging import urlparse -from urllib import urlencode -from oauthlib.common import Request +from oauthlib.common import Request, urlencode from . import parameters, signature, utils SIGNATURE_HMAC = u"HMAC-SHA1" diff --git a/oauthlib/oauth1/rfc5849/parameters.py b/oauthlib/oauth1/rfc5849/parameters.py index 3bf1a304..dee23a43 100644 --- a/oauthlib/oauth1/rfc5849/parameters.py +++ b/oauthlib/oauth1/rfc5849/parameters.py @@ -11,9 +11,8 @@ """ from urlparse import urlparse, urlunparse -from urllib import urlencode from . import utils -from oauthlib.common import extract_params +from oauthlib.common import extract_params, urlencode # TODO: do we need filter_params now that oauth_params are handled by Request? diff --git a/tests/oauth1/rfc5849/test_parameters.py b/tests/oauth1/rfc5849/test_parameters.py index e5ddf0f7..36b6fff9 100644 --- a/tests/oauth1/rfc5849/test_parameters.py +++ b/tests/oauth1/rfc5849/test_parameters.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -from urllib import urlencode +from oauthlib.common import urlencode from oauthlib.oauth1.rfc5849.parameters import (_append_params, prepare_headers, prepare_form_encoded_body, prepare_request_uri_query) from ...unittest import TestCase