Skip to content

Commit

Permalink
Fix unicode leaks
Browse files Browse the repository at this point in the history
Wrap stdlib quote, unquote, urlencode in versions that protect the
internal usage of unicode. All data going out is encoded as UTF-8, and
the results are read back in and decoded from UTF-8.
  • Loading branch information
idan committed May 1, 2012
1 parent dc59824 commit 0e92aeb
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 50 deletions.
81 changes: 36 additions & 45 deletions oauthlib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""

import re
import urllib
import urlparse


Expand All @@ -24,55 +25,45 @@


def quote(s, safe=u'/'):
"""A unicode-safe version of urllib.quote"""
# fastpath
if not s:
if s is None:
raise TypeError('None object cannot be quoted')
return s
cachekey = (safe, always_safe)
try:
(quoter, safe) = _safe_quoters[cachekey]
except KeyError:
safe_map = _safe_map.copy()
safe_map.update([(c, c) for c in safe])
quoter = safe_map.__getitem__
safe = always_safe + safe
_safe_quoters[cachekey] = (quoter, safe)
if not s.rstrip(safe):
return s
return u''.join(map(quoter, s))

_hexdig = u'0123456789ABCDEFabcdef'
_hextochr = dict((a + b, unichr(int(a + b, 16)))
for a in _hexdig for b in _hexdig)
encoded = s.encode("utf-8")
quoted = urllib.quote(encoded, safe)
return quoted.decode("utf-8")


def unquote(s):
"""A unicode-safe version of urllib.unquote"""
res = s.split('%')
# fastpath
if len(res) == 1:
return s
s = res[0]
for item in res[1:]:
try:
s += _hextochr[item[:2]] + item[2:]
except KeyError:
s += u'%' + item
except UnicodeDecodeError:
s += unichr(int(item[:2], 16)) + item[2:]
return s
encoded = s.encode("utf-8")
unquoted = urllib.unquote(encoded)
return unquoted.decode("utf-8")


def urlencode(params):
utf8_params = encode_params_utf8(params)
urlencoded = urllib.urlencode(utf8_params)
return urlencoded.decode("utf-8")


def encode_params_utf8(params):
"""Ensures that all parameters in a list of 2-element tuples are encoded to
bytestrings using UTF-8
"""
encoded = []
for k, v in params:
encoded.append((
k.encode('utf-8') if isinstance(k, unicode) else k,
v.encode('utf-8') if isinstance(v, unicode) else v))
return encoded

def unicode_params(params):
"""Ensures that all parameters in a list of 2-element tuples are unicode"""
clean = []

def decode_params_utf8(params):
"""Ensures that all parameters in a list of 2-element tuples are decoded to
unicode using UTF-8.
"""
decoded = []
for k, v in params:
clean.append((
unicode(k, 'utf-8') if isinstance(k, str) else k,
unicode(v, 'utf-8') if isinstance(v, str) else v))
return clean
decoded.append((
k.decode('utf-8') if isinstance(k, str) else k,
v.decode('utf-8') if isinstance(v, str) else v))
return decoded


urlencoded = set(always_safe) | set(u'=&;%+~')
Expand Down Expand Up @@ -104,7 +95,7 @@ def urldecode(query):
params = urlparse.parse_qsl(query, keep_blank_values=True)

# unicode all the things
return unicode_params(params)
return decode_params_utf8(params)


def extract_params(raw):
Expand All @@ -129,7 +120,7 @@ def extract_params(raw):
params = None
else:
params = list(raw.items() if isinstance(raw, dict) else raw)
params = unicode_params(params)
params = decode_params_utf8(params)
else:
params = None

Expand Down
3 changes: 1 addition & 2 deletions oauthlib/oauth1/rfc5849/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@

import logging
import urlparse
from urllib import urlencode

from oauthlib.common import Request
from oauthlib.common import Request, urlencode
from . import parameters, signature, utils

SIGNATURE_HMAC = u"HMAC-SHA1"
Expand Down
3 changes: 1 addition & 2 deletions oauthlib/oauth1/rfc5849/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
"""

from urlparse import urlparse, urlunparse
from urllib import urlencode
from . import utils
from oauthlib.common import extract_params
from oauthlib.common import extract_params, urlencode


# TODO: do we need filter_params now that oauth_params are handled by Request?
Expand Down
2 changes: 1 addition & 1 deletion tests/oauth1/rfc5849/test_parameters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from urllib import urlencode
from oauthlib.common import urlencode
from oauthlib.oauth1.rfc5849.parameters import (_append_params, prepare_headers,
prepare_form_encoded_body, prepare_request_uri_query)
from ...unittest import TestCase
Expand Down

0 comments on commit 0e92aeb

Please sign in to comment.