webapp/django/utils/encoding.py - gerrit-attic - Git at Google

 import types
 import urllib
 import datetime
 from django.utils.functional import Promise

 class DjangoUnicodeDecodeError(UnicodeDecodeError):
     def __init__(self, obj, *args):
         self.obj = obj
         UnicodeDecodeError.__init__(self, *args)

     def __str__(self):
         original = UnicodeDecodeError.__str__(self)
         return '%s. You passed in %r (%s)' % (original, self.obj,
                 type(self.obj))

 class StrAndUnicode(object):
     """
     A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.

     Useful as a mix-in.
     """
     def __str__(self):
         return self.__unicode__().encode('utf-8')

 def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
     """
     Returns a unicode object representing 's'. Treats bytestrings using the
     'encoding' codec.

     If strings_only is True, don't convert (some) non-string-like objects.
     """
     if isinstance(s, Promise):
         # The input is the result of a gettext_lazy() call.
         return s
     return force_unicode(s, encoding, strings_only, errors)

 def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
     """
     Similar to smart_unicode, except that lazy instances are resolved to
     strings, rather than kept as lazy objects.

     If strings_only is True, don't convert (some) non-string-like objects.
     """
     if strings_only and isinstance(s, (types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float)):
         return s
     try:
         if not isinstance(s, basestring,):
             if hasattr(s, '__unicode__'):
                 s = unicode(s)
             else:
                 s = unicode(str(s), encoding, errors)
         elif not isinstance(s, unicode):
             # Note: We use .decode() here, instead of unicode(s, encoding,
             # errors), so that if s is a SafeString, it ends up being a
             # SafeUnicode at the end.
             s = s.decode(encoding, errors)
     except UnicodeDecodeError, e:
         raise DjangoUnicodeDecodeError(s, *e.args)
     return s

 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
     """
     Returns a bytestring version of 's', encoded as specified in 'encoding'.

     If strings_only is True, don't convert (some) non-string-like objects.
     """
     if strings_only and isinstance(s, (types.NoneType, int)):
         return s
     if isinstance(s, Promise):
         return unicode(s).encode(encoding, errors)
     elif not isinstance(s, basestring):
         try:
             return str(s)
         except UnicodeEncodeError:
             return unicode(s).encode(encoding, errors)
     elif isinstance(s, unicode):
         return s.encode(encoding, errors)
     elif s and encoding != 'utf-8':
         return s.decode('utf-8', errors).encode(encoding, errors)
     else:
         return s

 def iri_to_uri(iri):
     """
     Convert an Internationalized Resource Identifier (IRI) portion to a URI
     portion that is suitable for inclusion in a URL.

     This is the algorithm from section 3.1 of RFC 3987.  However, since we are
     assuming input is either UTF-8 or unicode already, we can simplify things a
     little from the full method.

     Returns an ASCII string containing the encoded result.
     """
     # The list of safe characters here is constructed from the printable ASCII
     # characters that are not explicitly excluded by the list at the end of
     # section 3.1 of RFC 3987.
     if iri is None:
         return iri
     return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')
	import types
	import urllib
	import datetime
	from django.utils.functional import Promise

	class DjangoUnicodeDecodeError(UnicodeDecodeError):
	def __init__(self, obj, *args):
	self.obj = obj
	UnicodeDecodeError.__init__(self, *args)

	def __str__(self):
	original = UnicodeDecodeError.__str__(self)
	return '%s. You passed in %r (%s)' % (original, self.obj,
	type(self.obj))

	class StrAndUnicode(object):
	"""
	A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.

	Useful as a mix-in.
	"""
	def __str__(self):
	return self.__unicode__().encode('utf-8')

	def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
	"""
	Returns a unicode object representing 's'. Treats bytestrings using the
	'encoding' codec.

	If strings_only is True, don't convert (some) non-string-like objects.
	"""
	if isinstance(s, Promise):
	# The input is the result of a gettext_lazy() call.
	return s
	return force_unicode(s, encoding, strings_only, errors)

	def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
	"""
	Similar to smart_unicode, except that lazy instances are resolved to
	strings, rather than kept as lazy objects.

	If strings_only is True, don't convert (some) non-string-like objects.
	"""
	if strings_only and isinstance(s, (types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float)):
	return s
	try:
	if not isinstance(s, basestring,):
	if hasattr(s, '__unicode__'):
	s = unicode(s)
	else:
	s = unicode(str(s), encoding, errors)
	elif not isinstance(s, unicode):
	# Note: We use .decode() here, instead of unicode(s, encoding,
	# errors), so that if s is a SafeString, it ends up being a
	# SafeUnicode at the end.
	s = s.decode(encoding, errors)
	except UnicodeDecodeError, e:
	raise DjangoUnicodeDecodeError(s, *e.args)
	return s

	def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
	"""
	Returns a bytestring version of 's', encoded as specified in 'encoding'.

	If strings_only is True, don't convert (some) non-string-like objects.
	"""
	if strings_only and isinstance(s, (types.NoneType, int)):
	return s
	if isinstance(s, Promise):
	return unicode(s).encode(encoding, errors)
	elif not isinstance(s, basestring):
	try:
	return str(s)
	except UnicodeEncodeError:
	return unicode(s).encode(encoding, errors)
	elif isinstance(s, unicode):
	return s.encode(encoding, errors)
	elif s and encoding != 'utf-8':
	return s.decode('utf-8', errors).encode(encoding, errors)
	else:
	return s

	def iri_to_uri(iri):
	"""
	Convert an Internationalized Resource Identifier (IRI) portion to a URI
	portion that is suitable for inclusion in a URL.

	This is the algorithm from section 3.1 of RFC 3987. However, since we are
	assuming input is either UTF-8 or unicode already, we can simplify things a
	little from the full method.

	Returns an ASCII string containing the encoded result.
	"""
	# The list of safe characters here is constructed from the printable ASCII
	# characters that are not explicitly excluded by the list at the end of
	# section 3.1 of RFC 3987.
	if iri is None:
	return iri
	return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')