From 5e24c605d2926e23273089058741fe69e1b3030a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damir=20Jeli=C4=87?= Date: Wed, 19 Jun 2019 14:45:20 +0200 Subject: [PATCH] _compat: Change the to_native_str into a to_unicode_str function. The to_native_str function was supposed to produce Unicode decoded native strings for python2 and python3. Upon further consideration this doesn't make much sense since under python2 it would need to decode the bytes into a Unicode string and turn it back into a python2 str. The ability to use the replacement character requires us to use a Unicode string under python2 as well. --- python/olm/_compat.py | 12 ++++++------ python/olm/group_session.py | 4 ++-- python/olm/pk.py | 4 ++-- python/olm/session.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python/olm/_compat.py b/python/olm/_compat.py index 762371b..e1c0d63 100644 --- a/python/olm/_compat.py +++ b/python/olm/_compat.py @@ -48,8 +48,11 @@ def to_bytes(string): raise TypeError("Invalid type {}".format(type(string))) -def to_native_str(byte_string, errors="replace"): - """Turn a byte string into a native string decoding it as UTF-8. +def to_unicode_str(byte_string, errors="replace"): + """Turn a byte string into a unicode string. + + Should be used everywhere where the input byte string might not be trusted + and may contain invalid unicode values. Args: byte_string (bytes): The bytestring that will be converted to a native @@ -63,7 +66,4 @@ def to_native_str(byte_string, errors="replace"): Returns the decoded native string. """ - try: - return native_str(byte_string, errors=errors) - except TypeError: - return bytes(byte_string).decode(errors=errors) + return byte_string.decode(errors=errors) diff --git a/python/olm/group_session.py b/python/olm/group_session.py index 88f87f0..313e5fa 100644 --- a/python/olm/group_session.py +++ b/python/olm/group_session.py @@ -33,7 +33,7 @@ from future.utils import bytes_to_native_str # pylint: disable=no-name-in-module from _libolm import ffi, lib # type: ignore -from ._compat import URANDOM, to_bytearray, to_bytes, to_native_str +from ._compat import URANDOM, to_bytearray, to_bytes, to_unicode_str from ._finalize import track_for_finalization @@ -230,7 +230,7 @@ class InboundGroupSession(object): self._check_error(plaintext_length) - plaintext = to_native_str( + plaintext = to_unicode_str( ffi.unpack(plaintext_buffer, plaintext_length), errors=errors ) diff --git a/python/olm/pk.py b/python/olm/pk.py index 158c78d..18608b7 100644 --- a/python/olm/pk.py +++ b/python/olm/pk.py @@ -40,7 +40,7 @@ from future.utils import bytes_to_native_str from _libolm import ffi, lib # type: ignore -from ._compat import URANDOM, to_bytearray, to_native_str +from ._compat import URANDOM, to_bytearray, to_unicode_str from ._finalize import track_for_finalization @@ -361,7 +361,7 @@ class PkDecryption(object): # clear out copies of the plaintext lib.memset(plaintext_buffer, 0, max_plaintext_length) - return to_native_str(plaintext, errors=errors) + return to_unicode_str(plaintext, errors=errors) def _clear_pk_signing(pk_struct): diff --git a/python/olm/session.py b/python/olm/session.py index cf66582..f81b727 100644 --- a/python/olm/session.py +++ b/python/olm/session.py @@ -40,7 +40,7 @@ from future.utils import bytes_to_native_str # pylint: disable=no-name-in-module from _libolm import ffi, lib # type: ignore -from ._compat import URANDOM, to_bytearray, to_bytes, to_native_str +from ._compat import URANDOM, to_bytearray, to_bytes, to_unicode_str from ._finalize import track_for_finalization # This is imported only for type checking purposes @@ -318,7 +318,7 @@ class Session(object): plaintext_buffer, max_plaintext_length ) self._check_error(plaintext_length) - plaintext = to_native_str( + plaintext = to_unicode_str( ffi.unpack(plaintext_buffer, plaintext_length), errors=errors )