From 08b5113d80a5f9c6848de6a2cb3c917882962feb Mon Sep 17 00:00:00 2001 From: Isaac Jurado Date: Tue, 17 Mar 2015 18:19:43 +0100 Subject: [PATCH 1/5] Resolve locale data aliases eagerly Get rid of the LocaleDataDict because it slows down key lookups compared to a native dict. To do that, we need to resolve the aliases when loading the locale data from disk. Since this data is later cached, we incur in a slight overhead for the initial load. However, this is well compensated during the rest of the process execution. --- babel/core.py | 2 +- babel/localedata.py | 58 +++++++++++----------------------------- tests/test_localedata.py | 21 --------------- 3 files changed, 17 insertions(+), 64 deletions(-) diff --git a/babel/core.py b/babel/core.py index c08cfa737..c68286ecf 100644 --- a/babel/core.py +++ b/babel/core.py @@ -345,7 +345,7 @@ def __str__(self): @property def _data(self): if self.__data is None: - self.__data = localedata.LocaleDataDict(localedata.load(str(self))) + self.__data = localedata.load(str(self)) return self.__data def get_display_name(self, locale=None): diff --git a/babel/localedata.py b/babel/localedata.py index 437f49fae..3b3a25095 100644 --- a/babel/localedata.py +++ b/babel/localedata.py @@ -14,7 +14,7 @@ import os import threading -from collections import MutableMapping +from collections import Mapping from babel._compat import pickle @@ -100,6 +100,7 @@ def load(name, merge_inherited=True): _cache[name] = data finally: fileobj.close() + resolve_aliases(data, data) return data finally: _cache_lock.release() @@ -138,6 +139,20 @@ def merge(dict1, dict2): dict1[key] = val1 +def resolve_aliases(dic, base): + """Convert all aliases to values""" + for k, v in dic.items(): + if isinstance(v, Alias): + dic[k] = v.resolve(base) + elif isinstance(v, tuple): + alias, others = v + data = alias.resolve(base).copy() + merge(data, others) + dic[k] = data + elif isinstance(v, Mapping): + resolve_aliases(v, base) + + class Alias(object): """Representation of an alias in the locale data. @@ -169,44 +184,3 @@ def resolve(self, data): alias, others = data data = alias.resolve(base) return data - - -class LocaleDataDict(MutableMapping): - """Dictionary wrapper that automatically resolves aliases to the actual - values. - """ - - def __init__(self, data, base=None): - self._data = data - if base is None: - base = data - self.base = base - - def __len__(self): - return len(self._data) - - def __iter__(self): - return iter(self._data) - - def __getitem__(self, key): - orig = val = self._data[key] - if isinstance(val, Alias): # resolve an alias - val = val.resolve(self.base) - if isinstance(val, tuple): # Merge a partial dict with an alias - alias, others = val - val = alias.resolve(self.base).copy() - merge(val, others) - if type(val) is dict: # Return a nested alias-resolving dict - val = LocaleDataDict(val, base=self.base) - if val is not orig: - self._data[key] = val - return val - - def __setitem__(self, key, value): - self._data[key] = value - - def __delitem__(self, key): - del self._data[key] - - def copy(self): - return LocaleDataDict(self._data.copy(), base=self.base) diff --git a/tests/test_localedata.py b/tests/test_localedata.py index 17ecd6392..397da0569 100644 --- a/tests/test_localedata.py +++ b/tests/test_localedata.py @@ -41,27 +41,6 @@ def test_merge_nested_dict_no_overlap(self): 'y': {'a': 11, 'b': 12} }, d1) - def test_merge_with_alias_and_resolve(self): - alias = localedata.Alias('x') - d1 = { - 'x': {'a': 1, 'b': 2, 'c': 3}, - 'y': alias - } - d2 = { - 'x': {'a': 1, 'b': 12, 'd': 14}, - 'y': {'b': 22, 'e': 25} - } - localedata.merge(d1, d2) - self.assertEqual({ - 'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, - 'y': (alias, {'b': 22, 'e': 25}) - }, d1) - d = localedata.LocaleDataDict(d1) - self.assertEqual({ - 'x': {'a': 1, 'b': 12, 'c': 3, 'd': 14}, - 'y': {'a': 1, 'b': 22, 'c': 3, 'd': 14, 'e': 25} - }, dict(d.items())) - def test_load(): assert localedata.load('en_US')['languages']['sv'] == 'Swedish' From 65575c520b69d9e3c1154aac9276a1c135a83b73 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 29 Dec 2015 19:46:26 +0200 Subject: [PATCH 2/5] Make Locale objects immutable Refs #31 (https://github.com/python-babel/babel/issues/31) --- babel/core.py | 13 +++++++++++++ babel/localedata.py | 1 - tests/test_core.py | 7 +++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/babel/core.py b/babel/core.py index c68286ecf..beee3302b 100644 --- a/babel/core.py +++ b/babel/core.py @@ -151,6 +151,9 @@ def __init__(self, language, territory=None, script=None, variant=None): if not localedata.exists(identifier): raise UnknownLocaleError(identifier) + self.__immutable = True + + @classmethod def default(cls, category=None, aliases=LOCALE_ALIASES): """Return the system default locale for the specified category. @@ -342,6 +345,16 @@ def __str__(self): return get_locale_identifier((self.language, self.territory, self.script, self.variant)) + def __setattr__(self, key, value): + if key == "_Locale__data" or not getattr(self, "_Locale__immutable", False): + return super(Locale, self).__setattr__(key, value) + raise ValueError("%r is immutable." % self) + + def __delattr__(self, item): + if getattr(self, "_Locale__immutable", False): + raise ValueError("%r is immutable." % self) + super(Locale, self).__delattr__(item) + @property def _data(self): if self.__data is None: diff --git a/babel/localedata.py b/babel/localedata.py index 3b3a25095..9bcf7e26b 100644 --- a/babel/localedata.py +++ b/babel/localedata.py @@ -11,7 +11,6 @@ :copyright: (c) 2013 by the Babel Team. :license: BSD, see LICENSE for more details. """ - import os import threading from collections import Mapping diff --git a/tests/test_core.py b/tests/test_core.py index 54cf37dde..9453628d3 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -71,6 +71,13 @@ def test_hash(): assert hash(locale_a) != hash(locale_c) +def test_locale_immutability(): + loc = Locale('en', 'US') + with pytest.raises(ValueError): + loc.language = 'xq' + assert loc.language == 'en' + + class TestLocaleClass: def test_attributes(self): locale = Locale('en', 'US') From 07b824f45ec56443bc3cae9e003f462937f3505c Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 29 Dec 2015 20:31:00 +0200 Subject: [PATCH 3/5] Cache Locale objects in `Locale._cache` Refs #31 (https://github.com/python-babel/babel/issues/31) --- babel/_compat.py | 14 ++++++++++++++ babel/_memoized.py | 24 ++++++++++++++++++++++++ babel/core.py | 29 +++++++++++++++++++++++++++-- tests/test_core.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 babel/_memoized.py diff --git a/babel/_compat.py b/babel/_compat.py index 75abf9eb1..8c8f42a13 100644 --- a/babel/_compat.py +++ b/babel/_compat.py @@ -74,3 +74,17 @@ Decimal = _dec InvalidOperation = _invop ROUND_HALF_EVEN = _RHE + + +# From six 1.9.0. +# six is Copyright (c) 2010-2015 Benjamin Peterson. +# six is licensed under the MIT license. +def with_metaclass(meta, *bases): + """Create a base class with a metaclass.""" + # This requires a bit of explanation: the basic idea is to make a dummy + # metaclass for one level of class instantiation that replaces itself with + # the actual metaclass. + class metaclass(meta): + def __new__(cls, name, this_bases, d): + return meta(name, bases, d) + return type.__new__(metaclass, 'temporary_class', (), {}) diff --git a/babel/_memoized.py b/babel/_memoized.py new file mode 100644 index 000000000..6cda6bb7f --- /dev/null +++ b/babel/_memoized.py @@ -0,0 +1,24 @@ +# TODO: This can't live in .util until the circular import of +# core -> util -> localtime -> win32 -> core is resolved. + + +class Memoized(type): + """ + Metaclass for memoization based on __init__ args/kwargs. + """ + + def __new__(mcs, name, bases, dict): + if "_cache" not in dict: + dict["_cache"] = {} + return type.__new__(mcs, name, bases, dict) + + def __memoized_init__(cls, *args, **kwargs): + if hasattr(cls, "_get_memo_key"): + key = cls._get_memo_key(args, kwargs) + else: + key = (args or None, frozenset(kwargs.items()) or None) + if key not in cls._cache: + cls._cache[key] = type.__call__(cls, *args, **kwargs) + return cls._cache[key] + + __call__ = __memoized_init__ # This aliasing makes tracebacks more understandable. diff --git a/babel/core.py b/babel/core.py index beee3302b..19c62c091 100644 --- a/babel/core.py +++ b/babel/core.py @@ -12,8 +12,9 @@ import os from babel import localedata -from babel._compat import pickle, string_types +from babel._compat import pickle, string_types, with_metaclass from babel.plural import PluralRule +from babel._memoized import Memoized __all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale', 'parse_locale'] @@ -89,7 +90,7 @@ def __init__(self, identifier): self.identifier = identifier -class Locale(object): +class Locale(with_metaclass(Memoized)): """Representation of a specific locale. >>> locale = Locale('en', 'US') @@ -121,6 +122,30 @@ class Locale(object): For more information see :rfc:`3066`. """ + #: The dictionary used by the locale cache metaclass. + _cache = {} + + @staticmethod + def _get_memo_key(args, kwargs): + # Getter for a cache key for the Memoized metaclass. + # Since we know the argument names (language, territory, script, variant) + # for Locales, there's no need to use the inspect module or other heavy-duty + # machinery here. + # + # However, since this method is called fairly often, it's "unrolled" + # here and has a separate slow-path for the kwargs + args case. + nargs = len(args) + args = args + (None,) * (4 - nargs) + if kwargs: + get = kwargs.get + return ( + get('language', args[0]), + get('territory', args[1]), + get('script', args[2]), + get('variant', args[3]), + ) + return args + def __init__(self, language, territory=None, script=None, variant=None): """Initialize the locale object from the given identifier components. diff --git a/tests/test_core.py b/tests/test_core.py index 9453628d3..6b13d03f4 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -78,6 +78,34 @@ def test_locale_immutability(): assert loc.language == 'en' +def test_locale_caching(): + # Explicitly clear the cache dict now, if we've already loaded a locale in the past. + Locale._cache.clear() + assert not Locale._cache + + # (1) Just args + loc = Locale('en', 'US') + assert len(Locale._cache) == 1 # Cached something! + assert Locale._cache[('en', 'US', None, None)] is loc # Gotta be the same instance! + # (2) How about Locale.parse? + loc2 = Locale.parse('en_US') + assert len(Locale._cache) == 1 # No change here! + assert loc is loc2 # Still the same instance! + # (3) And kwargs, wildly misordered?! + loc3 = Locale(territory='US', variant=None, language='en') + assert len(Locale._cache) == 1 # Still no change! + assert loc is loc3 # Still the same instance! + + # Let's add some more locales! + Locale('fi', 'FI') + Locale('nb', 'NO') + Locale('sv', 'SE') + Locale('zh', 'CN', script='Hans') + Locale('zh', 'TW', script='Hant') + assert len(Locale._cache) == 6 # Cache GET! + + + class TestLocaleClass: def test_attributes(self): locale = Locale('en', 'US') From 0278582968277fbf55e8e6af822a46eea2ec4a1b Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 29 Dec 2015 20:47:34 +0200 Subject: [PATCH 4/5] Teach Locale.parse to share the Locale cache --- babel/core.py | 4 ++++ tests/test_core.py | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/babel/core.py b/babel/core.py index 19c62c091..3587158bc 100644 --- a/babel/core.py +++ b/babel/core.py @@ -278,6 +278,10 @@ def parse(cls, identifier, sep='_', resolve_likely_subtags=True): raise TypeError('Unxpected value for identifier: %r' % (identifier,)) parts = parse_locale(identifier, sep=sep) + + if parts in cls._cache: # We've loaded this one before. + return cls._cache[parts] + input_id = get_locale_identifier(parts) def _try_load(parts): diff --git a/tests/test_core.py b/tests/test_core.py index 6b13d03f4..ef17151bb 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -105,6 +105,21 @@ def test_locale_caching(): assert len(Locale._cache) == 6 # Cache GET! +def test_locale_cache_shared_by_parse(): + # Test that Locale.parse() shares the cache and doesn't do (much) + # extra work loading locales. + + # Put a dummy object into the cache... + en_US_cache_key = ('en', 'US', None, None) + dummy = object() + Locale._cache[en_US_cache_key] = dummy + + try: + assert Locale.parse("en^US", sep="^") is dummy # That's a weird separator, man! + finally: + # Now purge our silliness (even in case this test failed) + Locale._cache.clear() + class TestLocaleClass: def test_attributes(self): From 33a90579a8691ae9481c9c3f5572c6f71fde3774 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 29 Dec 2015 20:49:51 +0200 Subject: [PATCH 5/5] Add an RLock around the Locale cache. Refs #31 (https://github.com/python-babel/babel/issues/31) --- babel/_memoized.py | 18 +++++++++++++++--- babel/core.py | 5 ++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/babel/_memoized.py b/babel/_memoized.py index 6cda6bb7f..ada9fbaa7 100644 --- a/babel/_memoized.py +++ b/babel/_memoized.py @@ -10,15 +10,27 @@ class Memoized(type): def __new__(mcs, name, bases, dict): if "_cache" not in dict: dict["_cache"] = {} + if "_cache_lock" not in dict: + dict["_cache_lock"] = None return type.__new__(mcs, name, bases, dict) def __memoized_init__(cls, *args, **kwargs): + lock = cls._cache_lock if hasattr(cls, "_get_memo_key"): key = cls._get_memo_key(args, kwargs) else: key = (args or None, frozenset(kwargs.items()) or None) - if key not in cls._cache: - cls._cache[key] = type.__call__(cls, *args, **kwargs) - return cls._cache[key] + + try: + return cls._cache[key] + except KeyError: + try: + if lock: + lock.acquire() + inst = cls._cache[key] = type.__call__(cls, *args, **kwargs) + return inst + finally: + if lock: + lock.release() __call__ = __memoized_init__ # This aliasing makes tracebacks more understandable. diff --git a/babel/core.py b/babel/core.py index 3587158bc..7c65d98f0 100644 --- a/babel/core.py +++ b/babel/core.py @@ -8,8 +8,8 @@ :copyright: (c) 2013 by the Babel Team. :license: BSD, see LICENSE for more details. """ - import os +import threading from babel import localedata from babel._compat import pickle, string_types, with_metaclass @@ -125,6 +125,9 @@ class Locale(with_metaclass(Memoized)): #: The dictionary used by the locale cache metaclass. _cache = {} + #: The lock used for the cache metaclass. + _cache_lock = threading.RLock() + @staticmethod def _get_memo_key(args, kwargs): # Getter for a cache key for the Memoized metaclass.