From c82387a90d01ca099393b733d35d892ce10d2cb2 Mon Sep 17 00:00:00 2001 From: Javier Macias-Guarasa Date: Fri, 16 Jun 2023 18:24:52 +0200 Subject: [PATCH 1/5] Added full list of media files The header of the ELAN eaf files can contain multiple media files. This is implemented as a simple list of dictionaries contanining the data associated to each media file. --- speach/elan.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/speach/elan.py b/speach/elan.py index cb774a0..095f16d 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -1238,6 +1238,7 @@ class Doc(DataObject): def __init__(self, **kwargs): super().__init__(**kwargs) self.properties = OrderedDict() + self.media_descriptors = [] # JMG added this self.time_order = OrderedDict() self.__tiers = [] self.__tier_map = OrderedDict() # internal - map tierIDs to tier objects @@ -1557,6 +1558,8 @@ def _update_header_xml(self, node): # extract extra properties for prop_node in node.findall('PROPERTY'): self.properties[prop_node.get('NAME')] = prop_node.text + for media_node in node.findall('MEDIA_DESCRIPTOR'): + self.media_descriptors.append(media_node.attrib) def _add_tier_xml(self, tier_node) -> Tier: """ [Internal function] Parse a TIER XML node, create an ELANTier object and link it to this ELAN Doc From 91984ee210b07edb71f6a591c1dc93a14e881c7a Mon Sep 17 00:00:00 2001 From: Javier Macias-Guarasa Date: Wed, 26 Jun 2024 13:40:12 +0200 Subject: [PATCH 2/5] Updated version number refering to geintra The idea is allowing the generation of releases to be used internally in our group --- speach/__version__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/speach/__version__.py b/speach/__version__.py index 7206b0d..f6a4418 100644 --- a/speach/__version__.py +++ b/speach/__version__.py @@ -14,6 +14,6 @@ __issue__ = "https://github.com/neocl/speach/issues/" __maintainer__ = "Le Tuan Anh" __version_major__ = "0.1" # follow PEP-0440 -__version__ = "{}a15.post1".format(__version_major__) -__version_long__ = "{} - Alpha 15.post1".format(__version_major__) +__version__ = "{}a15.post1-geintra".format(__version_major__) +__version_long__ = "{} - Alpha 15.post1-geintra".format(__version_major__) __status__ = "3 - Alpha" From 56aa741816fd8706fdc6dcd895e2702b1a49405e Mon Sep 17 00:00:00 2001 From: Javier Macias-Guarasa Date: Wed, 26 Jun 2024 13:41:26 +0200 Subject: [PATCH 3/5] Updated properties and setters for future TODOs The idea in the future is fixing the @property and @*.setter related to media content. I did not fix it right now as this is not compulsory for our initial intended use, but should be doing to provide a consistent functionality with media files. --- speach/elan.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/speach/elan.py b/speach/elan.py index 095f16d..dce3c4d 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -1524,29 +1524,41 @@ def time_units(self, value): @property def media_url(self): + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @property media_url is deprecated as there may be more than one in a general ELAN file.') # JMG added this return self._xml_media_node.get('MEDIA_URL') @media_url.setter def media_url(self, value): # TODO: what if __xml_header_node is None? + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @media_url.setter is deprecated as there may be more than one in a general ELAN file.') # JMG added this self._xml_media_node.set('MEDIA_URL', value) @property def mime_type(self): + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @property mime_type is deprecated as there may be more than one in a general ELAN file.') # JMG added this return self._xml_media_node.get('MIME_TYPE') @mime_type.setter def mime_type(self, value): # TODO: what if __xml_header_node is None? + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @media_type.setter is deprecated as there may be more than one in a general ELAN file.') # JMG added this self._xml_media_node.set('MIME_TYPE', value) @property def relative_media_url(self): + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @property relative_media_url is deprecated as there may be more than one in a general ELAN file.') # JMG added this return self._xml_media_node.get('RELATIVE_MEDIA_URL') @relative_media_url.setter def relative_media_url(self, value): # TODO: what if __xml_header_node is None? + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @relative_media_url.setter is deprecated as there may be more than one in a general ELAN file.') # JMG added this self._xml_media_node.set('RELATIVE_MEDIA_URL', value) def _update_header_xml(self, node): From 55ef98753575c59e094083b7fca679f2d0cecaad Mon Sep 17 00:00:00 2001 From: Javier Macias-Guarasa Date: Wed, 26 Jun 2024 13:44:08 +0200 Subject: [PATCH 4/5] Added time_origin property and setter Intended for future use, to allow relative time handling. In our group we are using elan files from different sources that might not be synchronized. time_origin is a way to unify the time references. I did not further implement it (TODO) --- speach/elan.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/speach/elan.py b/speach/elan.py index dce3c4d..9635fe2 100644 --- a/speach/elan.py +++ b/speach/elan.py @@ -1561,6 +1561,19 @@ def relative_media_url(self, value): print(f'Use of @relative_media_url.setter is deprecated as there may be more than one in a general ELAN file.') # JMG added this self._xml_media_node.set('RELATIVE_MEDIA_URL', value) + @property + def time_origin(self): + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @property time_origin is deprecated as there may be more than one in a general ELAN file.') # JMG added this + return self._xml_media_node.get('TIME_ORIGIN') + + @time_origin.setter + def time_origin(self, value): + # TODO: what if __xml_header_node is None? + # TODO JMG: Generalize this to allow for multiple media descriptors + print(f'Use of @time_origin.setter is deprecated as there may be more than one in a general ELAN file.') # JMG added this + self._xml_media_node.set('TIME_ORIGIN', value) + def _update_header_xml(self, node): """ [Internal function] Read ELAN doc information from a HEADER XML node @@ -1849,6 +1862,7 @@ def read_eaf(cls, eaf_path, encoding='utf-8', *args, **kwargs): def create(cls, media_file='audio.wav', media_url=None, relative_media_url=None, + time_origin=0, author="", *args, **kwargs): """ Create a new blank ELAN doc @@ -1869,6 +1883,7 @@ def create(cls, media_file='audio.wav', eaf.media_file = media_file eaf.media_url = media_url eaf.relative_media_url = relative_media_url + eaf.time_origin = time_origin eaf.date = datetime.now() if author: eaf.author = author From 5da16225f6daec7282be5184346412f36ffcca69 Mon Sep 17 00:00:00 2001 From: Javier Macias-Guarasa Date: Wed, 26 Jun 2024 13:51:19 +0200 Subject: [PATCH 5/5] Fixed version name to comply with PEP 440 --- speach/__version__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/speach/__version__.py b/speach/__version__.py index f6a4418..d57dec8 100644 --- a/speach/__version__.py +++ b/speach/__version__.py @@ -14,6 +14,6 @@ __issue__ = "https://github.com/neocl/speach/issues/" __maintainer__ = "Le Tuan Anh" __version_major__ = "0.1" # follow PEP-0440 -__version__ = "{}a15.post1-geintra".format(__version_major__) -__version_long__ = "{} - Alpha 15.post1-geintra".format(__version_major__) +__version__ = "{}a15.post1+geintra".format(__version_major__) +__version_long__ = "{} - Alpha 15.post1+geintra".format(__version_major__) __status__ = "3 - Alpha"