1010from geopy import Nominatim
1111from jsonpath_ng import ext as jp
1212
13- import osw .data . mining as dm
13+ import osw .utils . strings as strutil
1414from osw import wiki_tools as wt
1515from osw .auth import CredentialManager
1616from osw .core import OSW
17- from osw .data .mining import RegExPatternExtended
1817from osw .model import entity as model
18+ from osw .utils .regex_pattern import REGEX_PATTERN_LIB , REGEX_PATTERN_LIST
1919from osw .wtsite import WtSite
2020
2121# Constants
2222PACKAGE_ROOT_PATH = Path (__file__ ).parents [2 ]
2323CREDENTIALS_FILE_PATH_DEFAULT = PACKAGE_ROOT_PATH / "examples" / "accounts.pwd.yaml"
2424ENABLE_SORTING = True
25- REGEX_PATTERN : Dict [str , Union [str , Dict [str , str ]]] = {
26- "SAP OU number and name from DN" : {
27- "Pattern" : r"CN=(.+)([0-9]{10})-(.+),OU=Abteilungen" ,
28- "Groups" : {2 : "SAP OU number" , 3 : "SAP OU name" },
29- },
30- "Location name from DN" : {
31- "Pattern" : r"CN=[A-Za-z]+-(\d+)_L_([^_]+),OU=Standorte" ,
32- "Groups" : {1 : "SAP institute number" , 2 : "Location name" },
33- },
34- "Location/Site parts from DN" : {
35- "Pattern" : r"CN=[A-Za-z]+-(\d+)_L_(([^_^ ^-]+)-([^_^ ]+) (\d+)),OU=Standorte" ,
36- "Groups" : {
37- 1 : "SAP institute number" ,
38- 2 : "Site name" ,
39- 3 : "City" ,
40- 4 : "Street" ,
41- 5 : "House number" ,
42- },
43- },
44- "UUID from full page title" : {
45- "Pattern" : r"([A-Za-z]+):([A-Z]+)([a-z\d\-]+)" ,
46- "Groups" : {1 : "Namespace" , 2 : "Prefix" , 3 : "UUID" },
47- },
48- }
49- REGEX_PATTERN_LIST = [
50- RegExPatternExtended (
51- description = "SAP OU number and name from DN" ,
52- pattern = r"CN=(.+)([0-9]{10})-(.+),OU=Abteilungen" ,
53- group_keys = ["Something" , "SAP OU number" , "SAP OU name" ],
54- ),
55- RegExPatternExtended (
56- description = "Location name from DN" ,
57- pattern = r"CN=[A-Za-z]+\-(\d+)_L_([^_]+),OU=Standorte" ,
58- group_keys = ["SAP institute number" , "Location name" ],
59- ),
60- RegExPatternExtended (
61- description = "Location/Site parts from DN" ,
62- pattern = r"CN=[A-Za-z]+\-(\d+)_L_(([^_^ ^-]+)-([^_^ ]+) (\d+))," r"OU=Standorte" ,
63- group_keys = [
64- "SAP institute number" ,
65- "Site name" ,
66- "City" ,
67- "Street" ,
68- "House number" ,
69- ],
70- ),
71- RegExPatternExtended (
72- description = "UUID from full page title" ,
73- pattern = r"([A-Za-z]+):([A-Z]+)([a-z\d\-]+)" ,
74- group_keys = ["Namespace" , "Prefix" , "UUID" ],
75- ),
76- ]
25+ # For compatibility with the old version of the module
7726REGEX_PATTERN = {rep .description : rep .dict () for rep in REGEX_PATTERN_LIST }
78- REGEX_PATTERN_LIB = {rep .description : rep for rep in REGEX_PATTERN_LIST }
7927
8028
8129# Classes
@@ -203,7 +151,7 @@ def get_uuid_from_object_via_type(obj: Any) -> Union[uuid_module.UUID, None]:
203151 else :
204152 type_str = str (type_ )
205153 match = re .match (
206- pattern = REGEX_PATTERN ["UUID from full page title" ][ "Pattern" ] ,
154+ pattern = REGEX_PATTERN_LIB ["UUID from full page title" ]. pattern ,
207155 string = type_str ,
208156 )
209157 uuid_str = match .group (3 )
@@ -473,8 +421,8 @@ def nan_empty_or_none(inp: Any) -> bool:
473421
474422
475423def regex_match_list (
476- pattern : Union [str , dm .RegExPatternExtended ], list_of_strings : List [str ]
477- ) -> List [Union [str , dm .MatchResult ]]:
424+ pattern : Union [str , strutil .RegExPatternExtended ], list_of_strings : List [str ]
425+ ) -> List [Union [str , strutil .MatchResult ]]:
478426 """Returns a subset of the 'list_of_strings' that matched the regex 'pattern'.
479427
480428 Parameters
@@ -493,7 +441,7 @@ def regex_match_list(
493441 if re .match (pattern = pattern , string = string ):
494442 matches .append (string )
495443 return matches
496- elif isinstance (pattern , dm .RegExPatternExtended ):
444+ elif isinstance (pattern , strutil .RegExPatternExtended ):
497445 matches = []
498446 for string in list_of_strings :
499447 match_result_obj = pattern .match (string )
@@ -780,6 +728,30 @@ def get_entities_from_osw(
780728 return entities_from_osw
781729
782730
731+ def full_page_title_to_uuid (full_page_title : str ) -> uuid_module .UUID :
732+ """Extracts a UUID from a full page title."""
733+ match = re .match (
734+ pattern = REGEX_PATTERN_LIB ["UUID from full page title" ].pattern ,
735+ string = full_page_title ,
736+ )
737+ uuid_str = match .group (3 )
738+ return uuid_module .UUID (uuid_str )
739+
740+
741+ def osw_id_to_uuid (osw_id : str ) -> uuid_module .UUID :
742+ """Extracts a UUID from an OSW ID."""
743+ match = re .match (
744+ pattern = REGEX_PATTERN_LIB ["UUID from OSW ID" ].pattern , string = osw_id
745+ )
746+ uuid_str = match .group (2 )
747+ return uuid_module .UUID (uuid_str )
748+
749+
750+ def uuid_to_osw_id (uuid : uuid_module .UUID , prefix : str = "OSW" ) -> str :
751+ """Creates an OSW ID from a UUID."""
752+ return f"{ prefix } { str (uuid ).replace ('-' , '' )} "
753+
754+
783755def uuid_to_full_page_title (
784756 uuid : Union [uuid_module .UUID , str ],
785757 wiki_ns : str = "Item" ,
0 commit comments