forked from rajhrikshit/nl2sql
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paththesaurus.py
More file actions
50 lines (40 loc) · 1.64 KB
/
thesaurus.py
File metadata and controls
50 lines (40 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import unicodedata
class Thesaurus:
def __init__(self):
self.dictionary = {}
def add_entry(self, word, synonyms):
self.dictionary[word] = synonyms
def add_synonym_to_a_word(self, word, synonym):
self.dictionary[word].append(synonym)
def add_synonyms_to_a_word(self, word, synonyms):
if word in self.dictionary:
self.dictionary[word] += synonyms
else:
self.dictionary[word] = synonyms
def get_synonyms_of_a_word(self, word):
if word in list(self.dictionary.keys()):
return self.dictionary[word]
def remove_accents(self, string):
nkfd_form = unicodedata.normalize('NFKD', str(string))
return "".join([c for c in nkfd_form if not unicodedata.combining(c)])
@staticmethod
def _generate_path(path):
cwd = os.path.dirname(__file__)
filename = os.path.join(cwd, path)
return filename
def load(self, path):
with open(self._generate_path(path)) as f:
content = f.readlines()
# we jump content[0] because it is the encoding-type line : useless to parse
for line_id in range(1, len(content)):
if '(' not in content[line_id]:
line = content[line_id].split("|")
word = self.remove_accents(line[0])
synonyms = self.remove_accents(content[line_id + 1]).split("|")
synonyms.pop(0)
self.add_synonyms_to_a_word(word, synonyms)
def print_me(self):
for keys, values in list(self.dictionary.items()):
print(keys)
print(values)