forked from CCWI/sentiment-analyser-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSentimentProvider.py
More file actions
178 lines (139 loc) · 6.45 KB
/
SentimentProvider.py
File metadata and controls
178 lines (139 loc) · 6.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
from SatException import SatException
import semantria
import uuid
import re
import time
import json
from watson_developer_cloud import AlchemyLanguageV1
from watson_developer_cloud import watson_developer_cloud_service
# Uncomment when config file is present
from config import semantria_key, semantria_secret, alchemy_key, german_conf_twitter_active, german_conf, db_host, db_name, db_user, db_password, db_port
class SentimentProvider(object):
def __init__(self, name, provider_id):
object.__init__(self)
self._name = name
self._provider_id = provider_id
def name(self):
return self._name
def setname(self, name):
self._name = name
def provider_id(self):
return self._provider_id
def setprovider_id(self, provider_id):
self._provider_id = provider_id
def parse(self, input_texts, expected_lang):
# Use Configuration for specific language
print("Parsing with provider " + self._name)
class SemantriaProvider(SentimentProvider):
def __init__(self):
SentimentProvider.__init__(self, 'Semantria', 1)
# Function to parse input text to a maximum of 100 pieces in a certain language (only German supported atm).
def parse(self, input_texts, expected_lang):
SentimentProvider.parse(self, input_texts, expected_lang)
if len(input_texts) > 100:
raise SatException("Too many inputs. Input documents limited at 100 per API call!")
# Parse messages from json file
docs_less140 = []
docs_more140 = []
id_map = {}
for comment in input_texts:
# generate unique id
comment_id = str(uuid.uuid4()).replace("-", "")
while comment_id in id_map:
comment_id = str(uuid.uuid4()).replace("-", "")
# Map id to orignal id of the comment
id_map[comment_id] = comment["id"]
# clean the text of any url
comment["text"] = re.sub(r'https?://www\.[a-z\.0-9]+', '', comment["text"])
comment["text"] = re.sub(r'www\.[a-z\.0-9]+', '', comment["text"])
# add comment to list of overall comments and bigger/smalle 140 char
if len(comment["text"]) > 140:
docs_more140.append({"id": comment_id, "text": comment["text"]})
else:
docs_less140.append({"id": comment_id, "text": comment["text"]})
# Initalise JSON serialiser and create semantria Session
serializer = semantria.JsonSerializer()
session = semantria.Session(semantria_key, semantria_secret, serializer, use_compression=True)
# Use Configuration for specific language
print("Setting Language: " + expected_lang)
if expected_lang != "German":
raise SatException("Only 'German' is supported!")
lang_id_less140 = german_conf_twitter_active
lang_id_more140 = german_conf
# Send messages as batch to semantria
if len(docs_more140) > 0:
session.queueBatch(docs_more140, lang_id_more140)
if len(docs_less140) > 0:
session.queueBatch(docs_less140, lang_id_less140)
# Retrieve results
length_more140 = len(docs_more140)
results_more140 = []
length_less140 = len(docs_less140)
results_less140 = []
while (len(results_more140) < length_more140) or (len(results_less140) < length_less140):
print("Retrieving processed results...", "\r\n")
time.sleep(2)
# get processed documents
status_more140 = session.getProcessedDocuments(lang_id_more140)
for data in status_more140:
if data["id"] in id_map:
data["id"] = id_map[data["id"]]
else:
status_more140.remove(data)
print "Added " + str(len(status_more140)) + " entries to result_more140"
results_more140.extend(status_more140)
status_less140 = session.getProcessedDocuments(lang_id_less140)
for data in status_less140:
if data["id"] in id_map:
data["id"] = id_map[data["id"]]
else:
status_less140.remove(data)
print "Added " + str(len(status_less140)) + " entries to result_less140"
results_less140.extend(status_less140)
results = results_more140 + results_less140
responses = []
for result in results:
responses.append(SentimentResponse(result['id'], result['sentiment_score'], None))
return responses
class AlchemyProvider(SentimentProvider):
def __init__(self):
SentimentProvider.__init__(self, 'Alchemy', 2)
self._alchemy_language = AlchemyLanguageV1(api_key=alchemy_key)
def parse(self, input_texts, expected_lang):
SentimentProvider.parse(self, input_texts, expected_lang)
responses = []
for comment in input_texts:
comment_text = comment["text"]
print("Comment: " + comment_text)
try:
if comment_text is None or len(comment_text.strip()) == 0:
print("Skipping comment. Text is empty!")
else:
result = self._alchemy_language.sentiment(text=comment_text, language=expected_lang.lower())
print(json.dumps(result, indent=2))
doc_sentiment = result["docSentiment"]
sentiment_response = SentimentResponse(comment["id"],
doc_sentiment["score"] if "score" in doc_sentiment else 0,
doc_sentiment["mixed"] if 'mixed' in doc_sentiment else 0)
responses.append(sentiment_response)
except watson_developer_cloud_service.WatsonException as e:
print(str(e) + " Comment: " + comment_text)
return responses
class SentimentResponse(object):
def __init__(self, id, sentiment_score, mixed):
object.__init__(self)
self._id = id
self._sentiment_score = sentiment_score
self._mixed = mixed
def id(self):
return self._id
def setid(self, id):
self._id = id
def sentiment_score(self):
return self._sentiment_score
def setsentiment_score(self, sentiment_score):
self._sentiment_score = sentiment_score
def mixed(self):
return self._mixed
def setmixed(self, mixed):
self._mixed = mixed