This repository was archived by the owner on May 14, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathskill_static_analysis.py
More file actions
92 lines (82 loc) · 3.73 KB
/
skill_static_analysis.py
File metadata and controls
92 lines (82 loc) · 3.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import re
import string
from bs4 import BeautifulSoup
import nlp_tools
def is_opening_utterance(pathtoinfopage, utterance):
invocation = None
with open(pathtoinfopage, 'r', encoding='utf-8') as file_in:
soup = BeautifulSoup(file_in.read(), 'html.parser')
loc = soup.find('div', id='a2s-skill-details').find_all(text=re.compile('Invocation Name:'))
tags = [tag.parent for tag in loc]
if len(tags) > 0:
names = BeautifulSoup(str(tags[0]), 'html.parser').find_all('span')
# print(names[1].get_text())
invocation = names[1].get_text()
if invocation is None:
tag_title = soup.find('h1', class_='a2s-title-content')
# print(title.get_text().strip())
if tag_title is not None:
title = tag_title.get_text().strip()
if nlp_tools.search_keyword(title, utterance):
return True
else:
if nlp_tools.search_keyword(invocation, utterance):
return True
return False
def create_custom_opening_utterance(pathtoinfopage):
invocation = None
utterance = "open "
with open(pathtoinfopage, 'r', encoding='utf-8') as file_in:
soup = BeautifulSoup(file_in.read(), 'html.parser')
loc = soup.find('div', id='a2s-skill-details').find_all(text=re.compile('Invocation Name:'))
tags = [tag.parent for tag in loc]
if len(tags) > 0:
names = BeautifulSoup(str(tags[0]), 'html.parser').find_all('span')
# print(names[1].get_text())
invocation = names[1].get_text()
if invocation is None:
tag_title = soup.find('h1', class_='a2s-title-content')
# print(title.get_text().strip())
if tag_title is not None:
title = tag_title.get_text().strip()
utterance += title
return utterance
else:
utterance += invocation
return utterance
def get_all_sample_utterances(pathtoinfopage):
list_utterances = list()
with open(pathtoinfopage, 'r', encoding='utf-8') as file_in:
soup = BeautifulSoup(file_in.read(), 'html.parser')
details = soup.find('div', id='a2s-product-details')
div_utterances = details.find('div', id='a2s-product-utterances')
if div_utterances is not None:
for div in div_utterances.find_all('li', class_='a-carousel-card'):
list_utterances.append(div.get_text().replace('\u201d', '').replace('\u201c', '').replace('\"', '').replace('\n', '').translate(str.maketrans('', '', string.punctuation)).strip().lower())
#.translate(str.maketrans('', '', string.punctuation))
# trim the unnecessary wake word and store utterances
for index, item in enumerate(list_utterances):
if 'Alexa' in item[:5] or 'alexa' in item[:5]:
for i, c in enumerate(item):
if c != ',' and c != ' ' and i > 4:
trim = item[i:]
list_utterances[index] = trim
break
return list_utterances
def get_additional_utterances_from_description(pathtoinfopage):
list_utterances = list()
# get description
list_desc = list()
with open(pathtoinfopage, 'r', encoding='utf-8') as file_in:
soup = BeautifulSoup(file_in.read(), 'html.parser')
div_desc = soup.body.find('div', attrs={'id': 'a2s-description'})
for tag in div_desc.find_all(['span', 'a']):
# print(tag.get_text())
list_desc.append(tag.get_text())
# identify utterances
for item in list_desc:
phrases_in_quotes = re.findall(r'"([^"]*)"', item)
for phrase in phrases_in_quotes:
if 'Alexa' in phrase[:5] or 'alexa' in phrase[:5]:
list_utterances.append(phrase)
return list_utterances