-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
117 lines (95 loc) · 3.35 KB
/
parser.py
File metadata and controls
117 lines (95 loc) · 3.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import sys
import os
def read_grammar(grammar_file):
'''
Opens and parses the given grammar file. It gives the terminal as the key
and the rules as the values. It strips on whitespace, "->", ";", and "|". It is not sensitive to ":".
This grammar mandates that there be arrows and semicolons
It uses the sliding window technique
'''
grammar = {}
arrow = "->"
semi = ";"
with open(grammar_file) as f:
for line in f:
if arrow not in line:
print('\nPlease input corrected Doty syntax Grammar. Your grammar is missing arrow(s)\n')
sys.exit(1)
elif semi not in line:
print('\nPlease input corrected Doty syntax Grammar. Your grammar is missing semicolon(s)\n')
sys.exit(1)
else:
lhs, rhs = line.strip().replace(semi, "").split("->")
rhs = rhs.split("|")
rhs = [rule.strip().split(" ") for rule in rhs]
if lhs in grammar:
grammar[lhs].extend(rhs)
else:
grammar[lhs] = rhs
return grammar
def grammar_parse(word, grammar):
'''
Parses the word usiing the given grammar. This is the CKY algorithm.
It uses a 2D list that is initialized with diagonal values and empty sets.
'''
length = len(word)
# Create a 2D list to represent the table, initialized with empty sets
table = []
for i in range(length):
row = []
for j in range(length):
row.append(set())
table.append(row)
# fill out the table using the CKY algorithm
for i in range(0, length):
# adds rules using the window
for lhs, rule in grammar.items():
for rhs in rule:
if len(rhs) == 1 and rhs[0] == word[i]:
# tests if terminal
table[i][i].add(lhs)
for i in range(i, -1, -1):
for k in range(j, i + 1):
for lhs, rule in grammar.items():
for rhs in rule:
# checks if accepted by terminal
if len(rhs) == 2 and rhs[0] in table[j][k] and rhs[1] in table[k + 1][i]:
T[j][i].add(lhs)
# tests if any terminals were accepted, which means string can be parsed
if len(table[0][length-1]) != 0:
#print("True")
return True
else:
#print("False")
return False
def main(grammar_file, string_file):
# Read the grammar
parsed_grammar = read_grammar(grammar_file)
file_path = string_file
check_file = os.path.getsize(string_file)
#print(check_file)
if check_file <= 1:
print("\nThe string file is empty. If this is because it is the empty string (epsilon), then it can be parsed by the grammar. If this was an error, please correctly enter the string.\n")
sys.exit(1)
else:
#print("bad")
with open(string_file) as f:
words = f.readline().strip()
# Parse the string using the CYK algorithm
result = grammar_parse(words, parsed_grammar)
# outputs result. Can comment out if you do not want a terminal response
if result == True:
print("\nTrue! The string CAN be generated by the grammar.\n")
else:
print("\nFalse! The string CANNOT be generated by the grammar.\n")
if __name__ == '__main__':
import argparse
if len(sys.argv) != 3:
print('\nUSAGE: python3 parser.py grammar_file string_file\n')
sys.exit(1)
# gives user information for program use
parser = argparse.ArgumentParser(description='CYK parser for a given grammar and string\n')
parser.add_argument('grammar', type=str, help='Path to grammar file in Doty parser syntax\n')
parser.add_argument('string', type=str, help='Path to string file in Chomsky normal form\n')
args = parser.parse_args()
main(args.grammar, args.string)