CKY-Python-Parser/parser.py at main · cutecatfann/CKY-Python-Parser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import sys
import os

def read_grammar(grammar_file):
	'''
	Opens and parses the given grammar file. It gives the terminal as the key
	and the rules as the values. It strips on whitespace, "->", ";", and "|". It is not sensitive to ":".

	This grammar mandates that there be arrows and semicolons

	It uses the sliding window technique
	'''
	grammar = {}
	arrow = "->"
	semi = ";"

	with open(grammar_file) as f:
		for line in f:
			if arrow not in line:
				print('\nPlease input corrected Doty syntax Grammar. Your grammar is missing arrow(s)\n')
				sys.exit(1)
			elif semi not in line:
				print('\nPlease input corrected Doty syntax Grammar. Your grammar is missing semicolon(s)\n')
				sys.exit(1)
			else:
				lhs, rhs = line.strip().replace(semi, "").split("->")
				rhs = rhs.split("|")
				rhs = [rule.strip().split(" ") for rule in rhs]
				if lhs in grammar:
					grammar[lhs].extend(rhs)
				else:
					grammar[lhs] = rhs
	return grammar


def grammar_parse(word, grammar):
	'''
	Parses the word usiing the given grammar. This is the CKY algorithm.
	It uses a 2D list that is initialized with diagonal values and empty sets.
	'''
	length = len(word)

	# Create a 2D list to represent the table, initialized with empty sets
	table = []
	for i in range(length):
		row = []
		for j in range(length):
			row.append(set())
		table.append(row)

	# fill out the table using the CKY algorithm
	for i in range(0, length):
		# adds rules using the window
		for lhs, rule in grammar.items():
			for rhs in rule:
				if len(rhs) == 1 and rhs[0] == word[i]:
					# tests if terminal
					table[i][i].add(lhs)

		for i in range(i, -1, -1):
			for k in range(j, i + 1):
				for lhs, rule in grammar.items():
					for rhs in rule:
						# checks if accepted by terminal
						if len(rhs) == 2 and rhs[0] in table[j][k] and rhs[1] in table[k + 1][i]:
							T[j][i].add(lhs)

	# tests if any terminals were accepted, which means string can be parsed
	if len(table[0][length-1]) != 0:
		#print("True")
		return True
	else:
		#print("False")
		return False


def main(grammar_file, string_file):
	# Read the grammar
	parsed_grammar = read_grammar(grammar_file)
	file_path = string_file

	check_file = os.path.getsize(string_file)
	#print(check_file)

	if check_file <= 1:
		print("\nThe string file is empty. If this is because it is the empty string (epsilon), then it can be parsed by the grammar. If this was an error, please correctly enter the string.\n")
		sys.exit(1)
	else:
		#print("bad")
		with open(string_file) as f:
			words = f.readline().strip()

	# Parse the string using the CYK algorithm
	result = grammar_parse(words, parsed_grammar)


	# outputs result. Can comment out if you do not want a terminal response
	if result == True:
		print("\nTrue! The string CAN be generated by the grammar.\n")
	else:
		print("\nFalse! The string CANNOT be generated by the grammar.\n")


if __name__ == '__main__':
	import argparse

	if len(sys.argv) != 3:
		print('\nUSAGE: python3 parser.py grammar_file string_file\n')
		sys.exit(1)

	# gives user information for program use
	parser = argparse.ArgumentParser(description='CYK parser for a given grammar and string\n')
	parser.add_argument('grammar', type=str, help='Path to grammar file in Doty parser syntax\n')
	parser.add_argument('string', type=str, help='Path to string file in Chomsky normal form\n')
	args = parser.parse_args()

	main(args.grammar, args.string)