-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlab3.py
More file actions
149 lines (137 loc) · 5.55 KB
/
lab3.py
File metadata and controls
149 lines (137 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#####################################################################################################
# Govinda KC #
# CS2302 Lab 3 #
# Instructur: Diego Aguirre #
# TA : Manoj Shah #
#####################################################################################################
# The purpose of this lab is to find the relationship between the pair of words interms of cosine similarity.
# The value of cosine angle is from -1 to 1. The value close to 1 tells that words pair is highly related to
# each other while close to -1 means that they are completely opposite to each other..
# Importing the required Modules.
import math
import AVLTree
import RedBlackTree
from AVLTree import AVLTree
from RedBlackTree import RedBlackTree
from AVLTree import Node
# Function to read a given file.
def read_file():
f = open('glove.6B.50d.txt', encoding="utf-8")
line = f.readline()
# Creating the array, embedding and node.
while line:
# Splitting the line
_line = line.split(" ")
# Checking if the first letter is an alphabet.
word = _line[0]
if word[0].isalpha():
# initializing the array as an null.
embedding_array = []
for j in range(1,len(_line)):
embedding_array.append(float(_line[j]))
node = Node(word, embedding_array)
# Using try and Except to insert the word in node
try:
tree.insert(node)
except:
tree.insert(word,embedding_array)
# Go to next line
line = f.readline()
f.close()
# Give the options to user RedBlack or AVL Tree?
while True:
_input = input("Type 0 for Red-black tree and 1 for AVL tree: ")
if _input is not '0' and _input is not '1':
print("Wrong input" )
continue
else:
break
# This is for RedBlackTree when user chose 0
if _input is "0":
tree = RedBlackTree()
# Function will be called to read the file.
read_file()
# This line prints the nodes count and height in total
print("RedBlack Tree has "+ str(len(tree)) + ' nodes')
print('and')
print("It's height is " + str(tree._height()))
# Writing all the words into a single file
output_file = open("RedBlack_tree.txt", "w+", encoding = 'utf-8')
tree._write()
output_file.close()
# Gives the depth of nodes of user's choice.
while True:
_inputuser = input("Please enter the depth of nodes you would like printed to file: ")
# Checks if the input is valid for the tree.
if int(_inputuser) >= int(tree._height()) or int(_inputuser) < 0:
print("Depth is not valid, please choose another depth size" )
continue
else:
break
# Creating file for depth
k=int(_inputuser)
depth_file = open("RB_depth.txt", "w+", encoding="utf-8")
#k=int(_inputuser)
tree._depth(k)
depth_file.close()
print('******************************************************************************************')
# This is for AVL Tree when user chose 1
if _input is "1":
tree = AVLTree()
read_file()
# This line prints the nodes count and height in total
print("AVL Tree has "+ str(tree._size())+' nodes')
print('and')
print("It's height is " + str(tree._height()))
# Writing all the words into a single file
output_file = open("AVL_tree.txt", "w+", encoding = 'utf-8')
tree._write()
output_file.close()
# For depth
while True:
_inputuser = input("Please enter the depth of nodes you would like printed to file: ")
print()
# Checks if the input is valid for the tree.
if int(_inputuser) >= int(tree._height()) or int(_inputuser) < 0:
print("Depth is not valid, please choose another depth size: ")
continue
else:
break
# Creating file for depth
k=int(_inputuser)
depth_file = open("AVL_depth.txt", "w+", encoding = 'utf-8')
tree._depth(k)
depth_file.close()
# Read the given file to find the relations of the words interms of cosine similarity.
f = open('appendix.txt')
line = f.readline()
# Cosine similarity calculations
while line:
# spliting the line and creating array
_line = line.split(" ")
# searching and assigning the nodes
w0 = tree.search(_line[0])
w1 = tree.search(_line[1])
if w0 is None or w1 is None:
print('no comparison is found')
# This section computes the angle of similarity between the two words using dot product and magnitudes
else:
# Now Measure the cosine similarity angle between two words.
# Initialization of terms
dot_prod = 0
magnitude_0 = 0
magnitude_1 = 0
e0 = w0.get_embedding()
e1 = w1.get_embedding()
for i in range (len(e0)):
# dot product between two embedding vectors
dot_prod+= e0[i]*e1[i]
magnitude_0 += e0[i]*e0[i]
magnitude_1 += e1[i]*e1[i]
magnitude_0 = math.sqrt(magnitude_0)
magnitude_1 = math.sqrt(magnitude_1)
magnitude_0 = magnitude_0 * magnitude_1
# Relation for cosine similarity measurement
cosine_similarity = dot_prod/magnitude_0
print(_line[0],"",_line[1],"", cosine_similarity)
line = f.readline()