-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscripts.py
More file actions
97 lines (72 loc) · 2.66 KB
/
Copy pathscripts.py
File metadata and controls
97 lines (72 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
This file contains scripts that are commonly used to handle data from the data source (CSV file)
"""
import csv
import datetime
from stock_price import StockPrices
from constants import CSV_COLUMNS
def find_stock_most_headlines(file):
"""
Find the stock with the most headlines in the given CSV
:param file: the name of the CSV file to search
:return: tuple containing the ticker and the number of articles i.e. (GOOGL, 1597)
"""
stocks = {}
with open(file, newline='', encoding="utf8") as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
ticker = row[-1]
if ticker in stocks:
stocks[ticker] += 1
else:
stocks[ticker] = 1
max_num_articles = float('-inf')
max_ticker = None
for key in stocks:
num_articles = stocks[key]
if num_articles > max_num_articles:
max_num_articles = num_articles
max_ticker = key
return max_ticker, max_num_articles
def get_rows_from_ticker(ticker, file):
"""
Given a ticker, return all of the rows as a list that contain that ticker
:param ticker: The ticker (i.e. GOOGL) to sort by
:param file: The CSV file to search on
:return: the rows pertaining to the ticker as a list
"""
rows = []
with open(file, newline='', encoding="utf8") as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
row_ticker = row[-1]
if ticker == row_ticker:
rows.append(row)
return rows
def get_date_range_from_rows(rows):
"""
Given rows from the analyst ratings, return the min and max date
:param rows: the rows from the CSV
:return: min and max date as a tuple
"""
max_d = datetime.datetime(2008, 1, 1) # Articles start at 2009
min_d = datetime.datetime(2022, 1, 1) # Articles are not older than 2020
for row in rows:
year, month, day = row[CSV_COLUMNS['DATE']].split(" ")[0].split("-")
date = datetime.datetime(int(year), int(month), int(day))
if date < min_d:
min_d = date
if date > max_d:
max_d = date
return min_d, max_d
if __name__ == "__main__":
CSV_FILE = 'raw_analyst_ratings.csv'
# ticker, num_articles = find_stock_most_headlines(CSV_FILE)
# print(ticker + " had the most headlines with " + str(num_articles) + " headlines")
ticker = "MRK"
rows = get_rows_from_ticker(ticker, CSV_FILE)
# print(rows)
min_date, max_date = get_date_range_from_rows(rows)
print(min_date, max_date)
stock = StockPrices(ticker, min_date, max_date)
print(stock.prices)