This repository was archived by the owner on Sep 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata.py
More file actions
76 lines (67 loc) · 2.21 KB
/
Copy pathdata.py
File metadata and controls
76 lines (67 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import v20
import yaml
import os
from datetime import datetime, timedelta
# DIR
CFG_DIR = os.getenv("HOME")
# config file
cfgStream = open(os.path.join(CFG_DIR, ".v20.conf"), "r")
cfg = yaml.load(cfgStream, Loader=yaml.FullLoader)
api = v20.Context(
hostname=cfg["hostname"],
port=cfg["port"],
ssl=cfg["ssl"],
datetime_format=cfg["datetime_format"],
token=cfg["token"],
)
# get hourly candles from date, return dataframe of candles
def get_hourly_candle(dt, count, asset):
# arguments for v20
kwargs = {
"accountID": cfg["active_account"],
"instrument": asset,
"granularity": "H1",
"smooth": False,
"price": "BAM",
"count": count,
"fromTime": dt,
"dailyAlignment": 0,
"alignmentTimezone": "UTC",
}
r = api.instrument.candles(**kwargs)
ls = [c.dict() for c in r.get("candles", 200)]
df = pd.io.json.json_normalize(ls)
df["time"], df["sec"] = df["time"].str.split(".", 1).str
df.index = df["time"]
df = df.drop(["time", "sec"], axis=1)
df = df.astype(float)
return df
# save hourly candle data
def get_data(asset, dt, count, filename):
# convert date to isoformat
dt = datetime.strptime(dt, "%Y-%m-%d")
dt = dt.isoformat("T") + "Z"
# get data in chunk (max size 5000)
results = []
while count > 0:
chunk_sz = min(count, 5000)
d = get_hourly_candle(dt, chunk_sz, asset)
d = d.reset_index()
results.append(d)
count -= len(d)
dt = datetime.strptime(d["time"].max(), "%Y-%m-%dT%H:%M:%S")
dt = dt + timedelta(hours=1)
dt = dt.isoformat("T") + "Z"
print("Loading {:} data from {:} for {:} bars".format(asset, dt, chunk_sz))
# save to csv
df = pd.concat(results, ignore_index=True)
df = df.sort_values("time")
df = df.reset_index(drop=True)
df.to_csv(filename, sep="\t", index=False)
print("Saved to " + filename)
# download training, testing data
get_data("EUR_USD", "2014-01-01", 30000, "EUR_USD_train.tsv")
get_data("GBP_USD", "2014-01-01", 30000, "GBP_USD_train.tsv")
get_data("EUR_USD", "2019-01-01", 2000, "EUR_USD_test.tsv")
get_data("GBP_USD", "2019-01-01", 2000, "GBP_USD_test.tsv")