-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
73 lines (63 loc) · 2.23 KB
/
utils.py
File metadata and controls
73 lines (63 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from __future__ import division
from __future__ import print_function
import os
import math
import torch
from torch.autograd import Variable as Var
from vocab import Vocab
# loading GLOVE word vectors
# if .pth file is found, will load that
# else will load from .txt file & save
def load_word_vectors(path):
if os.path.isfile(path + '.pth') and os.path.isfile(path + '.vocab'):
print('==> File found, loading to memory')
vectors = torch.load(path + '.pth')
vocab = Vocab(filename=path + '.vocab')
return vocab, vectors
# saved file not found, read from txt file
# and create tensors for word vectors
print('==> File not found, preparing, be patient')
count = sum(1 for line in open(path + '.txt'))
with open(path + '.txt', 'r') as f:
contents = f.readline().rstrip('\n').split(' ')
dim = len(contents[1:])
words = [None] * (count)
vectors = torch.zeros(count, dim)
with open(path + '.txt', 'r') as f:
idx = 0
for line in f:
contents = line.rstrip('\n').split(' ')
words[idx] = contents[0]
vectors[idx] = torch.Tensor(list(map(float, contents[1:])))
idx += 1
with open(path + '.vocab', 'w') as f:
for word in words:
f.write(word + '\n')
vocab = Vocab(filename=path + '.vocab')
torch.save(vectors, path + '.pth')
return vocab, vectors
# write unique words from a set of files to a new file
def build_vocab(filenames, vocabfile):
vocab = set()
for filename in filenames:
with open(filename, 'r') as f:
for line in f:
tokens = line.rstrip('\n').split(' ')
vocab |= set(tokens)
with open(vocabfile, 'w') as f:
for token in sorted(vocab):
f.write(token + '\n')
# mapping from scalar to vector
def map_label_to_target(label,num_classes):
target = torch.LongTensor(1)
target[0] = int(label)
return Var(target)
#target = torch.zeros(1,num_classes)
#target[label]=1
#ceil = int(math.ceil(label))
#floor = int(math.floor(label))
#if ceil==floor:
# target[0][floor-1] = 1
#else:
# target[0][floor-1] = ceil - label
# target[0][ceil-1] = label - floor