hyperstar/evaluate.py at master · nlpub/hyperstar · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python3

__author__ = 'Dmitry Ustalov'

import argparse
import csv
import os
import pickle
import sys
from multiprocessing import cpu_count

import numpy as np
from gensim.models.word2vec import Word2Vec

from batch_sim.nn_vec import nn_vec
from projlearn import MODELS

parser = argparse.ArgumentParser(description='Evaluation.')
parser.add_argument('--w2v', default='all.norm-sz100-w10-cb0-it1-min100.w2v', nargs='?',
                    help='Path to the word2vec model.')
parser.add_argument('--test', default='test.npz', nargs='?', help='Path to the test set.')
parser.add_argument('--subsumptions', default='subsumptions-test.txt', nargs='?', help='Path to the test subsumptions.')
parser.add_argument('--non_optimized', action='store_true', help='Disable most similar words calculation optimization.')
parser.add_argument('--threads', nargs='?', type=int, default=cpu_count(), help='Number of threads.')
parser.add_argument('path', nargs='*', help='List of the directories with results.')
args = vars(parser.parse_args())

if not len(sys.argv) > 1:
    print('Usage: %s path...' % (sys.argv[0]), file=sys.stderr)
    sys.exit(1)

WD = os.path.dirname(os.path.realpath(__file__))

w2v = Word2Vec.load_word2vec_format(os.path.join(WD, args['w2v']), binary=True, unicode_errors='ignore')
w2v.init_sims(replace=True)

with np.load(args['test']) as npz:
    X_index_test = npz['X_index']
    Y_all_test = npz['Y_all']
    Z_all_test = npz['Z_all']

X_all_test = Z_all_test[X_index_test[:, 0], :]

subsumptions_test = []

with open(args['subsumptions']) as f:
    reader = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONE)

    for row in reader:
        subsumptions_test.append((row[0], row[1]))

assert len(subsumptions_test) == X_all_test.shape[0]


def extract(clusters, Y_hat_clusters):
    cluster_indices = {cluster: 0 for cluster in Y_hat_clusters}

    Y_all_hat = []

    for cluster in clusters:
        Y_hat = Y_hat_clusters[cluster][cluster_indices[cluster]]
        cluster_indices[cluster] += 1

        Y_all_hat.append(Y_hat)

    assert sum(cluster_indices.values()) == len(clusters)

    return np.array(Y_all_hat)


def compute_ats(measures):
    return [sum(measures[j].values()) / len(subsumptions_test) for j in range(len(measures))]


def compute_auc(ats):
    return sum([ats[j] + ats[j + 1] for j in range(0, len(ats) - 1)]) / 2


for path in args['path']:
    print('Doing "%s" on "%s" and "%s".' % (path, args['test'], args['subsumptions']), flush=True)

    kmeans = pickle.load(open(os.path.join(path, 'kmeans.pickle'), 'rb'))
    print('The number of clusters is %d.' % (kmeans.n_clusters), flush=True)

    clusters_test = kmeans.predict(Y_all_test - X_all_test)

    for model in MODELS:
        try:
            with np.load(os.path.join(path, '%s.test.npz') % model) as npz:
                Y_hat_clusters = {int(cluster): npz[cluster] for cluster in npz.files}
        except FileNotFoundError:
            Y_hat_clusters = {}

        if kmeans.n_clusters != len(Y_hat_clusters):
            print('Missing the output for the model "%s"!' % model, file=sys.stderr, flush=True)
            continue

        Y_all_hat = extract(clusters_test, Y_hat_clusters)

        assert len(subsumptions_test) == Y_all_hat.shape[0]

        measures = [{} for _ in range(10)]

        if not args['non_optimized']:
            # normalize Y_all_hat to make dot product equal to cosine
            Y_all_hat_norm = Y_all_hat / np.linalg.norm(Y_all_hat, axis=1)[:, np.newaxis]
            print('nn_vec...')
            similar_indices = nn_vec(Y_all_hat_norm, w2v.syn0norm, topn=10, sort=True, return_sims=False,
                                     nthreads=args['threads'], verbose=False)
            print('nn_vec results covert...')
            similar_words = [[w2v.index2word[ind] for ind in row] for row in similar_indices]
            print('done')

        for i, (hyponym, hypernym) in enumerate(subsumptions_test):
            if args['non_optimized']:
                Y_hat = Y_all_hat[i].reshape(X_all_test.shape[1], )
                actual = [w for w, _ in w2v.most_similar(positive=[Y_hat], topn=10)]
            else:
                actual = similar_words[i]

            for j in range(0, len(measures)):
                measures[j][(hyponym, hypernym)] = 1. if hypernym in actual[:j + 1] else 0.

            if (i + 1) % 100 == 0:
                ats = compute_ats(measures)
                auc = compute_auc(ats)
                ats_string = ', '.join(['A@%d=%.6f' % (j + 1, ats[j]) for j in range(len(ats))])
                print('%d examples out of %d done for "%s/%s": %s. AUC=%.6f.' % (
                    i + 1,
                    len(subsumptions_test),
                    path,
                    model,
                    ats_string,
                    auc),
                      file=sys.stderr, flush=True)

        ats = compute_ats(measures)
        auc = compute_auc(ats)
        ats_string = ', '.join(['A@%d=%.4f' % (j + 1, ats[j]) for j in range(len(ats))])
        print('For "%s/%s": overall %s. AUC=%.6f.' % (path, model, ats_string, auc), flush=True)