TreeGrad/main.py at main · watml/TreeGrad · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from createTreeModel import createTreeModel, _classification_ids
from TreeGrad import treegrad_ranker, treestab
import numpy as np
from utilFuncs import treeUtility


arg_dict = dict(
        #fixed
        root='exp',
        random_seed=2025,
        #varied
        dataset_id=[4538, 44, 43174, 1475, 41150, 41145, 41168, 44975, 4549],
        sample_id=range(200),
        n_estimators=[0, 5],
        use_predicted_class=[0, 1],
        method=dict(
                semivalue=[(16,1), (8,1), (4,1), (2,1), (1,1), (1,2),
                           (1,4), (1,8), (1,16), (1,32), 0.5],
                treegrad_ranker=dict(
                        T_max=[10, 50, 100],
                        lr=[1, 5, 10, 50],
                        optimizer=['GA', 'Adam'],
                    ),
            )
    )


def job(arg):
    if not os.path.exists(arg['path_results']):
        model, X_test, _ = createTreeModel(arg['dataset_id'], arg['n_estimators'],
                                        arg['random_seed'])
        x = X_test[arg['sample_id']]
        if arg['dataset_id'] in _classification_ids:
            predicted_proba = model.predict_proba(x[None, :])
            predicted_class = np.argmax(predicted_proba[0])
            if arg['use_predicted_class']:
                class_index = predicted_class
            else:
                n_classes = predicted_proba.shape[1]
                np.random.seed(arg['random_seed'])
                offset = np.random.choice(np.arange(1, n_classes))
                class_index = (predicted_class + offset) % n_classes
        else:
            class_index = None

        results = np.empty((3, len(x) + 1), dtype=np.float64)
        if arg['method'] == 'treegrad_ranker':
            results[0, 1:] = treegrad_ranker(model, x, class_index, arg['optimizer'],
                                         arg['lr'], arg['T_max'])
        else:
            results[0, 1:] = treestab(model, x, arg['method'], class_index)


        util = treeUtility(model, x, class_index)
        ranking = np.argsort(results[0, 1:])[:0:-1]

        subset_inc = np.zeros(len(x), dtype=bool)
        subset_dec = np.ones(len(x), dtype=bool)
        results[1, 0] = util.evaluate(subset_inc, test=True)
        results[1, -1] = util.evaluate(subset_dec, test=True)
        results[2, 0], results[2, -1] = results[1, -1], results[1, 0]
        for i, player in enumerate(ranking):
            subset_inc[player] = True
            results[1, i+1] = util.evaluate(subset_inc, test=True)
            subset_dec[player] = False
            results[2, i+1] = util.evaluate(subset_dec, test=True)

        np.savez_compressed(arg['path_results'], results=results)


if __name__ == '__main__':
    import os
    # If there are n cpus, without the following specification, each process would
    # create n threads. So, given n_processes = n, there would be nxn threads in total,
    # which could hurt performance. Make sure n_processes x n_threads <= n_cpus.
    # it should be done before importing any other modules.
    NUM_THREAD = 1
    os.environ["OMP_NUM_THREADS"] = f"{NUM_THREAD}"
    os.environ["OPENBLAS_NUM_THREADS"] = f"{NUM_THREAD}"
    os.environ["MKL_NUM_THREADS"] = f"{NUM_THREAD}"
    os.environ["VECLIB_MAXIMUM_THREADS"] = f"{NUM_THREAD}"
    os.environ["NUMEXPR_NUM_THREADS"] = f"{NUM_THREAD}"

    import argparse
    from args import process_arg_dict
    from tqdm import tqdm
    import multiprocessing as mp
    import traceback


    parser = argparse.ArgumentParser()
    parser.add_argument("-p", type=int, default=1, help="number of processes")
    n_processes = parser.parse_args().p
    print('number of processes:', n_processes)

    args = process_arg_dict(arg_dict)
    n_total = len(args)

    try:
        if n_processes == 1:
            for i, arg in tqdm(enumerate(args), total=n_total):
                job(arg)
        else:
            with mp.Pool(n_processes) as pool:
                process = pool.imap_unordered(job, args)
                for _ in tqdm(process, total=n_total):
                    pass


    except:
        with open('err.txt', "a") as f:
            f.write('\n')
            traceback.print_exc(file=f)
        raise