MultiagentExplainability/folder_to_data.py at main · jaeioursh/MultiagentExplainability · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import csv
import glob
from numpy import *
from scipy import stats
import matplotlib.pyplot as plt

data = {}

def folderToData(folderName, count = None):
    if folderName[-1] != '/':
        folderName += '/'

    rawData = {}

    # Get all data files in folder
    fileNameCol = glob.glob(folderName + "*.csv")
    sampleCount = len(fileNameCol)
    averagingLabelCol = []
    allLabelCol = []
    if count != None:
        fileNameCol = fileNameCol[:min(sampleCount, count)]

    sampleCount = len(fileNameCol)
    print(sampleCount)

    # Get all labels from first file
    with open(fileNameCol[0], newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            label = row[0]

            # stared labels are labels we do not average
            if label[0] == '*':
                label = label[1:]
                rawData[label] = array(list(map(float, row[1:])))
            else:
                rawData[label] = zeros((sampleCount, len(row[1:])))
                averagingLabelCol.append(label)
                allLabelCol.append(label + "_err")
            allLabelCol.append(label)

    # Get all data from files
    sampleIndex = 0
    for fileName in fileNameCol:
        with open(fileName, newline='') as csvfile:
            reader = csv.reader(csvfile, delimiter=',')
            for row in reader:
                label = row[0]
                if label[0] != '*':
                    rawData[label][sampleIndex, :]  = list(map(float, row[1:]))
        sampleIndex += 1

    # Get error (95% CI) and mean (deleting raw data in the process)
    for label in averagingLabelCol:
        rawData[label + "_err"] = 1.96 * stats.sem(rawData[label])
        rawData[label] = mean(rawData[label], axis = 0)

    # Use spacing to lower resolution of data
    for label in allLabelCol:
        rawData[label] = rawData[label]

    return rawData