assembly-loop-extractor/utils.py at master · warwick-hpsc/assembly-loop-extractor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import pandas as pd
import os, re
from sets import Set

utils_script_dirpath = os.path.dirname(os.path.realpath(__file__))

class UnknownInstruction(Exception):
  def __init__(self, insn_name, occurence_count):
    message = "No exec unit found for insn '{0}' which occurs {1} times".format(insn_name, occurence_count)
    super(UnknownInstruction, self).__init__(message)

def safe_pd_filter(df, field, value):
  if not field in df.columns.values:
    print("WARNING: field '{0}' not in df".format(field))
    return df

  if isinstance(value, list):
    if len(value) == 0:
      raise Exception("safe_pd_filter() passed an empty list of values")
    else:
      f = df[field]==value[0]
      for i in range(1,len(value)):
        f = np.logical_or(f, df[field]==value[i])
      df = df[f]
  else:
    df = df[df[field]==value]

  if len(Set(df[field])) == 1:
    df = df.drop(field, axis=1)

  nrows = df.shape[0]
  if nrows == 0:
    raise Exception("No rows left after filter: '{0}' == '{1}'".format(field, value))
  return df

def load_insn_eu_mapping():
  exec_unit_mapping_filepath = os.path.join(utils_script_dirpath, "insn_eu_mapping.csv")
  df = pd.read_csv(exec_unit_mapping_filepath)

  exec_unit_mapping = {}
  for index,row in df.iterrows():
    eu = row["exec_unit"]
    if not eu in exec_unit_mapping:
      exec_unit_mapping[eu] = [row["instruction"]]
    else:
      exec_unit_mapping[eu].append(row["instruction"])

  return exec_unit_mapping

def map_insn_to_exec_unit(insn, mapping):
  exec_units = mapping.keys()
  for eu in exec_units:
    if insn in mapping[eu]:
      return eu

  for eu in exec_units:
    for eu_insn in mapping[eu]:
      if re.match(eu_insn, insn):
        return eu

  return ""

def categorise_aggregated_instructions_tally(tally_filepath):
  # print("Categorising aggregated instructions in file: " + tally_filepath)

  eu_mapping = load_insn_eu_mapping()
  exec_units = eu_mapping.keys()
  eu_classes = ["eu."+eu for eu in exec_units]

  insn_tally = pd.read_csv(tally_filepath, keep_default_na=False)

  insn_colnames = [c for c in insn_tally.columns.values if c.startswith("insn.")]

  eu_tally = insn_tally.copy().drop(insn_colnames, axis=1)
  for euc in eu_classes:
    eu_tally[euc] = 0
  eu_tally["mem.loads"] = 0
  eu_tally["mem.stores"] = 0
  # eu_tally["mem.spills"] = 0
  eu_tally["mem.load_spills"] = 0
  eu_tally["mem.store_spills"] = 0

  for insn_cn in insn_colnames:
    insn = insn_cn.split('.')[1].lower()
    count = insn_tally[insn_cn]

    if insn == "loads":
      eu_tally["mem.loads"] += count
      continue
    elif insn == "stores":
      eu_tally["mem.stores"] += count
      continue
    # elif insn == "spills":
    #   eu_tally["mem.spills"] += count
    #   continue
    elif insn == "load_spills":
      eu_tally["mem.load_spills"] += count
      continue
    elif insn == "store_spills":
      eu_tally["mem.store_spills"] += count
      continue

    eu = map_insn_to_exec_unit(insn, eu_mapping)
    exec_unit_found = eu != ""
    if not exec_unit_found:
      raise UnknownInstruction(insn, count.values.max())
    eu_tally["eu."+eu] += count

  # ## Current Intel documentation does not describe how AVX512 instructions are scheduled to
  # ## execution ports, so for now merge with other categories:
  # eu_tally["eu.simd_alu"] = eu_tally["eu.simd_alu"] + eu_tally["eu.avx512_alu"]
  # eu_tally = eu_tally.drop("eu.avx512_alu", axis=1)
  # eu_tally["eu.simd_shuffle"] = eu_tally["eu.simd_shuffle"] + eu_tally["eu.avx512_shuffle"]
  # eu_tally = eu_tally.drop("eu.avx512_shuffle", axis=1)
  # eu_tally["eu.fp_mov"] = eu_tally["eu.fp_mov"] + eu_tally["eu.avx512_misc"]
  # eu_tally = eu_tally.drop("eu.avx512_misc", axis=1)

  # ## Further merging of categories for better model fitting:
  # eu_tally["eu.fp_mov"] = eu_tally["eu.fp_mov"] + eu_tally["eu.simd_shuffle"]
  # eu_tally = eu_tally.drop("eu.simd_shuffle", axis=1)

  if "eu.DISCARD" in eu_tally.keys():
    del eu_tally["eu.DISCARD"]

  if "kernel" in eu_tally.columns.values:
    if "compute_flux_edge" in Set(eu_tally["kernel"]) and "indirect_rw" in Set(eu_tally["kernel"]):
      ## Good, have enough data to distinguish between spill-induced L1 loads/stores and main memory loads/stores.
      ## Can address situations where assembly-loop-extractor failed to identify spills:
      rw_data = safe_pd_filter(eu_tally, "kernel", "indirect_rw")
      if rw_data.shape[0] == eu_tally[eu_tally["kernel"]=="compute_flux_edge"].shape[0]:
        ## Safe to merge:
        rw_data = rw_data.drop(columns=[c for c in rw_data.columns if c.startswith("eu.")])
        rw_data = rw_data.rename(columns={c:c+".rw" for c in rw_data.columns if c.startswith("mem.")})
        eu_tally = eu_tally.merge(rw_data)
        f = eu_tally["mem.load_spills"]==0
        eu_tally.loc[f,"mem.load_spills"] = eu_tally.loc[f,"mem.loads"] - eu_tally.loc[f,"mem.loads.rw"]
        eu_tally.loc[f,"mem.loads"] = eu_tally.loc[f,"mem.loads.rw"]
        f = eu_tally["mem.store_spills"]==0
        eu_tally.loc[f,"mem.store_spills"] = eu_tally.loc[f,"mem.stores"] - eu_tally.loc[f,"mem.stores.rw"]
        eu_tally.loc[f,"mem.stores"] = eu_tally.loc[f,"mem.stores.rw"]
        eu_tally = eu_tally.drop(columns=[c for c in eu_tally.columns if c.endswith(".rw")])

  return eu_tally