diff --git a/Makefile.in b/Makefile.in index e8922f6..8053bc7 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.14.1 from Makefile.am. +# Makefile.in generated by automake 1.14 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2013 Free Software Foundation, Inc. @@ -567,10 +567,9 @@ distcheck: dist && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ && $(am__cd) $(distdir)/_build \ - && ../configure \ + && ../configure --srcdir=.. --prefix="$$dc_install_base" \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ - --srcdir=.. --prefix="$$dc_install_base" \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) check \ diff --git a/aclocal.m4 b/aclocal.m4 index 0d3fc74..b85f6ae 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1,4 +1,4 @@ -# generated automatically by aclocal 1.14.1 -*- Autoconf -*- +# generated automatically by aclocal 1.14 -*- Autoconf -*- # Copyright (C) 1996-2013 Free Software Foundation, Inc. @@ -35,7 +35,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.14' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. -m4_if([$1], [1.14.1], [], +m4_if([$1], [1.14], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) @@ -51,7 +51,7 @@ m4_define([_AM_AUTOCONF_VERSION], []) # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], -[AM_AUTOMAKE_VERSION([1.14.1])dnl +[AM_AUTOMAKE_VERSION([1.14])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) diff --git a/src/Makefile.in b/src/Makefile.in index e31baea..78a3989 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.14.1 from Makefile.am. +# Makefile.in generated by automake 1.14 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2013 Free Software Foundation, Inc. diff --git a/tests/Makefile.am b/tests/Makefile.am index 56c0ce3..c2929e6 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -3,7 +3,8 @@ CONFIG_ROOT=$(PIN_ROOT)/source/tools/Config include $(CONFIG_ROOT)/makefile.config -TEST_TOOL_ROOTS = cct_client deadspy_client redspy_client redspy_temporal_client redspy_temporal_approx_client cct_data_centric_client cct_client_mem_only cct_data_centric_client_tree_based redspy_spatial_client redspy_spatial_approx_client cct_metric_client cctlib_reader footprint_client footprint_client2 valueNum omp_datarace_client cache +TEST_TOOL_ROOTS = cct_client deadspy_client redspy_client redspy_temporal_client cct_data_centric_client cct_client_mem_only cct_data_centric_client_tree_based redspy_spatial_userdefine_client redspy_spatial_client cct_metric_client cctlib_reader footprint_client footprint_client2 valueNum omp_datarace_client + APP_ROOTS = deadWrites threaded #all: cct_client.so deadspy_client.so cct_data_centric_client.so cct_client_mem_only.so cct_data_centric_client_tree_based.so deadWrites @@ -44,18 +45,11 @@ $(OBJDIR)redspy_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_client$(OBJ_SUFFIX) $(C $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) $(OBJDIR)redspy_temporal_client$(OBJ_SUFFIX): redspy_temporal_client.cpp ../src/cctlib.H - $(CXX) -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< + $(CXX) -mavx -DENABLE_SAMPLING -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< $(OBJDIR)redspy_temporal_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_temporal_client$(OBJ_SUFFIX) $(CCTLIB_SHADOW_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) -$(OBJDIR)redspy_temporal_approx_client$(OBJ_SUFFIX): redspy_temporal_approx_client.cpp ../src/cctlib.H - $(CXX) -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< - -$(OBJDIR)redspy_temporal_approx_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_temporal_approx_client$(OBJ_SUFFIX) $(CCTLIB_SHADOW_BASED_LIBRARY) - $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) - - $(OBJDIR)omp_datarace_client$(OBJ_SUFFIX): omp_datarace_client.cpp ../src/cctlib.H shadow_memory.cpp $(CXX) -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< @@ -90,19 +84,19 @@ $(OBJDIR)cct_data_centric_client_tree_based$(OBJ_SUFFIX): cct_data_centric_clien $(OBJDIR)cct_data_centric_client_tree_based$(PINTOOL_SUFFIX): $(OBJDIR)cct_data_centric_client_tree_based$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib_tree_based $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) -$(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX): redspy_spatial_client.cpp ../src/cctlib.H +$(OBJDIR)redspy_spatial_userdefine_client$(OBJ_SUFFIX): redspy_spatial_userdefine_client.cpp ../src/cctlib.H $(CXX) $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< -$(OBJDIR)redspy_spatial_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) +$(OBJDIR)redspy_spatial_userdefine_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_userdefine_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib_tree_with_addr $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) -$(OBJDIR)redspy_spatial_approx_client$(OBJ_SUFFIX): redspy_spatial_approx_client.cpp ../src/cctlib.H + +$(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX): redspy_spatial_client.cpp ../src/cctlib.H $(CXX) $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< -$(OBJDIR)redspy_spatial_approx_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_approx_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) +$(OBJDIR)redspy_spatial_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib_tree_with_addr $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) - $(OBJDIR)cct_metric_client$(OBJ_SUFFIX): cct_metric_client.cpp ../src/cctlib.H $(CXX) $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< diff --git a/tests/Makefile.in b/tests/Makefile.in index d5f5554..da412e0 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -1,4 +1,4 @@ -# Makefile.in generated by automake 1.14.1 from Makefile.am. +# Makefile.in generated by automake 1.14 from Makefile.am. # @configure_input@ # Copyright (C) 1994-2013 Free Software Foundation, Inc. @@ -208,7 +208,7 @@ top_srcdir = @top_srcdir@ #include ../Makefile.inc #PIN_ROOT=$(PIN_PATH) CONFIG_ROOT = $(PIN_ROOT)/source/tools/Config -TEST_TOOL_ROOTS = cct_client deadspy_client redspy_client redspy_temporal_client redspy_temporal_approx_client cct_data_centric_client cct_client_mem_only cct_data_centric_client_tree_based redspy_spatial_client redspy_spatial_approx_client cct_metric_client cctlib_reader footprint_client footprint_client2 valueNum omp_datarace_client cache +TEST_TOOL_ROOTS = cct_client deadspy_client redspy_client redspy_temporal_client cct_data_centric_client cct_client_mem_only cct_data_centric_client_tree_based redspy_spatial_userdefine_client redspy_spatial_client cct_metric_client cctlib_reader footprint_client footprint_client2 valueNum omp_datarace_client APP_ROOTS = deadWrites threaded #all: cct_client.so deadspy_client.so cct_data_centric_client.so cct_client_mem_only.so cct_data_centric_client_tree_based.so deadWrites ALLOW_UNUSED_BUT_SET = -Wno-unused-but-set-variable @@ -427,17 +427,11 @@ $(OBJDIR)redspy_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_client$(OBJ_SUFFIX) $(C $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) $(OBJDIR)redspy_temporal_client$(OBJ_SUFFIX): redspy_temporal_client.cpp ../src/cctlib.H - $(CXX) -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< + $(CXX) -mavx -DENABLE_SAMPLING -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< $(OBJDIR)redspy_temporal_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_temporal_client$(OBJ_SUFFIX) $(CCTLIB_SHADOW_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) -$(OBJDIR)redspy_temporal_approx_client$(OBJ_SUFFIX): redspy_temporal_approx_client.cpp ../src/cctlib.H - $(CXX) -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< - -$(OBJDIR)redspy_temporal_approx_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_temporal_approx_client$(OBJ_SUFFIX) $(CCTLIB_SHADOW_BASED_LIBRARY) - $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) - $(OBJDIR)omp_datarace_client$(OBJ_SUFFIX): omp_datarace_client.cpp ../src/cctlib.H shadow_memory.cpp $(CXX) -Wno-deprecated $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< @@ -468,16 +462,16 @@ $(OBJDIR)cct_data_centric_client_tree_based$(OBJ_SUFFIX): cct_data_centric_clien $(OBJDIR)cct_data_centric_client_tree_based$(PINTOOL_SUFFIX): $(OBJDIR)cct_data_centric_client_tree_based$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib_tree_based $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) -$(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX): redspy_spatial_client.cpp ../src/cctlib.H +$(OBJDIR)redspy_spatial_userdefine_client$(OBJ_SUFFIX): redspy_spatial_userdefine_client.cpp ../src/cctlib.H $(CXX) $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< -$(OBJDIR)redspy_spatial_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) +$(OBJDIR)redspy_spatial_userdefine_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_userdefine_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib_tree_with_addr $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) -$(OBJDIR)redspy_spatial_approx_client$(OBJ_SUFFIX): redspy_spatial_approx_client.cpp ../src/cctlib.H +$(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX): redspy_spatial_client.cpp ../src/cctlib.H $(CXX) $(CCTLIB_TEST_FLAGS) $(COMP_OBJ)$@ $< -$(OBJDIR)redspy_spatial_approx_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_approx_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) +$(OBJDIR)redspy_spatial_client$(PINTOOL_SUFFIX): $(OBJDIR)redspy_spatial_client$(OBJ_SUFFIX) $(CCTLIB_TREE_BASED_LIBRARY) $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $< -L../src/$(OBJDIR) $(TOOL_LPATHS) -lcctlib_tree_with_addr $(TOOL_LIBS) $(CLIENT_LD_FLAGS) $(CLIENT_LIBS) $(OBJDIR)cct_metric_client$(OBJ_SUFFIX): cct_metric_client.cpp ../src/cctlib.H diff --git a/tests/redspy_spatial_userdefine_client.cpp b/tests/redspy_spatial_userdefine_client.cpp new file mode 100644 index 0000000..fef6567 --- /dev/null +++ b/tests/redspy_spatial_userdefine_client.cpp @@ -0,0 +1,668 @@ +// * BeginRiceCopyright ***************************************************** +// +// Copyright ((c)) 2002-2014, Rice University +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// * Neither the name of Rice University (RICE) nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// This software is provided by RICE and contributors "as is" and any +// express or implied warranties, including, but not limited to, the +// implied warranties of merchantability and fitness for a particular +// purpose are disclaimed. In no event shall RICE or contributors be +// liable for any direct, indirect, incidental, special, exemplary, or +// consequential damages (including, but not limited to, procurement of +// substitute goods or services; loss of use, data, or profits; or +// business interruption) however caused and on any theory of liability, +// whether in contract, strict liability, or tort (including negligence +// or otherwise) arising in any way out of the use of this software, even +// if advised of the possibility of such damage. +// +// ******************************************************* EndRiceCopyright * + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pin.H" + +//enable Data-centric +#define USE_TREE_BASED_FOR_DATA_CENTRIC +#define USE_TREE_WITH_ADDR +#include "cctlib.H" +using namespace std; +using namespace PinCCTLib; + +#define THREAD_MAX (1024) + +#define GEN_REG_NUM (16) +#define GEN_REG_LEN (8) +#define X87_REG_NUM (8) +#define X87_REG_LEN (10) +#define SIMD_REG_NUM (16) +#define SIMD_REG_LEN (32) + +#define SAME_RATE (0.1) +#define SAME_RECORD_LIMIT (0) +#define RED_RATE (0.9) +#define APPROX_RATE (0.01) + +#define ARRAY_UPDATE_THRESHOLD(a) (a/4) +#define MAKE_CONTEXT_PAIR(a, b) (((uint64_t)(a) << 32) | ((uint64_t)(b))) + +#define ARRAY_ANALYSIS_FN_NAME "Analyze_this_array" +#define REG_ANALYSIS_FN_NAME "Analyze_regs" + + +typedef struct valueGroup{ + list indexes; +}ValueGroup; + +typedef struct intraRedRecord{ + double redundancy; + uint32_t curCtxt; + list group; + list spatialRedInd; +}IntraRedRecord; + +typedef struct intraRegsRed{ + double genRegRed; + double x87RegRed; + double simdRegRed; +}IntraRegsRed; + +struct RedSpyThreadData{ + + long long numIns; +}; + +//helper struct used to + +// key for accessing TLS storage in the threads. initialized once in main() +static TLS_KEY client_tls_key; +static RedSpyThreadData* gSingleThreadedTData; + +// function to access thread-specific data +inline RedSpyThreadData* ClientGetTLS(const THREADID threadId) { +#ifdef MULTI_THREADED + RedSpyThreadData* tdata = + static_cast(PIN_GetThreadData(client_tls_key, threadId)); + return tdata; +#else + return gSingleThreadedTData; +#endif +} + + +INT32 Usage2() { + PIN_ERROR("Pin tool to gather calling context on each load and store.\n" + KNOB_BASE::StringKnobSummary() + "\n"); + return -1; +} + +// Main for RedSpy, initialize the tool, register instrumentation functions and call the target program. +static FILE* gTraceFile; +uint32_t lastStatic; +// Initialized the needed data structures before launching the target program +static void ClientInit(int argc, char* argv[]) { + // Create output file + char name[MAX_FILE_PATH] = "redspy_spatial_selected.out."; + char* envPath = getenv("CCTLIB_CLIENT_OUTPUT_FILE"); + + if(envPath) { + // assumes max of MAX_FILE_PATH + strcpy(name, envPath); + } + + gethostname(name + strlen(name), MAX_FILE_PATH - strlen(name)); + pid_t pid = getpid(); + sprintf(name + strlen(name), "%d", pid); + cerr << "\n Creating log file at:" << name << "\n"; + gTraceFile = fopen(name, "w"); + // print the arguments passed + fprintf(gTraceFile, "\n"); + + for(int i = 0 ; i < argc; i++) { + fprintf(gTraceFile, "%s ", argv[i]); + } + + fprintf(gTraceFile, "\n"); +} + +static unordered_map> arrayDataRed[THREAD_MAX]; +static unordered_map> regsRed[THREAD_MAX]; + + +VOID inline RecordIntraRegsRedundancy(uint32_t ctxt, IntraRegsRed redPair,THREADID threadId){ + + unordered_map>::iterator it; + it = regsRed[threadId].find(ctxt); + if(it == regsRed[threadId].end()){ + list newlist; + newlist.push_back(redPair); + regsRed[threadId].insert(std::pair>(ctxt,newlist)); + }else{ + it->second.push_back(redPair); + } +} + +VOID inline RecordIntraArrayRedundancy(string name, IntraRedRecord redPair,THREADID threadId){ + + unordered_map>::iterator it; + it = arrayDataRed[threadId].find(name); + if(it == arrayDataRed[threadId].end()){ + list newlist; + newlist.push_back(redPair); + arrayDataRed[threadId].insert(std::pair>(name,newlist)); + }else{ + it->second.push_back(redPair); + } +} + +static void CheckRegValues(CONTEXT * ctxt,THREADID threadId){ + + RedSpyThreadData* const tData = ClientGetTLS(threadId); + //ContextHandle_t curCtxtHandle = GetContextHandle(threadId, 0); + + //get values for general registers + UINT8 ** genRegs; + genRegs = (UINT8 **)malloc(GEN_REG_NUM * sizeof(UINT8 *)); + for (int i = 0; i < GEN_REG_NUM; ++i) { + genRegs[i] = (UINT8 *)malloc(GEN_REG_LEN * sizeof(UINT8)); + } + + PIN_GetContextRegval(ctxt,REG_RAX,genRegs[0]); + PIN_GetContextRegval(ctxt,REG_RBX,genRegs[1]); + PIN_GetContextRegval(ctxt,REG_RCX,genRegs[2]); + PIN_GetContextRegval(ctxt,REG_RDX,genRegs[3]); + + PIN_GetContextRegval(ctxt,REG_RBP,genRegs[4]); + PIN_GetContextRegval(ctxt,REG_RDI,genRegs[5]); + PIN_GetContextRegval(ctxt,REG_RSI,genRegs[6]); + PIN_GetContextRegval(ctxt,REG_RSP,genRegs[7]); + + PIN_GetContextRegval(ctxt,REG_R8,genRegs[8]); + PIN_GetContextRegval(ctxt,REG_R9,genRegs[9]); + PIN_GetContextRegval(ctxt,REG_R10,genRegs[10]); + PIN_GetContextRegval(ctxt,REG_R11,genRegs[11]); + PIN_GetContextRegval(ctxt,REG_R12,genRegs[12]); + PIN_GetContextRegval(ctxt,REG_R13,genRegs[13]); + PIN_GetContextRegval(ctxt,REG_R14,genRegs[14]); + PIN_GetContextRegval(ctxt,REG_R15,genRegs[15]); + + //get values for X87 registers + UINT8 ** x87Regs; + x87Regs = (UINT8 **)malloc(X87_REG_NUM * sizeof(UINT8 *)); + for (int i = 0; i < X87_REG_NUM; ++i) { + x87Regs[i] = (UINT8 *)malloc(X87_REG_LEN * sizeof(UINT8)); + } + + PIN_GetContextRegval(ctxt,REG_ST0,x87Regs[0]); + PIN_GetContextRegval(ctxt,REG_ST1,x87Regs[1]); + PIN_GetContextRegval(ctxt,REG_ST2,x87Regs[2]); + PIN_GetContextRegval(ctxt,REG_ST3,x87Regs[3]); + PIN_GetContextRegval(ctxt,REG_ST4,x87Regs[4]); + PIN_GetContextRegval(ctxt,REG_ST5,x87Regs[5]); + PIN_GetContextRegval(ctxt,REG_ST6,x87Regs[6]); + PIN_GetContextRegval(ctxt,REG_ST7,x87Regs[7]); + + //get values for SIMD registers + UINT8 ** simdRegs; + simdRegs = (UINT8 **)malloc(SIMD_REG_NUM * sizeof(UINT8 *)); + for (int i = 0; i < SIMD_REG_NUM; ++i) { + simdRegs[i] = (UINT8 *)malloc(SIMD_REG_LEN * sizeof(UINT8)); + } + + PIN_GetContextRegval(ctxt,REG_YMM0,simdRegs[0]); + PIN_GetContextRegval(ctxt,REG_YMM1,simdRegs[1]); + PIN_GetContextRegval(ctxt,REG_YMM2,simdRegs[2]); + PIN_GetContextRegval(ctxt,REG_YMM3,simdRegs[3]); + PIN_GetContextRegval(ctxt,REG_YMM4,simdRegs[4]); + PIN_GetContextRegval(ctxt,REG_YMM5,simdRegs[5]); + PIN_GetContextRegval(ctxt,REG_YMM6,simdRegs[6]); + PIN_GetContextRegval(ctxt,REG_YMM7,simdRegs[7]); + PIN_GetContextRegval(ctxt,REG_YMM8,simdRegs[8]); + PIN_GetContextRegval(ctxt,REG_YMM9,simdRegs[9]); + PIN_GetContextRegval(ctxt,REG_YMM10,simdRegs[10]); + PIN_GetContextRegval(ctxt,REG_YMM11,simdRegs[11]); + PIN_GetContextRegval(ctxt,REG_YMM12,simdRegs[12]); + PIN_GetContextRegval(ctxt,REG_YMM13,simdRegs[13]); + PIN_GetContextRegval(ctxt,REG_YMM14,simdRegs[14]); + PIN_GetContextRegval(ctxt,REG_YMM15,simdRegs[15]); + + int index = 0; + int i,j; + + //check redundancy in general registers + uint64_t valuesMap[GEN_REG_NUM]; + valuesMap[index++] = *(uint64_t *)(genRegs[0]); + + for (int i = 1; i < GEN_REG_NUM; ++i) { + + for (j = 0; j < index; ++j) { + if (*(uint64_t *)(genRegs[i]) == valuesMap[j]) { + break; + } + } + if (j >= index) { + valuesMap[index++] = *(uint64_t *)(genRegs[i]); + } + } + + float genRegRate = (float)index/GEN_REG_NUM; + + //check redundancy in x87 registers + UINT8 ** x87values; + x87values = (UINT8 **)malloc(X87_REG_NUM * sizeof(UINT8 *)); + for (int i = 0; i < X87_REG_NUM; ++i) { + x87values[i] = (UINT8 *)malloc(X87_REG_LEN * sizeof(UINT8)); + } + index = 0; + memcpy(x87values[index++], x87Regs[0], X87_REG_LEN * sizeof(UINT8)); + for (int i = 1; i < X87_REG_NUM; ++i) { + + for (j = 0; j < index; ++j) { + if (memcmp(x87values[j],x87Regs[i],X87_REG_LEN * sizeof(UINT8))==0) { + break; + } + } + if (j >= index) { + memcpy(x87values[index++], x87Regs[i], X87_REG_LEN * sizeof(UINT8)); + } + } + float x87RegRate = (float)index/X87_REG_NUM; + + //check redundancy in SIMD registers + UINT8 ** simdValues; + simdValues = (UINT8 **)malloc(SIMD_REG_NUM * sizeof(UINT8 *)); + for (int i = 0; i < SIMD_REG_NUM; ++i) { + simdValues[i] = (UINT8 *)malloc(SIMD_REG_LEN * sizeof(UINT8)); + } + index = 0; + memcpy(simdValues[index++], simdRegs[0], SIMD_REG_LEN * sizeof(UINT8)); + for (int i = 1; i < 8; ++i) { + + for (j = 0; j < index; ++j) { + if (memcmp(simdValues[j],simdRegs[i],SIMD_REG_LEN * sizeof(UINT8))==0) { + break; + } + } + if (j >= index) { + memcpy(simdValues[index++], simdRegs[i], SIMD_REG_LEN * sizeof(UINT8)); + } + } + float simdRegRate = (float)index/SIMD_REG_NUM; + + if (genRegRate > RED_RATE || x87RegRate > RED_RATE || simdRegRate > RED_RATE) { + ContextHandle_t curCtxtHandle = GetContextHandle(threadId, 0); + IntraRegsRed newpair; + newpair.genRegRed = genRegRate; + newpair.x87RegRed = x87RegRate; + newpair.simdRegRed = simdRegRate; + RecordIntraRegsRedundancy(curCtxtHandle,newpair,threadId); + } +} + + +template +struct ArrayAnalysis{ + + typedef typename unordered_map>::iterator MyIterator; + + static __attribute__((always_inline)) bool CheckIntraArrayRedundancy(uint64_t begAddr, uint64_t endAddr, uint32_t stride, IntraRedRecord * newPair ){ + + unordered_map> valuesMap; + MyIterator mapIt; + list spatialRedIndex; + uint64_t address = begAddr; + uint32_t index = 0; + T valueLast = 0; + while(address < endAddr){ + + T value = *static_cast((void *)address); + + if(isApprox){ + T r = (value - valueLast)/value; + if (r < APPROX_RATE && r > -APPROX_RATE) + spatialRedIndex.push_back(index); + for(mapIt=valuesMap.begin(); mapIt != valuesMap.end(); ++mapIt){ + r = (value - mapIt->first)/value; + if (r < APPROX_RATE && r > -APPROX_RATE){ + mapIt->second.push_back(index); + break; + } + } + if(mapIt == valuesMap.end()){ + list newlist; + newlist.push_back(index); + valuesMap.insert(std::pair>(value,newlist)); + } + }else{ + if(value == valueLast) + spatialRedIndex.push_back(index); + mapIt = valuesMap.find(value); + if(mapIt == valuesMap.end()){ + list newlist; + newlist.push_back(index); + valuesMap.insert(std::pair>(value,newlist)); + }else{ + mapIt->second.push_back(index); + } + } + address += stride; + index++; + valueLast = value; + } + uint32_t numUniqueValue = valuesMap.size(); + double redRate = (double)(index - numUniqueValue)/index; + list maxList; + for (mapIt = valuesMap.begin(); mapIt != valuesMap.end(); ++mapIt){ + if(mapIt->second.size() > index*SAME_RATE){ + ValueGroup newGroup; + newGroup.indexes = mapIt->second; + maxList.push_back(newGroup); + } + } + if(redRate > RED_RATE || maxList.size() > SAME_RECORD_LIMIT){ + newPair->redundancy = redRate; + newPair->group = maxList; + newPair->spatialRedInd = spatialRedIndex; + return true; + } + return false; + } +}; + +static VOID InstrumentInsCallback(INS ins, VOID* v, const uint32_t opaqueHandle) { + ; +} + +void new_ARRAY_ANALYSIS_FN_NAME(char * name, void * addr, uint32_t typeSize, uint32_t stride, bool isApprox, THREADID threadId){ + //printf("name:%s, addr:%p, type:%d, stride:%d\n",name,addr,typeSize,stride); + string str(name); + + DataHandle_t dataHandle = GetDataObjectHandle(addr,threadId); + IntraRedRecord newRecord; + bool hasRedundant = false; + + if (isApprox) { + switch (typeSize) { + case 4: + hasRedundant = ArrayAnalysis::CheckIntraArrayRedundancy(dataHandle.beg_addr,dataHandle.end_addr,stride,&newRecord); + break; + case 8: + hasRedundant = ArrayAnalysis::CheckIntraArrayRedundancy(dataHandle.beg_addr,dataHandle.end_addr,stride,&newRecord); + break; + default: + assert(0 && "approx inappropriate type size, should not reach here!"); + break; + } + }else{ + + switch (typeSize) { + case 1: + hasRedundant = ArrayAnalysis::CheckIntraArrayRedundancy(dataHandle.beg_addr,dataHandle.end_addr,stride,&newRecord); + break; + case 2: + hasRedundant = ArrayAnalysis::CheckIntraArrayRedundancy(dataHandle.beg_addr,dataHandle.end_addr,stride,&newRecord); + break; + case 4: + hasRedundant = ArrayAnalysis::CheckIntraArrayRedundancy(dataHandle.beg_addr,dataHandle.end_addr,stride,&newRecord); + break; + case 8: + hasRedundant = ArrayAnalysis::CheckIntraArrayRedundancy(dataHandle.beg_addr,dataHandle.end_addr,stride,&newRecord); + break; + default: + assert(0 && "unknow element size, should not reach here!"); + break; + } + } + if(hasRedundant){ + ContextHandle_t curCtxtHandle = GetContextHandle(threadId, 0); + newRecord.curCtxt = curCtxtHandle; + RecordIntraArrayRedundancy( name, newRecord, threadId); + } +} +/* +VOID Overrides (IMG img, VOID * v) { + // Master setup + RTN rtn = RTN_FindByName (img, ARRAY_ANALYSIS_FN_NAME); + if (RTN_Valid (rtn)) { + + RTN_InsertCall (rtn, IPOINT_BEFORE, (AFUNPTR) CheckRegValues, IARG_CONTEXT, IARG_THREAD_ID,IARG_END); + // Define a function prototype that describes the application routine + // that will be replaced. + // + PROTO proto_master = PROTO_Allocate (PIN_PARG (void), CALLINGSTD_DEFAULT, + ARRAY_ANALYSIS_FN_NAME,PIN_PARG (char *),PIN_PARG (void *),PIN_PARG (uint32_t),PIN_PARG (uint32_t), PIN_PARG (bool), + PIN_PARG_END ()); + + // Replace the application routine with the replacement function. + // Additional arguments have been added to the replacement routine. + // + RTN_ReplaceSignature (rtn, AFUNPTR (new_ARRAY_ANALYSIS_FN_NAME), + IARG_PROTOTYPE, proto_master, + IARG_FUNCARG_ENTRYPOINT_VALUE, 0, + IARG_FUNCARG_ENTRYPOINT_VALUE, 1, + IARG_FUNCARG_ENTRYPOINT_VALUE, 2, + IARG_FUNCARG_ENTRYPOINT_VALUE, 3, + IARG_FUNCARG_ENTRYPOINT_VALUE, 4, + IARG_THREAD_ID, IARG_END); + // Free the function prototype. + PROTO_Free (proto_master); + } +}*/ + +VOID Overrides (IMG img, VOID * v) { + // Master setup + for( SEC sec= IMG_SecHead(img); SEC_Valid(sec); sec = SEC_Next(sec) ){ + for( RTN rtn= SEC_RtnHead(sec); RTN_Valid(rtn); rtn = RTN_Next(rtn) ){ + string rtnName = RTN_Name(rtn); + if (rtnName.find(ARRAY_ANALYSIS_FN_NAME) != std::string::npos) { + + // Define a function prototype that describes the application routine + // that will be replaced. + // + PROTO proto_master = PROTO_Allocate (PIN_PARG (void), CALLINGSTD_DEFAULT, + ARRAY_ANALYSIS_FN_NAME,PIN_PARG (char *),PIN_PARG (void *),PIN_PARG (uint32_t),PIN_PARG (uint32_t), PIN_PARG (bool), + PIN_PARG_END ()); + + // Replace the application routine with the replacement function. + // Additional arguments have been added to the replacement routine. + // + RTN_ReplaceSignature (rtn, AFUNPTR (new_ARRAY_ANALYSIS_FN_NAME), + IARG_PROTOTYPE, proto_master, + IARG_FUNCARG_ENTRYPOINT_VALUE, 0, + IARG_FUNCARG_ENTRYPOINT_VALUE, 1, + IARG_FUNCARG_ENTRYPOINT_VALUE, 2, + IARG_FUNCARG_ENTRYPOINT_VALUE, 3, + IARG_FUNCARG_ENTRYPOINT_VALUE, 4, + IARG_THREAD_ID, IARG_END); + // Free the function prototype. + PROTO_Free (proto_master); + }else if (rtnName.find(REG_ANALYSIS_FN_NAME) != std::string::npos) { + RTN_Open(rtn); + RTN_InsertCall (rtn, IPOINT_BEFORE, (AFUNPTR) CheckRegValues, IARG_CONTEXT, IARG_THREAD_ID,IARG_END); + RTN_Close(rtn); + } + } + } +} + + +struct RedundacyData { + ContextHandle_t dead; + ContextHandle_t kill; + uint64_t frequency; +}; + +static inline string ConvertListToString(list inlist){ + + list::iterator it = inlist.begin(); + uint32_t tmp = (*it); + string indexList = "[" + to_string(tmp) + ","; + it++; + while(it != inlist.end()){ + if(*it == tmp + 1){ + tmp = *it; + } + else{ + indexList += to_string(tmp) + "],[" + to_string(*it)+ ","; + tmp = *it; + } + it++; + } + indexList += to_string(tmp) + "]"; + return indexList; +} + + +static inline bool RedundacyCompare(const struct RedundacyData &first, const struct RedundacyData &second) { + return first.frequency > second.frequency ? true : false; +} + +static void PrintRedundancyPairs(THREADID threadId) { + + fprintf(gTraceFile,"\n*************** Intra Array Redundancy of Thread %d ***************\n",threadId); + unordered_map>::iterator itIntra; + + fprintf(gTraceFile,"========== Selected Dataobjecy Redundancy ==========\n"); + for(itIntra = arrayDataRed[threadId].begin(); itIntra != arrayDataRed[threadId].end(); ++itIntra){ + + fprintf(gTraceFile,"\nVariable %s: \n",(itIntra->first).c_str()); + + list::iterator listIt; + for(listIt = itIntra->second.begin(); listIt != itIntra->second.end(); ++listIt){ + + PrintFullCallingContext((*listIt).curCtxt); + fprintf(gTraceFile,"\nRed:%.2f, unique value large index group:\n",(*listIt).redundancy); + list::iterator groupIt; + int num = 0; + for (groupIt = (*listIt).group.begin(); groupIt != (*listIt).group.end(); ++groupIt) { + string indexlist = ConvertListToString((*groupIt).indexes); + fprintf(gTraceFile,"Group %d: %s\n",num, indexlist.c_str()); + } + string indexlist = ConvertListToString((*listIt).spatialRedInd); + fprintf(gTraceFile,"redundant spatial indexes:%s\n",indexlist.c_str()); + + } + fprintf(gTraceFile,"\n----------------------------"); + } + + fprintf(gTraceFile,"\n*************** Intra Registers Redundancy of Thread %d ***************\n",threadId); + unordered_map>::iterator itIntraReg; + + fprintf(gTraceFile,"========== ==========\n"); + for(itIntraReg = regsRed[threadId].begin(); itIntraReg != regsRed[threadId].end(); ++itIntraReg){ + + PrintFullCallingContext(itIntraReg->first); + + list::iterator listItReg; + for(listItReg = itIntraReg->second.begin(); listItReg != itIntraReg->second.end(); ++listItReg){ + + fprintf(gTraceFile,"\n general registers redundancy: %.2f\n",(*listItReg).genRegRed); + fprintf(gTraceFile,"\n X87 registers redundancy: %.2f\n",(*listItReg).x87RegRed); + fprintf(gTraceFile,"\n SIMD registers redundancy: %.2f\n",(*listItReg).simdRegRed); + } + fprintf(gTraceFile,"\n----------------------------"); + } +} + +// On each Unload of a loaded image, the accummulated redundancy information is dumped +static VOID ImageUnload(IMG img, VOID* v) { + fprintf(gTraceFile, "\n TODO .. Multi-threading is not well supported."); + THREADID threadid = PIN_ThreadId(); + fprintf(gTraceFile, "\nUnloading %s", IMG_Name(img).c_str()); + // Update gTotalInstCount first + PIN_LockClient(); + PrintRedundancyPairs(threadid); + PIN_UnlockClient(); + // clear redmap now + arrayDataRed[threadid].clear(); +} + +static VOID ThreadFiniFunc(THREADID threadId, const CONTEXT *ctxt, INT32 code, VOID *v) { + +} + +static VOID FiniFunc(INT32 code, VOID *v) { + // do whatever you want to the full CCT with footpirnt +} + + +static void InitThreadData(RedSpyThreadData* tdata){ + + tdata->numIns = 0; +} + +static VOID ThreadStart(THREADID threadid, CONTEXT* ctxt, INT32 flags, VOID* v) { + RedSpyThreadData* tdata = new RedSpyThreadData(); + InitThreadData(tdata); + // __sync_fetch_and_add(&gClientNumThreads, 1); + PIN_SetThreadData(client_tls_key, tdata, threadid); +#ifdef MULTI_THREADED + PIN_SetThreadData(client_tls_key, tdata, threadid); +#else + gSingleThreadedTData = tdata; +#endif +} + + +int main(int argc, char* argv[]) { + // Initialize PIN + if(PIN_Init(argc, argv)) + return Usage2(); + + // Initialize Symbols, we need them to report functions and lines + PIN_InitSymbols(); + + // Init Client + ClientInit(argc, argv); + // Intialize CCTLib + PinCCTLibInit(INTERESTING_INS_MEMORY_ACCESS, gTraceFile, InstrumentInsCallback, 0, true); + + + // Obtain a key for TLS storage. + client_tls_key = PIN_CreateThreadDataKey(0 /*TODO have a destructir*/); + // Register ThreadStart to be called when a thread starts. + PIN_AddThreadStartFunction(ThreadStart, 0); + + + // fini function for post-mortem analysis + PIN_AddThreadFiniFunction(ThreadFiniFunc, 0); + PIN_AddFiniFunction(FiniFunc, 0); + + IMG_AddInstrumentFunction(Overrides, 0); + + // Register ImageUnload to be called when an image is unloaded + IMG_AddUnloadFunction(ImageUnload, 0); + + // Launch program now + PIN_StartProgram(); + return 0; +} + + diff --git a/tests/redspy_temporal_client.cpp b/tests/redspy_temporal_client.cpp index 7d7ebd9..a1e3700 100644 --- a/tests/redspy_temporal_client.cpp +++ b/tests/redspy_temporal_client.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,19 @@ #include #include "pin.H" #include "cctlib.H" +#include +#include + +extern "C" { +#include "xed-interface.h" +#include "xed-common-hdrs.h" +} + +#include +#include +using google::sparse_hash_map; // namespace where class lives by default +using google::dense_hash_map; + using namespace std; using namespace PinCCTLib; @@ -77,21 +91,39 @@ using namespace PinCCTLib; #define MAX_WRITE_OPS_IN_INS (8) #define MAX_REG_LENGTH (64) -#define MAX_ALIAS_REGS (4) //EAX, EBX, ECX, EDX +#define MAX_SIMD_LENGTH (64) +#define MAX_SIMD_REGS (32) + +#define MAX_ALIAS_REGS (16) //EAX, EBX, ECX, EDX, EBP, EDI, ESI, ESP, R8-R15 #define MAX_ALIAS_REG_SIZE (8) //RAX is 64bits #define MAX_ALIAS_TYPE (3) //(RAX, EAX, AX),(AH),(AL) //different register group -#define ALIAS_REG_A (0) //RAX, EAX, AX, AH, or AL -#define ALIAS_REG_B (1) -#define ALIAS_REG_C (2) -#define ALIAS_REG_D (3) +enum AliasReg { + ALIAS_REG_A = 0, //RAX, EAX, AX, AH, or AL + ALIAS_REG_B, + ALIAS_REG_C, + ALIAS_REG_D, + ALIAS_REG_BP, + ALIAS_REG_DI, + ALIAS_REG_SI, + ALIAS_REG_SP, + ALIAS_REG_R8, + ALIAS_REG_R9, + ALIAS_REG_R10, + ALIAS_REG_R11, + ALIAS_REG_R12, + ALIAS_REG_R13, + ALIAS_REG_R14, + ALIAS_REG_R15}; //alias type, generic, high byte or low byte -#define ALIAS_GENERIC (0) // RAX, EAX, or AX -#define ALIAS_HIGH_BYTE (1) //AH -#define ALIAS_LOW_BYTE (2) // AL -#define ALIAS_HIGH_LOW (3) + +enum AliasGroup{ + ALIAS_GENERIC=0, // RAX, EAX, or AX + ALIAS_HIGH_BYTE, //AH + ALIAS_LOW_BYTE // AL +}; #if __BYTE_ORDER == __LITTLE_ENDIAN //alias begin bytes for different types @@ -120,7 +152,7 @@ using namespace PinCCTLib; #ifdef ENABLE_SAMPLING #define WINDOW_ENABLE 1000000 -#define WINDOW_DISABLE 1000000000 +#define WINDOW_DISABLE 100000000 #define WINDOW_CLEAN 10 #endif @@ -130,18 +162,29 @@ using namespace PinCCTLib; #define MAKE_CONTEXT_PAIR(a, b) (((uint64_t)(a) << 32) | ((uint64_t)(b))) +#define delta 0.01 + struct AddrValPair{ - void * address; uint8_t value[MAX_WRITE_OP_LENGTH]; -}; + void * address; +} __attribute__((aligned(16))); + +struct LargeReg{ + UINT8 value[MAX_SIMD_LENGTH]; +} __attribute__((aligned(32))); struct RedSpyThreadData{ + AddrValPair buffer[MAX_WRITE_OPS_IN_INS]; + struct LargeReg simdValue[MAX_SIMD_REGS]; + uint32_t regCtxt[REG_LAST]; UINT8 regValue[REG_LAST][MAX_REG_LENGTH]; UINT8 aliasValue[MAX_ALIAS_REGS][MAX_ALIAS_REG_SIZE]; uint32_t aliasCtxt[MAX_ALIAS_REGS][MAX_ALIAS_TYPE]; + uint32_t simdCtxt[MAX_SIMD_REGS]; + uint64_t bytesWritten; long long numIns; @@ -217,12 +260,15 @@ static const uint64_t READ_ACCESS_STATES [] = {/*0 byte */0, /*1 byte */ ONE_BYT static const uint64_t WRITE_ACCESS_STATES [] = {/*0 byte */0, /*1 byte */ ONE_BYTE_WRITE_ACTION, /*2 byte */ TWO_BYTE_WRITE_ACTION, /*3 byte */ 0, /*4 byte */ FOUR_BYTE_WRITE_ACTION, /*5 byte */0, /*6 byte */0, /*7 byte */0, /*8 byte */ EIGHT_BYTE_WRITE_ACTION}; static const uint8_t OVERFLOW_CHECK [] = {/*0 byte */0, /*1 byte */ 0, /*2 byte */ 0, /*3 byte */ 1, /*4 byte */ 2, /*5 byte */3, /*6 byte */4, /*7 byte */5, /*8 byte */ 6}; -static unordered_map RedMap[THREAD_MAX]; +static dense_hash_map RedMap[THREAD_MAX]; +static dense_hash_map ApproxRedMap[THREAD_MAX]; + +static inline void AddToRedTable(uint64_t key, uint16_t value, THREADID threadId) __attribute__((always_inline,flatten)); static inline void AddToRedTable(uint64_t key, uint16_t value, THREADID threadId) { #ifdef MULTI_THREADED LOCK_RED_MAP(); #endif - unordered_map::iterator it = RedMap[threadId].find(key); + dense_hash_map::iterator it = RedMap[threadId].find(key); if ( it == RedMap[threadId].end()) { RedMap[threadId][key] = value; } else { @@ -233,85 +279,334 @@ static inline void AddToRedTable(uint64_t key, uint16_t value, THREADID threadI #endif } +static inline void AddToApproximateRedTable(uint64_t key, uint16_t value, THREADID threadId) __attribute__((always_inline,flatten)); +static inline void AddToApproximateRedTable(uint64_t key, uint16_t value, THREADID threadId) { +#ifdef MULTI_THREADED + LOCK_RED_MAP(); +#endif + dense_hash_map::iterator it = ApproxRedMap[threadId].find(key); + if ( it == ApproxRedMap[threadId].end()) { + ApproxRedMap[threadId][key] = value; + } else { + it->second += value; + } +#ifdef MULTI_THREADED + UNLOCK_RED_MAP(); +#endif +} + + #ifdef ENABLE_SAMPLING static inline VOID EmptyCtxt(RedSpyThreadData* tData){ - int i; - for( i = 0; i< REG_LAST; ++i){ - tData->regCtxt[i] = 0; - } - /* - tData->numWinds++; - if(tData->numWinds > WINDOW_CLEAN){ - long count = tData->bytesWritten; - long delNum = 0; - //printf("size of the map %lu, total reg written %lu\n",count,tData->numRegWritten); - unordered_map::iterator it,ittmp; - for (it = RedMap[threadId].begin(); it != RedMap[threadId].end();) { - //printf("%lu\n",(*it).second); - if((*it).second * 100.0 < count){ - delNum += (*it).second; - ittmp = it; - it++; - RedMap[threadId].erase(ittmp); - }else - it++; - } - tData->numWinds=0; - tData->bytesWritten -= delNum; - }*/ + memset(&tData->regCtxt, 0, sizeof(uint32_t)*REG_LAST); + memset(&tData->aliasCtxt, 0, sizeof(uint32_t)*MAX_ALIAS_REGS*MAX_ALIAS_TYPE); + memset(&tData->regValue, 0, REG_LAST*MAX_REG_LENGTH); + memset(&tData->aliasValue, 0, MAX_ALIAS_REGS*MAX_ALIAS_REG_SIZE); } static ADDRINT IfEnableSample(THREADID threadId){ RedSpyThreadData* const tData = ClientGetTLS(threadId); - if(tData->sampleFlag){ - return 1; - } - return 0; + return tData->sampleFlag; } #endif +static inline bool IsFloatInstruction(ADDRINT ip) { + xed_decoded_inst_t xedd; + xed_state_t xed_state; + xed_decoded_inst_zero_set_mode(&xedd, &xed_state); + + if(XED_ERROR_NONE == xed_decode(&xedd, (const xed_uint8_t*)(ip), 15)) { + xed_iclass_enum_t iclassType = xed_decoded_inst_get_iclass(&xedd); + if (iclassType >= XED_ICLASS_F2XM1 && iclassType <=XED_ICLASS_FYL2XP1) { + return true; + } + if (iclassType >= XED_ICLASS_VEXTRACTF128 && iclassType <=XED_ICLASS_VINSERTI128) { + return true; + } + if (iclassType >= XED_ICLASS_VRCPPS && iclassType <= XED_ICLASS_VSQRTSS) { + return true; + } + if (iclassType >= XED_ICLASS_VSUBPD && iclassType <= XED_ICLASS_VXORPS) { + return true; + } + switch (iclassType) { + case XED_ICLASS_ADDPD: + case XED_ICLASS_ADDPS: + case XED_ICLASS_ADDSD: + case XED_ICLASS_ADDSS: + case XED_ICLASS_ADDSUBPD: + case XED_ICLASS_ADDSUBPS: + case XED_ICLASS_ANDNPD: + case XED_ICLASS_ANDNPS: + case XED_ICLASS_ANDPD: + case XED_ICLASS_ANDPS: + case XED_ICLASS_BLENDPD: + case XED_ICLASS_BLENDPS: + case XED_ICLASS_BLENDVPD: + case XED_ICLASS_BLENDVPS: + case XED_ICLASS_CMPPD: + case XED_ICLASS_CMPPS: + case XED_ICLASS_CMPSD: + case XED_ICLASS_CMPSD_XMM: + case XED_ICLASS_COMISD: + case XED_ICLASS_COMISS: + case XED_ICLASS_CVTDQ2PD: + case XED_ICLASS_CVTDQ2PS: + case XED_ICLASS_CVTPD2PS: + case XED_ICLASS_CVTPI2PD: + case XED_ICLASS_CVTPI2PS: + case XED_ICLASS_CVTPS2PD: + case XED_ICLASS_CVTSD2SS: + case XED_ICLASS_CVTSI2SD: + case XED_ICLASS_CVTSI2SS: + case XED_ICLASS_CVTSS2SD: + case XED_ICLASS_DIVPD: + case XED_ICLASS_DIVPS: + case XED_ICLASS_DIVSD: + case XED_ICLASS_DIVSS: + case XED_ICLASS_DPPD: + case XED_ICLASS_DPPS: + case XED_ICLASS_HADDPD: + case XED_ICLASS_HADDPS: + case XED_ICLASS_HSUBPD: + case XED_ICLASS_HSUBPS: + case XED_ICLASS_MAXPD: + case XED_ICLASS_MAXPS: + case XED_ICLASS_MAXSD: + case XED_ICLASS_MAXSS: + case XED_ICLASS_MINPD: + case XED_ICLASS_MINPS: + case XED_ICLASS_MINSD: + case XED_ICLASS_MINSS: + case XED_ICLASS_MOVAPD: + case XED_ICLASS_MOVAPS: + case XED_ICLASS_MOVD: + case XED_ICLASS_MOVHLPS: + case XED_ICLASS_MOVHPD: + case XED_ICLASS_MOVHPS: + case XED_ICLASS_MOVLHPS: + case XED_ICLASS_MOVLPD: + case XED_ICLASS_MOVLPS: + case XED_ICLASS_MOVMSKPD: + case XED_ICLASS_MOVMSKPS: + case XED_ICLASS_MOVNTPD: + case XED_ICLASS_MOVNTPS: + case XED_ICLASS_MOVNTSD: + case XED_ICLASS_MOVNTSS: + case XED_ICLASS_MOVSD: + case XED_ICLASS_MOVSD_XMM: + case XED_ICLASS_MOVSS: + case XED_ICLASS_MULPD: + case XED_ICLASS_MULPS: + case XED_ICLASS_MULSD: + case XED_ICLASS_MULSS: + case XED_ICLASS_ORPD: + case XED_ICLASS_ORPS: + case XED_ICLASS_ROUNDPD: + case XED_ICLASS_ROUNDPS: + case XED_ICLASS_ROUNDSD: + case XED_ICLASS_ROUNDSS: + case XED_ICLASS_SHUFPD: + case XED_ICLASS_SHUFPS: + case XED_ICLASS_SQRTPD: + case XED_ICLASS_SQRTPS: + case XED_ICLASS_SQRTSD: + case XED_ICLASS_SQRTSS: + case XED_ICLASS_SUBPD: + case XED_ICLASS_SUBPS: + case XED_ICLASS_SUBSD: + case XED_ICLASS_SUBSS: + case XED_ICLASS_VADDPD: + case XED_ICLASS_VADDPS: + case XED_ICLASS_VADDSD: + case XED_ICLASS_VADDSS: + case XED_ICLASS_VADDSUBPD: + case XED_ICLASS_VADDSUBPS: + case XED_ICLASS_VANDNPD: + case XED_ICLASS_VANDNPS: + case XED_ICLASS_VANDPD: + case XED_ICLASS_VANDPS: + case XED_ICLASS_VBLENDPD: + case XED_ICLASS_VBLENDPS: + case XED_ICLASS_VBLENDVPD: + case XED_ICLASS_VBLENDVPS: + case XED_ICLASS_VBROADCASTSD: + case XED_ICLASS_VBROADCASTSS: + case XED_ICLASS_VCMPPD: + case XED_ICLASS_VCMPPS: + case XED_ICLASS_VCMPSD: + case XED_ICLASS_VCMPSS: + case XED_ICLASS_VCOMISD: + case XED_ICLASS_VCOMISS: + case XED_ICLASS_VCVTDQ2PD: + case XED_ICLASS_VCVTDQ2PS: + case XED_ICLASS_VCVTPD2PS: + case XED_ICLASS_VCVTPH2PS: + case XED_ICLASS_VCVTPS2PD: + case XED_ICLASS_VCVTSD2SS: + case XED_ICLASS_VCVTSI2SD: + case XED_ICLASS_VCVTSI2SS: + case XED_ICLASS_VCVTSS2SD: + case XED_ICLASS_VDIVPD: + case XED_ICLASS_VDIVPS: + case XED_ICLASS_VDIVSD: + case XED_ICLASS_VDIVSS: + case XED_ICLASS_VDPPD: + case XED_ICLASS_VDPPS: + case XED_ICLASS_VMASKMOVPD: + case XED_ICLASS_VMASKMOVPS: + case XED_ICLASS_VMAXPD: + case XED_ICLASS_VMAXPS: + case XED_ICLASS_VMAXSD: + case XED_ICLASS_VMAXSS: + case XED_ICLASS_VMINPD: + case XED_ICLASS_VMINPS: + case XED_ICLASS_VMINSD: + case XED_ICLASS_VMINSS: + case XED_ICLASS_VMOVAPD: + case XED_ICLASS_VMOVAPS: + case XED_ICLASS_VMOVD: + case XED_ICLASS_VMOVHLPS: + case XED_ICLASS_VMOVHPD: + case XED_ICLASS_VMOVHPS: + case XED_ICLASS_VMOVLHPS: + case XED_ICLASS_VMOVLPD: + case XED_ICLASS_VMOVLPS: + case XED_ICLASS_VMOVMSKPD: + case XED_ICLASS_VMOVMSKPS: + case XED_ICLASS_VMOVNTPD: + case XED_ICLASS_VMOVNTPS: + case XED_ICLASS_VMOVSD: + case XED_ICLASS_VMOVSS: + case XED_ICLASS_VMOVUPD: + case XED_ICLASS_VMOVUPS: + case XED_ICLASS_VMULPD: + case XED_ICLASS_VMULPS: + case XED_ICLASS_VMULSD: + case XED_ICLASS_VMULSS: + case XED_ICLASS_VORPD: + case XED_ICLASS_VORPS: + case XED_ICLASS_VPABSD: + case XED_ICLASS_VPADDD: + case XED_ICLASS_VPCOMD: + case XED_ICLASS_VPCOMUD: + case XED_ICLASS_VPERMILPD: + case XED_ICLASS_VPERMILPS: + case XED_ICLASS_VPERMPD: + case XED_ICLASS_VPERMPS: + case XED_ICLASS_VPGATHERDD: + case XED_ICLASS_VPGATHERQD: + case XED_ICLASS_VPHADDBD: + case XED_ICLASS_VPHADDD: + case XED_ICLASS_VPHADDUBD: + case XED_ICLASS_VPHADDUWD: + case XED_ICLASS_VPHADDWD: + case XED_ICLASS_VPHSUBD: + case XED_ICLASS_VPHSUBWD: + case XED_ICLASS_VPINSRD: + case XED_ICLASS_VPMACSDD: + case XED_ICLASS_VPMACSSDD: + case XED_ICLASS_VPMASKMOVD: + case XED_ICLASS_VPMAXSD: + case XED_ICLASS_VPMAXUD: + case XED_ICLASS_VPMINSD: + case XED_ICLASS_VPMINUD: + case XED_ICLASS_VPROTD: + case XED_ICLASS_VPSUBD: + case XED_ICLASS_XORPD: + case XED_ICLASS_XORPS: + return true; + + default: return false; + } + } else { + assert(0 && "failed to disassemble instruction"); + return false; + } +} +/* +static inline bool IsFloatInstruction(ADDRINT ip, uint32_t oper) { + xed_decoded_inst_t xedd; + xed_state_t xed_state; + xed_decoded_inst_zero_set_mode(&xedd, &xed_state); + + if(XED_ERROR_NONE == xed_decode(&xedd, (const xed_uint8_t*)(ip), 15)) { + xed_operand_element_type_enum_t TypeOperand = xed_decoded_inst_operand_element_type(&xedd,oper); + if(TypeOperand == XED_OPERAND_ELEMENT_TYPE_SINGLE || TypeOperand == XED_OPERAND_ELEMENT_TYPE_DOUBLE || TypeOperand == XED_OPERAND_ELEMENT_TYPE_FLOAT16 || TypeOperand == XED_OPERAND_ELEMENT_TYPE_LONGDOUBLE) + return true; + return false; + } else { + assert(0 && "failed to disassemble instruction"); + return false; + } +}*/ + +static inline uint16_t FloatOperandSize(ADDRINT ip, uint32_t oper) { + xed_decoded_inst_t xedd; + xed_state_t xed_state; + xed_decoded_inst_zero_set_mode(&xedd, &xed_state); + + if(XED_ERROR_NONE == xed_decode(&xedd, (const xed_uint8_t*)(ip), 15)) { + xed_operand_element_type_enum_t TypeOperand = xed_decoded_inst_operand_element_type(&xedd,oper); + if(TypeOperand == XED_OPERAND_ELEMENT_TYPE_SINGLE || TypeOperand == XED_OPERAND_ELEMENT_TYPE_FLOAT16) + return 4; + if (TypeOperand == XED_OPERAND_ELEMENT_TYPE_DOUBLE) { + return 8; + } + if (TypeOperand == XED_OPERAND_ELEMENT_TYPE_LONGDOUBLE) { + return 16; + } + assert(0 && "float instruction with unknown operand\n"); + return 0; + } else { + assert(0 && "failed to disassemble instruction\n"); + return 0; + } +} + /*********************************************************************************/ /* register analysis */ /*********************************************************************************/ -template +/**************** handleing align registers ****************/ +template struct HandleAliasRegisters{ - static __attribute__((always_inline)) void CheckUpdateGenericAlias(uint8_t regId, uint8_t byteOffset, T value, uint32_t opaqueHandle, THREADID threadId) { + static __attribute__((always_inline)) void CheckUpdateGenericAlias(uint8_t regId, T value, uint32_t opaqueHandle, THREADID threadId) { RedSpyThreadData* const tData = ClientGetTLS(threadId); ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); - T * where = (T *)(&tData->aliasValue[regId][byteOffset]); - - if (*where == value) { - AddToRedTable(MAKE_CONTEXT_PAIR(tData->aliasCtxt[regId][ALIAS_GENERIC],curCtxtHandle),sizeof(T),threadId); - }else - * where = value; - tData->aliasCtxt[regId][ALIAS_GENERIC] = curCtxtHandle; - tData->aliasCtxt[regId][ALIAS_HIGH_BYTE] = curCtxtHandle; - tData->aliasCtxt[regId][ALIAS_LOW_BYTE] = curCtxtHandle; - } - static __attribute__((always_inline)) void CheckUpdateHighLowAlias(uint8_t regId, uint8_t byteOffset, uint8_t regType, T value, uint32_t opaqueHandle, THREADID threadId) { - - RedSpyThreadData* const tData = ClientGetTLS(threadId); - ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); + //alias begin bytes for different types + #if __BYTE_ORDER == __LITTLE_ENDIAN + uint8_t byteOffset = aliasGroup == ALIAS_HIGH_BYTE ? 1 : 0; + #else + #error "unknown endianness" + #endif + T * where = (T *)(&tData->aliasValue[regId][byteOffset]); if (*where == value) { - AddToRedTable(MAKE_CONTEXT_PAIR(tData->aliasCtxt[regId][regType],curCtxtHandle),sizeof(T),threadId); - }else - * where = value; + AddToRedTable(MAKE_CONTEXT_PAIR(tData->aliasCtxt[regId][aliasGroup], curCtxtHandle), sizeof(T), threadId); + }else { + *where = value; + } tData->aliasCtxt[regId][ALIAS_GENERIC] = curCtxtHandle; - tData->aliasCtxt[regId][regType] = curCtxtHandle; + if(aliasGroup == ALIAS_GENERIC){ + tData->aliasCtxt[regId][ALIAS_HIGH_BYTE] = curCtxtHandle; + tData->aliasCtxt[regId][ALIAS_LOW_BYTE] = curCtxtHandle; + } else { + tData->aliasCtxt[regId][aliasGroup] = curCtxtHandle; + } } }; -template +/**************** handleing general registers ****************/ +template struct HandleGeneralRegisters{ static __attribute__((always_inline)) void CheckValues(T value, REG reg, uint32_t opaqueHandle, THREADID threadId) { @@ -321,7 +616,7 @@ struct HandleGeneralRegisters{ T * regBefore = (T *)(&tData->regValue[reg][0]); - if (* regBefore == value && tData->regCtxt[reg]) { + if (* regBefore == value ) { AddToRedTable(MAKE_CONTEXT_PAIR(tData->regCtxt[reg],curCtxtHandle),sizeof(T),threadId); }else * regBefore = value; @@ -329,26 +624,305 @@ struct HandleGeneralRegisters{ } }; -static inline VOID CheckLargeRegAfterWrite(PIN_REGISTER* regRef, REG reg, uint32_t regSize, uint32_t opaqueHandle, THREADID threadId){ +//lenInt64: 1(X87), 2(XMM), 4(YMM), 8(ZMM) +template +struct HandleSpecialRegisters{ + + //check the MM_x part registers in X87 + static __attribute__((always_inline)) void CheckRegValues(PIN_REGISTER* regRef, REG regID, uint32_t opaqueHandle, THREADID threadId){ + + RedSpyThreadData* const tData = ClientGetTLS(threadId); + + ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); + + if(lenInt64 == 1){ + uint64_t *oldValue = (uint64_t*)&(tData->regValue[regID][0]); + if(*oldValue == regRef->qword[0]) + AddToRedTable(MAKE_CONTEXT_PAIR(tData->regCtxt[regID],curCtxtHandle),8,threadId); + else + *oldValue = regRef->qword[0]; + + tData->regCtxt[regID] = curCtxtHandle; + }else if(lenInt64 == 2){ + + uint64_t *oldValue1 = (uint64_t*)&(tData->simdValue[regID].value); + uint64_t *oldValue2 = (uint64_t*)&(tData->simdValue[regID].value[8]); + if(*oldValue1 == regRef->qword[0] && *oldValue2 == regRef->qword[1]) + AddToRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[regID],curCtxtHandle),16,threadId); + else{ + *oldValue1 = regRef->qword[0]; + *oldValue2 = regRef->qword[1]; + } + tData->simdCtxt[regID] = curCtxtHandle; + + }else{ + + uint64_t *oldValue; + bool isRedundant = true; + for(int i = 0,j = 0; i < lenInt64; ++i, j += 8){ + oldValue = (uint64_t*)&(tData->simdValue[regID].value[j]); + if(*oldValue != regRef->qword[i]){ + isRedundant = false; + *oldValue = regRef->qword[i]; + } + } + + if(isRedundant) + AddToRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[regID],curCtxtHandle),lenInt64*8,threadId); + + tData->simdCtxt[regID] = curCtxtHandle; + } + } + + static __attribute__((always_inline)) void CheckSIMDRegValues(PIN_REGISTER* regRef, uint8_t simdID, uint32_t opaqueHandle, THREADID threadId){ + + RedSpyThreadData* const tData = ClientGetTLS(threadId); + + ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); + + if(lenInt64 == 2){ + + uint64_t *oldValue1 = (uint64_t*)&(tData->simdValue[simdID].value[0]); + uint64_t *oldValue2 = (uint64_t*)&(tData->simdValue[simdID].value[8]); + if(*oldValue1 == regRef->qword[0] && *oldValue2 == regRef->qword[1]) + AddToRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[simdID],curCtxtHandle),16,threadId); + else{ + *oldValue1 = regRef->qword[0]; + *oldValue2 = regRef->qword[1]; + } + }else{ + + uint64_t *oldValue; + bool isRedundant = true; + for(int i = 0,j = 0; i < lenInt64; ++i, j += 8){ + oldValue = (uint64_t*)&(tData->simdValue[simdID].value[j]); + if(*oldValue != regRef->qword[i]){ + isRedundant = false; + *oldValue = regRef->qword[i]; + } + } + + if(isRedundant) + AddToRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[simdID],curCtxtHandle),lenInt64*8,threadId); + } + tData->simdCtxt[simdID] = curCtxtHandle; + } +}; + +/**************** handleing registers approximation ****************/ +//static void Check10BytesReg(PIN_REGISTER* regRef, REG reg, uint32_t opaqueHandle, THREADID threadId)__attribute__((always_inline)); +static void Check10BytesReg(CONTEXT * ctxt, REG reg, uint32_t opaqueHandle, THREADID threadId){ RedSpyThreadData* const tData = ClientGetTLS(threadId); - ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); - int i; - bool isRedundantWrite = true; - for(i = 0; i < 16; ++i){ - if(tData->regValue[reg][i] != regRef->byte[i]){ - isRedundantWrite = false; - tData->regValue[reg][i] = regRef->byte[i]; + UINT8 * valueAfter; + valueAfter = (UINT8 *)malloc(10*sizeof(UINT8)); + PIN_GetContextRegval(ctxt,reg,valueAfter); + + uint64_t * upperOld = (uint64_t*)&(tData->regValue[reg][2]); + uint64_t * upperNew = (uint64_t*)&(valueAfter[2]); + + uint16_t * lowOld = (uint16_t*)&(tData->regValue[reg][0]); + uint16_t * lowNew = (uint16_t*)(valueAfter); + + if((*lowOld & 0xfff0) == (*lowNew & 0xfff0) && *upperNew == *upperOld){ + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->regCtxt[reg],curCtxtHandle),10,threadId); + *lowOld = *lowNew; + }else + memcpy(&tData->regValue[reg][0], valueAfter, 10); + tData->regCtxt[reg] = curCtxtHandle; +} + +//approximate general registers +template +struct ApproxGeneralRegisters{ + + static __attribute__((always_inline)) void CheckValues(PIN_REGISTER* regRef, uint32_t reg, uint32_t opaqueHandle, THREADID threadId){ + + RedSpyThreadData* const tData = ClientGetTLS(threadId); + + ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); + + if(isAlias){ + uint8_t byteOffset = 0; + + T newValue; + if(sizeof(T) == 8) + newValue = regRef->dbl[0]; + else + newValue = regRef->flt[0]; + + T oldValue = *((T*)(&tData->aliasValue[reg][byteOffset])); + T rate = (newValue - oldValue)/oldValue; + if( rate <= delta && rate >= -delta ){ + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->aliasCtxt[reg][ALIAS_GENERIC],curCtxtHandle),sizeof(T),threadId); + } + if( newValue != oldValue) + *((T*)(&tData->aliasValue[reg][byteOffset])) = newValue; + + tData->aliasCtxt[reg][ALIAS_GENERIC] = curCtxtHandle; + tData->aliasCtxt[reg][ALIAS_HIGH_BYTE] = curCtxtHandle; + tData->aliasCtxt[reg][ALIAS_LOW_BYTE] = curCtxtHandle; + + }else{ + T newValue; + if(sizeof(T) == 8) + newValue = regRef->dbl[0]; + else + newValue = regRef->flt[0]; + + T oldValue = *((T*)(&tData->regValue[reg][0])); + T rate = (newValue - oldValue)/oldValue; + if(rate <= delta && rate >= -delta) { + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->regCtxt[reg],curCtxtHandle),sizeof(T),threadId); + } + if(newValue != oldValue) + *((T*)(&tData->regValue[reg][0])) = newValue; + tData->regCtxt[reg] = curCtxtHandle; } } +}; + +//approximate SIMD registers, simdType:0(XMM), 1(YMM), 2(ZMM) +template +struct ApproxLargeRegisters{ - if(isRedundantWrite && tData->regCtxt[reg]!=0) { - AddToRedTable(MAKE_CONTEXT_PAIR(tData->regCtxt[reg],curCtxtHandle),regSize,threadId); + static __attribute__((always_inline)) void CheckValues(PIN_REGISTER* regRef, uint8_t regInd, uint32_t opaqueHandle, THREADID threadId){ + + RedSpyThreadData* const tData = ClientGetTLS(threadId); + + ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); + + if(simdType == 0){ + + if(sizeof(T) == 4){ + __m128 oldValue = _mm_load_ps( reinterpret_cast (&(tData->simdValue[regInd].value[0]))); + __m128 newValue = _mm_loadu_ps( reinterpret_cast (regRef)); + + __m128 result = _mm_sub_ps(newValue,oldValue); + + result = _mm_div_ps(result,oldValue); + float rates[4] __attribute__((aligned(16))); + _mm_store_ps(rates,result); + + uint8_t redCount = 0; + if(rates[0] <= delta && rates[0] >= -delta) redCount++; + if(rates[1] <= delta && rates[1] >= -delta) redCount++; + if(rates[2] <= delta && rates[2] >= -delta) redCount++; + if(rates[3] <= delta && rates[3] >= -delta) redCount++; + + if(redCount) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[regInd],curCtxtHandle),4*redCount,threadId); + _mm_store_ps(reinterpret_cast (&(tData->simdValue[regInd].value[0])),newValue); + + }else if(sizeof(T) == 8){ + __m128d oldValue = _mm_load_pd( reinterpret_cast (&(tData->simdValue[regInd].value[0]))); + __m128d newValue = _mm_loadu_pd( reinterpret_cast (regRef)); + + __m128d result = _mm_sub_pd(newValue,oldValue); + + result = _mm_div_pd(result,oldValue); + + double rate[2]; + _mm_storel_pd(&rate[0],result); + _mm_storeh_pd(&rate[1],result); + + uint8_t redCount = 0; + if(rate[0] <= delta && rate[0] >=-delta) redCount++; + if(rate[1] <= delta && rate[1] >= -delta) redCount++; + + if(redCount) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[regInd],curCtxtHandle),8*redCount,threadId); + _mm_store_pd(reinterpret_cast (&(tData->simdValue[regInd].value[0])),newValue); + }else ; + + }else if(simdType == 1){ + + if(sizeof(T) == 4){ + __m256 oldValue = _mm256_load_ps( reinterpret_cast (&(tData->simdValue[regInd].value[0]))); + __m256 newValue = _mm256_loadu_ps( reinterpret_cast (regRef)); + + __m256 result = _mm256_sub_ps(newValue,oldValue); + + result = _mm256_div_ps(result,oldValue); + float rates[8] __attribute__((aligned(32))); + _mm256_store_ps(rates,result); + + uint8_t redCount = 0; + for(int i = 0; i < 7; ++i) + if(rates[i] <= delta && rates[i] >= -delta) redCount++; + + if(redCount) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[regInd],curCtxtHandle),4*redCount,threadId); + _mm256_store_ps(reinterpret_cast (&(tData->simdValue[regInd].value[0])),newValue); + + }else if(sizeof(T) == 8){ + __m256d oldValue = _mm256_load_pd( reinterpret_cast (&(tData->simdValue[regInd].value[0]))); + __m256d newValue = _mm256_loadu_pd( reinterpret_cast (regRef)); + + __m256d result = _mm256_sub_pd(newValue,oldValue); + + result = _mm256_div_pd(result,oldValue); + + double rate[4] __attribute__((aligned(32))); + _mm256_store_pd(rate,result); + + uint8_t redCount = 0; + if(rate[0] <= delta && rate[0] >=-delta) redCount++; + if(rate[1] <= delta && rate[1] >= -delta) redCount++; + if(rate[2] <= delta && rate[2] >=-delta) redCount++; + if(rate[3] <= delta && rate[3] >= -delta) redCount++; + + if(redCount) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[regInd],curCtxtHandle),8*redCount,threadId); + _mm256_store_pd(reinterpret_cast (&(tData->simdValue[regInd].value[0])),newValue); + }else ; + + }else ;/*else{ + + if(sizeof(T) == 4){ + __m512 oldValue = _mm512_load_ps( reinterpret_cast (&(tData->simdValue[regInd].value[0]))); + __m512 newValue = _mm512_loadu_ps( reinterpret_cast (regRef)); + + __m512 result = _mm512_sub_ps(newValue,oldValue); + + result = _mm512_div_ps(result,oldValue); + float rates[16] __attribute__((aligned(64))); + _mm512_store_ps(rates,result); + + uint8_t redCount = 0; + for(int i = 0; i < 15; ++i) + if(rates[i] <= delta && rates[i] >= -delta) redCount++; + + if(redCount) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->simdCtxt[regInd],curCtxtHandle),4*redCount,threadId); + _mm512_store_ps(reinterpret_cast (&(tData->simdValue[regInd].value[0])),newValue); + + }else if(sizeof(T) == 8){ + __m512d oldValue = _mm512_load_pd( reinterpret_cast (&(tData->simdValue[regInd].value[0]))); + __m512d newValue = _mm512_loadu_pd( reinterpret_cast (regRef)); + + __m512d result = _mm512_sub_pd(newValue,oldValue); + + result = _mm512_div_pd(result,oldValue); + + double rates[8] __attribute__((aligned(64))); + _mm512_store_ps(rates,result); + + uint8_t redCount = 0; + for(int i = 0; i < 7; ++i) + if(rates[i] <= delta && rates[i] >= -delta) redCount++; + + if(redCount) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(tData->regCtxt[reg],curCtxtHandle),8*redCount,threadId); + _mm512_store_pd(reinterpret_cast (&(tData->simdValue[regInd].value[0])),newValue); + }else ; + }*/ + + tData->simdCtxt[regInd] = curCtxtHandle; } - tData->regCtxt[reg] = curCtxtHandle; -} +}; static inline uint32_t GetAliasIDs(REG reg){ uint8_t regGroup = 0; @@ -378,6 +952,67 @@ static inline uint32_t GetAliasIDs(REG reg){ case REG_DX: regGroup = ALIAS_REG_D; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; case REG_DH: regGroup = ALIAS_REG_D; byteInd = ALIAS_BYTES_INDEX_8_H; type = ALIAS_HIGH_BYTE; break; case REG_DL: regGroup = ALIAS_REG_D; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_LOW_BYTE; break; + + case REG_RBP: regGroup = ALIAS_REG_BP; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_EBP: regGroup = ALIAS_REG_BP; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_BP: regGroup = ALIAS_REG_BP; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_BPL: regGroup = ALIAS_REG_BP; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_RDI: regGroup = ALIAS_REG_DI; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_EDI: regGroup = ALIAS_REG_DI; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_DI: regGroup = ALIAS_REG_DI; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_DIL: regGroup = ALIAS_REG_DI; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_RSI: regGroup = ALIAS_REG_SI; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_ESI: regGroup = ALIAS_REG_SI; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_SI: regGroup = ALIAS_REG_SI; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_SIL: regGroup = ALIAS_REG_SI; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_RSP: regGroup = ALIAS_REG_SP; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_ESP: regGroup = ALIAS_REG_SP; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_SP: regGroup = ALIAS_REG_SP; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_SPL: regGroup = ALIAS_REG_SP; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R8: regGroup = ALIAS_REG_R8; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R8D: regGroup = ALIAS_REG_R8; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R8W: regGroup = ALIAS_REG_R8; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R8B: regGroup = ALIAS_REG_R8; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R9: regGroup = ALIAS_REG_R9; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R9D: regGroup = ALIAS_REG_R9; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R9W: regGroup = ALIAS_REG_R9; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R9B: regGroup = ALIAS_REG_R9; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R10: regGroup = ALIAS_REG_R10; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R10D: regGroup = ALIAS_REG_R10; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R10W: regGroup = ALIAS_REG_R10; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R10B: regGroup = ALIAS_REG_R10; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R11: regGroup = ALIAS_REG_R11; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R11D: regGroup = ALIAS_REG_R11; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R11W: regGroup = ALIAS_REG_R11; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R11B: regGroup = ALIAS_REG_R11; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R12: regGroup = ALIAS_REG_R12; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R12D: regGroup = ALIAS_REG_R12; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R12W: regGroup = ALIAS_REG_R12; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R12B: regGroup = ALIAS_REG_R12; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R13: regGroup = ALIAS_REG_R13; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R13D: regGroup = ALIAS_REG_R13; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R13W: regGroup = ALIAS_REG_R13; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R13B: regGroup = ALIAS_REG_R13; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R14: regGroup = ALIAS_REG_R14; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R14D: regGroup = ALIAS_REG_R14; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R14W: regGroup = ALIAS_REG_R14; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R14B: regGroup = ALIAS_REG_R14; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + + case REG_R15: regGroup = ALIAS_REG_R15; byteInd = ALIAS_BYTES_INDEX_64; type = ALIAS_GENERIC; break; + case REG_R15D: regGroup = ALIAS_REG_R15; byteInd = ALIAS_BYTES_INDEX_32; type = ALIAS_GENERIC; break; + case REG_R15W: regGroup = ALIAS_REG_R15; byteInd = ALIAS_BYTES_INDEX_16; type = ALIAS_GENERIC; break; + case REG_R15B: regGroup = ALIAS_REG_R15; byteInd = ALIAS_BYTES_INDEX_8_L; type = ALIAS_GENERIC; break; + default: assert(0 && "not alias registers! should not reach here!"); break; } uint32_t aliasGroupByteType = ((uint32_t)regGroup << 16) | ((uint32_t)byteInd << 8) | ((uint32_t)type); @@ -406,6 +1041,54 @@ inline bool RegHasAlias(REG reg){ case REG_BL: case REG_CL: case REG_DL: + case REG_RBP: + case REG_EBP: + case REG_BP: + case REG_BPL: + case REG_RDI: + case REG_EDI: + case REG_DI: + case REG_DIL: + case REG_RSI: + case REG_ESI: + case REG_SI: + case REG_SIL: + case REG_RSP: + case REG_ESP: + case REG_SP: + case REG_SPL: + case REG_R8: + case REG_R8D: + case REG_R8W: + case REG_R8B: + case REG_R9: + case REG_R9D: + case REG_R9W: + case REG_R9B: + case REG_R10: + case REG_R10D: + case REG_R10W: + case REG_R10B: + case REG_R11: + case REG_R11D: + case REG_R11W: + case REG_R11B: + case REG_R12: + case REG_R12D: + case REG_R12W: + case REG_R12B: + case REG_R13: + case REG_R13D: + case REG_R13W: + case REG_R13B: + case REG_R14: + case REG_R14D: + case REG_R14W: + case REG_R14B: + case REG_R15: + case REG_R15D: + case REG_R15W: + case REG_R15B: return true; default: return false; } @@ -413,64 +1096,130 @@ inline bool RegHasAlias(REG reg){ #ifdef ENABLE_SAMPLING -#define HANDLE_LARGEREG() \ +#define HANDLE_SPECIALREG(LEN,REG_ID) \ INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ -INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) CheckLargeRegAfterWrite, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, reg, IARG_UINT32, regSize, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleSpecialRegisters::CheckRegValues, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) -#define HANDLE_ALIAS_GENERIC(T, ID, OFFSET) \ -INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END); \ -INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleAliasRegisters::CheckUpdateGenericAlias, IARG_UINT32, ID, IARG_UINT32, OFFSET, IARG_REG_VALUE, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) +#define HANDLE_LARGEREG_APPROX(T, SIMD_TYPE, REG_ID) \ +INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) ApproxLargeRegisters::CheckValues, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) -#define HANDLE_ALIAS_HIGHLOW(ID, OFFSET, TYPE) \ +#define HANDLE_ALIAS_REG(T, ALIAS_GRP, ID) \ INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END); \ -INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleAliasRegisters::CheckUpdateHighLowAlias, IARG_UINT32, ID, IARG_UINT32, OFFSET, IARG_UINT32, TYPE, IARG_REG_VALUE, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleAliasRegisters::CheckUpdateGenericAlias, IARG_UINT32, ID, IARG_REG_VALUE, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) #define HANDLE_GENERAL(T) \ INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END); \ -INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleGeneralRegisters::CheckValues,IARG_REG_VALUE,reg,IARG_UINT32, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleGeneralRegisters::CheckValues,IARG_REG_VALUE,reg,IARG_UINT32, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) + +#define HANDLE_APPROXREG(T, IS_ALIAS, REG_ID) \ +INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) ApproxGeneralRegisters::CheckValues, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) + +#define HANDLE_10BYTES_APPROX(REG_ID) \ +INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) Check10BytesReg, IARG_CONTEXT, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) #else -#define HANDLE_LARGEREG() \ -INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) CheckLargeRegAfterWrite, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, reg, IARG_UINT32, regSize, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) +#define HANDLE_SPECIALREG(LEN,REG_ID) \ +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleSpecialRegisters::CheckRegValues, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) -#define HANDLE_ALIAS_GENERIC(T, ID, OFFSET) \ -INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleAliasRegisters::CheckUpdateGenericAlias, IARG_UINT32, ID, IARG_UINT32, OFFSET, IARG_REG_VALUE, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) +#define HANDLE_LARGEREG_APPROX(T, SIMD_TYPE, REG_ID) \ +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) ApproxLargeRegisters::CheckValues, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) -#define HANDLE_ALIAS_HIGHLOW(ID, OFFSET, TYPE) \ -INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleAliasRegisters::CheckUpdateHighLowAlias, IARG_UINT32, ID, IARG_UINT32, OFFSET, IARG_UINT32, TYPE, IARG_REG_VALUE, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) +#define HANDLE_ALIAS_REG(T, ALIAS_GRP, ID) \ +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleAliasRegisters::CheckUpdateGenericAlias, IARG_UINT32, ID,IARG_REG_VALUE, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) #define HANDLE_GENERAL(T) \ -INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleGeneralRegisters::CheckValues,IARG_REG_VALUE,reg,IARG_UINT32, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) HandleGeneralRegisters::CheckValues,IARG_REG_VALUE,reg,IARG_UINT32, reg, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_END) + +#define HANDLE_APPROXREG(T, IS_ALIAS, REG_ID) \ +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) ApproxGeneralRegisters::CheckValues, IARG_REG_CONST_REFERENCE,reg, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) + +#define HANDLE_10BYTES_APPROX(REG_ID) \ +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) Check10BytesReg, IARG_CONTEXT, IARG_UINT32, REG_ID, IARG_UINT32, opaqueHandle, IARG_THREAD_ID,IARG_END) #endif -static inline void InstrumentAliasReg(INS ins, REG reg, uint32_t opaqueHandle){ +static inline void InstrumentAliasReg(INS ins, REG reg, uint16_t oper, uint32_t opaqueHandle){ + uint32_t regSize = REG_Size(reg); uint32_t aliasIDs = GetAliasIDs(reg); uint8_t regId = static_cast(((aliasIDs) & 0x00ffffff) >> 16 ); - uint8_t byteOffset = static_cast(((aliasIDs) & 0x0000ffff) >> 8 ); - uint8_t type; - switch (REG_Size(reg)) { - case 8: HANDLE_ALIAS_GENERIC(uint64_t, regId, byteOffset); break; - case 4: HANDLE_ALIAS_GENERIC(uint32_t, regId, byteOffset); break; - case 2: HANDLE_ALIAS_GENERIC(uint16_t, regId, byteOffset); break; - case 1: type = static_cast((aliasIDs) & 0x000000ff); - HANDLE_ALIAS_HIGHLOW(regId, byteOffset,type); break; - default: break; + if (IsFloatInstruction(INS_Address(ins))){ + switch (regSize) { + case 1: + case 2: + case 4: HANDLE_APPROXREG(float, true, regId); break; + case 8: HANDLE_APPROXREG(double, true, regId); break; + default: break; + } + }else{ + switch (regSize) { + case 8: HANDLE_ALIAS_REG(uint64_t, ALIAS_GENERIC, regId); break; + case 4: HANDLE_ALIAS_REG(uint32_t, ALIAS_GENERIC, regId); break; + case 2: HANDLE_ALIAS_REG(uint16_t, ALIAS_GENERIC, regId); break; + case 1: if (REG_is_Lower8(reg)){ + HANDLE_ALIAS_REG(uint8_t, ALIAS_LOW_BYTE, regId); + }else{ + HANDLE_ALIAS_REG(uint8_t, ALIAS_HIGH_BYTE, regId); + }break; + default: break; + } + } } -static inline void InstrumentGeneralReg(INS ins, REG reg, uint32_t opaqueHandle){ +static inline void InstrumentGeneralReg(INS ins, REG reg, uint16_t oper, uint32_t opaqueHandle){ uint32_t regSize = REG_Size(reg); - switch(regSize) { - case 1: HANDLE_GENERAL(uint8_t); break; - case 2: HANDLE_GENERAL(uint16_t); break; - case 4: HANDLE_GENERAL(uint32_t); break; - case 8: HANDLE_GENERAL(uint64_t); break; - default: { - HANDLE_LARGEREG(); break; + + if (IsFloatInstruction(INS_Address(ins))){ + unsigned int operSize = FloatOperandSize(INS_Address(ins),oper); + switch (regSize) { + case 1: + case 2: + case 4: HANDLE_APPROXREG(float, false, reg); break; + case 8: HANDLE_APPROXREG(double, false, reg); break; + case 10: HANDLE_10BYTES_APPROX(reg); break; + case 16: { + switch (operSize) { + case 4: HANDLE_LARGEREG_APPROX(float,0,reg-REG_XMM_BASE);break; + case 8: HANDLE_LARGEREG_APPROX(double,0,reg-REG_XMM_BASE);break; + default: assert(0 && "handle large reg with large operand size\n"); break; + } + }break; + case 32:{ + switch (operSize) { + case 4: HANDLE_LARGEREG_APPROX(float,1,reg-REG_YMM_BASE);break; + case 8: HANDLE_LARGEREG_APPROX(double,1,reg-REG_YMM_BASE);break; + default: assert(0 && "handle large reg with large operand size\n"); break; + } + }break; + case 64: { + switch (operSize) { + case 4: HANDLE_LARGEREG_APPROX(float,2,reg-REG_ZMM_BASE);break; + case 8: HANDLE_LARGEREG_APPROX(double,2,reg-REG_ZMM_BASE);break; + default: assert(0 && "handle large reg with large operand size\n"); break; + } + }break; + default: assert(0 && "not recoganized register size for floating instruction!\n"); + } + }else{ + if (REG_is_in_X87(reg)) { + HANDLE_SPECIALREG(1,reg); + return; + } + switch(regSize) { + case 1: HANDLE_GENERAL(uint8_t); break; + case 2: HANDLE_GENERAL(uint16_t); break; + case 4: HANDLE_GENERAL(uint32_t); break; + case 8: HANDLE_GENERAL(uint64_t); break; + case 16: HANDLE_SPECIALREG(2,reg-REG_XMM_BASE); break; + case 32: HANDLE_SPECIALREG(4,reg-REG_YMM_BASE); break; + case 64: HANDLE_SPECIALREG(8,reg-REG_ZMM_BASE); break; + default: assert(0 && "not recoganized register size for integer instruction!\n"); break; } } } @@ -479,36 +1228,42 @@ static inline void InstrumentGeneralReg(INS ins, REG reg, uint32_t opaqueHandle) /*********************** memory temporal redundancy functions **************************/ /***************************************************************************************/ -template +template struct UnrolledLoop{ static __attribute__((always_inline)) void Body(function func){ func(start); // Real loop body - UnrolledLoop:: Body(func); // unroll next iteration + UnrolledLoop:: Body(func); // unroll next iteration } static __attribute__((always_inline)) void BodySamePage(ContextHandle_t * __restrict__ prevIP, const ContextHandle_t handle, THREADID threadId){ if(conditional) { // report in RedTable - AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[start], handle), 1, threadId); + if(approx) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(prevIP[start], handle), 1, threadId); + else + AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[start], handle), 1, threadId); } // Update context prevIP[start] = handle; - UnrolledLoop:: BodySamePage(prevIP, handle, threadId); // unroll next iteration + UnrolledLoop:: BodySamePage(prevIP, handle, threadId); // unroll next iteration } static __attribute__((always_inline)) void BodyStraddlePage(uint64_t addr, const ContextHandle_t handle, THREADID threadId){ uint8_t * status = GetOrCreateShadowBaseAddress((uint64_t)addr + start); ContextHandle_t * prevIP = (ContextHandle_t*)(status + PAGE_OFFSET(((uint64_t)addr + start)) * sizeof(ContextHandle_t)); if (conditional) { // report in RedTable - AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[0 /* 0 is correct*/ ], handle), 1, threadId); + if(approx) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(prevIP[0 /* 0 is correct*/ ], handle), 1, threadId); + else + AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[0 /* 0 is correct*/ ], handle), 1, threadId); } // Update context prevIP[0] = handle; - UnrolledLoop:: BodyStraddlePage(addr, handle, threadId); // unroll next iteration + UnrolledLoop:: BodyStraddlePage(addr, handle, threadId); // unroll next iteration } }; -template -struct UnrolledLoop{ +template +struct UnrolledLoop{ static __attribute__((always_inline)) void Body(function func){} static __attribute__((always_inline)) void BodySamePage(ContextHandle_t * __restrict__ prevIP, const ContextHandle_t handle, THREADID threadId){} static __attribute__((always_inline)) void BodyStraddlePage(uint64_t addr, const ContextHandle_t handle, THREADID threadId){} @@ -535,19 +1290,112 @@ struct UnrolledConjunction{ }; -template +template struct RedSpyAnalysis{ static __attribute__((always_inline)) bool IsWriteRedundant(void * &addr, THREADID threadId){ RedSpyThreadData* const tData = ClientGetTLS(threadId); AddrValPair * avPair = & tData->buffer[bufferOffset]; addr = avPair->address; - switch(AccessLen){ - case 1: return *((uint8_t*)(&avPair->value)) == *(static_cast(avPair->address)); - case 2: return *((uint16_t*)(&avPair->value)) == *(static_cast(avPair->address)); - case 4: return *((uint32_t*)(&avPair->value)) == *(static_cast(avPair->address)); - case 8: return *((uint64_t*)(&avPair->value)) == *(static_cast(avPair->address)); - default: return memcmp(&avPair->value, avPair->address, AccessLen) == 0; + + if(isApprox){ + if(AccessLen>=32){ + if(sizeof(T) == 4){ + __m256 oldValue = _mm256_load_ps( reinterpret_cast (&avPair->value)); + __m256 newValue = _mm256_loadu_ps( reinterpret_cast (avPair->address)); + + __m256 result = _mm256_sub_ps(newValue,oldValue); + + result = _mm256_div_ps(result,oldValue); + float rates[8] __attribute__((aligned(32))); + _mm256_store_ps(rates,result); + + for(int i = 0; i < 8; ++i){ + if(rates[i] < -delta || rates[i] > delta) { + return false; + } + } + return true; + + }else if(sizeof(T) == 8){ + __m256d oldValue = _mm256_load_pd( reinterpret_cast (&avPair->value)); + __m256d newValue = _mm256_loadu_pd( reinterpret_cast (avPair->address)); + + __m256d result = _mm256_sub_pd(newValue,oldValue); + + result = _mm256_div_pd(result,oldValue); + + double rates[4] __attribute__((aligned(32))); + _mm256_store_pd(rates,result); + + for(int i = 0; i < 4; ++i){ + if(rates[i] < -delta || rates[i] > delta) { + return false; + } + } + return true; + } + }else if(AccessLen == 16){ + if(sizeof(T) == 4){ + __m128 oldValue = _mm_load_ps( reinterpret_cast (&avPair->value)); + __m128 newValue = _mm_loadu_ps( reinterpret_cast (avPair->address)); + + __m128 result = _mm_sub_ps(newValue,oldValue); + + result = _mm_div_ps(result,oldValue); + float rates[4] __attribute__((aligned(16))); + _mm_store_ps(rates,result); + + for(int i = 0; i < 4; ++i){ + if(rates[i] < -delta || rates[i] > delta) { + return false; + } + } + return true; + + }else if(sizeof(T) == 8){ + __m128d oldValue = _mm_load_pd( reinterpret_cast (&avPair->value)); + __m128d newValue = _mm_loadu_pd( reinterpret_cast (avPair->address)); + + __m128d result = _mm_sub_pd(newValue,oldValue); + + result = _mm_div_pd(result,oldValue); + + double rate[2]; + _mm_storel_pd(&rate[0],result); + _mm_storeh_pd(&rate[1],result); + + if(rate[0] < -delta || rate[0] > delta) + return false; + if(rate[1] < -delta || rate[1] > delta) + return false; + return true; + } + }else if(AccessLen == 10){ + UINT8 newValue[10]; + memcpy(newValue, addr, AccessLen); + + uint64_t * upperOld = (uint64_t*)&(avPair->value[2]); + uint64_t * upperNew = (uint64_t*)&(newValue[2]); + + uint16_t * lowOld = (uint16_t*)&(avPair->value[0]); + uint16_t * lowNew = (uint16_t*)&(newValue[0]); + + if((*lowOld & 0xfff0) == (*lowNew & 0xfff0) && *upperNew == *upperOld){ + return true; + } + return false; + }else{ + T newValue = *(static_cast(avPair->address)); + T oldValue = *((T*)(&avPair->value)); + + T rate = (newValue - oldValue)/oldValue; + if( rate <= delta && rate >= -delta ) return true; + else return false; + } + }else{ + return *((T*)(&avPair->value)) == *(static_cast(avPair->address)); } + return false; } static __attribute__((always_inline)) VOID RecordNByteValueBeforeWrite(void* addr, THREADID threadId){ @@ -557,13 +1405,28 @@ struct RedSpyAnalysis{ AddrValPair * avPair = & tData->buffer[bufferOffset]; avPair->address = addr; - switch(AccessLen){ - case 1: *((uint8_t*)(&avPair->value)) = *(static_cast(addr)); break; - case 2: *((uint16_t*)(&avPair->value)) = *(static_cast(addr)); break; - case 4: *((uint32_t*)(&avPair->value)) = *(static_cast(addr)); break; - case 8: *((uint64_t*)(&avPair->value)) = *(static_cast(addr)); break; - default:memcpy(&avPair->value, addr, AccessLen); - } + if(AccessLen >= 32){ + if(sizeof(T) == 4){ + __m256 newValue = _mm256_loadu_ps( reinterpret_cast (addr)); + _mm256_store_ps(reinterpret_cast (&avPair->value), newValue); + + }else if(sizeof(T) == 8){ + __m256d newValue = _mm256_loadu_pd(reinterpret_cast (addr)); + _mm256_store_pd(reinterpret_cast (&avPair->value), newValue); + } + }else if(AccessLen == 16){ + if(sizeof(T) == 4){ + __m128 newValue = _mm_loadu_ps( reinterpret_cast (addr)); + _mm_store_ps(reinterpret_cast (&avPair->value), newValue); + + }else if(sizeof(T) == 8){ + __m128d newValue = _mm_loadu_pd(reinterpret_cast (addr)); + _mm_store_pd(reinterpret_cast (&avPair->value), newValue); + } + }else if(AccessLen == 10){ + memcpy(&avPair->value, addr, AccessLen); + }else + *((T*)(&avPair->value)) = *(static_cast(addr)); } static __attribute__((always_inline)) VOID CheckNByteValueAfterWrite(uint32_t opaqueHandle, THREADID threadId){ @@ -582,37 +1445,73 @@ struct RedSpyAnalysis{ // All from same ctxt? if (UnrolledConjunction<0, AccessLen, 1>::BodyContextCheck(prevIP)) { // report in RedTable - AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[0], curCtxtHandle), AccessLen, threadId); + if(isApprox) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(prevIP[0], curCtxtHandle), AccessLen, threadId); + else + AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[0], curCtxtHandle), AccessLen, threadId); // Update context - UnrolledLoop<0, AccessLen, 1, false /* redundancy is updated outside*/>::BodySamePage(prevIP, curCtxtHandle, threadId); + UnrolledLoop<0, AccessLen, 1, false, /* redundancy is updated outside*/ isApprox>::BodySamePage(prevIP, curCtxtHandle, threadId); } else { // different contexts - UnrolledLoop<0, AccessLen, 1, true /* redundancy is updated inside*/>::BodySamePage(prevIP, curCtxtHandle, threadId); + UnrolledLoop<0, AccessLen, 1, true, /* redundancy is updated inside*/ isApprox>::BodySamePage(prevIP, curCtxtHandle, threadId); } } else { // Write across a 64-K page boundary // First byte is on this page though - AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[0], curCtxtHandle), 1, threadId); + if(isApprox) + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(prevIP[0], curCtxtHandle), 1, threadId); + else + AddToRedTable(MAKE_CONTEXT_PAIR(prevIP[0], curCtxtHandle), 1, threadId); // Update context prevIP[0] = curCtxtHandle; // Remaining bytes [1..AccessLen] somewhere will across a 64-K page boundary - UnrolledLoop<1, AccessLen, 1, true /* update redundancy */>::BodyStraddlePage( (uint64_t) addr, curCtxtHandle, threadId); + UnrolledLoop<1, AccessLen, 1, true, /* update redundancy */ isApprox>::BodyStraddlePage( (uint64_t) addr, curCtxtHandle, threadId); } } else { // No redundancy. // Just update contexts if(isAccessWithinPageBoundary) { // Update context - UnrolledLoop<0, AccessLen, 1, false /* not redundant*/>::BodySamePage(prevIP, curCtxtHandle, threadId); + UnrolledLoop<0, AccessLen, 1, false, /* not redundant*/ isApprox>::BodySamePage(prevIP, curCtxtHandle, threadId); } else { // Write across a 64-K page boundary // Update context prevIP[0] = curCtxtHandle; // Remaining bytes [1..AccessLen] somewhere will across a 64-K page boundary - UnrolledLoop<1, AccessLen, 1, false /* dont update redundancy */>::BodyStraddlePage( (uint64_t) addr, curCtxtHandle, threadId); - + UnrolledLoop<1, AccessLen, 1, false, /* not redundant*/ isApprox>::BodyStraddlePage( (uint64_t) addr, curCtxtHandle, threadId); + } + } + } + static __attribute__((always_inline)) VOID ApproxCheckAfterWrite(uint32_t opaqueHandle, THREADID threadId){ + RedSpyThreadData* const tData = ClientGetTLS(threadId); + void * addr; + bool isRedundantWrite = IsWriteRedundant(addr, threadId); + + ContextHandle_t curCtxtHandle = GetContextHandle(threadId, opaqueHandle); + + UINT32 const interv = sizeof(T); + uint8_t* status = GetOrCreateShadowBaseAddress((uint64_t)addr); + ContextHandle_t * __restrict__ prevIP = (ContextHandle_t*)(status + PAGE_OFFSET((uint64_t)addr) * sizeof(ContextHandle_t)); + + if(isRedundantWrite){ + for(UINT32 index = 0 ; index < AccessLen; index+=interv){ + status = GetOrCreateShadowBaseAddress((uint64_t)addr + index); + prevIP = (ContextHandle_t*)(status + PAGE_OFFSET(((uint64_t)addr + index)) * sizeof(ContextHandle_t)); + // report in RedTable + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(prevIP[0 /* 0 is correct*/ ], curCtxtHandle), interv, threadId); + // Update context + prevIP[0] = curCtxtHandle; + } + }else{ + for(UINT32 index = 0 ; index < AccessLen; index+=interv){ + status = GetOrCreateShadowBaseAddress((uint64_t)addr + index); + prevIP = (ContextHandle_t*)(status + PAGE_OFFSET(((uint64_t)addr + index)) * sizeof(ContextHandle_t)); + // report in RedTable + AddToApproximateRedTable(MAKE_CONTEXT_PAIR(prevIP[0 /* 0 is correct*/ ], curCtxtHandle), interv, threadId); + // Update context + prevIP[0] = curCtxtHandle; } } } @@ -657,11 +1556,17 @@ static inline VOID CheckAfterLargeWrite(UINT32 accessLen, uint32_t bufferOffset #ifdef ENABLE_SAMPLING -#define HANDLE_CASE(NUM, BUFFER_INDEX) \ -case (NUM):{INS_InsertIfPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ -INS_InsertThenPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) RedSpyAnalysis<(NUM), (BUFFER_INDEX)>::RecordNByteValueBeforeWrite, IARG_MEMORYOP_EA, memOp, IARG_THREAD_ID, IARG_END);\ +#define HANDLE_CASE(T, ACCESS_LEN, BUFFER_INDEX, IS_APPROX) \ +INS_InsertIfPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ +INS_InsertThenPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) RedSpyAnalysis::RecordNByteValueBeforeWrite, IARG_MEMORYOP_EA, memOp, IARG_THREAD_ID, IARG_END);\ +INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) RedSpyAnalysis::CheckNByteValueAfterWrite, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_INST_PTR,IARG_END) + +#define HANDLE_APPROX_CASE(T, ACCESS_LEN, BUFFER_INDEX, IS_APPROX) \ +INS_InsertIfPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ +INS_InsertThenPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) RedSpyAnalysis::RecordNByteValueBeforeWrite, IARG_MEMORYOP_EA, memOp, IARG_THREAD_ID, IARG_END);\ INS_InsertIfPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ -INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) RedSpyAnalysis<(NUM), (BUFFER_INDEX)>::CheckNByteValueAfterWrite, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_INST_PTR,IARG_END);}break +INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) RedSpyAnalysis::ApproxCheckAfterWrite, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_INST_PTR,IARG_END) #define HANDLE_LARGE() \ INS_InsertIfPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)IfEnableSample, IARG_THREAD_ID,IARG_END);\ @@ -671,9 +1576,13 @@ INS_InsertThenPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) CheckAfterLargeWrite, #else -#define HANDLE_CASE(NUM, BUFFER_INDEX) \ -case (NUM):{INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) RedSpyAnalysis<(NUM), (BUFFER_INDEX)>::RecordNByteValueBeforeWrite, IARG_MEMORYOP_EA, memOp, IARG_THREAD_ID, IARG_END);\ -INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) RedSpyAnalysis<(NUM), (BUFFER_INDEX)>::CheckNByteValueAfterWrite, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_INST_PTR,IARG_END);}break +#define HANDLE_CASE(T, ACCESS_LEN, BUFFER_INDEX, IS_APPROX) \ +INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) RedSpyAnalysis::RecordNByteValueBeforeWrite, IARG_MEMORYOP_EA, memOp, IARG_THREAD_ID, IARG_END);\ +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) RedSpyAnalysis::CheckNByteValueAfterWrite, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_INST_PTR,IARG_END) + +#define HANDLE_APPROX_CASE(T, ACCESS_LEN, BUFFER_INDEX, IS_APPROX) \ +INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) RedSpyAnalysis::RecordNByteValueBeforeWrite, IARG_MEMORYOP_EA, memOp, IARG_THREAD_ID, IARG_END);\ +INS_InsertPredicatedCall(ins, IPOINT_AFTER, (AFUNPTR) RedSpyAnalysis::ApproxCheckAfterWrite, IARG_UINT32, opaqueHandle, IARG_THREAD_ID, IARG_INST_PTR,IARG_END) #define HANDLE_LARGE() \ INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR) RecordValueBeforeLargeWrite, IARG_MEMORYOP_EA, memOp, IARG_MEMORYWRITE_SIZE, IARG_UINT32, readBufferSlotIndex, IARG_THREAD_ID, IARG_END);\ @@ -698,16 +1607,41 @@ template struct RedSpyInstrument{ static __attribute__((always_inline)) void InstrumentReadValueBeforeAndAfterWriting(INS ins, UINT32 memOp, uint32_t opaqueHandle){ UINT32 refSize = INS_MemoryOperandSize(ins, memOp); - switch(refSize) { - HANDLE_CASE(1, readBufferSlotIndex); - HANDLE_CASE(2, readBufferSlotIndex); - HANDLE_CASE(4, readBufferSlotIndex); - HANDLE_CASE(8, readBufferSlotIndex); - HANDLE_CASE(10, readBufferSlotIndex); - HANDLE_CASE(16, readBufferSlotIndex); - - default: { - HANDLE_LARGE(); + + if (IsFloatInstruction(INS_Address(ins))) { + unsigned int operSize = FloatOperandSize(INS_Address(ins),INS_MemoryOperandIndexToOperandIndex(ins,memOp)); + switch(refSize) { + case 1: + case 2: assert(0 && "memory write floating data with unexptected small size"); + case 4: HANDLE_APPROX_CASE(float, 4, readBufferSlotIndex, true); break; + case 8: HANDLE_APPROX_CASE(double, 8, readBufferSlotIndex, true); break; + case 10: HANDLE_APPROX_CASE(uint8_t, 10, readBufferSlotIndex, true); break; + case 16: { + switch (operSize) { + case 4: HANDLE_APPROX_CASE(float, 16, readBufferSlotIndex, true); break; + case 8: HANDLE_APPROX_CASE(double, 16, readBufferSlotIndex, true); break; + default: assert(0 && "handle large mem write with unexpected operand size\n"); break; + } + }break; + case 32: { + switch (operSize) { + case 4: HANDLE_APPROX_CASE(float, 32, readBufferSlotIndex, true); break; + case 8: HANDLE_APPROX_CASE(double, 32, readBufferSlotIndex, true); break; + default: assert(0 && "handle large mem write with unexpected operand size\n"); break; + } + }break; + default: assert(0 && "unexpected large memory writes\n"); break; + } + }else{ + switch(refSize) { + case 1: HANDLE_CASE(uint8_t, 1, readBufferSlotIndex, false); break; + case 2: HANDLE_CASE(uint16_t, 2, readBufferSlotIndex, false); break; + case 4: HANDLE_CASE(uint32_t, 4, readBufferSlotIndex, false); break; + case 8: HANDLE_CASE(uint64_t, 8, readBufferSlotIndex, false); break; + + default: { + HANDLE_LARGE(); + } } } } @@ -734,9 +1668,7 @@ static inline bool REG_IsIgnorable(REG reg){ return true; else if(reg == REG_MXCSR) return true; - else if(reg == REG_GFLAGS || reg == REG_FLAGS) - return true; - else if(reg == REG_ST0) + else if(REG_is_flags(reg)) return true; return false; } @@ -804,16 +1736,9 @@ static VOID InstrumentInsCallback(INS ins, VOID* v, const uint32_t opaqueHandle) continue; if (RegHasAlias(reg)) { - switch (reg) { - case REG_RAX: HANDLE_ALIAS_GENERIC(uint64_t, ALIAS_REG_A, ALIAS_BYTES_INDEX_64); break; - case REG_EAX: HANDLE_ALIAS_GENERIC(uint32_t, ALIAS_REG_A, ALIAS_BYTES_INDEX_32); break; - case REG_EBX: HANDLE_ALIAS_GENERIC(uint32_t, ALIAS_REG_B, ALIAS_BYTES_INDEX_32); break; - case REG_ECX: HANDLE_ALIAS_GENERIC(uint32_t, ALIAS_REG_C, ALIAS_BYTES_INDEX_32); break; - case REG_EDX: HANDLE_ALIAS_GENERIC(uint32_t, ALIAS_REG_D, ALIAS_BYTES_INDEX_32); break; - default: InstrumentAliasReg(ins, reg , opaqueHandle); break; - } + InstrumentAliasReg(ins, reg , oper, opaqueHandle); }else{ - InstrumentGeneralReg(ins, reg, opaqueHandle); + InstrumentGeneralReg(ins, reg, oper, opaqueHandle); } } } @@ -825,7 +1750,7 @@ static VOID InstrumentInsCallback(INS ins, VOID* v, const uint32_t opaqueHandle) inline VOID UpdateAndCheck(uint32_t count, uint32_t bytes, THREADID threadId) { RedSpyThreadData* const tData = ClientGetTLS(threadId); - tData->bytesWritten += bytes; + if(tData->sampleFlag){ tData->numIns += count; if(tData->numIns > WINDOW_ENABLE){ @@ -840,12 +1765,17 @@ inline VOID UpdateAndCheck(uint32_t count, uint32_t bytes, THREADID threadId) { tData->numIns = 0; } } + if (tData->sampleFlag) { + tData->bytesWritten += bytes; + } } inline VOID Update(uint32_t count, uint32_t bytes, THREADID threadId){ RedSpyThreadData* const tData = ClientGetTLS(threadId); tData->numIns += count; - tData->bytesWritten += bytes; + if (tData->sampleFlag) { + tData->bytesWritten += bytes; + } } //instrument the trace, count the number of ins in the trace, decide to instrument or not @@ -881,15 +1811,15 @@ static void InstrumentTrace(TRACE trace, void* f) { } if (BBL_InsTail(bbl) == BBL_InsHead(bbl)) { - BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)UpdateAndCheck,IARG_UINT32, totInsInBbl, IARG_UINT32,totBytes, IARG_THREAD_ID,IARG_END); + BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)UpdateAndCheck,IARG_UINT32, totInsInBbl, IARG_UINT32,totBytes, IARG_THREAD_ID, IARG_CALL_ORDER, CALL_ORDER_FIRST, IARG_END); }else if(INS_IsIndirectBranchOrCall(BBL_InsTail(bbl))){ - BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)UpdateAndCheck,IARG_UINT32, totInsInBbl, IARG_UINT32,totBytes, IARG_THREAD_ID,IARG_END); + BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)UpdateAndCheck,IARG_UINT32, totInsInBbl, IARG_UINT32,totBytes, IARG_THREAD_ID,IARG_CALL_ORDER, CALL_ORDER_FIRST, IARG_END); }else{ if (check) { - BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)UpdateAndCheck,IARG_UINT32, totInsInBbl, IARG_UINT32, totBytes, IARG_THREAD_ID,IARG_END); + BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)UpdateAndCheck,IARG_UINT32, totInsInBbl, IARG_UINT32, totBytes, IARG_THREAD_ID,IARG_CALL_ORDER, CALL_ORDER_FIRST, IARG_END); check = false; } else { - BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)Update,IARG_UINT32, totInsInBbl, IARG_UINT32, totBytes, IARG_THREAD_ID, IARG_END); + BBL_InsertCall(bbl,IPOINT_BEFORE,(AFUNPTR)Update,IARG_UINT32, totInsInBbl, IARG_UINT32, totBytes, IARG_THREAD_ID, IARG_CALL_ORDER, CALL_ORDER_FIRST, IARG_END); check = true; } } @@ -957,7 +1887,7 @@ static void PrintRedundancyPairs(THREADID threadId) { fprintf(gTraceFile, "*************** Dump Data from Thread %d ****************\n", threadId); #ifdef MERGING - for (unordered_map::iterator it = RedMap[threadId].begin(); it != RedMap[threadId].end(); ++it) { + for (dense_hash_map::iterator it = RedMap[threadId].begin(); it != RedMap[threadId].end(); ++it) { ContextHandle_t dead = DECODE_DEAD((*it).first); ContextHandle_t kill = DECODE_KILL((*it).first); @@ -986,7 +1916,75 @@ static void PrintRedundancyPairs(THREADID threadId) { } } #else - for (unordered_map::iterator it = RedMap[threadId].begin(); it != RedMap[threadId].end(); ++it) { + for (dense_hash_map::iterator it = RedMap[threadId].begin(); it != RedMap[threadId].end(); ++it) { + RedundacyData tmp = { DECODE_DEAD ((*it).first), DECODE_KILL((*it).first), (*it).second}; + tmpList.push_back(tmp); + grandTotalRedundantBytes += tmp.frequency; + } +#endif + + fprintf(gTraceFile, "\n Total redundant bytes = %f %%\n", grandTotalRedundantBytes * 100.0 / ClientGetTLS(threadId)->bytesWritten); + + sort(tmpList.begin(), tmpList.end(), RedundacyCompare); + vector::iterator listIt; + int cntxtNum = 0; + for (vector::iterator listIt = tmpList.begin(); listIt != tmpList.end(); ++listIt) { + if (cntxtNum < MAX_REDUNDANT_CONTEXTS_TO_LOG) { + fprintf(gTraceFile, "\n======= (%f) %% ======\n", (*listIt).frequency * 100.0 / grandTotalRedundantBytes); + if ((*listIt).dead == 0) { + fprintf(gTraceFile, "\n Prepopulated with by OS\n"); + } else { + PrintFullCallingContext((*listIt).dead); + } + fprintf(gTraceFile, "\n---------------------Redundantly written by---------------------------\n"); + PrintFullCallingContext((*listIt).kill); + } + else { + break; + } + cntxtNum++; + } +} + +static void PrintApproximationRedundancyPairs(THREADID threadId) { + vector tmpList; + vector::iterator tmpIt; + + uint64_t grandTotalRedundantBytes = 0; + fprintf(gTraceFile, "*************** Dump Data(delta=%.2f%%) from Thread %d ****************\n", delta*100,threadId); + +#ifdef MERGING + for (dense_hash_map::iterator it = ApproxRedMap[threadId].begin(); it != ApproxRedMap[threadId].end(); ++it) { + ContextHandle_t dead = DECODE_DEAD((*it).first); + ContextHandle_t kill = DECODE_KILL((*it).first); + + for(tmpIt = tmpList.begin();tmpIt != tmpList.end(); ++tmpIt){ + if(dead == 0 || ((*tmpIt).dead) == 0){ + continue; + } + if (!HaveSameCallerPrefix(dead,(*tmpIt).dead)) { + continue; + } + if (!HaveSameCallerPrefix(kill,(*tmpIt).kill)) { + continue; + } + bool ct1 = IsSameSourceLine(dead,(*tmpIt).dead); + bool ct2 = IsSameSourceLine(kill,(*tmpIt).kill); + if(ct1 && ct2){ + (*tmpIt).frequency += (*it).second; + grandTotalRedundantBytes += (*it).second; + grandTotalRedundantIns += 1; + break; + } + } + if(tmpIt == tmpList.end()){ + RedundacyData tmp = { dead, kill, (*it).second}; + tmpList.push_back(tmp); + grandTotalRedundantBytes += tmp.frequency; + } + } +#else + for (dense_hash_map::iterator it = ApproxRedMap[threadId].begin(); it != ApproxRedMap[threadId].end(); ++it) { RedundacyData tmp = { DECODE_DEAD ((*it).first), DECODE_KILL((*it).first), (*it).second}; tmpList.push_back(tmp); grandTotalRedundantBytes += tmp.frequency; @@ -1024,9 +2022,11 @@ static VOID ImageUnload(IMG img, VOID* v) { // Update gTotalInstCount first PIN_LockClient(); PrintRedundancyPairs(threadid); + PrintApproximationRedundancyPairs(threadid); PIN_UnlockClient(); // clear redmap now RedMap[threadid].clear(); + ApproxRedMap[threadid].clear(); } static VOID ThreadFiniFunc(THREADID threadid, const CONTEXT *ctxt, INT32 code, VOID *v) { @@ -1041,10 +2041,14 @@ static void InitThreadData(RedSpyThreadData* tdata){ tdata->sampleFlag = true; tdata->numIns = 0; tdata->numWinds = 0; + for (int i = 0; i < THREAD_MAX; ++i) { + RedMap[i].set_empty_key(0); + ApproxRedMap[i].set_empty_key(0); + } } static VOID ThreadStart(THREADID threadid, CONTEXT* ctxt, INT32 flags, VOID* v) { - RedSpyThreadData* tdata = new RedSpyThreadData(); + RedSpyThreadData* tdata = (RedSpyThreadData*)memalign(32,sizeof(RedSpyThreadData)); InitThreadData(tdata); // __sync_fetch_and_add(&gClientNumThreads, 1); #ifdef MULTI_THREADED