diff --git a/Assignment-3/CPP/AEHelper.cpp b/Assignment-3/CPP/AEHelper.cpp new file mode 100644 index 0000000..2b6f78d --- /dev/null +++ b/Assignment-3/CPP/AEHelper.cpp @@ -0,0 +1,345 @@ +//===- AEHelper.cpp -- Abstract Interpretation harness --// +// +// SVF: Static Value-Flow Analysis +// +// Copyright (C) <2013-2022> +// + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . +// +//===----------------------------------------------------------------------===// +/* + * Harness for Assignment-3 abstract interpretation. + * + * Owns the harness-side `AbstractExecution::*` methods that students don't + * design: + * - Interprocedural WTO construction (initWTO) + * - Stub / checkpoint sub-dispatch (handleStubFunctions / + * handleCheckpointStubs) — invoked + * from the student's handleCallSite + * in Assignment_3.cpp. + * - External-API whitelist (isExternalCallForAssignment) + * - Abstract-state helpers (getAbsValue / updateAbsValue / + * loadValue / storeValue / GEP* / + * getAbsStateFromTrace / postAbsTrace) + * - Validator (ensureAllAssertsValidated) + * + * Pure bug-reporting concerns (AEReporter class + JSON / coverage summary) + * live in AEReporter.cpp. The analysis driver (runOnModule / analyse / + * handleCallSite / reportBufOverflow / reportNullDeref) and the six student + * tasks live in Assignment_3.cpp. + */ + +#include "Assignment_3.h" +// harness-only: the abstract-state helpers and the post-trace accessor need +// the full AbstractInterpretation definition. Student code (Assignment_3.cpp) +// never includes this header, so it cannot reach AbsExtAPI / getUtils. +#include "AE/Svfexe/AbstractInterpretation.h" +#include "WPA/Andersen.h" +#include + +using namespace SVF; + +/// Whitelist of external-call names the assignment expects students to model +/// in `updateStateOnExtCall`. Covers: +/// - Assignment-specific stubs: `mem_insert`, `str_insert` +/// - Memory family: `memcpy`, `memmove`, `memset` +/// - String family: `strcpy`, `strncpy`, `strcat`, +/// `strncat`, `strlen`, `wcslen` +/// - Ground-truth checkpoint stubs: `SAFE_/UNSAFE_BUFACCESS`, +/// `SAFE_/UNSAFE_PTRDEREF` +/// +/// The library APIs are matched by substring because Clang emits the memory +/// family as LLVM intrinsics (e.g. `llvm.memcpy.p0.p0.i64`) and the substring +/// is preserved in the mangled name. +bool AbstractExecution::isExternalCallForAssignment(const SVF::FunObjVar* func) { + const std::string& name = func->getName(); + static const Set exactStubs = { + "mem_insert", "str_insert", + "UNSAFE_BUFACCESS", "SAFE_BUFACCESS", + "UNSAFE_PTRDEREF", "SAFE_PTRDEREF"}; + if (exactStubs.count(name)) + return true; + static const std::vector apiSubstrings = { + "memcpy", "memmove", "memset", + "strcpy", "strncpy", "strcat", "strncat", + "strlen", "wcslen"}; + for (const auto& key : apiSubstrings) { + if (name.find(key) != std::string::npos) + return true; + } + return false; +} + +// --------------------------------------------------------------------------- +// WTO construction. Each (mutually) recursive function's entry node becomes +// a WTO cycle head because intra-SCC call edges are turned into back-edges. +// The same widening/narrowing machinery used for loops then drives recursion +// to a fixpoint via handleICFGCycle; recursive callsites are filtered out in +// handleCallSite via `inSameCallGraphSCC`. +// --------------------------------------------------------------------------- + +void AbstractExecution::initWTO() { + ander = AndersenWaveDiff::createAndersenWaveDiff(svfir); + Andersen::CallGraphSCC* callGraphScc = ander->getCallGraphSCC(); + callGraphScc->find(); + auto callGraph = ander->getCallGraph(); + + for (auto it = callGraph->begin(); it != callGraph->end(); ++it) { + const FunObjVar* fun = it->second->getFunction(); + if (fun->isDeclaration()) + continue; + + NodeID repNodeId = callGraphScc->repNode(it->second->getId()); + const NodeBS& cgSCCNodes = callGraphScc->subNodes(repNodeId); + + bool isEntry = it->second->getInEdges().empty(); + for (auto inEdge : it->second->getInEdges()) + if (!cgSCCNodes.test(inEdge->getSrcID())) + isEntry = true; + if (!isEntry) + continue; + + Set funcScc; + for (const auto& node : cgSCCNodes) + funcScc.insert(callGraph->getGNode(node)->getFunction()); + + auto* wto = new ICFGWTO(icfg->getFunEntryICFGNode(fun), funcScc); + wto->init(); + funcToWTO[fun] = wto; + } +} + +/// Verify that every ground-truth stub call site was reached by the student's +/// analysis (added to `assert_points` via handleCallSite -> handleStubFunctions +/// / handleCheckpointStubs). A missed stub site means the student's +/// control-flow logic skipped a place the grader cares about. +/// +/// Recognised stubs: +/// - svf_assert / svf_assert_eq : abstract-state assertion checks +/// - UNSAFE_PTRDEREF / SAFE_PTRDEREF : null-deref ground truth +/// - UNSAFE_BUFACCESS / SAFE_BUFACCESS : buffer-access ground truth +/// +/// Additionally requires that the number of reported bugs is at least the +/// number of UNSAFE_* stubs in the program. +void AbstractExecution::ensureAllAssertsValidated() { + static const Set kAssertStubs = {"svf_assert", "svf_assert_eq"}; + static const Set kCheckpointStubs = { + "UNSAFE_PTRDEREF", "SAFE_PTRDEREF", + "UNSAFE_BUFACCESS", "SAFE_BUFACCESS"}; + u32_t unsafe_to_be_verified = 0; + for (auto it = svfir->getICFG()->begin(); it != svfir->getICFG()->end(); ++it) { + const ICFGNode* node = it->second; + const CallICFGNode* call = SVFUtil::dyn_cast(node); + if (!call) + continue; + const FunObjVar* fun = call->getCalledFunction(); + if (!fun) + continue; + const std::string& name = fun->getName(); + const bool isAssertStub = kAssertStubs.count(name) > 0; + const bool isCheckpointStub = kCheckpointStubs.count(name) > 0; + if (!isAssertStub && !isCheckpointStub) + continue; + if (name.rfind("UNSAFE_", 0) == 0) + unsafe_to_be_verified++; + if (!bugReporter.isAssertionPoint(call)) { + std::stringstream ss; + ss << "The stub function callsite (" << name + << ") was not reached by the student's control flow: " + << call->toString(); + std::cerr << ss.str() << std::endl; + assert(false); + } + } + + assert(unsafe_to_be_verified <= bugReporter.getBugReporter().getBugSet().size() && + "The number of UNSAFE_* stubs (ground truth) should <= the number of bugs reported"); +} + +// --------------------------------------------------------------------------- +// Ground-truth helpers used by handleCheckpointStubs. Computed from SVF +// primitives only so the stub verdict cannot be biased by student bugs. +// --------------------------------------------------------------------------- + +namespace { +bool harnessSafeAccess(AbstractState& as, SVFIR* svfir, const ValVar* value, + const IntervalValue& len) { + AbstractValue ptrVal = as[value->getId()]; + if (!ptrVal.isAddr()) + return true; + for (const auto& addr : ptrVal.getAddrs()) { + if (AbstractState::isBlackHoleObjAddr(addr) || AbstractState::isNullMem(addr)) + continue; + NodeID objId = as.getIDFromAddr(addr); + const BaseObjVar* baseObj = svfir->getBaseObject(objId); + if (!baseObj || baseObj->isBlackHoleObj() || !baseObj->isConstantByteSize()) + continue; + u32_t size = baseObj->getByteSizeOfObj(); + IntervalValue baseOffset(0); + const SVFVar* svfVar = svfir->getGNode(objId); + if (auto* gepObj = SVFUtil::dyn_cast(svfVar)) + baseOffset = IntervalValue((s64_t)gepObj->getConstantFieldIdx()); + IntervalValue offset = baseOffset + len; + if (offset.ub().getIntNumeral() >= (s64_t)size) + return false; + } + return true; +} + +bool harnessSafeDeref(AbstractState& as, const ValVar* value) { + if (!value || value->getId() == IRGraph::NullPtr) + return false; + const AbstractValue& absVal = as[value->getId()]; + if (!absVal.isAddr()) + return true; + for (const auto& addr : absVal.getAddrs()) { + if (AbstractState::isBlackHoleObjAddr(addr)) + continue; + if (AbstractState::isNullMem(addr)) + return false; + if (as.isFreedMem(addr)) + return false; + } + return true; +} +} // namespace + +/// Validate the SAFE/UNSAFE checkpoint stub functions. Validation uses the +/// harness-only `harnessSafeAccess` / `harnessSafeDeref` helpers, NOT the +/// student's `canSafelyAccessMemory` / `canSafelyDerefPtr` — so the stub +/// verdict cannot be biased by student bugs. +void AbstractExecution::handleCheckpointStubs(const CallICFGNode* callNode) { + bugReporter.noteAssertionPoint(callNode); + const std::string fun_name = callNode->getCalledFunction()->getName(); + if (fun_name == "SAFE_BUFACCESS" || fun_name == "UNSAFE_BUFACCESS") { + if (callNode->arg_size() < 2) + return; + AbstractState& as = getAbsStateFromTrace(callNode); + IntervalValue len = as[callNode->getArgument(1)->getId()].getInterval(); + if (len.isBottom()) + len = IntervalValue(0); + const ValVar* ptr = callNode->getArgument(0); + if (!harnessSafeAccess(as, svfir, ptr, len - IntervalValue(1))) + reportBufOverflow(callNode); + } + else if (fun_name == "SAFE_PTRDEREF" || fun_name == "UNSAFE_PTRDEREF") { + if (callNode->arg_size() < 1) + return; + AbstractState& as = getAbsStateFromTrace(callNode); + const ValVar* ptr = callNode->getArgument(0); + if (!harnessSafeDeref(as, ptr)) + reportNullDeref(callNode); + } +} + +/// Handle the abstract-state assertion stubs. `svf_assert(expr)` requires the +/// expression to hold true; `svf_assert_eq(a, b)` requires the two intervals +/// to be equal. Both record the call site in `assert_points` so +/// `ensureAllAssertsValidated` can verify coverage. +void AbstractExecution::handleStubFunctions(const SVF::CallICFGNode* callNode) { + if (callNode->getCalledFunction()->getName() == "svf_assert") { + bugReporter.noteAssertionPoint(callNode); + u32_t arg0 = callNode->getArgument(0)->getId(); + AbstractState& as = getAbsStateFromTrace(callNode); + + if (as[arg0].getInterval().is_infinite()) { + SVFUtil::errs() << "svf_assert Fail. " << callNode->toString() << "\n"; + assert(false); + } + else { + if (as[arg0].getInterval().equals(IntervalValue(1, 1))) { + std::stringstream ss; + ss << "The assertion (" << callNode->toString() << ")" + << " is successfully verified!!\n"; + SVFUtil::outs() << ss.str() << std::endl; + } + else { + std::stringstream ss; + ss << "The assertion (" << callNode->toString() << ")" + << " is unsatisfiable!!\n"; + SVFUtil::outs() << ss.str() << std::endl; + assert(false); + } + } + return; + } + else if (callNode->getCalledFunction()->getName() == "svf_assert_eq") { + u32_t arg0 = callNode->getArgument(0)->getId(); + u32_t arg1 = callNode->getArgument(1)->getId(); + AbstractState& as = getAbsStateFromTrace(callNode); + if (as[arg0].getInterval().equals(as[arg1].getInterval())) { + SVFUtil::errs() << SVFUtil::sucMsg("The assertion is successfully verified!!\n"); + } + else { + SVFUtil::errs() << "svf_assert_eq Fail. " << callNode->toString() << "\n"; + assert(false); + } + return; + } +} + +// =========================================================================== +// Abstract-state helpers — wrap operations on the underlying +// AbstractInterpretation singleton. Defined here (not in the header) so +// student code never sees AbstractInterpretation/AbsExtAPI directly. +// =========================================================================== +namespace SVF { + +const AbstractValue& AbstractExecution::getAbsValue(const ValVar* var, const ICFGNode* node) { + return ai->getAbsValue(var, node); +} +const AbstractValue& AbstractExecution::getAbsValue(const ObjVar* var, const ICFGNode* node) { + return ai->getAbsValue(var, node); +} +const AbstractValue& AbstractExecution::getAbsValue(const SVFVar* var, const ICFGNode* node) { + return ai->getAbsValue(var, node); +} +void AbstractExecution::updateAbsValue(const ValVar* var, const AbstractValue& val, const ICFGNode* node) { + ai->updateAbsValue(var, val, node); +} +void AbstractExecution::updateAbsValue(const ObjVar* var, const AbstractValue& val, const ICFGNode* node) { + ai->updateAbsValue(var, val, node); +} +void AbstractExecution::updateAbsValue(const SVFVar* var, const AbstractValue& val, const ICFGNode* node) { + ai->updateAbsValue(var, val, node); +} +AbstractValue AbstractExecution::loadValue(const ValVar* pointer, const ICFGNode* node) { + return ai->loadValue(pointer, node); +} +void AbstractExecution::storeValue(const ValVar* pointer, const AbstractValue& val, const ICFGNode* node) { + ai->storeValue(pointer, val, node); +} +AddressValue AbstractExecution::getGepObjAddrs(const ValVar* pointer, IntervalValue offset) { + return ai->getGepObjAddrs(pointer, offset); +} +IntervalValue AbstractExecution::getGepElementIndex(const GepStmt* gep) { + return ai->getGepElementIndex(gep); +} +IntervalValue AbstractExecution::getGepByteOffset(const GepStmt* gep) { + return ai->getGepByteOffset(gep); +} +u32_t AbstractExecution::getAllocaInstByteSize(const AddrStmt* addr) { + return ai->getAllocaInstByteSize(addr); +} + +// harness-only post-trace accessors (need full AbstractInterpretation type). +AbstractState& AbstractExecution::getAbsStateFromTrace(const ICFGNode* node) { + return (*ai)[node]; +} +Map& AbstractExecution::postAbsTrace() { + return ai->getTrace(); +} + +} // namespace SVF diff --git a/Assignment-3/CPP/AEReporter.cpp b/Assignment-3/CPP/AEReporter.cpp index 0974f6a..fbf4255 100644 --- a/Assignment-3/CPP/AEReporter.cpp +++ b/Assignment-3/CPP/AEReporter.cpp @@ -1,4 +1,4 @@ -//===- AEReporter.cpp -- Abstract Interpretation harness --// +//===- AEReporter.cpp -- Abstract Interpretation bug reporter --// // // SVF: Static Value-Flow Analysis // @@ -20,17 +20,16 @@ // //===----------------------------------------------------------------------===// /* - * Helper Functions for Abstract Interpretation and buffer overflow detection + * Bug-reporter implementations for the Assignment-3 harness. * - * Created on: Feb 19, 2024 + * Holds the JSON-escape utility, target-report matching, and the AEReporter + * coverage / summary methods. All other harness state (AbstractExecution + * driver, ICFG/WTO traversal, call-site dispatch, abstract-state helpers) + * lives in AEHelper.cpp. */ -#include "Assignment_3.h" -// harness-only: the facade implementation and the post-trace accessor need the -// full AbstractInterpretation definition. Student code (Assignment_3.cpp) never -// includes this header, so it cannot reach AbsExtAPI / getUtils. -#include "AE/Svfexe/AbstractInterpretation.h" -#include "WPA/Andersen.h" +#include "AEReporter.h" +#include "Graphs/ICFG.h" #include #include @@ -95,48 +94,6 @@ static bool ass3ReportMatchesTarget(const AssignmentBugReport& report, return fileSeen && lineSeen; } -// Branch refinement, statement transfer functions (updateAbsState + -// updateStateOn*), the GEP-offset tracking, getAccessOffset, the memory-safety -// helpers (canSafelyAccessMemory / canSafelyDerefPtr) and the buffer/null -// checkers are all student TODOs this year and live in Assignment_3.cpp. The -// stub validators in this file deliberately do not call them — they compute -// ground truth from SVF primitives only, then query the BugReporter for the -// student's verdict. - -/// Report a buffer overflow for a given ICFG node -void AbstractExecution::reportBufOverflow(const ICFGNode* node) { - // Create an exception with the node's string representation - AEException bug(node->toString()); - // Add the bug to the reporter using the helper - bugReporter.addBugToReporter("buffer-overflow", bug, node); -} - -/// Report a nullptr dereference for a given ICFG node -void AbstractExecution::reportNullDeref(const ICFGNode* node) { - AEException bug(node->toString()); - bugReporter.addBugToReporter("nullptr-deref", bug, node); -} - -bool AbstractExecution::isExternalCallForAssignment(const SVF::FunObjVar* func) { - Set extFuncs = { - "mem_insert", "str_insert", - "UNSAFE_BUFACCESS", "SAFE_BUFACCESS", - "UNSAFE_PTRDEREF", "SAFE_PTRDEREF"}; - if (extFuncs.find(func->getName()) != extFuncs.end()) { - return true; - } else { - return false; - } -} - -void AbstractExecution::runOnModule(SVF::ICFG* _icfg) { - svfir = PAG::getPAG(); - icfg = _icfg; - analyse(); - if (!bugReporter.getCaseConfig().emitJson) - bugReporter.printReport(); -} - u32_t AEReporter::getTotalNodeCount(const ICFG* icfg) const { if (!icfg) return 0; @@ -197,465 +154,3 @@ void AEReporter::writeJsonSummary(std::ostream& os, const ICFG* icfg, os << "]\n"; os << "}\n"; } - -/** - * @brief Build the interprocedural WTO per call-graph SCC entry. - * - * Each (mutually) recursive function's entry node becomes a WTO cycle head - * because intra-SCC call edges are turned into back-edges. The same - * widening/narrowing machinery used for loops then drives recursion to a - * fixpoint via handleICFGCycle; there is no separate "is recursive?" check. - */ -void AbstractExecution::initWTO() { - ander = AndersenWaveDiff::createAndersenWaveDiff(svfir); - // Find the strongly connected components of the call graph so we can hand - // each SCC's member set to ICFGWTO below. - Andersen::CallGraphSCC* callGraphScc = ander->getCallGraphSCC(); - callGraphScc->find(); - auto callGraph = ander->getCallGraph(); - - // Build one interprocedural WTO per call-graph-SCC entry function. The - // SCC's function set is passed to ICFGWTO so that call edges *into* a - // callee in the same SCC become back-edges: a (mutually) recursive - // function's entry node then shows up as a WTO cycle head, and the same - // widening/narrowing machinery used for loops drives it to a fixpoint. - for (auto it = callGraph->begin(); it != callGraph->end(); ++it) { - const FunObjVar* fun = it->second->getFunction(); - if (fun->isDeclaration()) - continue; - - NodeID repNodeId = callGraphScc->repNode(it->second->getId()); - const NodeBS& cgSCCNodes = callGraphScc->subNodes(repNodeId); - - // Only SCC-entry functions (with a caller outside the SCC, or no - // caller at all) own a WTO; intra-SCC members are reached via the - // entry's interprocedural WTO. - bool isEntry = it->second->getInEdges().empty(); - for (auto inEdge : it->second->getInEdges()) - if (!cgSCCNodes.test(inEdge->getSrcID())) - isEntry = true; - if (!isEntry) - continue; - - Set funcScc; - for (const auto& node : cgSCCNodes) - funcScc.insert(callGraph->getGNode(node)->getFunction()); - - auto* wto = new ICFGWTO(icfg->getFunEntryICFGNode(fun), funcScc); - wto->init(); - funcToWTO[fun] = wto; - } - -} - -// updateGepObjOffsetFromBase / hasGepObjOffsetFromBase / getGepObjOffsetFromBase -// / getAccessOffset / updateAbsState / mergeStatesFromPredecessors / branch -// refinement (isCmpBranchFeasible / isSwitchBranchFeasible / isBranchFeasible) -// are all student TODOs this year and live in Assignment_3.cpp. - - -/// handle global node -void AbstractExecution::handleGlobalNode() { - AbstractState as; - const ICFGNode* node = icfg->getGlobalICFGNode(); - bugReporter.noteAnalyzed(node); - postAbsTrace()[node] = preAbsTrace[node]; - // The null pointer carries the dedicated null memory address so that - // pointer-vs-null comparisons and null dereferences can be detected. - postAbsTrace()[node][IRGraph::NullPtr] = AddressValue(NullMemAddr); - // Global Node, we just need to handle addr, load, store, copy and gep - for (const SVFStmt* stmt : node->getSVFStmts()) { - updateAbsState(stmt); - } -} - -/// Verify that every ground-truth stub call site in the program was reached -/// by the student's analysis (added to `assert_points` via handleCallSite -> -/// handleStubFunctions / handleCheckpointStubs). A missed stub site means -/// the student's control-flow logic skipped a place the grader cares about. -/// -/// Recognised stubs: -/// - svf_assert / svf_assert_eq : abstract-state assertion checks -/// - UNSAFE_PTRDEREF / SAFE_PTRDEREF : null-deref ground truth -/// - UNSAFE_BUFACCESS / SAFE_BUFACCESS : buffer-access ground truth -/// -/// Additionally requires that the number of reported bugs is at least the -/// number of UNSAFE_* stubs in the program. -void AbstractExecution::ensureAllAssertsValidated() { - static const Set kAssertStubs = {"svf_assert", "svf_assert_eq"}; - static const Set kCheckpointStubs = { - "UNSAFE_PTRDEREF", "SAFE_PTRDEREF", - "UNSAFE_BUFACCESS", "SAFE_BUFACCESS"}; - u32_t unsafe_to_be_verified = 0; - for (auto it = svfir->getICFG()->begin(); it != svfir->getICFG()->end(); ++it) { - const ICFGNode* node = it->second; - const CallICFGNode* call = SVFUtil::dyn_cast(node); - if (!call) - continue; - const FunObjVar* fun = call->getCalledFunction(); - if (!fun) - continue; - const std::string& name = fun->getName(); - const bool isAssertStub = kAssertStubs.count(name) > 0; - const bool isCheckpointStub = kCheckpointStubs.count(name) > 0; - if (!isAssertStub && !isCheckpointStub) - continue; - if (name.rfind("UNSAFE_", 0) == 0) - unsafe_to_be_verified++; - if (!bugReporter.isAssertionPoint(call)) { - std::stringstream ss; - ss << "The stub function callsite (" << name - << ") was not reached by the student's control flow: " - << call->toString(); - std::cerr << ss.str() << std::endl; - assert(false); - } - } - - assert(unsafe_to_be_verified <= bugReporter.getBugReporter().getBugSet().size() && - "The number of UNSAFE_* stubs (ground truth) should <= the number of bugs reported"); -} - - -/** - * @brief The driver program - * - * This function conducts the overall analysis of the program by initializing and processing - * various components of the control flow graph (ICFG) and handling global nodes and WTO cycles. - * It marks recursive functions, initializes WTOs for each function, and processes the main function. - */ -void AbstractExecution::analyse() { - // Init WTOs for all functions, and handle Global ICFGNode of SVFModule - initWTO(); - // AbstractStateManager was folded into AbstractInterpretation upstream; the - // header AE/Svfexe/AbstractStateManager.h was removed. Use the singleton - // AbstractInterpretation; it pulls SVFIR from PAG::getPAG() internally and - // does not need an explicit Andersen analysis to be passed in. - ai = &AbstractInterpretation::getAEInstance(); - - // Handle the global node - handleGlobalNode(); - - // Process the main function if it exists - if (const FunObjVar* fun = svfir->getFunObjVar("main")) { - // arguments of main are initialised as \top to represent all possible inputs - for (u32_t i = 0; i < fun->arg_size(); ++i) { - AbstractState& as = getAbsStateFromTrace(icfg->getGlobalICFGNode()); - as[fun->getArg(i)->getId()] = IntervalValue::top(); - } - //assert the main function exist - assert(svfir->getFunObjVar("main") != nullptr && "Main function not found"); - handleFunction(svfir->getICFG()->getFunEntryICFGNode(svfir->getFunObjVar("main"))); - } - return; -} - -/** - * @brief Handle a node in the ICFG - * - * This function handles a node in the ICFG by merging the abstract states of its predecessors, - * updating the abstract state based on the node's statements, and handling stub functions. - * It also checks if the abstract state has reached a fixpoint and returns the result. - * Return true means the abstract state has changed - * Return false means the abstract state has reached a fixpoint or is infeasible - * - * @param node The node to be handled - * @return True if the abstract state has changed, false if it has reached a fixpoint or is infeasible - */ -bool AbstractExecution::handleICFGNode(const ICFGNode* node) { - AbstractState tmpEs; - bool is_feasible = mergeStatesFromPredecessors(node, tmpEs); - if (!is_feasible) { - SVFUtil::errs() << "Infeasible for node " << node->getId() << "\n"; - return false; - } - bugReporter.noteAnalyzed(node); - preAbsTrace[node] = tmpEs; - // Store the last abstract state, used to check if the abstract state has reached a fixpoint - AbstractState last_as = postAbsTrace()[node]; - postAbsTrace()[node] = preAbsTrace[node]; - for (const SVFStmt* stmt : node->getSVFStmts()) { - updateAbsState(stmt); - } - - if (const CallICFGNode* callNode = SVFUtil::dyn_cast(node)) { - // Bug checking for external API calls happens inside handleCallSite, - // after the API value summary is applied. - handleCallSite(callNode); - } - else { - // Implicit dereference / GEP overflow checks on ordinary statements. - nullptrDerefDetection(node); - bufOverflowDetection(node); - } - // If the abstract state is the same as the last abstract state, return false because we have reached fixpoint - if (postAbsTrace()[node] == last_as) { - return false; - } - return true; -} -// updateAbsState now lives in Assignment_3.cpp (student TODO). - -namespace { -/// Harness-only ground-truth check for buffer access safety. Computed from -/// SVF primitives (base object size + GepObjVar::getConstantFieldIdx) so it -/// never depends on the student's gepObjOffsetFromBase map or -/// canSafelyAccessMemory implementation. -bool harnessSafeAccess(AbstractState& as, SVFIR* svfir, const ValVar* value, - const IntervalValue& len) { - AbstractValue ptrVal = as[value->getId()]; - if (!ptrVal.isAddr()) - return true; - for (const auto& addr : ptrVal.getAddrs()) { - if (AbstractState::isBlackHoleObjAddr(addr) || AbstractState::isNullMem(addr)) - continue; - NodeID objId = as.getIDFromAddr(addr); - const BaseObjVar* baseObj = svfir->getBaseObject(objId); - if (!baseObj || baseObj->isBlackHoleObj() || !baseObj->isConstantByteSize()) - continue; - u32_t size = baseObj->getByteSizeOfObj(); - IntervalValue baseOffset(0); - const SVFVar* svfVar = svfir->getGNode(objId); - if (auto* gepObj = SVFUtil::dyn_cast(svfVar)) - baseOffset = IntervalValue((s64_t)gepObj->getConstantFieldIdx()); - IntervalValue offset = baseOffset + len; - if (offset.ub().getIntNumeral() >= (s64_t)size) - return false; - } - return true; -} - -/// Harness-only ground-truth check for pointer-dereference safety. -bool harnessSafeDeref(AbstractState& as, const ValVar* value) { - if (!value || value->getId() == IRGraph::NullPtr) - return false; - const AbstractValue& absVal = as[value->getId()]; - if (!absVal.isAddr()) - return true; - for (const auto& addr : absVal.getAddrs()) { - if (AbstractState::isBlackHoleObjAddr(addr)) - continue; - if (AbstractState::isNullMem(addr)) - return false; - if (as.isFreedMem(addr)) - return false; - } - return true; -} -} // namespace - -/** - * @brief Handle a call site in the control flow graph - * - * This function processes a call site by updating the abstract state, handling the called function, - * and managing the call stack. It resumes the execution state after the function call. - * - * @param node The call site node to be handled - */ -void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { - // Get the callee function associated with the call site - const FunObjVar* callee = callNode->getCalledFunction(); - if (!callee) - return; - std::string fun_name = callee->getName(); - if (fun_name == "svf_assert" || fun_name == "svf_assert_eq") { - handleStubFunctions(callNode); - } - else if (fun_name == "SAFE_BUFACCESS" || fun_name == "UNSAFE_BUFACCESS" || - fun_name == "SAFE_PTRDEREF" || fun_name == "UNSAFE_PTRDEREF") { - // Ground-truth checkpoints for the buffer/nullptr checkers. - handleCheckpointStubs(callNode); - } - else if (fun_name == "nd" || fun_name == "rand") { - NodeID lhsId = callNode->getRetICFGNode()->getActualRet()->getId(); - postAbsTrace()[callNode][lhsId] = AbstractValue(IntervalValue::top()); - } - else if (SVFUtil::isExtCall(callee)) { - // External API value summaries. The student implements the memory and - // string families (memcpy/memset/strcpy/strcat/...) plus the - // assignment-specific mem_insert/str_insert stubs in updateStateOnExtCall; - // unmodelled functions fall back to SVF inside that dispatcher. After - // propagating values we run the bug checkers on the API's - // pointer/length arguments. - updateStateOnExtCall(callNode); - nullptrDerefDetection(callNode); - bufOverflowDetection(callNode); - } - else { - // Skip recursive callsites (within the same call-graph SCC): the - // interprocedural WTO built in initWTO() already encoded this as a - // back-edge, so the outer cycle's widen/narrow iteration in - // handleICFGCycle drives the recursion to a fixpoint. Mirrors - // SVF's `AbstractInterpretation::skipRecursiveCall`. - const FunObjVar* caller = callNode->getCaller(); - if (caller && ander && ander->inSameCallGraphSCC(caller, callee)) - return; - handleFunction(svfir->getICFG()->getFunEntryICFGNode(callee)); - const RetICFGNode* retNode = callNode->getRetICFGNode(); - if (postAbsTrace().count(callNode)) - postAbsTrace()[retNode] = postAbsTrace()[callNode]; - } -} - -/** - * @brief Validate the SAFE/UNSAFE checkpoint stub functions. - * - * These stubs encode the ground truth for the bug checkers. Validation uses - * the harness-only `harnessSafeAccess` / `harnessSafeDeref` helpers above, - * NOT the student's `canSafelyAccessMemory` / `canSafelyDerefPtr` — so the - * stub verdict cannot be biased by student bugs. - */ -void AbstractExecution::handleCheckpointStubs(const CallICFGNode* callNode) { - bugReporter.noteAssertionPoint(callNode); - const std::string fun_name = callNode->getCalledFunction()->getName(); - if (fun_name == "SAFE_BUFACCESS" || fun_name == "UNSAFE_BUFACCESS") { - if (callNode->arg_size() < 2) - return; - AbstractState& as = getAbsStateFromTrace(callNode); - IntervalValue len = as[callNode->getArgument(1)->getId()].getInterval(); - if (len.isBottom()) - len = IntervalValue(0); - const ValVar* ptr = callNode->getArgument(0); - if (!harnessSafeAccess(as, svfir, ptr, len - IntervalValue(1))) - reportBufOverflow(callNode); - } - else if (fun_name == "SAFE_PTRDEREF" || fun_name == "UNSAFE_PTRDEREF") { - if (callNode->arg_size() < 1) - return; - AbstractState& as = getAbsStateFromTrace(callNode); - const ValVar* ptr = callNode->getArgument(0); - if (!harnessSafeDeref(as, ptr)) - reportNullDeref(callNode); - } -} - -void AbstractExecution::handleFunction(const ICFGNode* funEntry) { - // Iterate the function's interprocedural WTO components in WTO order. - // Singletons are handled directly; cycles (loop heads and recursive - // function entries) are driven to a fixpoint by handleICFGCycle. - // Recursive callsites are filtered out earlier in handleCallSite via - // `inSameCallGraphSCC`, so handleFunction never re-enters itself. - const FunObjVar* fun = funEntry->getFun(); - auto it = funcToWTO.find(fun); - if (it == funcToWTO.end()) - return; - for (const ICFGWTOComp* comp : it->second->getWTOComponents()) { - if (const ICFGSingletonWTO* singleton = SVFUtil::dyn_cast(comp)) { - handleICFGNode(singleton->getICFGNode()); - } - else if (const ICFGCycleWTO* cycle = SVFUtil::dyn_cast(comp)) { - handleICFGCycle(cycle); - } - } -} - -/** - * @brief Handle stub functions for verifying abstract interpretation results - * - * This function handles specific stub functions (`svf_assert` and `OVERFLOW`) to check whether - * the abstract interpretation results are as expected. For `svf_assert(expr)`, the expression must hold true. - * For `svf_assert_eq(a, b)`, the two intervals must be equal. - * - * @param callnode The call node representing the stub function to be handled - */ - -void AbstractExecution::handleStubFunctions(const SVF::CallICFGNode* callNode) { - // Handle the 'svf_assert' stub function - if (callNode->getCalledFunction()->getName() == "svf_assert") { - bugReporter.noteAssertionPoint(callNode); - // If the condition is false, the program is infeasible - u32_t arg0 = callNode->getArgument(0)->getId(); - AbstractState& as = getAbsStateFromTrace(callNode); - - // Check if the interval for the argument is infinite - if (as[arg0].getInterval().is_infinite()) { - SVFUtil::errs() << "svf_assert Fail. " << callNode->toString() << "\n"; - assert(false); - } - else { - if (as[arg0].getInterval().equals(IntervalValue(1, 1))) { - std::stringstream ss; - ss << "The assertion (" << callNode->toString() << ")" - << " is successfully verified!!\n"; - SVFUtil::outs() << ss.str() << std::endl; - } - else { - std::stringstream ss; - ss << "The assertion (" << callNode->toString() << ")" - << " is unsatisfiable!!\n"; - SVFUtil::outs() << ss.str() << std::endl; - assert(false); - } - } - return; - } - else if (callNode->getCalledFunction()->getName() == "svf_assert_eq") { - u32_t arg0 = callNode->getArgument(0)->getId(); - u32_t arg1 = callNode->getArgument(1)->getId(); - AbstractState& as = getAbsStateFromTrace(callNode); - if (as[arg0].getInterval().equals(as[arg1].getInterval())) - { - SVFUtil::errs() << SVFUtil::sucMsg("The assertion is successfully verified!!\n"); - } - else - { - SVFUtil::errs() <<"svf_assert_eq Fail. " << callNode->toString() << "\n"; - assert(false); - } - return; - } -} - -// =========================================================================== -// State-manager primitives — thin forwarders to the underlying -// AbstractInterpretation singleton. Defined here (not in the header) so -// student code never sees AbstractInterpretation/AbsExtAPI directly. -// =========================================================================== -namespace SVF { - -const AbstractValue& AbstractExecution::getAbsValue(const ValVar* var, const ICFGNode* node) { - return ai->getAbsValue(var, node); -} -const AbstractValue& AbstractExecution::getAbsValue(const ObjVar* var, const ICFGNode* node) { - return ai->getAbsValue(var, node); -} -const AbstractValue& AbstractExecution::getAbsValue(const SVFVar* var, const ICFGNode* node) { - return ai->getAbsValue(var, node); -} -void AbstractExecution::updateAbsValue(const ValVar* var, const AbstractValue& val, const ICFGNode* node) { - ai->updateAbsValue(var, val, node); -} -void AbstractExecution::updateAbsValue(const ObjVar* var, const AbstractValue& val, const ICFGNode* node) { - ai->updateAbsValue(var, val, node); -} -void AbstractExecution::updateAbsValue(const SVFVar* var, const AbstractValue& val, const ICFGNode* node) { - ai->updateAbsValue(var, val, node); -} -AbstractValue AbstractExecution::loadValue(const ValVar* pointer, const ICFGNode* node) { - return ai->loadValue(pointer, node); -} -void AbstractExecution::storeValue(const ValVar* pointer, const AbstractValue& val, const ICFGNode* node) { - ai->storeValue(pointer, val, node); -} -AddressValue AbstractExecution::getGepObjAddrs(const ValVar* pointer, IntervalValue offset) { - return ai->getGepObjAddrs(pointer, offset); -} -IntervalValue AbstractExecution::getGepElementIndex(const GepStmt* gep) { - return ai->getGepElementIndex(gep); -} -IntervalValue AbstractExecution::getGepByteOffset(const GepStmt* gep) { - return ai->getGepByteOffset(gep); -} -u32_t AbstractExecution::getAllocaInstByteSize(const AddrStmt* addr) { - return ai->getAllocaInstByteSize(addr); -} - -// harness-only post-trace accessors (need full AbstractInterpretation type). -AbstractState& AbstractExecution::getAbsStateFromTrace(const ICFGNode* node) { - return (*ai)[node]; -} -Map& AbstractExecution::postAbsTrace() { - return ai->getTrace(); -} - -} // namespace SVF diff --git a/Assignment-3/CPP/AEReporter.h b/Assignment-3/CPP/AEReporter.h index 4641596..b9cf55b 100644 --- a/Assignment-3/CPP/AEReporter.h +++ b/Assignment-3/CPP/AEReporter.h @@ -122,12 +122,21 @@ namespace SVF { } void printReport() { - if (_nodeToBugInfo.size() > 0) { - std::cerr << "######################Buffer Overflow (" + std::to_string(_nodeToBugInfo.size()) - + " found)######################\n"; - std::cerr << "---------------------------------------------\n"; - for (auto& it : _nodeToBugInfo) { - std::cerr << it.second << "\n---------------------------------------------\n"; + if (_reports.empty()) + return; + // Group by kind so buffer-overflow and nullptr-deref reports + // are clearly distinguished in the terminal output. + Map> grouped; + for (const auto& r : _reports) + grouped[r.kind].push_back(&r); + std::cerr << "###################### Bug Reports (" + << _reports.size() << " total) ######################\n"; + for (const auto& kv : grouped) { + std::cerr << "--- " << kv.first + << " (" << kv.second.size() << ") ---\n"; + for (const auto* r : kv.second) { + std::cerr << r->message + << "\n---------------------------------------------\n"; } } } diff --git a/Assignment-3/CPP/Assignment_3.cpp b/Assignment-3/CPP/Assignment_3.cpp index 51142db..28de172 100644 --- a/Assignment-3/CPP/Assignment_3.cpp +++ b/Assignment-3/CPP/Assignment_3.cpp @@ -26,41 +26,135 @@ */ #include "Assignment_3.h" +// Needed by the analysis driver below (runOnModule / analyse reach +// AbstractInterpretation::getAEInstance; handleCallSite uses Andersen's SCC +// check). +#include "AE/Svfexe/AbstractInterpretation.h" +#include "WPA/Andersen.h" using namespace SVF; using namespace SVFUtil; // =========================================================================== -// Student TODOs -// =========================================================================== -// Implement abstract interpretation for verification and bug detection. The -// harness (AEReporter.cpp) drives the analysis and calls into the six entry -// points below (please do not delete); You are free to add any internal -// classes and helper methods you need within Assignment_3.h and -// Assignment_3.cpp. +// Analysis driver entry points (pre-implemented). // =========================================================================== -void AbstractExecution::updateAbsState(const SVFStmt* stmt) { - // TODO: dispatch on the statement subtype and update the abstract state. +/// Entry point invoked from test-ae.cpp. Records the SVFIR and the ICFG +/// pointer, runs the analysis, and (unless the case is in JSON mode) prints +/// the bug-reporter summary. +void AbstractExecution::runOnModule(SVF::ICFG* _icfg) { + svfir = PAG::getPAG(); + icfg = _icfg; + analyse(); + if (!getReporter().getCaseConfig().emitJson) + getReporter().printReport(); } -bool AbstractExecution::mergeStatesFromPredecessors(const ICFGNode* block, AbstractState& as) { - // TODO: join predecessor post-states (with branch refinement) into `as`. - return false; +/// Build the interprocedural WTO, initialise the AbstractInterpretation +/// singleton, replay the global ICFG node, then start the analysis at main. +void AbstractExecution::analyse() { + initWTO(); + // AbstractStateManager was folded into AbstractInterpretation upstream + // (the AE/Svfexe/AbstractStateManager.h header was removed). Use the + // AbstractInterpretation singleton; it pulls SVFIR from PAG::getPAG() + // internally and does not need an explicit Andersen analysis to be + // passed in. + ai = &AbstractInterpretation::getAEInstance(); + + handleGlobalNode(); + + if (const FunObjVar* fun = svfir->getFunObjVar("main")) { + // Arguments of main are initialised as \top to represent all + // possible inputs. + for (u32_t i = 0; i < fun->arg_size(); ++i) { + AbstractState& as = getAbsStateFromTrace(icfg->getGlobalICFGNode()); + as[fun->getArg(i)->getId()] = IntervalValue::top(); + } + assert(svfir->getFunObjVar("main") != nullptr && "Main function not found"); + handleFunction(svfir->getICFG()->getFunEntryICFGNode(svfir->getFunObjVar("main"))); + } } -void AbstractExecution::handleICFGCycle(const ICFGCycleWTO* cycle) { - // TODO: iterate the cycle body to a fixpoint (widening optional). +/// Record a buffer-overflow bug. +void AbstractExecution::reportBufOverflow(const ICFGNode* node) { + AEException bug(node->toString()); + getReporter().addBugToReporter("buffer-overflow", bug, node); } -void AbstractExecution::bufOverflowDetection(const ICFGNode* node) { - // TODO: detect out-of-bounds memory accesses at `node`. +/// Record a nullptr-dereference bug. +void AbstractExecution::reportNullDeref(const ICFGNode* node) { + AEException bug(node->toString()); + getReporter().addBugToReporter("nullptr-deref", bug, node); +} + +// =========================================================================== +// Student TODOs — driver entry points +// =========================================================================== +// The harness's `analyse()` (above) calls `handleGlobalNode()` once for the +// SVFModule's global ICFG node and `handleFunction(main_entry)` to start the +// per-function analysis. A typical layering is: +// handleFunction walks the interprocedural WTO components and dispatches +// singletons to handleICFGNode / cycles to handleICFGCycle. +// handleICFGNode merges predecessor states (Task 2), runs the per-statement +// transfer functions (Task 1), routes call sites via +// handleCallSite, and runs the bug checkers (Tasks 5 / 6). +// handleICFGCycle iterates the cycle body to a fixpoint with widening / +// narrowing (Task 3). +// You are free to deviate from this skeleton as long as the test driver's +// expectations hold. Helper methods for Tasks 1, 2, 4, 5, 6 are yours to +// design — override the matching no-op virtuals (updateAbsState, +// mergeStatesFromPredecessors, updateStateOnExtCall, bufOverflowDetection, +// nullptrDerefDetection) if you want handleCallSite to drive into them. +// =========================================================================== + +void AbstractExecution::handleGlobalNode() { + // TODO: initialise the global ICFG node's state and replay the global + // statements through your statement transfer functions. } -void AbstractExecution::nullptrDerefDetection(const ICFGNode* node) { - // TODO: detect nullptr dereferences at `node`. +void AbstractExecution::handleFunction(const ICFGNode* funEntry) { + // TODO: walk the function's interprocedural WTO components (singletons + // vs. cycles) and dispatch to handleICFGNode / handleICFGCycle. } -void AbstractExecution::updateStateOnExtCall(const SVF::CallICFGNode* call) { - // TODO: model memory/string library calls and assignment-specific stubs. +bool AbstractExecution::handleICFGNode(const ICFGNode* node) { + // TODO: merge predecessor states, run the per-statement transfer + // functions, handle call sites (delegating to handleCallSite for call + // nodes), and return whether the post-state changed. + return false; +} + +void AbstractExecution::handleICFGCycle(const ICFGCycleWTO* cycle) { + // TODO: iterate the cycle body to a fixpoint (widening / narrowing). +} + +// =========================================================================== +// Student TODO — handleCallSite. +// +// Dispatch a call ICFG node based on its callee: +// +// * svf_assert / svf_assert_eq -> handleStubFunctions(call) +// * SAFE_/UNSAFE_BUFACCESS, SAFE_/UNSAFE_PTRDEREF +// -> handleCheckpointStubs(call) +// * nd / rand -> nondeterministic return: +// set the actual-return +// variable to TOP. +// * other external callees (SVFUtil::isExtCall) +// -> updateStateOnExtCall, then +// run the bug checkers +// (nullptrDerefDetection + +// bufOverflowDetection) on +// the call's arguments. +// * non-extern callees -> skip recursive callsites +// using Andersen's +// inSameCallGraphSCC, then +// inline by calling +// handleFunction on the +// callee's entry ICFG node +// and forward the call +// node's post-state to the +// return ICFG node. +// =========================================================================== +void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { + // TODO } diff --git a/Assignment-3/CPP/Assignment_3.h b/Assignment-3/CPP/Assignment_3.h index e2405f5..d270816 100644 --- a/Assignment-3/CPP/Assignment_3.h +++ b/Assignment-3/CPP/Assignment_3.h @@ -31,19 +31,45 @@ namespace SVF { class AbstractInterpretation; class AndersenWaveDiff; - /// Abstract Execution class + + /// Student-facing AbstractExecution class. + /// + /// The harness (AEHelper.cpp / AEReporter.cpp) provides interprocedural + /// WTO construction, stub / checkpoint sub-dispatch, the external-API + /// whitelist, the abstract-state helpers wrapping the underlying + /// AbstractInterpretation singleton, and the assertion-coverage + /// validator. Assignment_3.cpp owns the analysis driver + /// (runOnModule / analyse / report* forwarders, all pre-implemented) + /// plus five student TODOs: the four driver entry points + /// (handleGlobalNode / handleFunction / handleICFGNode / + /// handleICFGCycle) and `handleCallSite`. You design the rest of the + /// six tasks and have your handleICFGNode dispatch into them however + /// you see fit — override the matching no-op virtuals (updateAbsState, + /// mergeStatesFromPredecessors, updateStateOnExtCall, + /// bufOverflowDetection, nullptrDerefDetection) if you want your + /// handleCallSite to call into your code. + /// + /// General analysis engine + /// 1. Statement transfer functions -- typically inside handleICFGNode + /// 2. Branch refinement -- typically inside handleICFGNode + /// 3. Cycle and recursion fixpoint -- typically inside handleICFGCycle + /// 4. External-API value summaries -- typically inside handleCallSite + /// + /// Bug checkers + /// 5. Buffer-overflow checker + /// 6. Nullptr-dereference checker class AbstractExecution { public: - /// Constructor + // ==================================================================== + // Construction / lifetime (harness) + // ==================================================================== explicit AbstractExecution(const AssignmentCaseConfig& config = AssignmentCaseConfig()) : bugReporter(config) { } - /// Harness reporter accessor (used by test-ae.cpp for JSON summary). - AEReporter& getReporter() { return bugReporter; } - const AEReporter& getReporter() const { return bugReporter; } - - virtual void runOnModule(ICFG* icfg); + virtual ~AbstractExecution() { + // `ai` is the AbstractInterpretation singleton; SVF owns its lifetime. + } static AbstractExecution& getAEInstance() { @@ -51,32 +77,56 @@ namespace SVF { return instance; } - /// Handle global variables and initializations - void handleGlobalNode(); + /// Harness reporter accessor (used by test-ae.cpp for the JSON summary). + AEReporter& getReporter() { return bugReporter; } + const AEReporter& getReporter() const { return bugReporter; } - /// Driver of the program + // ==================================================================== + // Harness driver (do not modify) + // + // Entry points called by the test driver (test-ae.cpp): + // runOnModule -- build SVFIR + ICFG and kick off analyse(). + // analyse -- initWTO, init globals, dispatch into the + // student-implemented handleFunction. + // initWTO -- build one interprocedural WTO per call-graph + // SCC entry function (Andersen-based). + // ensureAllAssertsValidated + // -- after analysis, verify every ground-truth + // stub call site was reached and that the + // UNSAFE_* counts match reported bug counts. + // ==================================================================== + virtual void runOnModule(ICFG* icfg); virtual void analyse(); + void initWTO(); + void ensureAllAssertsValidated(); - /// Handle state updates for each type of SVF statement - virtual void updateAbsState(const SVFStmt* stmt); - - /// Fuction used to implement buffer overflow detection - virtual void bufOverflowDetection(const ICFGNode* node); - /// Function used to implement nullptr dereference detection - virtual void nullptrDerefDetection(const ICFGNode* node); + // ==================================================================== + // Harness dispatch + bug-reporting facade + // + // The student's per-node driver (handleICFGNode) calls handleCallSite + // for each CallICFGNode; the harness then routes svf_assert / + // SAFE_*/UNSAFE_* stubs to the appropriate checker, and forwards + // non-extern callees back into handleFunction (with the recursive + // callsite skip rule documented in AEHelper.cpp). + // ==================================================================== + void handleCallSite(const CallICFGNode* callnode); + void handleStubFunctions(const CallICFGNode* call); + void handleCheckpointStubs(const CallICFGNode* callnode); + bool isExternalCallForAssignment(const SVF::FunObjVar* func); - /// Report a buffer overflow for a given ICFG node void reportBufOverflow(const ICFGNode* node); - /// Report a nullptr dereference for a given ICFG node void reportNullDeref(const ICFGNode* node); - /// External-API value summaries (student TODO). - void updateStateOnExtCall(const SVF::CallICFGNode* extCallNode); - - /// State-manager primitives (forward to the underlying - /// AbstractInterpretation). Use these from the statement transfer - /// functions and the external-API summaries. - ///@{ + // ==================================================================== + // Abstract-state helpers + // + // Pre-implemented operations on the abstract domain: reading / writing + // the abstract value of a variable, loading / storing through an + // abstract pointer, and computing GEP byte / element offsets and + // alloca byte sizes. Use these from the transfer functions and the + // external-API summaries instead of touching the underlying + // AbstractInterpretation singleton directly. + // ==================================================================== const AbstractValue& getAbsValue(const ValVar* var, const ICFGNode* node); const AbstractValue& getAbsValue(const ObjVar* var, const ICFGNode* node); const AbstractValue& getAbsValue(const SVFVar* var, const ICFGNode* node); @@ -92,61 +142,69 @@ namespace SVF { IntervalValue getGepElementIndex(const GepStmt* gep); IntervalValue getGepByteOffset(const GepStmt* gep); u32_t getAllocaInstByteSize(const AddrStmt* addr); - ///@} - /// Handle stub functions for verifying abstract interpretation results - void handleStubFunctions(const CallICFGNode* call); - - /// Build the (interprocedural) WTO for each call-graph SCC entry. - void initWTO(); - - /// Merge predecessor states into the current node's pre-state. - bool mergeStatesFromPredecessors(const ICFGNode* curNode, AbstractState& as); - - /// Handle a call site in the control flow graph - void handleCallSite(const CallICFGNode* callnode); - /// Validate the SAFE/UNSAFE checkpoint stub functions - void handleCheckpointStubs(const CallICFGNode* callnode); - bool isExternalCallForAssignment(const SVF::FunObjVar* func); + /// Read-only access to the abstract state at an ICFG node (pulled from + /// AbstractInterpretation's owned trace). + AbstractState& getAbsStateFromTrace(const ICFGNode* node); - /// Handle a function in the ICFG + // ==================================================================== + // STUDENT TODOs — Driver entry points + // ==================================================================== + // The harness's `analyse()` calls `handleGlobalNode()` once and then + // `handleFunction(main_entry)`. From there the per-function flow, + // per-node merging + transfer, and cycle / recursion fixpoint are + // your responsibility. A typical layering: handleFunction walks the + // interprocedural WTO, handleICFGNode merges predecessor states and + // runs the per-statement transfer functions, handleICFGCycle drives + // loops / recursion to a fixpoint with widening / narrowing. You + // are free to deviate as long as the test driver's expectations + // (covered stubs, reported bugs) hold. + // ==================================================================== + void handleGlobalNode(); void handleFunction(const ICFGNode* funEntry); - bool handleICFGNode(const ICFGNode* node); - void handleICFGCycle(const ICFGCycleWTO* cycle); - /// Return its abstract state given an ICFGNode (defined in the helper). - AbstractState& getAbsStateFromTrace(const ICFGNode* node); - - void ensureAllAssertsValidated(); - - /// Destructor - virtual ~AbstractExecution() { - // `ai` is the AbstractInterpretation singleton; SVF owns its lifetime. - } + // ==================================================================== + // Optional hooks for the rest of the six tasks (no-op by default). + // + // The pre-implemented handleCallSite (in Assignment_3.cpp) routes + // ordinary external-API calls through updateStateOnExtCall and then + // nullptrDerefDetection / bufOverflowDetection. Override these + // virtuals if you want your value-summary modelling (Task 4) and + // your bug checkers (Tasks 5 / 6) to run during the analysis. How + // you structure them internally — per-statement transfer helpers + // for Task 1, predecessor join with branch refinement for Task 2, + // memory-safety predicates for Tasks 5 / 6, length-of-string + // helpers shared across summaries and checker — is your design. + // ==================================================================== + virtual void updateAbsState(const SVFStmt* stmt) {} + virtual bool mergeStatesFromPredecessors(const ICFGNode* curNode, AbstractState& as) { return false; } + virtual void updateStateOnExtCall(const SVF::CallICFGNode* extCallNode) {} + virtual void bufOverflowDetection(const ICFGNode* node) {} + virtual void nullptrDerefDetection(const ICFGNode* node) {} protected: - /// SVFIR and ICFG + // Harness state shared with student methods. SVFIR* svfir; ICFG* icfg; /// Andersen pointer analysis (owns the call graph + SCC used to drive /// the interprocedural WTO); created in initWTO(). AndersenWaveDiff* ander = nullptr; - /// Map a function to its corresponding WTO + /// One interprocedural WTO per call-graph-SCC entry function. Map funcToWTO; /// Abstract trace immediately before an ICFGNode. Map preAbsTrace; - /// The "post" trace lives inside the manager (defined in the helper). + /// Post-trace lives inside the AbstractInterpretation singleton; this + /// accessor exposes it as a Map. Map& postAbsTrace(); private: AEReporter bugReporter; - /// Handle to the underlying state manager. Used by the merged - /// `getAbsValue` / `updateAbsValue` / `loadValue` / `storeValue` / GEP - /// primitives above, and by the harness-only post-trace accessors. + /// Underlying AbstractInterpretation singleton — backs the + /// abstract-state helpers above and the post-trace accessor. AbstractInterpretation* ai = nullptr; }; diff --git a/Assignment-3/Python/AEHelper.py b/Assignment-3/Python/AEHelper.py new file mode 100644 index 0000000..60a7512 --- /dev/null +++ b/Assignment-3/Python/AEHelper.py @@ -0,0 +1,739 @@ +"""Harness for Assignment-3 abstract interpretation. + +Owns the AbstractExecution class's harness-side methods — interprocedural +WTO construction (initWto), stub / checkpoint sub-dispatchers +(handleStubFunction, handleCheckpointStubs) invoked from the student's +handleCallSite override in Assignment_3.py, the external-API whitelist +(isExternalCallForAssignment), the abstract-state helpers that wrap +AbstractInterpretation, and the validator (ensureAllAssertsValidated). + +The AEReporter class (pure bug reporting + JSON / coverage summary plus +the GEP / strlen / memcpy helpers used by the bug checkers) lives in +AEReporter.py. The analysis driver (analyse / handleCallSite / +reportBufOverflow / reportNullDeref) and the six student tasks live in +Assignment_3.py. +""" + +from AEReporter import AEReporter + +from abc import abstractmethod + +from pysvf import ICFG, ICFGNode +from typing import List, Dict, Set, Optional +import pysvf +import faulthandler +faulthandler.enable() + +import pysvf +from pysvf import IntervalValue, AddressValue, AbstractValue, AbstractState +import sys +from pysvf.enums import OpCode, Predicate +class WTOCycleDepth: + def __init__(self): + self._heads: List[ICFGNode] = [] + + def add(self, head: ICFGNode): + self._heads.append(head) + + def __iter__(self): + return iter(self._heads) + + def __str__(self): + return f"{self._heads}" + + def __repr__(self): + return f"{self._heads}" + + def compare(self, other): + if self == other: + return 0 + this_it = iter(self) + other_it = iter(other) + while this_it: + if not other_it: + return 1 + elif this_it == other_it: + this_it = next(this_it) + other_it = next(other_it) + else: + return 2 + if not other_it: + return 0 + else: + return -1 + + def __lt__(self, other): + return self.compare(other) == -1 + + def __le__(self, other): + return self.compare(other) <= 0 + + def __eq__(self, other): + return self.compare(other) == 0 + + def __ge__(self, other): + return self.compare(other) >= 0 + + def __gt__(self, other): + return self.compare(other) == 1 + + + +class ICFGWTOComp: + def __init__(self, node: ICFGNode): + self.node = node + + + def getICFGNode(self) -> ICFGNode: + return self.node + + @abstractmethod + def accept(self, visitor): + pass + + +class ICFGWTONode(ICFGWTOComp): + def __init__(self, node: ICFGNode): + self.node = node + + def accept(self, visitor): + visitor.visitNode(self) + + +class ICFGWTOCycle(ICFGWTOComp): + def __init__(self, head: ICFGWTONode, components: List[ICFGWTOComp]): + self.head = head + self.components = components + + def accept(self, visitor): + visitor.visit(self) + + +class ICFGWTO: + + class WTOCycleDepthBuilder: + def __init__(self, node_to_wto_cycle_depth): + self.wto_cycle_depth = WTOCycleDepth() + self.node_to_wto_cycle_depth = node_to_wto_cycle_depth + + def visit(self, cycle: ICFGWTOCycle): + head = cycle.head.getICFGNode() + previous_cycle_depth = self.wto_cycle_depth + self.node_to_wto_cycle_depth[head] = self.wto_cycle_depth + self.wto_cycle_depth = WTOCycleDepth() + self.wto_cycle_depth.add(head) + for component in cycle.components: + component.accept(self) + self.wto_cycle_depth = previous_cycle_depth + + def visitNode(self, node: ICFGWTONode): + self.node_to_wto_cycle_depth[node.getICFGNode()] = self.wto_cycle_depth + + + def __init__(self, graph: ICFG, entry: ICFGNode, scc=None): + self.graph = graph + self.entry = entry + # Interprocedural WTO: `scc` is the set of FunObjVar *ids* in this + # call-graph SCC. Call edges into a callee in the same SCC are then + # followed (becoming back-edges), so a (mutually) recursive function's + # entry shows up as a WTO cycle head -- exactly like the C++ ICFGWTO. + # If no SCC is given, the SCC is the entry's own function. + if scc: + self.scc_fun_ids = set(scc) + else: + self.scc_fun_ids = {entry.getFun().getId()} + self.components: List[ICFGWTOComp] = [] + self.all_components : Set[ICFGWTOComp] = set() + self.head_ref_to_cycle: Dict[ICFGNode, ICFGWTOCycle] = {} + self.node_to_depth: Dict[ICFGNode, int] = {} + + self._num = 0 + self._CDN: Dict[ICFGNode, int] = {} + self._stack: List[ICFGNode] = [] + + def init(self): + self.visit(self.entry, self.components) + self._CDN.clear() + self._stack.clear() + self.build_node_to_depth() + + + def component(self, node: ICFGNode) -> ICFGWTOCycle: + partition = [] + for succ in self.get_successors(node): + if self._CDN.get(succ, 0) == 0: + self.visit(succ, partition) + head = ICFGWTONode(node) + ptr = ICFGWTOCycle(head, partition) + self.head_ref_to_cycle[node] = ptr + return ptr + + + def visit(self, node: ICFGNode, components: List[ICFGWTOComp]): + head = 0 # CycleDepthNumber head(0) + min = 0 # CycleDepthNumber min(0) + loop = False # bool loop + self._stack.append(node) # push(node) + self._num += 1 # _num += CycleDepthNumber(1) + head = self._num # head = _num + self._CDN[node] = head # setCDN(node, head) + for succ in self.get_successors(node): # forEachSuccessor(node, [&](const NodeT* succ) + succ_dfn = self._CDN.get(succ, 0) # CycleDepthNumber succ_dfn = getCDN(succ) + if succ_dfn == 0: # if (succ_dfn == CycleDepthNumber(0)) + min = self.visit(succ, components) # min = visit(succ, partition) + else: + min = succ_dfn # min = succ_dfn + if min <= head: # if (min <= head) + head = min # head = min + loop = True # loop = true + + if head == self._CDN[node]: # if (head == getCDN(node)) + self._CDN[node] = 0x7fffffff # setCDN(node, UINT_MAX) + element = self._stack.pop() + if loop: + while element != node: + self._CDN[element] = 0 # setCDN(element, 0) + element = self._stack.pop() # element = pop() + components.insert(0, self.component(node)) # partition.push_front(component(node)) + else: + components.insert(0, ICFGWTONode(node)) # partition.push_front(newNode(node)) + return head + + + def get_successors(self, node: ICFGNode) -> List[ICFGNode]: + # Interprocedural successor relation, mirroring C++ ICFGWTO::getSuccessors. + successors = [] + if isinstance(node, pysvf.CallICFGNode): + for e in node.getOutEdges(): + callee_entry = e.getDstNode() + if callee_entry.getFun().getId() in self.scc_fun_ids: + # caller & callee in the same SCC -> follow the call edge + successors.append(callee_entry) + else: + # different SCC -> shortcut to the local return node + successors.append(node.getRetICFGNode()) + else: + for e in node.getOutEdges(): + succ = e.getDstNode() + # Only stay within the SCC (intra edges, and return edges back + # into an SCC function). + if succ.getFun().getId() in self.scc_fun_ids: + successors.append(succ) + return successors + + + def build_node_to_depth(self): + builder = self.WTOCycleDepthBuilder(self.node_to_depth) + for component in self.components: + component.accept(builder) + + + def __str__(self): + return f"ICFGWTO: {self.components}" + + +class AbstractExecution: + def __init__(self, pag: pysvf.SVFIR): + self.svfir = pag + self.icfg = pag.getICFG() + self.call_site_stack = [] + self.func_to_wto = {} + self.pre_abs_trace = {} + # Owns the post-trace and is the backing store for AbsExtAPI as well + # as the GEP/load/store helpers (getGepByteOffset etc.). Replaces + # the old `self.post_abs_trace` dict so reads/writes on + # `self.post_abs_trace[node]` go through the mgr's trace. + # AbstractStateManager was folded into AbstractInterpretation upstream + # (the AbstractStateManager.h header was removed). Use the + # AbstractInterpretation singleton; it pulls SVFIR from PAG::getPAG() + # internally and does not need an explicit Andersen instance. + self.ai = pysvf.AbstractInterpretation.getAEInstance() + # Alias preserved so existing call-sites `self.post_abs_trace[node]` + # keep working. The mgr supports __getitem__/__setitem__/__contains__. + self.post_abs_trace = self.ai + self.buf_overflow_helper = AEReporter(self.svfir, self.ai) + + self.widen_delay = 3 + self.addressMask = 0x7f000000 + self.flippedAddressMask = (self.addressMask^0xffffffff) + + # ------------------------------------------------------------------ + # Optional hooks for Tasks 1, 2, 4, 5, 6. The pre-implemented + # handleCallSite (in Assignment_3.py) routes ordinary external-API + # calls through updateStateOnExtCall and then nullptrDerefDetection / + # bufOverflowDetection. Override these on your Assignment3 subclass + # if you want your value-summary modelling and bug checkers to run. + # ------------------------------------------------------------------ + def updateAbsState(self, stmt): + pass + + def mergeStatesFromPredecessors(self, block): + return False, AbstractState() + + def updateStateOnExtCall(self, call): + pass + + def bufOverflowDetection(self, node): + pass + + def nullptrDerefDetection(self, node): + pass + + + """ + Initialize the interprocedural WTO per call-graph SCC entry. + + Each (mutually) recursive function's entry node becomes a WTO cycle head + because intra-SCC call edges are turned into back-edges. The same + widening/narrowing machinery used for loops then drives recursion to a + fixpoint via handleICFGCycle; there is no separate "is recursive?" check. + """ + def initWto(self): + callgraphScc = pysvf.getCallGraphSCC() + self._callgraph_scc = callgraphScc + callgraph = self.svfir.getCallGraph() + self._callgraph = callgraph + + # SCC membership comes from pysvf: CallGraphSCC.subNodes(rep) returns + # the call-graph node IDs in the SCC represented by 'rep'. We only + # need it to feed ICFGWTO so intra-SCC call edges become back-edges. + cgid_to_fun = {} + for node in callgraph.getNodes(): + cgid_to_fun[node.getId()] = node.getFunction() + + # Build one interprocedural WTO per call-graph-SCC entry function. An + # SCC entry is a member with a caller outside the SCC (or no caller). + # Intra-SCC members are reached via the entry's interprocedural WTO. + self.func_to_wto = {} + for node in callgraph.getNodes(): + fun = node.getFunction() + if fun.isDeclaration(): + continue + cgid = node.getId() + rep = callgraphScc.repNode(cgid) + scc_cgids = set(callgraphScc.subNodes(rep)) + + in_edges = list(node.getInEdges()) + is_entry = (len(in_edges) == 0) + for e in in_edges: + if e.getSrcID() not in scc_cgids: # caller outside the SCC + is_entry = True + if not is_entry: + continue + + func_scc_ids = {cgid_to_fun[c].getId() for c in scc_cgids} + wto = ICFGWTO(self.icfg, self.icfg.getFunEntryICFGNode(fun), func_scc_ids) + wto.init() + # Key by function id: pybind FunObjVar wrappers are not guaranteed to + # hash consistently across calls, so don't use the object as a key. + self.func_to_wto[fun.getId()] = wto + + + + """ + Placeholder for additional documentation or functionality. + """ + def getVirtualMemAddress(self, idx: int) -> int: + return self.addressMask + idx + + + """ + Handle the global ICFG node by initializing its abstract state and updating it based on its statements. + + This function performs the following steps: + 1. Initializes the abstract state for the global ICFG node in both pre- and post-abstract traces. + 2. Sets the initial value of variable 0 to an address value of 0. + 3. Iterates through all statements in the global ICFG node and updates the abstract state accordingly. + """ + # handleGlobalNode / handleFunction / handleICFGNode are student TODOs + # this year and live in Assignment_3.py. + + # handleCallSite is part of the analysis driver and lives in + # Assignment_3.py. + + def inSameCallGraphSCC(self, fun1, fun2) -> bool: + scc = getattr(self, "_callgraph_scc", None) + cg = getattr(self, "_callgraph", None) + if scc is None or cg is None: + return False + n1 = cg.getCallGraphNodeByFunObj(fun1) + n2 = cg.getCallGraphNodeByFunObj(fun2) + if n1 is None or n2 is None: + return False + return scc.repNode(n1.getId()) == scc.repNode(n2.getId()) + + # Whitelist of external-call names the assignment expects students to + # model in `updateStateOnExtCall`. Mirrors the C++ side: exact match for + # assignment-specific and checkpoint stubs; substring match for library + # APIs whose Clang lowering yields LLVM intrinsics (e.g. `llvm.memcpy.*`). + _EXT_EXACT_STUBS = frozenset({ + "mem_insert", "str_insert", + "UNSAFE_BUFACCESS", "SAFE_BUFACCESS", + "UNSAFE_PTRDEREF", "SAFE_PTRDEREF", + }) + _EXT_API_SUBSTRINGS = ( + "memcpy", "memmove", "memset", + "strcpy", "strncpy", "strcat", "strncat", + "strlen", "wcslen", + ) + + def isExternalCallForAssignment(self, func) -> bool: + name = func.getName() + if name in self._EXT_EXACT_STUBS: + return True + return any(key in name for key in self._EXT_API_SUBSTRINGS) + + + """ + Handle stub functions such as 'svf_assert' and 'OVERFLOW'. + + This function processes specific stub functions in the program's control flow graph (CFG) + to validate assertions or detect buffer overflows. It performs the following tasks: + + 1. For 'svf_assert': + - Adds the call node to the set of assertion points. + - Checks the abstract state of the argument to determine if the assertion is valid. + - If the assertion is invalid or unsatisfiable, raises an error. + + 2. For 'OVERFLOW': + - Adds the call node to the set of assertion points. + - Checks if the right-hand side (RHS) value is an address. + - Iterates through the addresses to calculate the access offset and compare it + with the object size to detect buffer overflows. + - If a buffer overflow is detected, records the overflow node and prints a success message. + - If no overflow is detected, raises an error. + + :param call_node: The call node representing the stub function in the CFG. + :type call_node: pysvf.CallICFGNode + """ + def handleStubFunction(self, callNode: pysvf.CallICFGNode): + # Get the callee function associated with the call site + if callNode.getCalledFunction().getName() == "svf_assert": + self.buf_overflow_helper.noteAssertionPoint(callNode) + # If the condition is false, the program is infeasible + arg0 = callNode.getArgument(0).getId() + abstract_state = self.post_abs_trace[callNode] + + # Check if the interval for the argument is infinite + if abstract_state[arg0].getInterval().isTop(): + print(f"svf_assert Fail. {callNode}") + assert False + else: + if (abstract_state[arg0].getInterval().equals(IntervalValue(1, 1)) or + abstract_state[arg0].getInterval().equals(IntervalValue(-1, -1))): + print(f"The assertion ({callNode}) is successfully verified!!") + else: + print(f"The assertion ({callNode}) is unsatisfiable!!") + assert False + + + def handleCheckpointStubs(self, callNode: pysvf.CallICFGNode): + """SAFE_/UNSAFE_ checkpoints: ground-truth bug markers. + + Records the call site in ``assert_points`` so + :py:meth:`ensureAllAssertsValidated` can verify the student's control + flow reached it. The harness reports a bug iff its independent + ground-truth check (bypassing the student's predicates) sees one. + """ + self.buf_overflow_helper.noteAssertionPoint(callNode) + fun_name = callNode.getCalledFunction().getName() + abstract_state = self.post_abs_trace[callNode] + if fun_name in ("SAFE_BUFACCESS", "UNSAFE_BUFACCESS"): + if callNode.arg_size() < 2: + return + length = abstract_state[callNode.getArgument(1).getId()].getInterval() + if length.isBottom(): + length = IntervalValue(0) + ptr = callNode.getArgument(0) + if not self._harnessSafeAccess(abstract_state, ptr, length - IntervalValue(1)): + self.buf_overflow_helper.reportBufOverflow( + callNode, f"buffer-overflow at {callNode}") + elif fun_name in ("SAFE_PTRDEREF", "UNSAFE_PTRDEREF"): + if callNode.arg_size() < 1: + return + ptr = callNode.getArgument(0) + if not self._harnessSafeDeref(abstract_state, ptr): + self.buf_overflow_helper.reportBufOverflow( + callNode, f"nullptr-deref at {callNode}") + + def _harnessSafeAccess(self, abstract_state, value, length: IntervalValue) -> bool: + ptr_val = abstract_state[value.getId()] + if not ptr_val.isAddr(): + return True + for addr in ptr_val.getAddrs(): + if pysvf.AbstractState.isBlackHoleObjAddr(addr) or pysvf.AbstractState.isNullMem(addr): + continue + obj_id = abstract_state.getIDFromAddr(addr) + base_obj = self.svfir.getBaseObject(obj_id) + if base_obj is None or base_obj.isBlackHoleObj() or not base_obj.isConstantByteSize(): + continue + size = base_obj.getByteSizeOfObj() + gnode = self.svfir.getGNode(obj_id) + base_offset = IntervalValue(gnode.getConstantFieldIdx()) if isinstance(gnode, pysvf.GepObjVar) else IntervalValue(0) + offset = base_offset + length + if int(offset.ub()) >= size: + return False + return True + + def _harnessSafeDeref(self, abstract_state, value) -> bool: + if value is None or isinstance(value, pysvf.ConstNullPtrValVar): + return False + abs_val = abstract_state[value.getId()] + if not abs_val.isAddr(): + return True + for addr in abs_val.getAddrs(): + if pysvf.AbstractState.isBlackHoleObjAddr(addr): + continue + if pysvf.AbstractState.isNullMem(addr): + return False + if abstract_state.isFreedMem(addr): + return False + return True + + + # mergeStatesFromPredecessors is a student TODO this year and lives in + # Assignment_3.py. + + def isBranchFeasible(self, intraEdge: pysvf.IntraCFGEdge, abstractState: pysvf.AbstractState) -> bool : + cmp_var = intraEdge.getCondition() + cmp_in_edges = cmp_var.getInEdges() + if len(cmp_in_edges) == 0: + return pysvf.AbstractState.isSwitchBranchFeasible(self.svfir, cmp_var, intraEdge.getSuccessorCondValue(), abstractState) + else: + cmp = cmp_in_edges[0] + if isinstance(cmp, pysvf.CmpStmt): + return pysvf.AbstractState.isCmpBranchFeasible(self.svfir, cmp, intraEdge.getSuccessorCondValue(), abstractState) + else: + return pysvf.AbstractState.isSwitchBranchFeasible(self.svfir, cmp_var, intraEdge.getSuccessorCondValue(), abstractState) + + + + + + def ensureAllAssertsValidated(self): + """Verify the student's control flow reached every ground-truth stub. + + Recognised stubs: + * ``svf_assert`` / ``svf_assert_eq`` -- abstract-state assertions + * ``UNSAFE_PTRDEREF`` / ``SAFE_PTRDEREF`` -- null-deref ground truth + * ``UNSAFE_BUFACCESS`` / ``SAFE_BUFACCESS`` -- buffer-access ground truth + + A missed stub site means the student's control-flow logic skipped a + place the grader cares about. Additionally requires that the number + of reported bugs is at least the number of ``UNSAFE_*`` stubs. + """ + assert_stubs = {"svf_assert", "svf_assert_eq"} + checkpoint_stubs = {"UNSAFE_PTRDEREF", "SAFE_PTRDEREF", + "UNSAFE_BUFACCESS", "SAFE_BUFACCESS"} + unsafe_to_be_verified = 0 + for node in self.svfir.getICFG().getNodes(): + if not isinstance(node, pysvf.CallICFGNode): + continue + called_function = node.getCalledFunction() + if not called_function: + continue + name = called_function.getName() + if name not in assert_stubs and name not in checkpoint_stubs: + continue + if name.startswith("UNSAFE_"): + unsafe_to_be_verified += 1 + if not self.buf_overflow_helper.isAssertionPoint(node): + raise AssertionError( + f"The stub function callsite ({name}) was not reached by " + f"the student's control flow: {node}" + ) + assert unsafe_to_be_verified <= len(self.buf_overflow_helper.node_to_bug_info), \ + "The number of UNSAFE_* stubs (ground truth) should <= the number of bugs reported" + + + + + # analyse / updateAbsState / handleCallSite / reportBufOverflow / + # reportNullDeref live on the student side in Assignment_3.py. + + """ + Initialize an object variable in the abstract state. + + This function determines the initial abstract value for a given object variable + based on its type and properties. It handles various types of object variables, + including constants, global variables, and complex structures, and assigns + appropriate abstract values such as intervals or addresses. + + Steps: + 1. Retrieve the base object associated with the given object variable. + 2. Check the type of the object variable: + - For constant integer or floating-point variables, return their exact value as an interval. + - For null pointers, return an interval representing zero. + - For global variables, return an address value based on a virtual memory address. + - For constant arrays or structures, return a top interval to represent unknown values. + 3. For other types of object variables, return an address value based on a virtual memory address. + + :param obj_var: The object variable to initialize. + :type obj_var: pysvf.ObjVar + :return: The initialized abstract value for the object variable. + :rtype: pysvf.AbstractValue + """ + def initObjVar(self, objVar: pysvf.ObjVar): + var_id = objVar.getId() + obj = self.svfir.getBaseObject(var_id).asBaseObjVar() + if obj.isConstDataObjVar() or obj.isConstantArray() or obj.isConstantStruct(): + if isinstance(objVar, pysvf.ConstIntObjVar): + numeral = objVar.getSExtValue() + return IntervalValue(numeral, numeral) + + elif isinstance(objVar, pysvf.ConstFPObjVar): + return IntervalValue(objVar.getFPValue(), objVar.getFPValue()) + + elif isinstance(objVar, pysvf.ConstNullPtrObjVar): + return IntervalValue(0,0) + + elif isinstance(objVar, pysvf.GlobalObjVar): + return AddressValue(self.getVirtualMemAddress(var_id)) + + elif obj.isConstantArray() or obj.isConstantStruct(): + return IntervalValue.top() + else: + return IntervalValue.top() + else: + return AddressValue(self.getVirtualMemAddress(var_id)) + + + def updateStateOnAddr(self, addr: pysvf.AddrStmt): + node = addr.getICFGNode() + abstract_state = self.post_abs_trace[node] + assert isinstance(abstract_state, AbstractState) + abstract_state[addr.getRHSVarID()] = AbstractValue(self.initObjVar(addr.getRHSVar().asObjVar())) + abstract_state[addr.getLHSVarID()] = abstract_state[addr.getRHSVarID()] + + + + + + + + def updateStateOnCmp(self, cmp: pysvf.CmpStmt): + node = cmp.getICFGNode() + abstract_state = self.post_abs_trace[node] + assert isinstance(abstract_state, AbstractState) + op0 = cmp.getOpVar(0) + op1 = cmp.getOpVar(1) + res = cmp.getResId() + if abstract_state.getVar(op0.getId()).isInterval() and abstract_state.getVar(op0.getId()).isInterval(): + res_val = IntervalValue(0) + lhs = abstract_state[op0.getId()].getInterval() + rhs = abstract_state[op1.getId()].getInterval() + predicate = cmp.getPredicate() + if predicate == Predicate.ICMP_EQ or predicate == Predicate.FCMP_OEQ or predicate == Predicate.FCMP_UEQ: + res_val = lhs.eq_interval(rhs) + elif predicate == Predicate.ICMP_NE or predicate == Predicate.FCMP_ONE or predicate == Predicate.FCMP_UNE: + res_val = lhs.ne_interval(rhs) + elif predicate == Predicate.ICMP_SGT or predicate == Predicate.FCMP_UGT or predicate == Predicate.FCMP_OGT or predicate == Predicate.FCMP_UGT: + res_val = (lhs > rhs) + elif predicate == Predicate.ICMP_SGE or predicate == Predicate.FCMP_UGE or predicate == Predicate.FCMP_OGE or predicate == Predicate.FCMP_UGE: + res_val = (lhs >= rhs) + elif predicate == Predicate.ICMP_SLT or predicate == Predicate.ICMP_ULT or predicate == Predicate.FCMP_OLT or predicate == Predicate.FCMP_ULT: + res_val = (lhs < rhs) + elif predicate == Predicate.ICMP_SLE or predicate == Predicate.ICMP_ULE or predicate == Predicate.FCMP_OLE or predicate == Predicate.FCMP_ULE: + res_val = (lhs <= rhs) + elif predicate == Predicate.FCMP_FALSE: + res_val = IntervalValue(0,0) + elif predicate == Predicate.FCMP_TRUE: + res_val = IntervalValue(1,1) + abstract_state[res] = AbstractValue(res_val) + if abstract_state.getVar(op0.getId()).isAddr() and abstract_state.getVar(op0.getId()).isAddr(): + res_val = None + lhs = abstract_state[op0.getId()] + rhs = abstract_state[op1.getId()] + predicate = cmp.getPredicate() + + if predicate in [Predicate.ICMP_EQ, Predicate.FCMP_OEQ, Predicate.FCMP_UEQ]: + if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: + res_val = IntervalValue(lhs.equals(rhs)) + else: + if lhs.getAddrs().hasIntersect(rhs.getAddrs()): + res_val = IntervalValue.top() + else: + res_val = IntervalValue(0) + + elif predicate in [Predicate.ICMP_NE, Predicate.FCMP_ONE, Predicate.FCMP_UNE]: + if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: + res_val = IntervalValue(not lhs.equals(rhs)) + else: + if lhs.getAddrs().hasIntersect(rhs.getAddrs()): + res_val = IntervalValue.top() + else: + res_val = IntervalValue(1) + + elif predicate in [Predicate.ICMP_UGT, Predicate.ICMP_SGT, Predicate.FCMP_OGT, Predicate.FCMP_UGT]: + if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: + res_val = IntervalValue(next(iter(lhs.getAddrs())) > next(iter(rhs.getAddrs()))) + else: + res_val = IntervalValue.top() + + elif predicate in [Predicate.ICMP_UGE, Predicate.ICMP_SGE, Predicate.FCMP_OGE, Predicate.FCMP_UGE]: + if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: + res_val = IntervalValue(next(iter(lhs.getAddrs())) >= next(iter(rhs.getAddrs()))) + else: + res_val = IntervalValue.top() + + elif predicate in [Predicate.ICMP_ULT, Predicate.ICMP_SLT, Predicate.FCMP_OLT, Predicate.FCMP_ULT]: + if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: + res_val = IntervalValue(next(iter(lhs.getAddrs())) < next(iter(rhs.getAddrs()))) + else: + res_val = IntervalValue.top() + + elif predicate in [Predicate.ICMP_ULE, Predicate.ICMP_SLE, Predicate.FCMP_OLE, Predicate.FCMP_ULE]: + if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: + res_val = IntervalValue(next(iter(lhs.getAddrs())) <= next(iter(rhs.getAddrs()))) + else: + res_val = IntervalValue.top() + + elif predicate == Predicate.FCMP_FALSE: + res_val = IntervalValue(0, 0) + + elif predicate == Predicate.FCMP_TRUE: + res_val = IntervalValue(1, 1) + + else: + assert False, "undefined compare" + + abstract_state[res] = res_val + + + + def updateStateOnCall(self, call: pysvf.CallPE): + node = call.getICFGNode() + abstract_state = self.post_abs_trace[node] + lhs = call.getLHSVarID() + rhs = call.getRHSVarID() + abstract_state[lhs] = abstract_state[rhs] + + + def updateStateOnRet(self, ret: pysvf.RetPE): + node = ret.getICFGNode() + abstract_state = self.post_abs_trace[node] + abstract_state[ret.getLHSVarID()] = abstract_state[ret.getRHSVarID()] + + + + def updateStateOnSelect(self, select: pysvf.SelectStmt): + node = select.getICFGNode() + abstract_state = self.post_abs_trace[node] + assert isinstance(abstract_state, AbstractState) + res = select.get_res_id() + tval = select.get_true_value().getId() + fval = select.get_false_value().getId() + cond = select.getCondition().getId() + if abstract_state[cond].getInterval().isInterval(): + if abstract_state[cond].getInterval().is_zero(): + abstract_state[res] = abstract_state[fval] + else: + abstract_state[res] = abstract_state[tval] + else: + abstract_state[res].joinWith(abstract_state[tval]) + abstract_state[res].joinWith(abstract_state[fval]) + + + + + # getAccessOffset is a student TODO this year and lives in Assignment_3.py. diff --git a/Assignment-3/Python/AEReporter.py b/Assignment-3/Python/AEReporter.py index 7d3250e..9b1ab07 100644 --- a/Assignment-3/Python/AEReporter.py +++ b/Assignment-3/Python/AEReporter.py @@ -1,219 +1,13 @@ -from abc import abstractmethod +"""Bug-reporter implementations for the Assignment-3 harness. -from pysvf import ICFG, ICFGNode -from typing import List, Dict, Set, Optional -import pysvf -import faulthandler -faulthandler.enable() +Owns the AEReporter class — bug accumulation, assertion-point tracking, +GEP / strlen / memcpy helpers shared by the checkers. Everything else +(AbstractExecution driver, ICFG/WTO traversal, call-site dispatch) lives +in AEHelper.py. +""" import pysvf from pysvf import IntervalValue, AddressValue, AbstractValue, AbstractState -import sys -from pysvf.enums import OpCode, Predicate - -class WTOCycleDepth: - def __init__(self): - self._heads: List[ICFGNode] = [] - - def add(self, head: ICFGNode): - self._heads.append(head) - - def __iter__(self): - return iter(self._heads) - - def __str__(self): - return f"{self._heads}" - - def __repr__(self): - return f"{self._heads}" - - def compare(self, other): - if self == other: - return 0 - this_it = iter(self) - other_it = iter(other) - while this_it: - if not other_it: - return 1 - elif this_it == other_it: - this_it = next(this_it) - other_it = next(other_it) - else: - return 2 - if not other_it: - return 0 - else: - return -1 - - def __lt__(self, other): - return self.compare(other) == -1 - - def __le__(self, other): - return self.compare(other) <= 0 - - def __eq__(self, other): - return self.compare(other) == 0 - - def __ge__(self, other): - return self.compare(other) >= 0 - - def __gt__(self, other): - return self.compare(other) == 1 - - - -class ICFGWTOComp: - def __init__(self, node: ICFGNode): - self.node = node - - - def getICFGNode(self) -> ICFGNode: - return self.node - - @abstractmethod - def accept(self, visitor): - pass - - -class ICFGWTONode(ICFGWTOComp): - def __init__(self, node: ICFGNode): - self.node = node - - def accept(self, visitor): - visitor.visitNode(self) - - -class ICFGWTOCycle(ICFGWTOComp): - def __init__(self, head: ICFGWTONode, components: List[ICFGWTOComp]): - self.head = head - self.components = components - - def accept(self, visitor): - visitor.visit(self) - - -class ICFGWTO: - - class WTOCycleDepthBuilder: - def __init__(self, node_to_wto_cycle_depth): - self.wto_cycle_depth = WTOCycleDepth() - self.node_to_wto_cycle_depth = node_to_wto_cycle_depth - - def visit(self, cycle: ICFGWTOCycle): - head = cycle.head.getICFGNode() - previous_cycle_depth = self.wto_cycle_depth - self.node_to_wto_cycle_depth[head] = self.wto_cycle_depth - self.wto_cycle_depth = WTOCycleDepth() - self.wto_cycle_depth.add(head) - for component in cycle.components: - component.accept(self) - self.wto_cycle_depth = previous_cycle_depth - - def visitNode(self, node: ICFGWTONode): - self.node_to_wto_cycle_depth[node.getICFGNode()] = self.wto_cycle_depth - - - def __init__(self, graph: ICFG, entry: ICFGNode, scc=None): - self.graph = graph - self.entry = entry - # Interprocedural WTO: `scc` is the set of FunObjVar *ids* in this - # call-graph SCC. Call edges into a callee in the same SCC are then - # followed (becoming back-edges), so a (mutually) recursive function's - # entry shows up as a WTO cycle head -- exactly like the C++ ICFGWTO. - # If no SCC is given, the SCC is the entry's own function. - if scc: - self.scc_fun_ids = set(scc) - else: - self.scc_fun_ids = {entry.getFun().getId()} - self.components: List[ICFGWTOComp] = [] - self.all_components : Set[ICFGWTOComp] = set() - self.head_ref_to_cycle: Dict[ICFGNode, ICFGWTOCycle] = {} - self.node_to_depth: Dict[ICFGNode, int] = {} - - self._num = 0 - self._CDN: Dict[ICFGNode, int] = {} - self._stack: List[ICFGNode] = [] - - def init(self): - self.visit(self.entry, self.components) - self._CDN.clear() - self._stack.clear() - self.build_node_to_depth() - - - def component(self, node: ICFGNode) -> ICFGWTOCycle: - partition = [] - for succ in self.get_successors(node): - if self._CDN.get(succ, 0) == 0: - self.visit(succ, partition) - head = ICFGWTONode(node) - ptr = ICFGWTOCycle(head, partition) - self.head_ref_to_cycle[node] = ptr - return ptr - - - def visit(self, node: ICFGNode, components: List[ICFGWTOComp]): - head = 0 # CycleDepthNumber head(0) - min = 0 # CycleDepthNumber min(0) - loop = False # bool loop - self._stack.append(node) # push(node) - self._num += 1 # _num += CycleDepthNumber(1) - head = self._num # head = _num - self._CDN[node] = head # setCDN(node, head) - for succ in self.get_successors(node): # forEachSuccessor(node, [&](const NodeT* succ) - succ_dfn = self._CDN.get(succ, 0) # CycleDepthNumber succ_dfn = getCDN(succ) - if succ_dfn == 0: # if (succ_dfn == CycleDepthNumber(0)) - min = self.visit(succ, components) # min = visit(succ, partition) - else: - min = succ_dfn # min = succ_dfn - if min <= head: # if (min <= head) - head = min # head = min - loop = True # loop = true - - if head == self._CDN[node]: # if (head == getCDN(node)) - self._CDN[node] = 0x7fffffff # setCDN(node, UINT_MAX) - element = self._stack.pop() - if loop: - while element != node: - self._CDN[element] = 0 # setCDN(element, 0) - element = self._stack.pop() # element = pop() - components.insert(0, self.component(node)) # partition.push_front(component(node)) - else: - components.insert(0, ICFGWTONode(node)) # partition.push_front(newNode(node)) - return head - - - def get_successors(self, node: ICFGNode) -> List[ICFGNode]: - # Interprocedural successor relation, mirroring C++ ICFGWTO::getSuccessors. - successors = [] - if isinstance(node, pysvf.CallICFGNode): - for e in node.getOutEdges(): - callee_entry = e.getDstNode() - if callee_entry.getFun().getId() in self.scc_fun_ids: - # caller & callee in the same SCC -> follow the call edge - successors.append(callee_entry) - else: - # different SCC -> shortcut to the local return node - successors.append(node.getRetICFGNode()) - else: - for e in node.getOutEdges(): - succ = e.getDstNode() - # Only stay within the SCC (intra edges, and return edges back - # into an SCC function). - if succ.getFun().getId() in self.scc_fun_ids: - successors.append(succ) - return successors - - - def build_node_to_depth(self): - builder = self.WTOCycleDepthBuilder(self.node_to_depth) - for component in self.components: - component.accept(builder) - - - def __str__(self): - return f"ICFGWTO: {self.components}" - class AEReporter: @@ -222,11 +16,11 @@ class AEReporter: abstract-interpretation harness. """ - def __init__(self, svfir: pysvf.SVFIR, svf_state_mgr: 'pysvf.AbstractInterpretation' = None): + def __init__(self, svfir: pysvf.SVFIR, ai: 'pysvf.AbstractInterpretation' = None): # Map ICFGNode -> diagnostic message for each detected bug. self.node_to_bug_info = {} self.svfir = svfir - self.svf_state_mgr = svf_state_mgr + self.ai = ai # Harness bookkeeping: stub call sites the analysis actually reached. self.assert_points = set() @@ -237,11 +31,11 @@ def isAssertionPoint(self, call) -> bool: return call in self.assert_points def getByteOffset(self, abstract_state: pysvf.AbstractState, gep: pysvf.GepStmt) -> pysvf.IntervalValue: - return self.svf_state_mgr.getGepByteOffset(gep) + return self.ai.getGepByteOffset(gep) def getGepObjAddrs(self, abstract_state: pysvf.AbstractState, var_id: int, offset: pysvf.IntervalValue) -> pysvf.AddressValue: pointer = self.svfir.getGNode(var_id) - return self.svf_state_mgr.getGepObjAddrs(pointer, offset) + return self.ai.getGepObjAddrs(pointer, offset) def getPointeeElement(self, abstract_state: pysvf.AbstractState, var_id: int): ptr_val = abstract_state[var_id] @@ -255,17 +49,19 @@ def getPointeeElement(self, abstract_state: pysvf.AbstractState, var_id: int): return None def getAllocaInstByteSize(self, abstract_state: pysvf.AbstractState, addr: pysvf.AddrStmt) -> int: - return self.svf_state_mgr.getAllocaInstByteSize(addr) + return self.ai.getAllocaInstByteSize(addr) def reportBufOverflow(self, node, msg): self.node_to_bug_info[node] = msg def printReport(self): - if len(self.node_to_bug_info) > 0: - print("######################Buffer Overflow ({} found)######################".format(len(self.node_to_bug_info))) - print("---------------------------------------------") - for node, msg in self.node_to_bug_info.items(): - print(f"{node}: {msg}\n---------------------------------------------") + if not self.node_to_bug_info: + return + print("###################### Bug Reports ({} total) ######################".format( + len(self.node_to_bug_info))) + print("---------------------------------------------") + for node, msg in self.node_to_bug_info.items(): + print(f"{node}: {msg}\n---------------------------------------------") def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, src: pysvf.SVFVar, len: pysvf.IntervalValue, start_idx: int): assert isinstance(abstractState, pysvf.AbstractState), "abstractState is not a pysvf.AbstractState" @@ -341,586 +137,3 @@ def getStrlen(self, abstractState, strValue): return pysvf.IntervalValue(0, pysvf.Options.max_field_limit()) else: return pysvf.IntervalValue(length * elem_size) - - -class AbstractExecution: - def __init__(self, pag: pysvf.SVFIR): - self.svfir = pag - self.icfg = pag.getICFG() - self.call_site_stack = [] - self.func_to_wto = {} - self.pre_abs_trace = {} - # Owns the post-trace and is the backing store for AbsExtAPI as well - # as the GEP/load/store helpers (getGepByteOffset etc.). Replaces - # the old `self.post_abs_trace` dict so reads/writes on - # `self.post_abs_trace[node]` go through the mgr's trace. - # AbstractStateManager was folded into AbstractInterpretation upstream - # (the AbstractStateManager.h header was removed). Use the - # AbstractInterpretation singleton; it pulls SVFIR from PAG::getPAG() - # internally and does not need an explicit Andersen instance. - self.svf_state_mgr = pysvf.AbstractInterpretation.getAEInstance() - # Alias preserved so existing call-sites `self.post_abs_trace[node]` - # keep working. The mgr supports __getitem__/__setitem__/__contains__. - self.post_abs_trace = self.svf_state_mgr - self.buf_overflow_helper = AEReporter(self.svfir, self.svf_state_mgr) - - self.widen_delay = 3 - self.addressMask = 0x7f000000 - self.flippedAddressMask = (self.addressMask^0xffffffff) - - - """ - Initialize the interprocedural WTO per call-graph SCC entry. - - Each (mutually) recursive function's entry node becomes a WTO cycle head - because intra-SCC call edges are turned into back-edges. The same - widening/narrowing machinery used for loops then drives recursion to a - fixpoint via handleICFGCycle; there is no separate "is recursive?" check. - """ - def initWto(self): - callgraphScc = pysvf.getCallGraphSCC() - self._callgraph_scc = callgraphScc - callgraph = self.svfir.getCallGraph() - self._callgraph = callgraph - - # SCC membership comes from pysvf: CallGraphSCC.subNodes(rep) returns - # the call-graph node IDs in the SCC represented by 'rep'. We only - # need it to feed ICFGWTO so intra-SCC call edges become back-edges. - cgid_to_fun = {} - for node in callgraph.getNodes(): - cgid_to_fun[node.getId()] = node.getFunction() - - # Build one interprocedural WTO per call-graph-SCC entry function. An - # SCC entry is a member with a caller outside the SCC (or no caller). - # Intra-SCC members are reached via the entry's interprocedural WTO. - self.func_to_wto = {} - for node in callgraph.getNodes(): - fun = node.getFunction() - if fun.isDeclaration(): - continue - cgid = node.getId() - rep = callgraphScc.repNode(cgid) - scc_cgids = set(callgraphScc.subNodes(rep)) - - in_edges = list(node.getInEdges()) - is_entry = (len(in_edges) == 0) - for e in in_edges: - if e.getSrcID() not in scc_cgids: # caller outside the SCC - is_entry = True - if not is_entry: - continue - - func_scc_ids = {cgid_to_fun[c].getId() for c in scc_cgids} - wto = ICFGWTO(self.icfg, self.icfg.getFunEntryICFGNode(fun), func_scc_ids) - wto.init() - # Key by function id: pybind FunObjVar wrappers are not guaranteed to - # hash consistently across calls, so don't use the object as a key. - self.func_to_wto[fun.getId()] = wto - - - - """ - Placeholder for additional documentation or functionality. - """ - def getVirtualMemAddress(self, idx: int) -> int: - return self.addressMask + idx - - - """ - Handle the global ICFG node by initializing its abstract state and updating it based on its statements. - - This function performs the following steps: - 1. Initializes the abstract state for the global ICFG node in both pre- and post-abstract traces. - 2. Sets the initial value of variable 0 to an address value of 0. - 3. Iterates through all statements in the global ICFG node and updates the abstract state accordingly. - """ - def handleGlobalNode(self): - self.post_abs_trace[self.icfg.getGlobalICFGNode()] = AbstractState() - self.pre_abs_trace[self.icfg.getGlobalICFGNode()] = self.post_abs_trace[self.icfg.getGlobalICFGNode()] - self.post_abs_trace[self.icfg.getGlobalICFGNode()][0] = AbstractValue(AddressValue(set())) - for stmt in self.icfg.getGlobalICFGNode().getSVFStmts(): - self.updateAbsState(stmt) - - - """ - Iterate a function's interprocedural WTO components. - - Singletons are handled directly; cycles (loop heads AND recursive-function - entries) are driven to a fixpoint by handleICFGCycle. Recursive callsites - are filtered out earlier in handleCallSite via ``inSameCallGraphSCC``, so - handleFunction never re-enters itself. - """ - def handleFunction(self, funEntry: pysvf.ICFGNode): - fun = funEntry.getFun() - wto = self.func_to_wto.get(fun.getId()) - if wto is None: - return - for comp in wto.components: - if isinstance(comp, ICFGWTOCycle): - self.handleICFGCycle(comp) - elif isinstance(comp, ICFGWTONode): - self.handleICFGNode(comp.getICFGNode()) - - """ - Handle a singleton WTO - This function handles a node in the ICFG by merging the abstract states of its predecessors, - updating the abstract state based on the node's statements, and handling stub functions. - It also checks if the abstract state has reached a fixpoint and returns the result. - Return true means the abstract state has changed - Return false means the abstract state has reached a fixpoint or is infeasible - - """ - def handleICFGNode(self, node: pysvf.ICFGNode): - is_feasible, self.pre_abs_trace[node] = self.mergeStatesFromPredecessors(node) - if not is_feasible: - print(f"Infeasible for node {node.getId()}") - return False - - # Store the last abstract state, used to check if the abstract state has reached a fixpoint - last_as = self.post_abs_trace[node] if node in self.post_abs_trace else None - self.post_abs_trace[node] = self.pre_abs_trace[node] - - for stmt in node.getSVFStmts(): - self.updateAbsState(stmt) - self.bufOverflowDetection(stmt) - - if isinstance(node, pysvf.CallICFGNode): - self.handleCallSite(node) - - # If the abstract state is the same as the last abstract state, return false because we have reached fixpoint - if last_as is not None and self.post_abs_trace[node] == last_as: - return False - - return True - - """ - Handle a call site in the control flow graph - - This function processes a call site by updating the abstract state, handling the called function, - and managing the call stack. It resumes the execution state after the function call. - return void - """ - def handleCallSite(self, node: pysvf.CallICFGNode): - fun_name = node.getCalledFunction().getName() - print(fun_name) - if fun_name == "svf_assert" or fun_name == "svf_assert_eq": - self.handleStubFunction(node) - elif fun_name in ("UNSAFE_BUFACCESS", "SAFE_BUFACCESS", - "UNSAFE_PTRDEREF", "SAFE_PTRDEREF"): - self.handleCheckpointStubs(node) - elif fun_name == "nd" or fun_name == "rand": - lhs_id = node.getRetICFGNode().getActualRet().getId() - self.post_abs_trace[node][lhs_id] = AbstractValue(IntervalValue.top()) - elif fun_name == "mem_insert" or fun_name == "str_insert": #isExternalCallForAssignment - self.updateStateOnExtCall(node) - elif pysvf.isExtCall(node.getCalledFunction()): - pass - else: - # Skip recursive callsites (within the same call-graph SCC): the - # interprocedural WTO built in initWto already encoded this as a - # back-edge, so the outer cycle's widen/narrow iteration in - # handleICFGCycle drives the recursion to a fixpoint. Mirrors - # SVF's AbstractInterpretation::skipRecursiveCall. - callee = node.getCalledFunction() - caller = node.getCaller() - if caller is not None and self.inSameCallGraphSCC(caller, callee): - return - self.handleFunction(self.svfir.getICFG().getFunEntryICFGNode(callee)) - - def inSameCallGraphSCC(self, fun1, fun2) -> bool: - scc = getattr(self, "_callgraph_scc", None) - cg = getattr(self, "_callgraph", None) - if scc is None or cg is None: - return False - n1 = cg.getCallGraphNodeByFunObj(fun1) - n2 = cg.getCallGraphNodeByFunObj(fun2) - if n1 is None or n2 is None: - return False - return scc.repNode(n1.getId()) == scc.repNode(n2.getId()) - - - """ - Handle stub functions such as 'svf_assert' and 'OVERFLOW'. - - This function processes specific stub functions in the program's control flow graph (CFG) - to validate assertions or detect buffer overflows. It performs the following tasks: - - 1. For 'svf_assert': - - Adds the call node to the set of assertion points. - - Checks the abstract state of the argument to determine if the assertion is valid. - - If the assertion is invalid or unsatisfiable, raises an error. - - 2. For 'OVERFLOW': - - Adds the call node to the set of assertion points. - - Checks if the right-hand side (RHS) value is an address. - - Iterates through the addresses to calculate the access offset and compare it - with the object size to detect buffer overflows. - - If a buffer overflow is detected, records the overflow node and prints a success message. - - If no overflow is detected, raises an error. - - :param call_node: The call node representing the stub function in the CFG. - :type call_node: pysvf.CallICFGNode - """ - def handleStubFunction(self, callNode: pysvf.CallICFGNode): - # Get the callee function associated with the call site - if callNode.getCalledFunction().getName() == "svf_assert": - self.buf_overflow_helper.noteAssertionPoint(callNode) - # If the condition is false, the program is infeasible - arg0 = callNode.getArgument(0).getId() - abstract_state = self.post_abs_trace[callNode] - - # Check if the interval for the argument is infinite - if abstract_state[arg0].getInterval().isTop(): - print(f"svf_assert Fail. {callNode}") - assert False - else: - if (abstract_state[arg0].getInterval().equals(IntervalValue(1, 1)) or - abstract_state[arg0].getInterval().equals(IntervalValue(-1, -1))): - print(f"The assertion ({callNode}) is successfully verified!!") - else: - print(f"The assertion ({callNode}) is unsatisfiable!!") - assert False - - - def handleCheckpointStubs(self, callNode: pysvf.CallICFGNode): - """SAFE_/UNSAFE_ checkpoints: ground-truth bug markers. - - Records the call site in ``assert_points`` so - :py:meth:`ensureAllAssertsValidated` can verify the student's control - flow reached it. The harness reports a bug iff its independent - ground-truth check (bypassing the student's predicates) sees one. - """ - self.buf_overflow_helper.noteAssertionPoint(callNode) - fun_name = callNode.getCalledFunction().getName() - abstract_state = self.post_abs_trace[callNode] - if fun_name in ("SAFE_BUFACCESS", "UNSAFE_BUFACCESS"): - if callNode.arg_size() < 2: - return - length = abstract_state[callNode.getArgument(1).getId()].getInterval() - if length.isBottom(): - length = IntervalValue(0) - ptr = callNode.getArgument(0) - if not self._harnessSafeAccess(abstract_state, ptr, length - IntervalValue(1)): - self.buf_overflow_helper.reportBufOverflow( - callNode, f"buffer-overflow at {callNode}") - elif fun_name in ("SAFE_PTRDEREF", "UNSAFE_PTRDEREF"): - if callNode.arg_size() < 1: - return - ptr = callNode.getArgument(0) - if not self._harnessSafeDeref(abstract_state, ptr): - self.buf_overflow_helper.reportBufOverflow( - callNode, f"nullptr-deref at {callNode}") - - def _harnessSafeAccess(self, abstract_state, value, length: IntervalValue) -> bool: - ptr_val = abstract_state[value.getId()] - if not ptr_val.isAddr(): - return True - for addr in ptr_val.getAddrs(): - if pysvf.AbstractState.isBlackHoleObjAddr(addr) or pysvf.AbstractState.isNullMem(addr): - continue - obj_id = abstract_state.getIDFromAddr(addr) - base_obj = self.svfir.getBaseObject(obj_id) - if base_obj is None or base_obj.isBlackHoleObj() or not base_obj.isConstantByteSize(): - continue - size = base_obj.getByteSizeOfObj() - gnode = self.svfir.getGNode(obj_id) - base_offset = IntervalValue(gnode.getConstantFieldIdx()) if isinstance(gnode, pysvf.GepObjVar) else IntervalValue(0) - offset = base_offset + length - if int(offset.ub()) >= size: - return False - return True - - def _harnessSafeDeref(self, abstract_state, value) -> bool: - if value is None or isinstance(value, pysvf.ConstNullPtrValVar): - return False - abs_val = abstract_state[value.getId()] - if not abs_val.isAddr(): - return True - for addr in abs_val.getAddrs(): - if pysvf.AbstractState.isBlackHoleObjAddr(addr): - continue - if pysvf.AbstractState.isNullMem(addr): - return False - if abstract_state.isFreedMem(addr): - return False - return True - - - # mergeStatesFromPredecessors is a student TODO this year and lives in - # Assignment_3.py. - - def isBranchFeasible(self, intraEdge: pysvf.IntraCFGEdge, abstractState: pysvf.AbstractState) -> bool : - cmp_var = intraEdge.getCondition() - cmp_in_edges = cmp_var.getInEdges() - if len(cmp_in_edges) == 0: - return pysvf.AbstractState.isSwitchBranchFeasible(self.svfir, cmp_var, intraEdge.getSuccessorCondValue(), abstractState) - else: - cmp = cmp_in_edges[0] - if isinstance(cmp, pysvf.CmpStmt): - return pysvf.AbstractState.isCmpBranchFeasible(self.svfir, cmp, intraEdge.getSuccessorCondValue(), abstractState) - else: - return pysvf.AbstractState.isSwitchBranchFeasible(self.svfir, cmp_var, intraEdge.getSuccessorCondValue(), abstractState) - - - - - - def ensureAllAssertsValidated(self): - """Verify the student's control flow reached every ground-truth stub. - - Recognised stubs: - * ``svf_assert`` / ``svf_assert_eq`` -- abstract-state assertions - * ``UNSAFE_PTRDEREF`` / ``SAFE_PTRDEREF`` -- null-deref ground truth - * ``UNSAFE_BUFACCESS`` / ``SAFE_BUFACCESS`` -- buffer-access ground truth - - A missed stub site means the student's control-flow logic skipped a - place the grader cares about. Additionally requires that the number - of reported bugs is at least the number of ``UNSAFE_*`` stubs. - """ - assert_stubs = {"svf_assert", "svf_assert_eq"} - checkpoint_stubs = {"UNSAFE_PTRDEREF", "SAFE_PTRDEREF", - "UNSAFE_BUFACCESS", "SAFE_BUFACCESS"} - unsafe_to_be_verified = 0 - for node in self.svfir.getICFG().getNodes(): - if not isinstance(node, pysvf.CallICFGNode): - continue - called_function = node.getCalledFunction() - if not called_function: - continue - name = called_function.getName() - if name not in assert_stubs and name not in checkpoint_stubs: - continue - if name.startswith("UNSAFE_"): - unsafe_to_be_verified += 1 - if not self.buf_overflow_helper.isAssertionPoint(node): - raise AssertionError( - f"The stub function callsite ({name}) was not reached by " - f"the student's control flow: {node}" - ) - assert unsafe_to_be_verified <= len(self.buf_overflow_helper.node_to_bug_info), \ - "The number of UNSAFE_* stubs (ground truth) should <= the number of bugs reported" - - - - - """ - Perform the main analysis of the program. - - This function initializes the Weak Topological Order (WTO) for all functions, - processes the global ICFG node, and analyzes the main function if it exists. - It ensures that the abstract states are properly initialized and updated - throughout the analysis. - - Steps: - 1. Initialize the WTO for all functions in the program. - 2. Process the global ICFG node to initialize its abstract state. - 3. If the main function exists: - - Initialize its arguments as top to represent all possible inputs. - - Process its WTO components to analyze its control flow. - """ - def analyse(self): - self.initWto() - self.handleGlobalNode() - # # Process the main function if it exists - main_fun = self.svfir.getFunObjVar("main") - if main_fun: - # Arguments of main are initialized as top to represent all possible inputs - for i in range(main_fun.arg_size()): - as_state = self.pre_abs_trace[self.icfg.getGlobalICFGNode()] - as_state[main_fun.getArg(i).getId()] = IntervalValue.top() - - self.handleFunction(self.icfg.getFunEntryICFGNode(main_fun)) - else: - assert False, "Main function not found" - self.ensureAllAssertsValidated() - self.buf_overflow_helper.printReport() - - - # updateAbsState is a student TODO this year and lives in Assignment_3.py. - - """ - Initialize an object variable in the abstract state. - - This function determines the initial abstract value for a given object variable - based on its type and properties. It handles various types of object variables, - including constants, global variables, and complex structures, and assigns - appropriate abstract values such as intervals or addresses. - - Steps: - 1. Retrieve the base object associated with the given object variable. - 2. Check the type of the object variable: - - For constant integer or floating-point variables, return their exact value as an interval. - - For null pointers, return an interval representing zero. - - For global variables, return an address value based on a virtual memory address. - - For constant arrays or structures, return a top interval to represent unknown values. - 3. For other types of object variables, return an address value based on a virtual memory address. - - :param obj_var: The object variable to initialize. - :type obj_var: pysvf.ObjVar - :return: The initialized abstract value for the object variable. - :rtype: pysvf.AbstractValue - """ - def initObjVar(self, objVar: pysvf.ObjVar): - var_id = objVar.getId() - obj = self.svfir.getBaseObject(var_id).asBaseObjVar() - if obj.isConstDataObjVar() or obj.isConstantArray() or obj.isConstantStruct(): - if isinstance(objVar, pysvf.ConstIntObjVar): - numeral = objVar.getSExtValue() - return IntervalValue(numeral, numeral) - - elif isinstance(objVar, pysvf.ConstFPObjVar): - return IntervalValue(objVar.getFPValue(), objVar.getFPValue()) - - elif isinstance(objVar, pysvf.ConstNullPtrObjVar): - return IntervalValue(0,0) - - elif isinstance(objVar, pysvf.GlobalObjVar): - return AddressValue(self.getVirtualMemAddress(var_id)) - - elif obj.isConstantArray() or obj.isConstantStruct(): - return IntervalValue.top() - else: - return IntervalValue.top() - else: - return AddressValue(self.getVirtualMemAddress(var_id)) - - - def updateStateOnAddr(self, addr: pysvf.AddrStmt): - node = addr.getICFGNode() - abstract_state = self.post_abs_trace[node] - assert isinstance(abstract_state, AbstractState) - abstract_state[addr.getRHSVarID()] = AbstractValue(self.initObjVar(addr.getRHSVar().asObjVar())) - abstract_state[addr.getLHSVarID()] = abstract_state[addr.getRHSVarID()] - - - - - - - - def updateStateOnCmp(self, cmp: pysvf.CmpStmt): - node = cmp.getICFGNode() - abstract_state = self.post_abs_trace[node] - assert isinstance(abstract_state, AbstractState) - op0 = cmp.getOpVar(0) - op1 = cmp.getOpVar(1) - res = cmp.getResId() - if abstract_state.getVar(op0.getId()).isInterval() and abstract_state.getVar(op0.getId()).isInterval(): - res_val = IntervalValue(0) - lhs = abstract_state[op0.getId()].getInterval() - rhs = abstract_state[op1.getId()].getInterval() - predicate = cmp.getPredicate() - if predicate == Predicate.ICMP_EQ or predicate == Predicate.FCMP_OEQ or predicate == Predicate.FCMP_UEQ: - res_val = lhs.eq_interval(rhs) - elif predicate == Predicate.ICMP_NE or predicate == Predicate.FCMP_ONE or predicate == Predicate.FCMP_UNE: - res_val = lhs.ne_interval(rhs) - elif predicate == Predicate.ICMP_SGT or predicate == Predicate.FCMP_UGT or predicate == Predicate.FCMP_OGT or predicate == Predicate.FCMP_UGT: - res_val = (lhs > rhs) - elif predicate == Predicate.ICMP_SGE or predicate == Predicate.FCMP_UGE or predicate == Predicate.FCMP_OGE or predicate == Predicate.FCMP_UGE: - res_val = (lhs >= rhs) - elif predicate == Predicate.ICMP_SLT or predicate == Predicate.ICMP_ULT or predicate == Predicate.FCMP_OLT or predicate == Predicate.FCMP_ULT: - res_val = (lhs < rhs) - elif predicate == Predicate.ICMP_SLE or predicate == Predicate.ICMP_ULE or predicate == Predicate.FCMP_OLE or predicate == Predicate.FCMP_ULE: - res_val = (lhs <= rhs) - elif predicate == Predicate.FCMP_FALSE: - res_val = IntervalValue(0,0) - elif predicate == Predicate.FCMP_TRUE: - res_val = IntervalValue(1,1) - abstract_state[res] = AbstractValue(res_val) - if abstract_state.getVar(op0.getId()).isAddr() and abstract_state.getVar(op0.getId()).isAddr(): - res_val = None - lhs = abstract_state[op0.getId()] - rhs = abstract_state[op1.getId()] - predicate = cmp.getPredicate() - - if predicate in [Predicate.ICMP_EQ, Predicate.FCMP_OEQ, Predicate.FCMP_UEQ]: - if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: - res_val = IntervalValue(lhs.equals(rhs)) - else: - if lhs.getAddrs().hasIntersect(rhs.getAddrs()): - res_val = IntervalValue.top() - else: - res_val = IntervalValue(0) - - elif predicate in [Predicate.ICMP_NE, Predicate.FCMP_ONE, Predicate.FCMP_UNE]: - if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: - res_val = IntervalValue(not lhs.equals(rhs)) - else: - if lhs.getAddrs().hasIntersect(rhs.getAddrs()): - res_val = IntervalValue.top() - else: - res_val = IntervalValue(1) - - elif predicate in [Predicate.ICMP_UGT, Predicate.ICMP_SGT, Predicate.FCMP_OGT, Predicate.FCMP_UGT]: - if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: - res_val = IntervalValue(next(iter(lhs.getAddrs())) > next(iter(rhs.getAddrs()))) - else: - res_val = IntervalValue.top() - - elif predicate in [Predicate.ICMP_UGE, Predicate.ICMP_SGE, Predicate.FCMP_OGE, Predicate.FCMP_UGE]: - if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: - res_val = IntervalValue(next(iter(lhs.getAddrs())) >= next(iter(rhs.getAddrs()))) - else: - res_val = IntervalValue.top() - - elif predicate in [Predicate.ICMP_ULT, Predicate.ICMP_SLT, Predicate.FCMP_OLT, Predicate.FCMP_ULT]: - if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: - res_val = IntervalValue(next(iter(lhs.getAddrs())) < next(iter(rhs.getAddrs()))) - else: - res_val = IntervalValue.top() - - elif predicate in [Predicate.ICMP_ULE, Predicate.ICMP_SLE, Predicate.FCMP_OLE, Predicate.FCMP_ULE]: - if len(lhs.getAddrs()) == 1 and len(rhs.getAddrs()) == 1: - res_val = IntervalValue(next(iter(lhs.getAddrs())) <= next(iter(rhs.getAddrs()))) - else: - res_val = IntervalValue.top() - - elif predicate == Predicate.FCMP_FALSE: - res_val = IntervalValue(0, 0) - - elif predicate == Predicate.FCMP_TRUE: - res_val = IntervalValue(1, 1) - - else: - assert False, "undefined compare" - - abstract_state[res] = res_val - - - - def updateStateOnCall(self, call: pysvf.CallPE): - node = call.getICFGNode() - abstract_state = self.post_abs_trace[node] - lhs = call.getLHSVarID() - rhs = call.getRHSVarID() - abstract_state[lhs] = abstract_state[rhs] - - - def updateStateOnRet(self, ret: pysvf.RetPE): - node = ret.getICFGNode() - abstract_state = self.post_abs_trace[node] - abstract_state[ret.getLHSVarID()] = abstract_state[ret.getRHSVarID()] - - - - def updateStateOnSelect(self, select: pysvf.SelectStmt): - node = select.getICFGNode() - abstract_state = self.post_abs_trace[node] - assert isinstance(abstract_state, AbstractState) - res = select.get_res_id() - tval = select.get_true_value().getId() - fval = select.get_false_value().getId() - cond = select.getCondition().getId() - if abstract_state[cond].getInterval().isInterval(): - if abstract_state[cond].getInterval().is_zero(): - abstract_state[res] = abstract_state[fval] - else: - abstract_state[res] = abstract_state[tval] - else: - abstract_state[res].joinWith(abstract_state[tval]) - abstract_state[res].joinWith(abstract_state[fval]) - - - - - # getAccessOffset is a student TODO this year and lives in Assignment_3.py. diff --git a/Assignment-3/Python/Assignment_3.py b/Assignment-3/Python/Assignment_3.py index 5e230d0..86e8373 100644 --- a/Assignment-3/Python/Assignment_3.py +++ b/Assignment-3/Python/Assignment_3.py @@ -1,14 +1,36 @@ -from AEReporter import * +from AEHelper import * import pysvf # ============================================================================= -# Student TODOs -# ============================================================================= -# Implement abstract interpretation for verification and bug detection. The -# harness (AEReporter.py) drives the analysis and calls into the entry points -# below (please do not delete); You are free to add any internal classes and -# helper methods you need within Assignment_3.py. +# Student-facing Assignment3 class. +# +# The harness (AEHelper.py / AEReporter.py) provides interprocedural WTO +# construction, stub / checkpoint sub-dispatch, the external-API whitelist, +# the abstract-state helpers wrapping the underlying AbstractInterpretation +# singleton, and the assertion-coverage validator. This file owns the +# analysis driver (analyse / report* forwarders, pre-implemented below) and +# the five student TODOs: +# * the four driver entry points +# handleGlobalNode / handleFunction / handleICFGNode / handleICFGCycle +# * handleCallSite (the call-node dispatcher). +# You design the rest of the six tasks (statement transfer, branch +# refinement, external-API summaries, buffer-overflow checker, +# nullptr-dereference checker) and have your handleICFGNode dispatch into +# them however you see fit — override the matching no-op virtuals +# (updateAbsState, mergeStatesFromPredecessors, updateStateOnExtCall, +# bufOverflowDetection, nullptrDerefDetection) on AbstractExecution if you +# want your handleCallSite to call into your code. +# +# General analysis engine +# 1. Statement transfer functions -- typically inside handleICFGNode +# 2. Branch refinement -- typically inside handleICFGNode +# 3. Cycle and recursion fixpoint -- typically inside handleICFGCycle +# 4. External-API value summaries -- typically inside handleCallSite +# +# Bug checkers +# 5. Buffer-overflow checker +# 6. Nullptr-dereference checker # ============================================================================= @@ -16,35 +38,103 @@ class Assignment3(AbstractExecution): def __init__(self, pag: pysvf.SVFIR) -> None: super().__init__(pag) - # Dispatch a single SVF statement to the matching transfer function. - def updateAbsState(self, stmt: pysvf.SVFStmt): - # TODO: dispatch on statement subtype and update the abstract state. - pass + # ========================================================================= + # Analysis driver (pre-implemented). + # ========================================================================= - # Join predecessor post-states (with branch refinement) into the - # current node's pre-state. - def mergeStatesFromPredecessors(self, block: pysvf.ICFGNode, - abstract_state: pysvf.AbstractState) -> bool: - # TODO - return False + # `test-ae.py` calls `ass3.analyse()` directly — there is no separate + # `runOnModule` in the Python port. Builds the WTO, replays the global + # ICFG node, kicks off the analysis at main, validates the assertion + # coverage, and prints the bug-reporter summary. + def analyse(self): + self.initWto() + self.handleGlobalNode() + main_fun = self.svfir.getFunObjVar("main") + if main_fun: + for i in range(main_fun.arg_size()): + as_state = self.pre_abs_trace[self.icfg.getGlobalICFGNode()] + as_state[main_fun.getArg(i).getId()] = IntervalValue.top() + self.handleFunction(self.icfg.getFunEntryICFGNode(main_fun)) + else: + assert False, "Main function not found" + self.ensureAllAssertsValidated() + self.buf_overflow_helper.printReport() - # Iterate the cycle body to a fixpoint (widening optional). - def handleICFGCycle(self, cycle): - # TODO + # Bug-reporter forwarders. Mirrors the C++ AbstractExecution::report* + # helpers; routes through the AEReporter instance owned by the harness. + def reportBufOverflow(self, node, msg=None): + self.buf_overflow_helper.reportBufOverflow( + node, msg if msg is not None else f"buffer-overflow at {node}") + + def reportNullDeref(self, node, msg=None): + self.buf_overflow_helper.reportBufOverflow( + node, msg if msg is not None else f"nullptr-deref at {node}") + + # ========================================================================= + # Student TODOs — driver entry points. + # + # `analyse()` (above) calls `handleGlobalNode()` once for the SVFModule's + # global ICFG node and `handleFunction(main_entry)` to start the + # per-function analysis. A typical layering is: + # handleFunction walks the interprocedural WTO components and + # dispatches singletons to handleICFGNode / cycles to + # handleICFGCycle. + # handleICFGNode merges predecessor states (Task 2), runs the + # per-statement transfer functions (Task 1), routes + # call sites via handleCallSite, and runs the bug + # checkers (Tasks 5 / 6). + # handleICFGCycle iterates the cycle body to a fixpoint with widening + # / narrowing (Task 3). + # You are free to deviate as long as the test driver's expectations + # (covered stubs, reported bugs) hold. + # ========================================================================= + + def handleGlobalNode(self): + # TODO: initialise the global ICFG node's state and replay the global + # statements through your statement transfer functions. pass - # Detect out-of-bounds memory accesses at `node`. - def bufOverflowDetection(self, node: pysvf.ICFGNode): - # TODO + def handleFunction(self, funEntry: pysvf.ICFGNode): + # TODO: walk the function's interprocedural WTO components (singletons + # vs cycles) and dispatch to handleICFGNode / handleICFGCycle. pass - # Model external library calls (memory/string families and - # assignment-specific stubs). - def updateStateOnExtCall(self, call: pysvf.CallICFGNode): - # TODO + def handleICFGNode(self, node: pysvf.ICFGNode): + # TODO: merge predecessor states, run the per-statement transfer + # functions, dispatch call sites via handleCallSite, run the bug + # checkers, and return whether the post-state changed. + return False + + def handleICFGCycle(self, cycle): + # TODO: iterate the cycle body to a fixpoint (widening / narrowing). pass - # Handle a call site in the control-flow graph. + # ========================================================================= + # Student TODO — handleCallSite. + # + # Dispatch a call ICFG node based on its callee: + # + # * svf_assert / svf_assert_eq -> self.handleStubFunction(node) + # * SAFE_/UNSAFE_BUFACCESS, + # SAFE_/UNSAFE_PTRDEREF -> self.handleCheckpointStubs(node) + # * nd / rand -> nondeterministic return: set + # the actual-return variable to + # TOP on the call node's + # post-state. + # * other external callees -> updateStateOnExtCall, then run + # (mem_insert / str_insert / the bug checkers + # pysvf.isExtCall(...)) (nullptrDerefDetection + + # bufOverflowDetection) on the + # call's arguments. + # * non-extern callees -> skip recursive callsites using + # self.inSameCallGraphSCC, then + # inline by calling + # self.handleFunction on the + # callee's entry ICFG node and + # forward the call node's + # post-state to the return ICFG + # node. + # ========================================================================= def handleCallSite(self, node: pysvf.CallICFGNode): # TODO pass