diff --git a/Assignment-3/CPP/Assignment_3_Helper.cpp b/Assignment-3/CPP/AEReporter.cpp similarity index 76% rename from Assignment-3/CPP/Assignment_3_Helper.cpp rename to Assignment-3/CPP/AEReporter.cpp index 2bcb60d..0974f6a 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.cpp +++ b/Assignment-3/CPP/AEReporter.cpp @@ -1,4 +1,4 @@ -//===- Assignment_3_Helper.cpp -- Abstract Interpretation --// +//===- AEReporter.cpp -- Abstract Interpretation harness --// // // SVF: Static Value-Flow Analysis // @@ -133,15 +133,11 @@ void AbstractExecution::runOnModule(SVF::ICFG* _icfg) { svfir = PAG::getPAG(); icfg = _icfg; analyse(); - if (!caseConfig.emitJson) + if (!bugReporter.getCaseConfig().emitJson) bugReporter.printReport(); } -u32_t AbstractExecution::getAnalyzedNodeCount() const { - return static_cast(analyzedNodes.size()); -} - -u32_t AbstractExecution::getTotalNodeCount() const { +u32_t AEReporter::getTotalNodeCount(const ICFG* icfg) const { if (!icfg) return 0; u32_t total = 0; @@ -150,29 +146,29 @@ u32_t AbstractExecution::getTotalNodeCount() const { return total; } -double AbstractExecution::getICFGCoverage() const { - u32_t total = getTotalNodeCount(); +double AEReporter::getICFGCoverage(const ICFG* icfg) const { + u32_t total = getTotalNodeCount(icfg); if (total == 0) return 0.0; return 100.0 * static_cast(getAnalyzedNodeCount()) / static_cast(total); } -bool AbstractExecution::hasTargetReport() const { +bool AEReporter::hasTargetReport() const { if (caseConfig.targetLoc.empty()) return false; - for (const AssignmentBugReport& report : bugReporter.getReports()) { + for (const AssignmentBugReport& report : _reports) { if (ass3ReportMatchesTarget(report, caseConfig.targetLoc)) return true; } return false; } -void AbstractExecution::writeJsonSummary(std::ostream& os, double wallSeconds, - int exitCode, bool assertsValidated) const { - const auto& reports = bugReporter.getReports(); +void AEReporter::writeJsonSummary(std::ostream& os, const ICFG* icfg, + double wallSeconds, int exitCode, + bool assertsValidated) const { const bool targetHit = hasTargetReport(); const u32_t tp = caseConfig.targetLoc.empty() ? 0 : (targetHit ? 1 : 0); - const u32_t fp = reports.size() > tp ? static_cast(reports.size() - tp) : 0; + const u32_t fp = _reports.size() > tp ? static_cast(_reports.size() - tp) : 0; os << "{\n"; os << " \"case_id\": \"" << ass3JsonEscape(caseConfig.caseId) << "\",\n"; @@ -182,21 +178,21 @@ void AbstractExecution::writeJsonSummary(std::ostream& os, double wallSeconds, os << " \"asserts_validated\": " << (assertsValidated ? "true" : "false") << ",\n"; os << " \"tp\": " << tp << ",\n"; os << " \"fp\": " << fp << ",\n"; - os << " \"reports\": " << reports.size() << ",\n"; + os << " \"reports\": " << _reports.size() << ",\n"; os << " \"wall_sec\": " << std::fixed << std::setprecision(3) << wallSeconds << ",\n"; - os << " \"icfg_nodes\": " << getTotalNodeCount() << ",\n"; + os << " \"icfg_nodes\": " << getTotalNodeCount(icfg) << ",\n"; os << " \"analyzed_icfg_nodes\": " << getAnalyzedNodeCount() << ",\n"; - os << " \"icfg_coverage\": " << std::fixed << std::setprecision(2) << getICFGCoverage() << ",\n"; + os << " \"icfg_coverage\": " << std::fixed << std::setprecision(2) << getICFGCoverage(icfg) << ",\n"; os << " \"report_list\": ["; - for (size_t i = 0; i < reports.size(); ++i) { - const AssignmentBugReport& report = reports[i]; + for (size_t i = 0; i < _reports.size(); ++i) { + const AssignmentBugReport& report = _reports[i]; os << (i == 0 ? "\n" : ",\n"); os << " {\"kind\": \"" << ass3JsonEscape(report.kind) << "\", \"node\": " << report.nodeId << ", \"location\": \"" << ass3JsonEscape(report.location) << "\", \"message\": \"" << ass3JsonEscape(report.message) << "\"}"; } - if (!reports.empty()) + if (!_reports.empty()) os << "\n "; os << "]\n"; os << "}\n"; @@ -250,15 +246,6 @@ void AbstractExecution::initWTO() { funcToWTO[fun] = wto; } - // Record every cycle head (loop heads and recursive-function entries) so - // handleFunction can dispatch them to handleICFGCycle. - for (auto fun : funcToWTO) { - for (const ICFGWTOComp* comp : fun.second->getWTOComponents()) { - if (const ICFGCycleWTO* cycle = SVFUtil::dyn_cast(comp)) { - cycleHeadToCycle[cycle->head()->getICFGNode()] = cycle; - } - } - } } // updateGepObjOffsetFromBase / hasGepObjOffsetFromBase / getGepObjOffsetFromBase @@ -271,7 +258,7 @@ void AbstractExecution::initWTO() { void AbstractExecution::handleGlobalNode() { AbstractState as; const ICFGNode* node = icfg->getGlobalICFGNode(); - analyzedNodes.insert(node); + bugReporter.noteAnalyzed(node); postAbsTrace()[node] = preAbsTrace[node]; // The null pointer carries the dedicated null memory address so that // pointer-vs-null comparisons and null dereferences can be detected. @@ -282,36 +269,51 @@ void AbstractExecution::handleGlobalNode() { } } -/// If we have stub calls as ground truths in the program, including svf_assert and OVERFLOW, -/// make sure they are fully verified/checked. +/// Verify that every ground-truth stub call site in the program was reached +/// by the student's analysis (added to `assert_points` via handleCallSite -> +/// handleStubFunctions / handleCheckpointStubs). A missed stub site means +/// the student's control-flow logic skipped a place the grader cares about. +/// +/// Recognised stubs: +/// - svf_assert / svf_assert_eq : abstract-state assertion checks +/// - UNSAFE_PTRDEREF / SAFE_PTRDEREF : null-deref ground truth +/// - UNSAFE_BUFACCESS / SAFE_BUFACCESS : buffer-access ground truth +/// +/// Additionally requires that the number of reported bugs is at least the +/// number of UNSAFE_* stubs in the program. void AbstractExecution::ensureAllAssertsValidated() { - u32_t svf_assert_to_be_verified = 0; - u32_t overflow_assert_to_be_verified = 0; + static const Set kAssertStubs = {"svf_assert", "svf_assert_eq"}; + static const Set kCheckpointStubs = { + "UNSAFE_PTRDEREF", "SAFE_PTRDEREF", + "UNSAFE_BUFACCESS", "SAFE_BUFACCESS"}; + u32_t unsafe_to_be_verified = 0; for (auto it = svfir->getICFG()->begin(); it != svfir->getICFG()->end(); ++it) { const ICFGNode* node = it->second; - if (const CallICFGNode* call = SVFUtil::dyn_cast(node)) { - if (const FunObjVar* fun = call->getCalledFunction()) { - if (fun->getName() == "svf_assert" || fun->getName() == "OVERFLOW") { - if (fun->getName() == "svf_assert") { - svf_assert_to_be_verified++; - } - else { - overflow_assert_to_be_verified++; - } - if (assert_points.find(call) == assert_points.end()) { - std::stringstream ss; - ss << "The stub function calliste (svf_assert or OVERFLOW) has not been checked: " - << call->toString(); - std::cerr << ss.str() << std::endl; - assert(false); - } - } - } + const CallICFGNode* call = SVFUtil::dyn_cast(node); + if (!call) + continue; + const FunObjVar* fun = call->getCalledFunction(); + if (!fun) + continue; + const std::string& name = fun->getName(); + const bool isAssertStub = kAssertStubs.count(name) > 0; + const bool isCheckpointStub = kCheckpointStubs.count(name) > 0; + if (!isAssertStub && !isCheckpointStub) + continue; + if (name.rfind("UNSAFE_", 0) == 0) + unsafe_to_be_verified++; + if (!bugReporter.isAssertionPoint(call)) { + std::stringstream ss; + ss << "The stub function callsite (" << name + << ") was not reached by the student's control flow: " + << call->toString(); + std::cerr << ss.str() << std::endl; + assert(false); } } - assert(overflow_assert_to_be_verified <= bugReporter.getBugReporter().getBugSet().size() && - "The number of stub asserts (ground truth) should <= the number of overflow reported"); + assert(unsafe_to_be_verified <= bugReporter.getBugReporter().getBugSet().size() && + "The number of UNSAFE_* stubs (ground truth) should <= the number of bugs reported"); } @@ -330,7 +332,6 @@ void AbstractExecution::analyse() { // AbstractInterpretation; it pulls SVFIR from PAG::getPAG() internally and // does not need an explicit Andersen analysis to be passed in. ai = &AbstractInterpretation::getAEInstance(); - svfStateMgr = new Ass3StateManager(ai); // Handle the global node handleGlobalNode(); @@ -368,7 +369,7 @@ bool AbstractExecution::handleICFGNode(const ICFGNode* node) { SVFUtil::errs() << "Infeasible for node " << node->getId() << "\n"; return false; } - analyzedNodes.insert(node); + bugReporter.noteAnalyzed(node); preAbsTrace[node] = tmpEs; // Store the last abstract state, used to check if the abstract state has reached a fixpoint AbstractState last_as = postAbsTrace()[node]; @@ -457,7 +458,7 @@ void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { if (!callee) return; std::string fun_name = callee->getName(); - if (fun_name == "OVERFLOW" || fun_name == "svf_assert" || fun_name == "svf_assert_eq") { + if (fun_name == "svf_assert" || fun_name == "svf_assert_eq") { handleStubFunctions(callNode); } else if (fun_name == "SAFE_BUFACCESS" || fun_name == "UNSAFE_BUFACCESS" || @@ -481,9 +482,14 @@ void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { bufOverflowDetection(callNode); } else { - // Inline the callee body unconditionally. handleFunction guards - // against re-entering a WTO that is already on the stack, so - // recursive callsites just fall back to the outer WTO cycle. + // Skip recursive callsites (within the same call-graph SCC): the + // interprocedural WTO built in initWTO() already encoded this as a + // back-edge, so the outer cycle's widen/narrow iteration in + // handleICFGCycle drives the recursion to a fixpoint. Mirrors + // SVF's `AbstractInterpretation::skipRecursiveCall`. + const FunObjVar* caller = callNode->getCaller(); + if (caller && ander && ander->inSameCallGraphSCC(caller, callee)) + return; handleFunction(svfir->getICFG()->getFunEntryICFGNode(callee)); const RetICFGNode* retNode = callNode->getRetICFGNode(); if (postAbsTrace().count(callNode)) @@ -500,6 +506,7 @@ void AbstractExecution::handleCallSite(const CallICFGNode* callNode) { * stub verdict cannot be biased by student bugs. */ void AbstractExecution::handleCheckpointStubs(const CallICFGNode* callNode) { + bugReporter.noteAssertionPoint(callNode); const std::string fun_name = callNode->getCalledFunction()->getName(); if (fun_name == "SAFE_BUFACCESS" || fun_name == "UNSAFE_BUFACCESS") { if (callNode->arg_size() < 2) @@ -526,18 +533,12 @@ void AbstractExecution::handleFunction(const ICFGNode* funEntry) { // Iterate the function's interprocedural WTO components in WTO order. // Singletons are handled directly; cycles (loop heads and recursive // function entries) are driven to a fixpoint by handleICFGCycle. - // - // `_funcsInFlight` guards re-entry: if this WTO is already on the call - // stack (i.e. a recursive callsite tried to inline back into us), return - // immediately and let the outer cycle's widen/narrow iteration drive the - // recursion to a fixpoint. This is the only mechanism for handling - // recursion — there is no separate "is recursive callsite?" check. + // Recursive callsites are filtered out earlier in handleCallSite via + // `inSameCallGraphSCC`, so handleFunction never re-enters itself. const FunObjVar* fun = funEntry->getFun(); auto it = funcToWTO.find(fun); if (it == funcToWTO.end()) return; - if (!_funcsInFlight.insert(fun).second) - return; for (const ICFGWTOComp* comp : it->second->getWTOComponents()) { if (const ICFGSingletonWTO* singleton = SVFUtil::dyn_cast(comp)) { handleICFGNode(singleton->getICFGNode()); @@ -546,7 +547,6 @@ void AbstractExecution::handleFunction(const ICFGNode* funEntry) { handleICFGCycle(cycle); } } - _funcsInFlight.erase(fun); } /** @@ -554,7 +554,7 @@ void AbstractExecution::handleFunction(const ICFGNode* funEntry) { * * This function handles specific stub functions (`svf_assert` and `OVERFLOW`) to check whether * the abstract interpretation results are as expected. For `svf_assert(expr)`, the expression must hold true. - * For `OVERFLOW(object, offset_access)`, the size of the object must be less than or equal to the offset access. + * For `svf_assert_eq(a, b)`, the two intervals must be equal. * * @param callnode The call node representing the stub function to be handled */ @@ -562,7 +562,7 @@ void AbstractExecution::handleFunction(const ICFGNode* funEntry) { void AbstractExecution::handleStubFunctions(const SVF::CallICFGNode* callNode) { // Handle the 'svf_assert' stub function if (callNode->getCalledFunction()->getName() == "svf_assert") { - assert_points.insert(callNode); + bugReporter.noteAssertionPoint(callNode); // If the condition is false, the program is infeasible u32_t arg0 = callNode->getArgument(0)->getId(); AbstractState& as = getAbsStateFromTrace(callNode); @@ -604,92 +604,49 @@ void AbstractExecution::handleStubFunctions(const SVF::CallICFGNode* callNode) { } return; } - // Handle the 'OVERFLOW' stub function. Ground truth is computed from SVF - // primitives only — `GepObjVar::getConstantFieldIdx()` gives the accumulated - // offset of the sub-object from its base — so the verdict does not depend - // on the student's gepObjOffsetFromBase map. - else if (callNode->getCalledFunction()->getName() == "OVERFLOW") { - assert_points.insert(callNode); - u32_t arg0 = callNode->getArgument(0)->getId(); - u32_t arg1 = callNode->getArgument(1)->getId(); - - AbstractState& as = getAbsStateFromTrace(callNode); - AbstractValue gepRhsVal = as[arg0]; - - if (gepRhsVal.isAddr()) { - bool overflow = false; - s64_t access_offset = as[arg1].getInterval().ub().getIntNumeral(); - for (const auto& addr : gepRhsVal.getAddrs()) { - NodeID objId = as.getIDFromAddr(addr); - const BaseObjVar* baseObj = svfir->getBaseObject(objId); - if (!baseObj || !baseObj->isConstantByteSize()) - continue; - s64_t size = (s64_t)baseObj->getByteSizeOfObj(); - s64_t baseOffset = 0; - if (auto* gepObj = SVFUtil::dyn_cast(svfir->getGNode(objId))) - baseOffset = (s64_t)gepObj->getConstantFieldIdx(); - if (baseOffset + access_offset >= size) - overflow = true; - } - if (overflow) { - reportBufOverflow(callNode); - std::cerr << "Your implementation successfully detected the buffer overflow\n"; - } - else { - SVFUtil::errs() << "Your implementation failed to detect the buffer overflow!" - << callNode->toString() << "\n"; - assert(false); - } - } - else { - SVFUtil::errs() << "Your implementation failed to detect the buffer overflow!" - << callNode->toString() << "\n"; - assert(false); - } - } } // =========================================================================== -// Ass3StateManager — narrow facade forwarding only the whitelisted state and -// GEP primitives to the underlying AbstractInterpretation. Defined here (not -// in the header) so student code never sees AbstractInterpretation/AbsExtAPI. +// State-manager primitives — thin forwarders to the underlying +// AbstractInterpretation singleton. Defined here (not in the header) so +// student code never sees AbstractInterpretation/AbsExtAPI directly. // =========================================================================== namespace SVF { -const AbstractValue& Ass3StateManager::getAbsValue(const ValVar* var, const ICFGNode* node) { +const AbstractValue& AbstractExecution::getAbsValue(const ValVar* var, const ICFGNode* node) { return ai->getAbsValue(var, node); } -const AbstractValue& Ass3StateManager::getAbsValue(const ObjVar* var, const ICFGNode* node) { +const AbstractValue& AbstractExecution::getAbsValue(const ObjVar* var, const ICFGNode* node) { return ai->getAbsValue(var, node); } -const AbstractValue& Ass3StateManager::getAbsValue(const SVFVar* var, const ICFGNode* node) { +const AbstractValue& AbstractExecution::getAbsValue(const SVFVar* var, const ICFGNode* node) { return ai->getAbsValue(var, node); } -void Ass3StateManager::updateAbsValue(const ValVar* var, const AbstractValue& val, const ICFGNode* node) { +void AbstractExecution::updateAbsValue(const ValVar* var, const AbstractValue& val, const ICFGNode* node) { ai->updateAbsValue(var, val, node); } -void Ass3StateManager::updateAbsValue(const ObjVar* var, const AbstractValue& val, const ICFGNode* node) { +void AbstractExecution::updateAbsValue(const ObjVar* var, const AbstractValue& val, const ICFGNode* node) { ai->updateAbsValue(var, val, node); } -void Ass3StateManager::updateAbsValue(const SVFVar* var, const AbstractValue& val, const ICFGNode* node) { +void AbstractExecution::updateAbsValue(const SVFVar* var, const AbstractValue& val, const ICFGNode* node) { ai->updateAbsValue(var, val, node); } -AbstractValue Ass3StateManager::loadValue(const ValVar* pointer, const ICFGNode* node) { +AbstractValue AbstractExecution::loadValue(const ValVar* pointer, const ICFGNode* node) { return ai->loadValue(pointer, node); } -void Ass3StateManager::storeValue(const ValVar* pointer, const AbstractValue& val, const ICFGNode* node) { +void AbstractExecution::storeValue(const ValVar* pointer, const AbstractValue& val, const ICFGNode* node) { ai->storeValue(pointer, val, node); } -AddressValue Ass3StateManager::getGepObjAddrs(const ValVar* pointer, IntervalValue offset) { +AddressValue AbstractExecution::getGepObjAddrs(const ValVar* pointer, IntervalValue offset) { return ai->getGepObjAddrs(pointer, offset); } -IntervalValue Ass3StateManager::getGepElementIndex(const GepStmt* gep) { +IntervalValue AbstractExecution::getGepElementIndex(const GepStmt* gep) { return ai->getGepElementIndex(gep); } -IntervalValue Ass3StateManager::getGepByteOffset(const GepStmt* gep) { +IntervalValue AbstractExecution::getGepByteOffset(const GepStmt* gep) { return ai->getGepByteOffset(gep); } -u32_t Ass3StateManager::getAllocaInstByteSize(const AddrStmt* addr) { +u32_t AbstractExecution::getAllocaInstByteSize(const AddrStmt* addr) { return ai->getAllocaInstByteSize(addr); } diff --git a/Assignment-3/CPP/Assignment_3_Helper.h b/Assignment-3/CPP/AEReporter.h similarity index 64% rename from Assignment-3/CPP/Assignment_3_Helper.h rename to Assignment-3/CPP/AEReporter.h index 9665e97..4641596 100644 --- a/Assignment-3/CPP/Assignment_3_Helper.h +++ b/Assignment-3/CPP/AEReporter.h @@ -1,4 +1,4 @@ -//===- Assignment-3-Helper.h -- Abstract Interpretation Helper funcs --// +//===- AEReporter.h -- Abstract Interpretation bug reporter --// // // SVF: Static Value-Flow Analysis // @@ -19,11 +19,9 @@ // along with this program. If not, see . // //===----------------------------------------------------------------------===// -/* - * Abstract Interpretation Helper Functions - * - * Created on: Feb 19, 2024 - */ + +#ifndef ASSIGNMENT_3_AE_REPORTER_H +#define ASSIGNMENT_3_AE_REPORTER_H #include "AE/Core/AbstractState.h" #include "AE/Svfexe/AEDetector.h" @@ -34,44 +32,11 @@ #include #include #include -namespace SVF { - class AbstractInterpretation; - - /// Narrow facade over the SVF abstract-interpretation state manager. - /// - /// Students interact with the abstract state *only* through this object - /// (the `svfStateMgr` member of AbstractExecution). It forwards exactly the - /// state read/write and GEP primitives the assignment is allowed to use, and - /// deliberately exposes no path to the SVF external-API modeller - /// (AbsExtAPI / handleExtAPI / getRangeLimitFromType / getUtils), so the - /// memory/string library summaries and the cast-range logic must be written - /// by hand. Method bodies live in Assignment_3_Helper.cpp. - class Ass3StateManager { - public: - explicit Ass3StateManager(AbstractInterpretation* ai = nullptr) : ai(ai) {} - - const AbstractValue& getAbsValue(const ValVar* var, const ICFGNode* node); - const AbstractValue& getAbsValue(const ObjVar* var, const ICFGNode* node); - const AbstractValue& getAbsValue(const SVFVar* var, const ICFGNode* node); - - void updateAbsValue(const ValVar* var, const AbstractValue& val, const ICFGNode* node); - void updateAbsValue(const ObjVar* var, const AbstractValue& val, const ICFGNode* node); - void updateAbsValue(const SVFVar* var, const AbstractValue& val, const ICFGNode* node); - AbstractValue loadValue(const ValVar* pointer, const ICFGNode* node); - void storeValue(const ValVar* pointer, const AbstractValue& val, const ICFGNode* node); - - AddressValue getGepObjAddrs(const ValVar* pointer, IntervalValue offset); - IntervalValue getGepElementIndex(const GepStmt* gep); - IntervalValue getGepByteOffset(const GepStmt* gep); - u32_t getAllocaInstByteSize(const AddrStmt* addr); +namespace SVF { - private: - // harness-only: AbstractExecution reaches the underlying manager for the - // post-trace; never exposed to student code. - friend class AbstractExecution; - AbstractInterpretation* ai; - }; + class ICFG; + class CallICFGNode; struct AssignmentCaseConfig { std::string caseId; @@ -89,8 +54,39 @@ namespace SVF { std::string ass3JsonEscape(const std::string& input); - class AbstractExecutionHelper { + /// Bug reporter and harness bookkeeper for the Assignment-3 + /// abstract-interpretation pipeline. Owns the bug list plus the + /// harness-only state (analysed ICFG nodes, validated assertion + /// call sites, case config) and renders the human / JSON summary + /// consumed by the grader. + class AEReporter { public: + AEReporter() = default; + explicit AEReporter(const AssignmentCaseConfig& config) : caseConfig(config) {} + + const AssignmentCaseConfig& getCaseConfig() const { return caseConfig; } + void setCaseConfig(const AssignmentCaseConfig& config) { caseConfig = config; } + + /// Coverage tracking: harness records every ICFG node it processed. + void noteAnalyzed(const ICFGNode* node) { analyzedNodes.insert(node); } + u32_t getAnalyzedNodeCount() const { + return static_cast(analyzedNodes.size()); + } + u32_t getTotalNodeCount(const ICFG* icfg) const; + double getICFGCoverage(const ICFG* icfg) const; + + /// Assertion-point tracking: harness records every stub call site it + /// actually reached so `ensureAllAssertsValidated` can detect missed + /// ones. + void noteAssertionPoint(const CallICFGNode* call) { assert_points.insert(call); } + bool isAssertionPoint(const CallICFGNode* call) const { + return assert_points.find(call) != assert_points.end(); + } + + bool hasTargetReport() const; + void writeJsonSummary(std::ostream& os, const ICFG* icfg, + double wallSeconds, int exitCode, + bool assertsValidated) const; /// Add a detected bug to the bug reporter and print the report ///@{ @@ -155,5 +151,14 @@ namespace SVF { SVFBugReport _recoder; Map _nodeToBugInfo; std::vector _reports; + + /// Harness bookkeeping (moved here from AbstractExecution so + /// Assignment_3.h stays focused on student-facing surface). + AssignmentCaseConfig caseConfig; + Set analyzedNodes; + Set assert_points; }; -} + +} // namespace SVF + +#endif // ASSIGNMENT_3_AE_REPORTER_H diff --git a/Assignment-3/CPP/Assignment_3.cpp b/Assignment-3/CPP/Assignment_3.cpp index 910509a..51142db 100644 --- a/Assignment-3/CPP/Assignment_3.cpp +++ b/Assignment-3/CPP/Assignment_3.cpp @@ -20,7 +20,7 @@ // //===----------------------------------------------------------------------===// /* - * Abstract Interpretation and buffer overflow detection + * Abstract Interpretation and bug detection * * Created on: Feb 19, 2024 */ @@ -33,9 +33,11 @@ using namespace SVFUtil; // =========================================================================== // Student TODOs // =========================================================================== -// Implement abstract interpretation for the assignment cases. The harness -// (Assignment_3_Helper.cpp) drives the analysis and only calls into the six -// entry points below; design and add any internal helpers you need. +// Implement abstract interpretation for verification and bug detection. The +// harness (AEReporter.cpp) drives the analysis and calls into the six entry +// points below (please do not delete); You are free to add any internal +// classes and helper methods you need within Assignment_3.h and +// Assignment_3.cpp. // =========================================================================== void AbstractExecution::updateAbsState(const SVFStmt* stmt) { diff --git a/Assignment-3/CPP/Assignment_3.h b/Assignment-3/CPP/Assignment_3.h index a4e0231..e2405f5 100644 --- a/Assignment-3/CPP/Assignment_3.h +++ b/Assignment-3/CPP/Assignment_3.h @@ -20,24 +20,29 @@ // //===----------------------------------------------------------------------===// /* - * Abstract Interpretation and buffer overflow detection + * Abstract Interpretation and bug detection * * Created on: Feb 19, 2024 */ -#include "Assignment_3_Helper.h" +#include "AEReporter.h" #include "SVFIR/SVFIR.h" #include namespace SVF { + class AbstractInterpretation; class AndersenWaveDiff; /// Abstract Execution class class AbstractExecution { public: /// Constructor explicit AbstractExecution(const AssignmentCaseConfig& config = AssignmentCaseConfig()) - : caseConfig(config) { + : bugReporter(config) { } + /// Harness reporter accessor (used by test-ae.cpp for JSON summary). + AEReporter& getReporter() { return bugReporter; } + const AEReporter& getReporter() const { return bugReporter; } + virtual void runOnModule(ICFG* icfg); static AbstractExecution& getAEInstance() @@ -68,6 +73,27 @@ namespace SVF { /// External-API value summaries (student TODO). void updateStateOnExtCall(const SVF::CallICFGNode* extCallNode); + /// State-manager primitives (forward to the underlying + /// AbstractInterpretation). Use these from the statement transfer + /// functions and the external-API summaries. + ///@{ + const AbstractValue& getAbsValue(const ValVar* var, const ICFGNode* node); + const AbstractValue& getAbsValue(const ObjVar* var, const ICFGNode* node); + const AbstractValue& getAbsValue(const SVFVar* var, const ICFGNode* node); + + void updateAbsValue(const ValVar* var, const AbstractValue& val, const ICFGNode* node); + void updateAbsValue(const ObjVar* var, const AbstractValue& val, const ICFGNode* node); + void updateAbsValue(const SVFVar* var, const AbstractValue& val, const ICFGNode* node); + + AbstractValue loadValue(const ValVar* pointer, const ICFGNode* node); + void storeValue(const ValVar* pointer, const AbstractValue& val, const ICFGNode* node); + + AddressValue getGepObjAddrs(const ValVar* pointer, IntervalValue offset); + IntervalValue getGepElementIndex(const GepStmt* gep); + IntervalValue getGepByteOffset(const GepStmt* gep); + u32_t getAllocaInstByteSize(const AddrStmt* addr); + ///@} + /// Handle stub functions for verifying abstract interpretation results void handleStubFunctions(const CallICFGNode* call); @@ -95,58 +121,32 @@ namespace SVF { void ensureAllAssertsValidated(); - /// Case-based grading/reporting helpers. These are intentionally - /// end-to-end: the grader should score TP/FP/time/coverage per case, - /// while module tags are only diagnosis hints. - void writeJsonSummary(std::ostream& os, double wallSeconds, int exitCode, - bool assertsValidated) const; - u32_t getAnalyzedNodeCount() const; - u32_t getTotalNodeCount() const; - double getICFGCoverage() const; - bool hasTargetReport() const; - const AssignmentCaseConfig& getCaseConfig() const { - return caseConfig; - } - /// Destructor virtual ~AbstractExecution() { - // svfStateMgr is the AbstractInterpretation singleton; SVF owns its lifetime. + // `ai` is the AbstractInterpretation singleton; SVF owns its lifetime. } protected: /// SVFIR and ICFG SVFIR* svfir; ICFG* icfg; - /// Narrow state-manager facade used by all student code: it forwards only - /// the whitelisted state read/write and GEP primitives and exposes no - /// path to the SVF external-API modeller (see Ass3StateManager). - Ass3StateManager* svfStateMgr = nullptr; /// Andersen pointer analysis (owns the call graph + SCC used to drive /// the interprocedural WTO); created in initWTO(). AndersenWaveDiff* ander = nullptr; /// Map a function to its corresponding WTO Map funcToWTO; - /// Functions whose WTO is currently being iterated; re-entry returns - /// early so the outer cycle drives the recursion to a fixpoint. - Set _funcsInFlight; /// Abstract trace immediately before an ICFGNode. Map preAbsTrace; /// The "post" trace lives inside the manager (defined in the helper). Map& postAbsTrace(); private: - AssignmentCaseConfig caseConfig; - - AbstractExecutionHelper bugReporter; - - Set assert_points; - Set analyzedNodes; - - Map cycleHeadToCycle; + AEReporter bugReporter; - /// harness-only raw handle to the underlying state manager; never used by - /// student code (which only sees the svfStateMgr facade above). + /// Handle to the underlying state manager. Used by the merged + /// `getAbsValue` / `updateAbsValue` / `loadValue` / `storeValue` / GEP + /// primitives above, and by the harness-only post-trace accessors. AbstractInterpretation* ai = nullptr; }; diff --git a/Assignment-3/CPP/test-ae.cpp b/Assignment-3/CPP/test-ae.cpp index e4d12fe..db918a0 100644 --- a/Assignment-3/CPP/test-ae.cpp +++ b/Assignment-3/CPP/test-ae.cpp @@ -81,7 +81,7 @@ int main(int argc, char** argv) { double wallSeconds = std::chrono::duration(finished - started).count(); if (config.emitJson) { std::cout << "ASS3_JSON_BEGIN\n"; - ae.writeJsonSummary(std::cout, wallSeconds, 0, true); + ae.getReporter().writeJsonSummary(std::cout, pag->getICFG(), wallSeconds, 0, true); std::cout << "ASS3_JSON_END\n"; } diff --git a/Assignment-3/Python/Assignment_3_Helper.py b/Assignment-3/Python/AEReporter.py similarity index 80% rename from Assignment-3/Python/Assignment_3_Helper.py rename to Assignment-3/Python/AEReporter.py index 9941695..7d3250e 100644 --- a/Assignment-3/Python/Assignment_3_Helper.py +++ b/Assignment-3/Python/AEReporter.py @@ -216,43 +216,30 @@ def __str__(self): -class AbstractExecutionHelper: +class AEReporter: """ - A helper class for abstract execution, providing functionality for bug reporting, - managing GEP object offsets, and other utilities. + Bug reporter and shared helper utilities for the Assignment-3 + abstract-interpretation harness. """ def __init__(self, svfir: pysvf.SVFIR, svf_state_mgr: 'pysvf.AbstractInterpretation' = None): - """ - Initialize member variables. - """ - # Map to store exception information for each ICFGNode + # Map ICFGNode -> diagnostic message for each detected bug. self.node_to_bug_info = {} self.svfir = svfir - # Optional: if a stateMgr is provided, getByteOffset delegates to its - # getGepByteOffset (the C++ side does the same via svfStateMgr->...). self.svf_state_mgr = svf_state_mgr + # Harness bookkeeping: stub call sites the analysis actually reached. + self.assert_points = set() + + def noteAssertionPoint(self, call): + self.assert_points.add(call) + + def isAssertionPoint(self, call) -> bool: + return call in self.assert_points - # ------------------------------------------------------------------ - # Helpers that used to live as instance methods on `pysvf.AbstractState`. - # Upstream (Semi-Sparse refactor) moved them to `AbstractInterpretation` - # (formerly `AbstractStateManager`, whose public header was removed), - # which requires a sparsity-aware trace we don't keep here. We re-implement - # the dense-mode behavior using only public AbstractState surface so the - # Python side mirrors the C++ side (`AbstractExecutionHelper::getByteOffset`). - # ------------------------------------------------------------------ def getByteOffset(self, abstract_state: pysvf.AbstractState, gep: pysvf.GepStmt) -> pysvf.IntervalValue: - # Delegates to the stateMgr's upstream impl, mirroring the C++ side - # `svfStateMgr->getGepByteOffset(gep)`. The `abstract_state` argument - # is kept in the signature for symmetry with the call-site shape but - # is not consulted here -- the mgr reads non-constant indices from - # its own trace, which is the same trace this helper writes to. return self.svf_state_mgr.getGepByteOffset(gep) def getGepObjAddrs(self, abstract_state: pysvf.AbstractState, var_id: int, offset: pysvf.IntervalValue) -> pysvf.AddressValue: - # Delegates to the stateMgr's upstream impl. mgr.getGepObjAddrs takes - # a ValVar* (and infers the ICFGNode from it), so we look the var up - # by id. Matches the C++ side `svfStateMgr->getGepObjAddrs(...)`. pointer = self.svfir.getGNode(var_id) return self.svf_state_mgr.getGepObjAddrs(pointer, offset) @@ -268,15 +255,9 @@ def getPointeeElement(self, abstract_state: pysvf.AbstractState, var_id: int): return None def getAllocaInstByteSize(self, abstract_state: pysvf.AbstractState, addr: pysvf.AddrStmt) -> int: - # Delegates to the stateMgr's upstream impl. mgr.getAllocaInstByteSize - # takes the AddrStmt directly (it derives node + sizes itself). Matches - # the C++ side `svfStateMgr->getAllocaInstByteSize(addr)`. return self.svf_state_mgr.getAllocaInstByteSize(addr) def reportBufOverflow(self, node, msg): - """ - Record an overflow node and its associated exception. - """ self.node_to_bug_info[node] = msg def printReport(self): @@ -286,14 +267,7 @@ def printReport(self): for node, msg in self.node_to_bug_info.items(): print(f"{node}: {msg}\n---------------------------------------------") - - # GEP-offset tracking (updateGepObjOffsetFromBase / has / get) was removed - # from the helper this year — students implement it in Assignment_3.py. - def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, src: pysvf.SVFVar, len: pysvf.IntervalValue, start_idx: int): - """ - Handle a memcpy operation in the abstract state. - """ assert isinstance(abstractState, pysvf.AbstractState), "abstractState is not a pysvf.AbstractState" assert isinstance(dst, pysvf.SVFVar), "dst is not a pysvf.SVFVar" assert isinstance(src, pysvf.SVFVar), "src is not a pysvf.SVFVar" @@ -314,7 +288,7 @@ def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, sr else: raise AssertionError("Unsupported type") size = len.lb().getNumeral() - range_val = size/elemSize + range_val = size / elemSize if abstractState.inVarToAddrsTable(dstId) and abstractState.inVarToAddrsTable(srcId): for index in range(0, int(range_val)): expr_src = self.getGepObjAddrs(abstractState, srcId, pysvf.IntervalValue(index)) @@ -326,23 +300,12 @@ def handleMemcpy(self, abstractState: pysvf.AbstractState, dst: pysvf.SVFVar, sr lhs = abstractState.load(addr_src) abstractState.store(addr_dst, lhs) - def getStrlen(self, abstractState, strValue): - """ - Calculate the length of a string in the abstract state. - - :param abstractState: The abstract state containing variable information. - :param strValue: The SVF variable representing the string. - :return: An IntervalValue representing the string length. - """ value_id = strValue.getId() dst_size = 0 - - # Determine the size of the destination object for addr in abstractState[value_id].getAddrs(): obj_id = abstractState.getIDFromAddr(addr) base_object = self.svfir.getBaseObject(obj_id) - if base_object.isConstantByteSize(): dst_size = base_object.getByteSizeOfObj() else: @@ -350,25 +313,17 @@ def getStrlen(self, abstractState, strValue): for stmt in icfg_node.getSVFStmts(): if isinstance(stmt, pysvf.AddrStmt): dst_size = self.getAllocaInstByteSize(abstractState, stmt) - length = 0 elem_size = 1 - - # Calculate the string length if abstractState.getVar(value_id).isAddr(): for index in range(dst_size): expr0 = self.getGepObjAddrs(abstractState, value_id, pysvf.IntervalValue(index)) val = pysvf.AbstractValue() - for addr in expr0: val.join_with(abstractState.load(addr)) - if val.isInterval() and chr(val.getInterval().getIntNumeral()) == '\0': break - length += 1 - - # Determine the size of each element in the string if strValue.getType().isArrayTy(): elem_size = strValue.getType().getTypeOfElement().getByteSize() elif strValue.getType().isPointerTy(): @@ -382,17 +337,11 @@ def getStrlen(self, abstractState, strValue): elem_size = 1 else: raise AssertionError("Unsupported type") - - # Return the calculated string length as an IntervalValue if length == 0: return pysvf.IntervalValue(0, pysvf.Options.max_field_limit()) else: return pysvf.IntervalValue(length * elem_size) - # addToGepObjOffsetFromBase / hasGepObjOffsetFromBase / getGepObjOffsetFromBase - # are no longer part of the helper API; students manage their own - # GEP-offset-from-base state in Assignment_3.py. - class AbstractExecution: def __init__(self, pag: pysvf.SVFIR): @@ -400,11 +349,6 @@ def __init__(self, pag: pysvf.SVFIR): self.icfg = pag.getICFG() self.call_site_stack = [] self.func_to_wto = {} - # Functions whose WTO is currently being iterated; re-entry returns - # early so the outer cycle drives the recursion to a fixpoint. This - # is the only mechanism for handling recursion — there is no separate - # "is recursive callsite?" check. - self._funcs_in_flight = set() self.pre_abs_trace = {} # Owns the post-trace and is the backing store for AbsExtAPI as well # as the GEP/load/store helpers (getGepByteOffset etc.). Replaces @@ -418,8 +362,8 @@ def __init__(self, pag: pysvf.SVFIR): # Alias preserved so existing call-sites `self.post_abs_trace[node]` # keep working. The mgr supports __getitem__/__setitem__/__contains__. self.post_abs_trace = self.svf_state_mgr - self.buf_overflow_helper = AbstractExecutionHelper(self.svfir, self.svf_state_mgr) - self.assert_points = set() + self.buf_overflow_helper = AEReporter(self.svfir, self.svf_state_mgr) + self.widen_delay = 3 self.addressMask = 0x7f000000 self.flippedAddressMask = (self.addressMask^0xffffffff) @@ -435,7 +379,9 @@ def __init__(self, pag: pysvf.SVFIR): """ def initWto(self): callgraphScc = pysvf.getCallGraphSCC() + self._callgraph_scc = callgraphScc callgraph = self.svfir.getCallGraph() + self._callgraph = callgraph # SCC membership comes from pysvf: CallGraphSCC.subNodes(rep) returns # the call-graph node IDs in the SCC represented by 'rep'. We only @@ -471,13 +417,6 @@ def initWto(self): # hash consistently across calls, so don't use the object as a key. self.func_to_wto[fun.getId()] = wto - # Build mapping from cycle head nodes to their corresponding cycles - # (loop heads AND recursive-function entries). - self.cycle_head_to_cycle = {} - for wto in self.func_to_wto.values(): - for comp in wto.components: - if isinstance(comp, ICFGWTOCycle): - self.cycle_head_to_cycle[comp.head.node] = comp """ @@ -507,30 +446,20 @@ def handleGlobalNode(self): Iterate a function's interprocedural WTO components. Singletons are handled directly; cycles (loop heads AND recursive-function - entries) are driven to a fixpoint by handleICFGCycle. - - `_funcs_in_flight` guards re-entry: if this WTO is already on the call - stack (i.e. a recursive callsite tried to inline back into us), return - immediately and let the outer cycle's widen/narrow iteration drive the - recursion to a fixpoint. This is the only mechanism for handling - recursion — there is no separate "is recursive callsite?" check. + entries) are driven to a fixpoint by handleICFGCycle. Recursive callsites + are filtered out earlier in handleCallSite via ``inSameCallGraphSCC``, so + handleFunction never re-enters itself. """ def handleFunction(self, funEntry: pysvf.ICFGNode): fun = funEntry.getFun() wto = self.func_to_wto.get(fun.getId()) if wto is None: return - if fun.getId() in self._funcs_in_flight: - return - self._funcs_in_flight.add(fun.getId()) - try: - for comp in wto.components: - if isinstance(comp, ICFGWTOCycle): - self.handleICFGCycle(comp) - elif isinstance(comp, ICFGWTONode): - self.handleICFGNode(comp.getICFGNode()) - finally: - self._funcs_in_flight.discard(fun.getId()) + for comp in wto.components: + if isinstance(comp, ICFGWTOCycle): + self.handleICFGCycle(comp) + elif isinstance(comp, ICFGWTONode): + self.handleICFGNode(comp.getICFGNode()) """ Handle a singleton WTO @@ -574,8 +503,11 @@ def handleICFGNode(self, node: pysvf.ICFGNode): def handleCallSite(self, node: pysvf.CallICFGNode): fun_name = node.getCalledFunction().getName() print(fun_name) - if fun_name == "OVERFLOW" or fun_name == "svf_assert" or fun_name == "svf_assert_eq": + if fun_name == "svf_assert" or fun_name == "svf_assert_eq": self.handleStubFunction(node) + elif fun_name in ("UNSAFE_BUFACCESS", "SAFE_BUFACCESS", + "UNSAFE_PTRDEREF", "SAFE_PTRDEREF"): + self.handleCheckpointStubs(node) elif fun_name == "nd" or fun_name == "rand": lhs_id = node.getRetICFGNode().getActualRet().getId() self.post_abs_trace[node][lhs_id] = AbstractValue(IntervalValue.top()) @@ -584,10 +516,27 @@ def handleCallSite(self, node: pysvf.CallICFGNode): elif pysvf.isExtCall(node.getCalledFunction()): pass else: - # Inline unconditionally; handleFunction's `_funcs_in_flight` - # guard short-circuits recursive re-entry, and the outer WTO - # cycle drives the recursion to a fixpoint. - self.handleFunction(self.svfir.getICFG().getFunEntryICFGNode(node.getCalledFunction())) + # Skip recursive callsites (within the same call-graph SCC): the + # interprocedural WTO built in initWto already encoded this as a + # back-edge, so the outer cycle's widen/narrow iteration in + # handleICFGCycle drives the recursion to a fixpoint. Mirrors + # SVF's AbstractInterpretation::skipRecursiveCall. + callee = node.getCalledFunction() + caller = node.getCaller() + if caller is not None and self.inSameCallGraphSCC(caller, callee): + return + self.handleFunction(self.svfir.getICFG().getFunEntryICFGNode(callee)) + + def inSameCallGraphSCC(self, fun1, fun2) -> bool: + scc = getattr(self, "_callgraph_scc", None) + cg = getattr(self, "_callgraph", None) + if scc is None or cg is None: + return False + n1 = cg.getCallGraphNodeByFunObj(fun1) + n2 = cg.getCallGraphNodeByFunObj(fun2) + if n1 is None or n2 is None: + return False + return scc.repNode(n1.getId()) == scc.repNode(n2.getId()) """ @@ -615,7 +564,7 @@ def handleCallSite(self, node: pysvf.CallICFGNode): def handleStubFunction(self, callNode: pysvf.CallICFGNode): # Get the callee function associated with the call site if callNode.getCalledFunction().getName() == "svf_assert": - self.assert_points.add(callNode) + self.buf_overflow_helper.noteAssertionPoint(callNode) # If the condition is false, the program is infeasible arg0 = callNode.getArgument(0).getId() abstract_state = self.post_abs_trace[callNode] @@ -632,39 +581,69 @@ def handleStubFunction(self, callNode: pysvf.CallICFGNode): print(f"The assertion ({callNode}) is unsatisfiable!!") assert False - elif callNode.getCalledFunction().getName() == "OVERFLOW": - # Harness-only ground truth: read the GepObjVar's accumulated - # offset from base via SVF's native getConstantFieldIdx, so the - # stub verdict does not depend on the student's gep_obj_offset - # tracking. - self.assert_points.add(callNode) - arg0 = callNode.getArgument(0).getId() - arg1 = callNode.getArgument(1).getId() - abstract_state = self.post_abs_trace[callNode] - gep_rhs_val = abstract_state[arg0] - - if gep_rhs_val.isAddr(): - overflow = False - access_offset = int(abstract_state[arg1].getInterval().ub()) - for addr in gep_rhs_val.getAddrs(): - obj_id = abstract_state.getIDFromAddr(addr) - base_obj = self.svfir.getBaseObject(obj_id) - if base_obj is None or not base_obj.isConstantByteSize(): - continue - size = base_obj.getByteSizeOfObj() - gnode = self.svfir.getGNode(obj_id) - base_offset = gnode.getConstantFieldIdx() if isinstance(gnode, pysvf.GepObjVar) else 0 - if base_offset + access_offset >= size: - overflow = True - if overflow: - print("Your implementation successfully detected the buffer overflow") - else: - print(f"Your implementation failed to detect the buffer overflow! {callNode}") - assert False - else: - print(f"Your implementation failed to detect the buffer overflow! {callNode}") - assert False + def handleCheckpointStubs(self, callNode: pysvf.CallICFGNode): + """SAFE_/UNSAFE_ checkpoints: ground-truth bug markers. + + Records the call site in ``assert_points`` so + :py:meth:`ensureAllAssertsValidated` can verify the student's control + flow reached it. The harness reports a bug iff its independent + ground-truth check (bypassing the student's predicates) sees one. + """ + self.buf_overflow_helper.noteAssertionPoint(callNode) + fun_name = callNode.getCalledFunction().getName() + abstract_state = self.post_abs_trace[callNode] + if fun_name in ("SAFE_BUFACCESS", "UNSAFE_BUFACCESS"): + if callNode.arg_size() < 2: + return + length = abstract_state[callNode.getArgument(1).getId()].getInterval() + if length.isBottom(): + length = IntervalValue(0) + ptr = callNode.getArgument(0) + if not self._harnessSafeAccess(abstract_state, ptr, length - IntervalValue(1)): + self.buf_overflow_helper.reportBufOverflow( + callNode, f"buffer-overflow at {callNode}") + elif fun_name in ("SAFE_PTRDEREF", "UNSAFE_PTRDEREF"): + if callNode.arg_size() < 1: + return + ptr = callNode.getArgument(0) + if not self._harnessSafeDeref(abstract_state, ptr): + self.buf_overflow_helper.reportBufOverflow( + callNode, f"nullptr-deref at {callNode}") + + def _harnessSafeAccess(self, abstract_state, value, length: IntervalValue) -> bool: + ptr_val = abstract_state[value.getId()] + if not ptr_val.isAddr(): + return True + for addr in ptr_val.getAddrs(): + if pysvf.AbstractState.isBlackHoleObjAddr(addr) or pysvf.AbstractState.isNullMem(addr): + continue + obj_id = abstract_state.getIDFromAddr(addr) + base_obj = self.svfir.getBaseObject(obj_id) + if base_obj is None or base_obj.isBlackHoleObj() or not base_obj.isConstantByteSize(): + continue + size = base_obj.getByteSizeOfObj() + gnode = self.svfir.getGNode(obj_id) + base_offset = IntervalValue(gnode.getConstantFieldIdx()) if isinstance(gnode, pysvf.GepObjVar) else IntervalValue(0) + offset = base_offset + length + if int(offset.ub()) >= size: + return False + return True + + def _harnessSafeDeref(self, abstract_state, value) -> bool: + if value is None or isinstance(value, pysvf.ConstNullPtrValVar): + return False + abs_val = abstract_state[value.getId()] + if not abs_val.isAddr(): + return True + for addr in abs_val.getAddrs(): + if pysvf.AbstractState.isBlackHoleObjAddr(addr): + continue + if pysvf.AbstractState.isNullMem(addr): + return False + if abstract_state.isFreedMem(addr): + return False + return True # mergeStatesFromPredecessors is a student TODO this year and lives in @@ -687,29 +666,39 @@ def isBranchFeasible(self, intraEdge: pysvf.IntraCFGEdge, abstractState: pysvf. def ensureAllAssertsValidated(self): - svf_assert_to_be_verified = 0 - overflow_assert_to_be_verified = 0 + """Verify the student's control flow reached every ground-truth stub. + + Recognised stubs: + * ``svf_assert`` / ``svf_assert_eq`` -- abstract-state assertions + * ``UNSAFE_PTRDEREF`` / ``SAFE_PTRDEREF`` -- null-deref ground truth + * ``UNSAFE_BUFACCESS`` / ``SAFE_BUFACCESS`` -- buffer-access ground truth + A missed stub site means the student's control-flow logic skipped a + place the grader cares about. Additionally requires that the number + of reported bugs is at least the number of ``UNSAFE_*`` stubs. + """ + assert_stubs = {"svf_assert", "svf_assert_eq"} + checkpoint_stubs = {"UNSAFE_PTRDEREF", "SAFE_PTRDEREF", + "UNSAFE_BUFACCESS", "SAFE_BUFACCESS"} + unsafe_to_be_verified = 0 for node in self.svfir.getICFG().getNodes(): - if isinstance(node, pysvf.CallICFGNode): - called_function = node.getCalledFunction() - if called_function: - function_name = called_function.getName() - if function_name in ["svf_assert", "OVERFLOW"]: - if function_name == "svf_assert": - svf_assert_to_be_verified += 1 - elif function_name == "OVERFLOW": - overflow_assert_to_be_verified += 1 - else: - pass - - if node not in self.assert_points: - raise AssertionError( - f"The stub function callsite (svf_assert or OVERFLOW) has not been checked: {node}" - ) - - assert overflow_assert_to_be_verified <= len(self.buf_overflow_helper.node_to_bug_info), \ - "The number of stub asserts (ground truth) should <= the number of overflow reported" + if not isinstance(node, pysvf.CallICFGNode): + continue + called_function = node.getCalledFunction() + if not called_function: + continue + name = called_function.getName() + if name not in assert_stubs and name not in checkpoint_stubs: + continue + if name.startswith("UNSAFE_"): + unsafe_to_be_verified += 1 + if not self.buf_overflow_helper.isAssertionPoint(node): + raise AssertionError( + f"The stub function callsite ({name}) was not reached by " + f"the student's control flow: {node}" + ) + assert unsafe_to_be_verified <= len(self.buf_overflow_helper.node_to_bug_info), \ + "The number of UNSAFE_* stubs (ground truth) should <= the number of bugs reported" diff --git a/Assignment-3/Python/Assignment_3.py b/Assignment-3/Python/Assignment_3.py index 9178d52..5e230d0 100644 --- a/Assignment-3/Python/Assignment_3.py +++ b/Assignment-3/Python/Assignment_3.py @@ -1,7 +1,17 @@ -from Assignment_3_Helper import * +from AEReporter import * import pysvf +# ============================================================================= +# Student TODOs +# ============================================================================= +# Implement abstract interpretation for verification and bug detection. The +# harness (AEReporter.py) drives the analysis and calls into the entry points +# below (please do not delete); You are free to add any internal classes and +# helper methods you need within Assignment_3.py. +# ============================================================================= + + class Assignment3(AbstractExecution): def __init__(self, pag: pysvf.SVFIR) -> None: super().__init__(pag) diff --git a/Assignment-3/Tests/buf_overflow.c b/Assignment-3/Tests/buf_overflow.c index b38ed5d..d5714c9 100644 --- a/Assignment-3/Tests/buf_overflow.c +++ b/Assignment-3/Tests/buf_overflow.c @@ -1,5 +1,8 @@ +extern void UNSAFE_BUFACCESS(void *, unsigned int); + int main(void) { char buf[4] = {0}; + UNSAFE_BUFACCESS(buf + 4, 1); buf[4] = 'x'; return 0; } diff --git a/Assignment-3/Tests/buf_overflow.ll b/Assignment-3/Tests/buf_overflow.ll index 74ba647..4d62630 100644 --- a/Assignment-3/Tests/buf_overflow.ll +++ b/Assignment-3/Tests/buf_overflow.ll @@ -10,23 +10,29 @@ define dso_local i32 @main() #0 !dbg !10 { store i32 0, ptr %1, align 4 #dbg_declare(ptr %2, !15, !DIExpression(), !20) call void @llvm.memset.p0.i64(ptr align 1 %2, i8 0, i64 4, i1 false), !dbg !20 - %3 = getelementptr inbounds [4 x i8], ptr %2, i64 0, i64 4, !dbg !21 - store i8 120, ptr %3, align 1, !dbg !22 - ret i32 0, !dbg !23 + %3 = getelementptr inbounds [4 x i8], ptr %2, i64 0, i64 0, !dbg !21 + %4 = getelementptr inbounds i8, ptr %3, i64 4, !dbg !22 + call void @UNSAFE_BUFACCESS(ptr noundef %4, i32 noundef 1), !dbg !23 + %5 = getelementptr inbounds [4 x i8], ptr %2, i64 0, i64 4, !dbg !24 + store i8 120, ptr %5, align 1, !dbg !25 + ret i32 0, !dbg !26 } ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #1 +declare void @UNSAFE_BUFACCESS(ptr noundef, i32 noundef) #2 + attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} !llvm.ident = !{!9} -!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 21.1.0 (https://github.com/bjjwwang/LLVM-compile 4f7056e8ada487923d1c8f9bc38df6472008eda3)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "buf_overflow.c", directory: "/mnt/scratch/PAG/Wjw/vibe/ass3-template-wt/Assignment-3/Tests", checksumkind: CSK_MD5, checksum: "6664fbbfa27f2e3eaf102f9e78ad61d1") +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "Homebrew clang version 21.1.8", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "buf_overflow.c", directory: "/Users/z5489735/2023/0603/template-SSA/Assignment-3/Tests", checksumkind: CSK_MD5, checksum: "85ee42df20341852cdffa71c1f7089a7") !2 = !{i32 7, !"Dwarf Version", i32 5} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} @@ -34,18 +40,21 @@ attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } !6 = !{i32 7, !"PIE Level", i32 2} !7 = !{i32 7, !"uwtable", i32 2} !8 = !{i32 7, !"frame-pointer", i32 2} -!9 = !{!"clang version 21.1.0 (https://github.com/bjjwwang/LLVM-compile 4f7056e8ada487923d1c8f9bc38df6472008eda3)"} -!10 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !14) +!9 = !{!"Homebrew clang version 21.1.8"} +!10 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !11, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !14) !11 = !DISubroutineType(types: !12) !12 = !{!13} !13 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !14 = !{} -!15 = !DILocalVariable(name: "buf", scope: !10, file: !1, line: 2, type: !16) +!15 = !DILocalVariable(name: "buf", scope: !10, file: !1, line: 4, type: !16) !16 = !DICompositeType(tag: DW_TAG_array_type, baseType: !17, size: 32, elements: !18) !17 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) !18 = !{!19} !19 = !DISubrange(count: 4) -!20 = !DILocation(line: 2, column: 10, scope: !10) -!21 = !DILocation(line: 3, column: 5, scope: !10) -!22 = !DILocation(line: 3, column: 12, scope: !10) -!23 = !DILocation(line: 4, column: 5, scope: !10) +!20 = !DILocation(line: 4, column: 10, scope: !10) +!21 = !DILocation(line: 5, column: 22, scope: !10) +!22 = !DILocation(line: 5, column: 26, scope: !10) +!23 = !DILocation(line: 5, column: 5, scope: !10) +!24 = !DILocation(line: 6, column: 5, scope: !10) +!25 = !DILocation(line: 6, column: 12, scope: !10) +!26 = !DILocation(line: 7, column: 5, scope: !10) diff --git a/Assignment-3/Tests/null_deref.c b/Assignment-3/Tests/null_deref.c index a2f01df..5b05662 100644 --- a/Assignment-3/Tests/null_deref.c +++ b/Assignment-3/Tests/null_deref.c @@ -1,7 +1,7 @@ -extern void UNSAFE_LOAD(void *); +extern void UNSAFE_PTRDEREF(void *); int main(void) { int *p = (int *)0; - UNSAFE_LOAD(p); + UNSAFE_PTRDEREF(p); return 0; } diff --git a/Assignment-3/Tests/null_deref.ll b/Assignment-3/Tests/null_deref.ll index 16c5063..da27e16 100644 --- a/Assignment-3/Tests/null_deref.ll +++ b/Assignment-3/Tests/null_deref.ll @@ -11,11 +11,11 @@ define dso_local i32 @main() #0 !dbg !13 { #dbg_declare(ptr %2, !17, !DIExpression(), !18) store ptr null, ptr %2, align 8, !dbg !18 %3 = load ptr, ptr %2, align 8, !dbg !19 - call void @UNSAFE_LOAD(ptr noundef %3), !dbg !20 + call void @UNSAFE_PTRDEREF(ptr noundef %3), !dbg !20 ret i32 0, !dbg !21 } -declare void @UNSAFE_LOAD(ptr noundef) #1 +declare void @UNSAFE_PTRDEREF(ptr noundef) #1 attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } @@ -24,8 +24,8 @@ attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protect !llvm.module.flags = !{!5, !6, !7, !8, !9, !10, !11} !llvm.ident = !{!12} -!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 21.1.0 (https://github.com/bjjwwang/LLVM-compile 4f7056e8ada487923d1c8f9bc38df6472008eda3)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "null_deref.c", directory: "/mnt/scratch/PAG/Wjw/vibe/ass3-template-wt/Assignment-3/Tests", checksumkind: CSK_MD5, checksum: "69381433b6fc6047d3e75f8d46b18ae0") +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "Homebrew clang version 21.1.8", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "null_deref.c", directory: "/Users/z5489735/2023/0603/template-SSA/Assignment-3/Tests", checksumkind: CSK_MD5, checksum: "317a43028d32ffea2a506ef7d909ebfe") !2 = !{!3} !3 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !4, size: 64) !4 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) @@ -36,13 +36,13 @@ attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protect !9 = !{i32 7, !"PIE Level", i32 2} !10 = !{i32 7, !"uwtable", i32 2} !11 = !{i32 7, !"frame-pointer", i32 2} -!12 = !{!"clang version 21.1.0 (https://github.com/bjjwwang/LLVM-compile 4f7056e8ada487923d1c8f9bc38df6472008eda3)"} +!12 = !{!"Homebrew clang version 21.1.8"} !13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !14, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !16) !14 = !DISubroutineType(types: !15) !15 = !{!4} !16 = !{} !17 = !DILocalVariable(name: "p", scope: !13, file: !1, line: 4, type: !3) !18 = !DILocation(line: 4, column: 10, scope: !13) -!19 = !DILocation(line: 5, column: 17, scope: !13) +!19 = !DILocation(line: 5, column: 21, scope: !13) !20 = !DILocation(line: 5, column: 5, scope: !13) !21 = !DILocation(line: 6, column: 5, scope: !13)