TSanMPIOptimizer.cpp

#include "llvm/Analysis/TSanMPIOptimizer.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/IPO/Attributor.h"

#include <algorithm>
#include <cstddef>
#include <ostream>
#include <string>
#include <vector>
#include <map>

using namespace llvm;

AnalysisKey TSanMPIOptimizerAnalysis::Key;

TSanMPIOptimizerAnalysis::TSanMPIOptResult TSanMPIOptimizerAnalysis::run(Module &M,
                                                                         ModuleAnalysisManager &AM) {
    errs() << "Running on Module: " << M.getName() << "\n";

    errs() << "Pass 1: Factory Detection and Incidence relation up to depth " << max_depth << "\n";
    for (Function& F : M) {
        if (F.isDeclaration()) continue;
        AAResults& AA = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager().getResult<AAManager>(F);
        for (BasicBlock& B : F) {
            for (Instruction& I : B) {
                if(CallBase* FuncCall = dyn_cast<CallBase>(&I)) {
                    if (!FuncCall->getCalledFunction()) continue;
                    if (SharedResourceFactories.find(FuncCall->getCalledFunction()->getName().data()) != SharedResourceFactories.end())  {
                        errs() << "Found Shared Resource Factory: '" << FuncCall->getCalledFunction()->getName() << "' called in '" << F.getName() <<"'\n";
                        SharedResource ShRes = {
                            FuncCall->getArgOperand(SharedResourceFactories.at(FuncCall->getCalledFunction()->getName().data()).first),
                            SharedResourceFactories.at(FuncCall->getCalledFunction()->getName().data()).second
                        };
                        int count = recurseGenerateWhitelist(ShRes, DefaultConfig, AA);
                        errs() << "Identified " << count << " connected Value(s)\n";
                    }
                }
            }
        }
    }


    errs() << "Pass 2: Clipping unrelated global Variables\n";
    int clipped = 0;
    for (GlobalVariable& GVar : M.getGlobalList()) {
        SharedResource G = {&GVar, ShResType::DirtyBuf};
        if (std::find(SharedResources.begin(), SharedResources.end(), G) == SharedResources.end()) {
            GVar.addAttribute("no_sanitize", "thread");
            clipped++;
        }
    }
    errs() << "Clipped " << clipped << " global variables\n";


    errs() << "Pass 3: Adding SanitizeThread attribute to affected functions, if not present\n";
    for (SharedResource V : SharedResources) {
        if (Instruction* I = dyn_cast<Instruction>(V.V)) {
            Function* F = I->getParent()->getParent();
            if (!F) continue;
            if (!F->hasFnAttribute(Attribute::SanitizeThread) && !F->hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) {
                errs() << "Added Function '" << F->getName() << "' to be instrumented\n";
                F->addFnAttr(Attribute::SanitizeThread);
            }
        }
    }

    int countwin = 0, countdirt = 0;
    for (int i = 0; i < SharedResources.size(); i++) {
        if (SharedResources[i].Type == ShResType::RemoteBuf) countwin++;
        if (SharedResources[i].Type == ShResType::DirtyBuf) countdirt++;
    }
    errs() << "Found " << countwin << " pure window symbols\n";
    errs() << "Found " << countdirt << " dirty symbols\n";

    errs() << "Checking remote access types... ";
    bool remoteisreadonly = !(M.getFunction("MPI_Put") || M.getFunction("MPI_Rput") || M.getFunction("MPI_Accumulate"));
    bool remoteiswriteonly = !(M.getFunction("MPI_Get") || M.getFunction("MPI_Rget") || M.getFunction("MPI_Accumulate"));
    errs() << (remoteisreadonly ? "RO " : "") << (remoteiswriteonly ? "WO " : "") << ((!remoteisreadonly && !remoteiswriteonly) ? "Mixed" : "") <<"\n";

    errs() << "Shared Resource detection finished, generating Result Struct" << "\n";
    createResultStruct(remoteisreadonly, remoteiswriteonly);
    errs() << "TSan-MPIOpt done!" << "\n";
    return ResultVal;
}

#define CREATE_RECVAL(x) {C, {(x), WorkingSet[i].V.Type}}

int TSanMPIOptimizerAnalysis::recurseGenerateWhitelist(SharedResource V, RecurseConfig C, AAResults& AA) {
    int depth = C.depth;
    int count = 0;
    std::vector<RecurseValue> WorkingSet;
    RecurseValue Init = {C,V};
    addUnique(WorkingSet, Init);
    std::vector<RecurseValue> NewSet;
    while (!WorkingSet.empty() && depth > 0) {
        int size = WorkingSet.size();
        for (int i = 0; i < size; i++) {

            addShRes(WorkingSet[i]);
            C = WorkingSet[i].config;

            // If enabled, mark operands of any ShRes if it is an Instruction
            if (C.recurseUp) {
                if (Instruction* Inst = dyn_cast<Instruction>(WorkingSet[i].V.V)) {
                    for (Value* NewVal : Inst->operand_values()) {
                        if (AA.alias(WorkingSet[i].V.V,NewVal) == AliasResult::NoAlias) continue;
                        addUnique(NewSet, RecurseValue{{C.recurseUp, C.recurseDown, depth, false},{NewVal, WorkingSet[i].V.Type}}); // No need to mark descendants of generators
                    }
                }
            }

            if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(WorkingSet[i].V.V)) {
                // ShRes is element of larger buffer. Mark that one as well.
                RecurseValue NewVal = CREATE_RECVAL(GEP->getPointerOperand());
                addUnique(NewSet, NewVal);
            }
            if (LoadInst* Load = dyn_cast<LoadInst>(WorkingSet[i].V.V)) {
                // ShRes is element of larger buffer. Mark that one as well.
                if (Load->getType()->isPointerTy() && Load->getPointerOperandType()->isPointerTy()) {
                    RecurseValue NewVal = CREATE_RECVAL(Load->getPointerOperand());
                    addUnique(NewSet, NewVal);
                }
            }

            for (Use& U : WorkingSet[i].V.V->uses()) {
                if (CallBase* FuncCall = dyn_cast<CallBase>(U.getUser())) { // FuncCall: Enter Function, mark ShRes Argument and recurse
                    Function* F = FuncCall->getCalledFunction();
                    if (!F || F->isDeclaration()) continue; // Extern or some other stuff
                    Argument* ShArg = F->getArg(U.getOperandNo());
                    RecurseValue NewVal = CREATE_RECVAL(ShArg);
                    addUnique(NewSet, NewVal);
                } else if (LoadInst* Load = dyn_cast<LoadInst>(U.getUser())) { // LoadInst: ShRes might be loaded into other Pointer. Mark and recurse
                    RecurseValue NewVal = CREATE_RECVAL(Load);
                    addUnique(NewSet, NewVal);
                } else if (StoreInst* Store = dyn_cast<StoreInst>(U.getUser())) { // StoreInst: ShRes Pointer Val might be copied. Mark and recurse
                    RecurseValue NewVal = CREATE_RECVAL(Store->getPointerOperand());
                    addUnique(NewSet, NewVal);
                    NewVal = CREATE_RECVAL(Store->getValueOperand());
                    addUnique(NewSet, NewVal);
                } else {
                    if (!U.getUser()) continue;
                    for (SharedResource ShRes : SharedResources) { // Finally, check if some Value may be aliased to some ShRes, and, if it is, Mark and recurse
                        if (AA.alias(U.getUser(),ShRes.V) != AliasResult::NoAlias) {
                            RecurseValue NewVal = CREATE_RECVAL(U.getUser());
                            addUnique(NewSet, NewVal);
                            break;
                        }
                    }
                }
            }
        }
        count += WorkingSet.size();
        WorkingSet.clear();
        WorkingSet = NewSet;
        NewSet.clear();
        depth--;
    }
    return count;
}

#undef CREATE_RECVAL

void TSanMPIOptimizerAnalysis::createResultStruct(bool remoteisreadonly, bool remoteiswriteonly) {
    ResultVal.SharedResources = SharedResources;
    ResultVal.RemoteIsReadOnly = remoteisreadonly;
    ResultVal.RemoteIsWriteOnly = remoteiswriteonly;
}

bool TSanMPIOptimizerAnalysis::addUnique(std::vector<RecurseValue> &Set, RecurseValue V) {
    if (!V.V.V) return false;
    for (int i = 0; i < Set.size(); i++) {
        if (Set[i].V == V.V) {
            if (Set[i].V.Type != V.V.Type) {
                Set[i].V.Type = ShResType::DirtyBuf;
                V.V.Type = ShResType::DirtyBuf;
            }
            return false;
        }
    }
    Set.push_back(V);
    return true;
}

bool TSanMPIOptimizerAnalysis::addShRes(RecurseValue& RV) {
    if (!RV.V.V) return false;
    if (!RV.config.allowGenerating) return false;
    for (int i = 0; i < SharedResources.size(); i++) {
        if (SharedResources[i].V == RV.V.V) {
            if (SharedResources[i].Type != RV.V.Type) {
                SharedResources[i].Type = ShResType::DirtyBuf;
                RV.V.Type = ShResType::DirtyBuf;
            }
            return false;
        }
    }
    SharedResources.push_back(RV.V);
    return true;
}