Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 35 additions & 31 deletions examples/deploy/cloudr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,41 @@
int main(int argc, char *argv[])
{
// Instantiating base managers
auto instanceManager = HiCR::backend::mpi::InstanceManager::createDefault(&argc, &argv);
auto communicationManager = HiCR::backend::mpi::CommunicationManager(MPI_COMM_WORLD);
auto memoryManager = HiCR::backend::mpi::MemoryManager();
auto computeManager = HiCR::backend::pthreads::ComputeManager();
auto instanceManager = HiCR::backend::mpi::InstanceManager::createDefault(&argc, &argv);
auto communicationManager = HiCR::backend::mpi::CommunicationManager(MPI_COMM_WORLD);
auto memoryManager = HiCR::backend::mpi::MemoryManager();
auto computeManager = HiCR::backend::pthreads::ComputeManager();

// Getting my base instance id and index
const auto baseInstanceId = instanceManager->getCurrentInstance()->getId();
uint64_t baseInstanceIdx = 0;
for (size_t i = 0; i < instanceManager->getInstances().size(); i++) if (instanceManager->getInstances()[i]->getId() == baseInstanceId) baseInstanceIdx = i;
const auto baseInstanceId = instanceManager->getCurrentInstance()->getId();
uint64_t baseInstanceIdx = 0;
for (size_t i = 0; i < instanceManager->getInstances().size(); i++)
if (instanceManager->getInstances()[i]->getId() == baseInstanceId) baseInstanceIdx = i;

// Checking for parameters
if (argc != 3)
{
{
fprintf(stderr, "Error: You need to pass a deployment.json and a cloudr.json file as parameters.\n");
instanceManager->finalize();
return 0;
return -1;
}

// Reading cloudr config file
std::string cloudrConfigFilePath = std::string(argv[2]);

// Parsing deployment file contents to a JSON object
std::ifstream ifs(cloudrConfigFilePath);
auto cloudrConfigJs = nlohmann::json::parse(ifs);
auto cloudrConfigJs = nlohmann::json::parse(ifs);

// Make sure we're running the number of base instances as emulated cloudr instances
if (instanceManager->getInstances().size() != cloudrConfigJs["Topologies"].size())
{
fprintf(stderr, "Error: The number of requested cloudr instances (%lu) is different than the number of instances provided (%lu)\n", cloudrConfigJs["Topologies"].size(), instanceManager->getInstances().size());
{
fprintf(stderr,
"Error: The number of requested cloudr instances (%lu) is different than the number of instances provided (%lu)\n",
cloudrConfigJs["Topologies"].size(),
instanceManager->getInstances().size());
instanceManager->finalize();
return 0;
return -1;
}

// Getting my emulated topology from the cloudr configuration file
Expand All @@ -57,11 +61,11 @@ int main(int argc, char *argv[])
auto hwlocTopologyManager = HiCR::backend::hwloc::TopologyManager(&hwlocTopology);

// Finding the first memory space and compute resource to create our RPC engine
const auto& topology = hwlocTopologyManager.queryTopology();
const auto& firstDevice = topology.getDevices().begin().operator*();
const auto& RPCMemorySpace = firstDevice->getMemorySpaceList().begin().operator*();
const auto& RPCComputeResource = firstDevice->getComputeResourceList().begin().operator*();
const auto &topology = hwlocTopologyManager.queryTopology();
const auto &firstDevice = topology.getDevices().begin().operator*();
const auto &RPCMemorySpace = firstDevice->getMemorySpaceList().begin().operator*();
const auto &RPCComputeResource = firstDevice->getComputeResourceList().begin().operator*();

// Instantiating RPC engine
HiCR::frontend::RPCEngine rpcEngine(communicationManager, *instanceManager, memoryManager, computeManager, RPCMemorySpace, RPCComputeResource);

Expand All @@ -72,14 +76,13 @@ int main(int argc, char *argv[])
deployr::Deployment deployment;

// Instantiating CloudR
HiCR::backend::cloudr::InstanceManager cloudrInstanceManager(&rpcEngine, emulatedTopology, [&]()
{
HiCR::backend::cloudr::InstanceManager cloudrInstanceManager(&rpcEngine, emulatedTopology, [&]() {
// Getting our current cloudr instance
const auto& currentInstance = cloudrInstanceManager.getCurrentInstance();
const auto &currentInstance = cloudrInstanceManager.getCurrentInstance();

// Getting our instance's emulated topology
const auto& emulatedTopology = dynamic_pointer_cast<HiCR::backend::cloudr::Instance>(currentInstance)->getTopology();
const auto &emulatedTopology = dynamic_pointer_cast<HiCR::backend::cloudr::Instance>(currentInstance)->getTopology();

// Creating deployr object
deployr::DeployR deployr(&cloudrInstanceManager, &rpcEngine, emulatedTopology);

Expand All @@ -101,27 +104,27 @@ int main(int argc, char *argv[])

// Parsing deployment file contents to a JSON object
std::ifstream ifs(deploymentFilePath);
auto deploymentJs = nlohmann::json::parse(ifs);
auto deploymentJs = nlohmann::json::parse(ifs);

// Getting requested topologies from the json file
for (size_t i = 0; i < deploymentJs["Runners"].size(); i++)
for (size_t i = 0; i < deploymentJs["Runners"].size(); i++)
{
// Getting runner
const auto& runner = deploymentJs["Runners"][i];
const auto &runner = deploymentJs["Runners"][i];

// Assigning runner topology
const auto runnerTopology = HiCR::Topology(runner["Topology"]);

// Asking cloudr to create new instances based on the topology requirement
const auto instanceTemplate = cloudrInstanceManager.createInstanceTemplate(runnerTopology);
auto instance = cloudrInstanceManager.createInstance(*instanceTemplate);
auto instance = cloudrInstanceManager.createInstance(*instanceTemplate);

// Adding new instances to list of newly created instances
newInstances.push_back(instance);

// Sanity check
if (instance == nullptr)
{
if (instance == nullptr)
{
fprintf(stderr, "Error: Could not create instance with required topology: %s\n", runnerTopology.serialize().dump(2).c_str());
instanceManager->abort(-1);
}
Expand All @@ -132,13 +135,14 @@ int main(int argc, char *argv[])

// Creating deployr object
deployr::DeployR deployr(&cloudrInstanceManager, &rpcEngine, topology);

// Calling main algorithm driver
deploy(deployr, deployment, cloudrInstanceManager.getCurrentInstance()->getId());
}

// Reliqushing newly created instances from cloudr
if (cloudrInstanceManager.getCurrentInstance()->isRootInstance()) for (const auto& instance : newInstances) cloudrInstanceManager.terminateInstance(instance);
if (cloudrInstanceManager.getCurrentInstance()->isRootInstance())
for (const auto &instance : newInstances) cloudrInstanceManager.terminateInstance(instance);

// Finalizing cloudR
cloudrInstanceManager.finalize();
Expand Down
6 changes: 3 additions & 3 deletions examples/deploy/deploy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
void leaderFc(deployr::DeployR &deployr)
{
// Getting local instance
printf("[LeaderFc] Hi, I am instance id: %lu\n", deployr.getInstanceId());
printf("[LeaderFc] Hi, I am instance id: %lu\n", deployr.getRunnerId());
}

void workerFc(deployr::DeployR &deployr)
{
// Getting local instance
printf("[WorkerFc] Hi, I am instance id: %lu\n", deployr.getInstanceId());
printf("[WorkerFc] Hi, I am instance id: %lu\n", deployr.getRunnerId());
}

void deploy(deployr::DeployR &deployr, const deployr::Deployment& deployment, const HiCR::Instance::instanceId_t coordinatorInstanceId)
void deploy(deployr::DeployR &deployr, const deployr::Deployment &deployment, const HiCR::Instance::instanceId_t coordinatorInstanceId)
{
// Initializing DeployR
deployr.initialize();
Expand Down
4 changes: 2 additions & 2 deletions examples/deploy/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ testSuite = [ 'examples', 'deploy' ]
if 'mpi' in engines
exec = executable('mpi', [ 'mpi.cpp'], dependencies: DeployRBuildDep)
if get_option('buildTests')
test('mpi', mpirunExecutable, args : [ '-np', '3', '--oversubscribe', exec.full_path(), meson.current_source_dir() + '/request.json'], timeout: 60, suite: testSuite )
test('mpi', mpirunExecutable, args : [ '-np', '3', '--oversubscribe', exec.full_path(), meson.current_source_dir() + '/deployment.json'], timeout: 60, suite: testSuite )
endif
endif

if 'cloudr' in engines
exec = executable('cloudr', [ 'cloudr.cpp' ], dependencies: DeployRBuildDep)
if get_option('buildTests')
test('cloudr', mpirunExecutable, args : [ '-np', '3', '--oversubscribe', exec.full_path(), meson.current_source_dir() + '/request.json'], timeout: 60, suite: testSuite )
test('cloudr', mpirunExecutable, args : [ '-np', '5', '--oversubscribe', exec.full_path(), meson.current_source_dir() + '/deployment.json', meson.current_source_dir() + '/cloudr.json'], timeout: 60, suite: testSuite )
endif
endif
21 changes: 10 additions & 11 deletions examples/deploy/mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ int main(int argc, char *argv[])
auto memoryManager = std::make_shared<HiCR::backend::mpi::MemoryManager>();

// Making sure we instantiated 3 instances, which is all we need for this example
if (instanceManager->getInstances().size() != 3)
if (instanceManager->getInstances().size() != 3)
{
fprintf(stderr, "Error: this example requires three instances to run.\n");
instanceManager->abort(-1);
Expand Down Expand Up @@ -56,8 +56,8 @@ int main(int argc, char *argv[])
HiCR::frontend::RPCEngine rpcEngine(*communicationManager, *instanceManager, *memoryManager, computeManager, bufferMemorySpace, computeResource);

// Gathering instances to run the example with
std::vector<HiCR::Instance*> instances;
for (const auto& instance : instanceManager->getInstances()) instances.push_back(instance.get());
std::vector<HiCR::Instance *> instances;
for (const auto &instance : instanceManager->getInstances()) instances.push_back(instance.get());

// Initialize RPC engine
rpcEngine.initialize();
Expand All @@ -70,9 +70,9 @@ int main(int argc, char *argv[])

// Getting the topology of the other MPI processes
std::vector<HiCR::Instance::instanceId_t> instanceIds;
for (const auto& instance : instanceManager->getInstances()) instanceIds.push_back(instance->getId());
for (const auto &instance : instanceManager->getInstances()) instanceIds.push_back(instance->getId());
const auto globalTopology = deployr.gatherGlobalTopology(instanceManager->getRootInstanceId(), instanceIds);

// Creating deployment object
deployr::Deployment deployment;

Expand All @@ -81,7 +81,7 @@ int main(int argc, char *argv[])
{
// Checking arguments
if (argc != 2)
{
{
fprintf(stderr, "Error: You need to pass a deployment.json file as parameter.\n");
instanceManager->abort(-1);
}
Expand All @@ -91,25 +91,24 @@ int main(int argc, char *argv[])

// Parsing request file contents to a JSON object
std::ifstream ifs(deploymentFilePath);
auto deploymentJs = nlohmann::json::parse(ifs);
auto deploymentJs = nlohmann::json::parse(ifs);

// Getting requested topologies from the json file
std::vector<HiCR::Topology> requestedTopologies;
for (const auto& runner : deploymentJs["Runners"]) requestedTopologies.push_back(HiCR::Topology(runner["Topology"]));
for (const auto &runner : deploymentJs["Runners"]) requestedTopologies.push_back(HiCR::Topology(runner["Topology"]));

// Determine best pairing between the detected instances
const auto matching = deployr::DeployR::doBipartiteMatching(requestedTopologies, globalTopology);

// Check matching
if (matching.size() != requestedTopologies.size())
{
{
fprintf(stderr, "Error: The provided instances do not have the sufficient hardware resources to run this job.\n");
instanceManager->abort(-1);
}

// Creating the runner objects
for (size_t i = 0; i < deploymentJs["Runners"].size(); i++)
deployment.addRunner(deployr::Runner(i, deploymentJs["Runners"][i]["Function"].get<std::string>(), matching[i]));
for (size_t i = 0; i < deploymentJs["Runners"].size(); i++) deployment.addRunner(deployr::Runner(i, deploymentJs["Runners"][i]["Function"].get<std::string>(), matching[i]));
}

// Deploying
Expand Down
2 changes: 1 addition & 1 deletion include/deployr/deployment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Deployment final
/**
* Add an instance
*/
__INLINE__ void addRunner(const Runner& runner) { _runners.push_back(runner); }
__INLINE__ void addRunner(const Runner &runner) { _runners.push_back(runner); }

/**
* Gets the instance map
Expand Down
Loading
Loading