Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
- name: Run benchmark
run: ./bench --benchmark_out=benchmark.json --benchmark_out_format=json
- name: Store benchmark result
if: ${{ github.event_name == 'push' }}
uses: benchmark-action/github-action-benchmark@v1
with:
tool: 'googlecpp'
Expand Down
14 changes: 11 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,33 @@ name: C++ CI

on:
push:
branches: [main]
pull_request:

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
arrow: [0, 1]
steps:
- uses: actions/checkout@v3
- name: Update submodules
run: git submodule update --init --recursive
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y ca-certificates lsb-release wget
wget https://packages.apache.org/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
sudo apt-get update
sudo apt-get install -y g++ make clang-tidy clang-format python3 python3-pip
sudo apt-get install -y g++ make clang-tidy clang-format python3 python3-pip libarrow-dev libparquet-dev
pip3 install pandas pyarrow pytest
- name: Build
run: make
run: make USE_ARROW=${{ matrix.arrow }}
- name: Run clang-format
run: make format && git diff --exit-code
- name: Run clang-tidy
run: make tidy
- name: Run test
run: make test
run: make USE_ARROW=${{ matrix.arrow }} test
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@
TARGET = main
BENCH = bench
OBJ = main_bench.o
USE_ARROW ?= 0
ifeq ($(USE_ARROW),1)
CXXFLAGS += $(shell pkg-config --cflags arrow parquet)
LDFLAGS += $(shell pkg-config --libs arrow parquet)
DEFINES += -DUSE_ARROW
endif

$(TARGET): main.cpp
$(CXX) -O3 -std=c++20 -fopenmp -march=native -o $(TARGET) $<
$(CXX) -O3 -std=c++20 -fopenmp -march=native $(DEFINES) $(CXXFLAGS) -o $(TARGET) $< $(LDFLAGS)

$(OBJ): main.cpp
$(CXX) -O3 -std=c++20 -fopenmp -march=native -DBENCH_LIB -c main.cpp -o $(OBJ)
$(CXX) -O3 -std=c++20 -fopenmp -march=native -DBENCH_LIB $(DEFINES) $(CXXFLAGS) -c main.cpp -o $(OBJ)

$(BENCH): $(OBJ) bench.cpp
$(CXX) -O3 -std=c++20 -fopenmp -march=native bench.cpp $(OBJ) -lbenchmark -lpthread -o $(BENCH)
$(CXX) -O3 -std=c++20 -fopenmp -march=native bench.cpp $(OBJ) -lbenchmark -lpthread $(LDFLAGS) -o $(BENCH)

format:
clang-format -i main.cpp
Expand Down
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,20 @@ Use the provided `Makefile`:
make
```

This produces an executable named `main`.
This produces an executable named `main`. To enable Apache Arrow support pass
`USE_ARROW=1` (requires Arrow C++ libraries with `pkg-config` files).

## Running

Run the simulator by providing a connectome CSV file and lists of active and silent neurons:
Run the simulator by providing a connectome CSV file and lists of active and silent neurons. When built with Arrow support you may provide a Parquet file instead:

```bash
./main --csv connectome.csv --active active.txt --silent silent.txt --t 1000
```
To load a Parquet file when Arrow support is enabled use:
```bash
./main --parquet Connectivity_783.parquet --active active.txt --silent silent.txt --t 1000
```

The output spike times are written to `spikes.bin`, and basic statistics are printed to stderr.

Expand Down
3 changes: 2 additions & 1 deletion bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
#include <cstdio>

size_t run_simulation(const std::string &csv,
const std::string &parquet,
const std::string &act,
const std::string &sil,
size_t T);

static void BM_Run(benchmark::State &state) {
for (auto _ : state) {
size_t spikes = run_simulation("test/test_connectome.csv",
size_t spikes = run_simulation("test/test_connectome.csv", "",
"test/active.txt",
"test/silent.txt", 5);
benchmark::DoNotOptimize(spikes);
Expand Down
2 changes: 1 addition & 1 deletion docs/AGENT_PROMPTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ The project simulates spikes in a Drosophila connectome using a leaky integrate-

- Keep the simulator self contained in `main.cpp` unless new modules are justified.
- Unit tests live under `test/` and should be expanded when new features are added.
- Large datasets are provided via the optional `Drosophila_brain_model` submodule and should not be committed directly to the repository.
- Large datasets are provided via the optional `Drosophila_brain_model` submodule and should not be committed directly to the repository. Support for loading Parquet files via Apache Arrow is optional and can be enabled by building with `USE_ARROW=1`.

## Pull Request Guidance

Expand Down
106 changes: 98 additions & 8 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
#include <string>
#include <tuple>
#include <vector>
#ifdef USE_ARROW
#include <arrow/api.h>
#include <arrow/io/api.h>
#include <parquet/arrow/reader.h>
#endif

using namespace std;

Expand Down Expand Up @@ -76,6 +81,73 @@ static CSR load_csv_to_csr(const string &csv, size_t N,
partial_sum(g.row.begin(), g.row.end(), g.row.begin());
return g;
}
#ifdef USE_ARROW
static size_t count_neurons_parquet(const string &pq) {
std::shared_ptr<arrow::io::ReadableFile> infile;
PARQUET_ASSIGN_OR_THROW(infile, arrow::io::ReadableFile::Open(pq));
std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(
reader->ReadTable({"Presynaptic_Index", "Postsynaptic_Index"}, &table));
auto pre_chunks = table->GetColumnByName("Presynaptic_Index")->chunks();
auto post_chunks = table->GetColumnByName("Postsynaptic_Index")->chunks();
size_t N = 0;
for (size_t c = 0; c < pre_chunks.size(); ++c) {
auto pre = std::static_pointer_cast<arrow::UInt32Array>(pre_chunks[c]);
auto post = std::static_pointer_cast<arrow::UInt32Array>(post_chunks[c]);
for (int64_t i = 0; i < pre->length(); ++i) {
uint32_t a = pre->Value(i);
uint32_t b = post->Value(i);
N = max(N, size_t(max(a, b) + 1));
}
}
return N;
}
static CSR load_parquet_to_csr(const string &pq, size_t N,
const vector<uint8_t> &silent) {
std::shared_ptr<arrow::io::ReadableFile> infile;
PARQUET_ASSIGN_OR_THROW(infile, arrow::io::ReadableFile::Open(pq));
std::unique_ptr<parquet::arrow::FileReader> reader;
PARQUET_THROW_NOT_OK(
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
std::shared_ptr<arrow::Table> table;
PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
auto pre_chunks = table->GetColumnByName("Presynaptic_Index")->chunks();
auto post_chunks = table->GetColumnByName("Postsynaptic_Index")->chunks();
auto w_chunks = table->GetColumnByName("Connectivity")->chunks();
vector<tuple<uint32_t, uint32_t, float>> edges;
size_t total = table->num_rows();
edges.reserve(total);
for (size_t c = 0; c < pre_chunks.size(); ++c) {
auto pre = std::static_pointer_cast<arrow::UInt32Array>(pre_chunks[c]);
auto post = std::static_pointer_cast<arrow::UInt32Array>(post_chunks[c]);
auto w = std::static_pointer_cast<arrow::FloatArray>(w_chunks[c]);
for (int64_t i = 0; i < pre->length(); ++i) {
uint32_t a = pre->Value(i);
uint32_t b = post->Value(i);
if (silent[a] || silent[b])
continue;
float wt = w->Value(i);
edges.emplace_back(a, b, wt);
}
}
sort(edges.begin(), edges.end(),
[](auto &x, auto &y) { return get<0>(x) < get<0>(y); });
CSR g;
g.row.assign(N + 1, 0);
g.col.reserve(edges.size());
g.w.reserve(edges.size());
for (auto [pre, post, w] : edges) {
++g.row[pre + 1];
g.col.push_back(post);
g.w.push_back(w);
}
partial_sum(g.row.begin(), g.row.end(), g.row.begin());
return g;
}
#endif
/* ---------- simulation ---------- */
struct Simulator {
size_t N;
Expand Down Expand Up @@ -169,10 +241,26 @@ struct Simulator {
}
};
/* ---------- main ---------- */
size_t run_simulation(const string &csv, const string &act, const string &sil,
size_t T) {
/* step 1: count neurons (max id+1) */
size_t run_simulation(const string &csv, const string &pq, const string &act,
const string &sil, size_t T) {
size_t N = 0;
CSR G;
#ifdef USE_ARROW
if (!pq.empty()) {
N = count_neurons_parquet(pq);
vector<uint8_t> silent_vec_tmp(N, 0);
for (auto id : load_list(sil))
if (id < N)
silent_vec_tmp[id] = 1;
G = load_parquet_to_csr(pq, N, silent_vec_tmp);
Params P;
Simulator S(N, G, P, std::move(silent_vec_tmp));
S.run(T, load_list(act));
S.save_bin("spikes.bin");
return S.spikes.size();
}
#endif
/* default CSV loader */
{
ifstream f(csv);
string l;
Expand All @@ -188,7 +276,7 @@ size_t run_simulation(const string &csv, const string &act, const string &sil,
if (id < N)
silent_vec[id] = 1;

CSR G = load_csv_to_csr(csv, N, silent_vec);
G = load_csv_to_csr(csv, N, silent_vec);
Params P;
Simulator S(N, G, P, silent_vec);
S.run(T, load_list(act));
Expand All @@ -198,24 +286,26 @@ size_t run_simulation(const string &csv, const string &act, const string &sil,

#ifndef BENCH_LIB
int main(int argc, char **argv) {
string csv, act, sil;
string csv, pq, act, sil;
size_t T = 1000;
for (int i = 1; i < argc; ++i) {
string a = argv[i];
if (a == "--csv")
csv = argv[++i];
else if (a == "--parquet")
pq = argv[++i];
else if (a == "--active")
act = argv[++i];
else if (a == "--silent")
sil = argv[++i];
else if (a == "--t")
T = stoul(argv[++i]);
}
if (csv.empty()) {
cerr << "--csv required\n";
if (csv.empty() && pq.empty()) {
cerr << "--csv or --parquet required\n";
return 1;
}
size_t spikes = run_simulation(csv, act, sil, T);
size_t spikes = run_simulation(csv, pq, act, sil, T);
cerr << "Spikes: " << spikes << "\n";
}
#endif