sequence/test/benchmarks.cpp at main · calum74/sequence · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// Benchmarking Sequence
// This compares the performance of Sequence against hand-written code.
// We also look at the overhead of using `sequence<T>&`, which uses
// virtual function calls.
//
// Benchmark 1: This calculates the sum of the squares of all
// even numbers between 0 and 1_000_000_000.
//
// Benchmark 2: This constructs a string consisting of 1000000 'a's.

#include <sequence.hpp>
#include <chrono>
#include <iostream>

const int N=1000000000;

// This is the canonical C implementation of Benchmark 1.
// This is the fastest implementation.
int benchmark1a()
{
    int sum=0;
    for(int i=0; i<=N; i++)
        if(i%2==0)
            sum += i*i;
    return sum;
}

// This is a Sequence implementation of Benchmark 1.
// It is somewhat slower than the C implementation even though
// it is fully inlined. The compiler was not able to optimize this
// as much as the C implementation, so it is around 3 times slower.
int benchmark1b()
{
    return seq(0, N).
        where([](int n) { return n%2==0; }).
        select([](int n) { return n*n; }).
        sum();
}

int processInts(const sequence<int> & items)
{
    return items.where([](int n) { return n%2==0; }).
        select([](int n) { return n*n; }).
        sum();
}

template<typename Seq>
int processInts2(Seq items)
{
    return items.where([](int n) { return n%2==0; }).
        select([](int n) { return n*n; }).
        sum();
}

// This is the Sequence implementation of Benchmark 1, with the added
// overhead of iterating the sequence using virtual function calls which
// is around 10 times slower than the inline C implementation due to the
// overhead of virtual function calls used internally.
int benchmark1c()
{
    return processInts(seq(0,N));
}

// This is the Sequence implementation of Benchmark 1, using a templated
// function to process the list. This shows the potential speedup of using templates
// if you don't mind all your code in header files.
int benchmark1d()
{
    return processInts2(seq(0,N));
}

const int N2 = 1000000;

// This is the C implementation of Benchmark 2.
// It is the joint-fastest implementation.
int aloop1()
{
    std::string result;
    for(int i=0; i<N2; ++i)
        result += 'a';
    return (int)result.size();
}

// This is the Sequence implementation of Benchmark 2.
// It is joint-fastest with the C implementation.
// It uses `accumulate()` which is faster than `aggregate` in this case.
int aloop2()
{
    return (int)list('a').repeat(N2).accumulate(std::string(), [](std::string & str, char ch) { str+=ch; }).size();
}

// This is the Sequence implementation of Benchmark 2.
// It uses `aggregate` which is significantly slower than `accumulate` in this case.
int aloop3()
{
    return (int)list('a').repeat(N2).aggregate(std::string(), [](const std::string & str, char ch) { return str+ch; }).size();
}

int processAs(const sequence<char> & items)
{
    return (int)items.accumulate(std::string(), [](std::string & str, char ch) { str+=ch; }).size();
}

// This is a Sequence implementation of Benchmark 2, passing
// the sequence as `sequence<char>&` which introduces virtual function calls.
// This is around 13% slower than the inline version that does not use virtual function calls.
int aloop4()
{
    return processAs(list('a').repeat(N2));
}

template<typename Fn>
void benchmark(Fn fn, const char * description)
{
    auto t1 = std::chrono::high_resolution_clock::now();
    int sum = fn();
    auto t2 = std::chrono::high_resolution_clock::now();
    std::cout << description << ", " << std::chrono::duration<double, std::milli>(t2-t1).count() << std::endl;
}

int main()
{
#ifndef NDEBUG
    std::cout << "WARNING!!! Running in a debug build\n";
#endif
    benchmark(benchmark1a, "1a: C implementation");
    benchmark(benchmark1b, "1b: Sequence implementation");
    benchmark(benchmark1c, "1c: Sequence passed as reference");
    benchmark(benchmark1d, "1d: Sequence passed as template");
    benchmark(aloop1,      "2a: C implementation");
    benchmark(aloop2,      "2b: Sequence implementation");
    benchmark(aloop3,      "2c: Sequence using naive aggregate");
    benchmark(aloop4,      "2d: Sequence passed as reference");
    return 0;
}