[DEV] add v1.66.0

This commit is contained in:
2018-01-12 21:47:58 +01:00
parent 87059bb1af
commit a97e9ae7d4
49032 changed files with 7668950 additions and 0 deletions

View File

@@ -0,0 +1,210 @@
# ---------------------------------------------------------------------------
# Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
#
# Distributed under the Boost Software License, Version 1.0
# See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt
#
# ---------------------------------------------------------------------------
include_directories(../include)
set(PERF_BOOST_COMPONENTS system timer chrono program_options)
if (${BOOST_COMPUTE_USE_OFFLINE_CACHE})
set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} filesystem)
endif()
if(${BOOST_COMPUTE_THREAD_SAFE} AND NOT ${BOOST_COMPUTE_USE_CPP11})
set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread)
elseif(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} thread)
endif()
if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
set(PERF_BOOST_COMPONENTS ${PERF_BOOST_COMPONENTS} date_time)
endif()
if(PERF_BOOST_COMPONENTS)
list(REMOVE_DUPLICATES PERF_BOOST_COMPONENTS)
endif()
find_package(Boost 1.54 REQUIRED COMPONENTS ${PERF_BOOST_COMPONENTS})
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
set(BENCHMARKS
accumulate
bernoulli_distribution
binary_find
cart_to_polar
comparison_sort
copy_if
copy_to_device
count
discrete_distribution
erase_remove
exclusive_scan
fill
find
find_end
includes
inner_product
is_permutation
is_sorted
max_element
merge
next_permutation
nth_element
partial_sum
partition
partition_point
prev_permutation
reverse
reverse_copy
rotate
rotate_copy
host_sort
random_number_engine
reduce_by_key
saxpy
search
search_n
set_difference
set_intersection
set_symmetric_difference
set_union
sort
sort_by_key
sort_float
stable_partition
uniform_int_distribution
unique
unique_copy
)
foreach(BENCHMARK ${BENCHMARKS})
set(PERF_TARGET perf_${BENCHMARK})
add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
target_link_libraries(${PERF_TARGET} ${OpenCL_LIBRARIES} ${Boost_LIBRARIES})
endforeach()
# stl benchmarks (for comparison)
set(STL_BENCHMARKS
stl_accumulate
stl_count
stl_find
stl_find_end
stl_includes
stl_inner_product
stl_max_element
stl_merge
stl_next_permutation
stl_partial_sum
stl_partition
stl_prev_permutation
stl_reverse
stl_reverse_copy
stl_rotate
stl_rotate_copy
stl_saxpy
stl_search
stl_search_n
stl_set_difference
stl_set_intersection
stl_set_symmetric_difference
stl_set_union
stl_sort
stl_stable_partition
stl_unique
stl_unique_copy
)
# stl benchmarks which require c++11
if(${BOOST_COMPUTE_USE_CPP11})
list(APPEND
STL_BENCHMARKS
stl_is_permutation
stl_partition_point
)
endif()
foreach(BENCHMARK ${STL_BENCHMARKS})
set(PERF_TARGET perf_${BENCHMARK})
add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
target_link_libraries(${PERF_TARGET} ${Boost_LIBRARIES})
endforeach()
# cuda/thrust benchmarks (for comparison)
if(${BOOST_COMPUTE_HAVE_CUDA})
find_package(CUDA 5.0 REQUIRED)
set(CUDA_BENCHMARKS
thrust_accumulate
thrust_count
thrust_exclusive_scan
thrust_find
thrust_inner_product
thrust_merge
thrust_partial_sum
thrust_partition
thrust_reduce_by_key
thrust_reverse
thrust_reverse_copy
thrust_rotate
thrust_saxpy
thrust_set_difference
thrust_sort
thrust_unique
)
foreach(BENCHMARK ${CUDA_BENCHMARKS})
set(PERF_TARGET perf_${BENCHMARK})
cuda_add_executable(${PERF_TARGET} perf_${BENCHMARK}.cu)
target_link_libraries(${PERF_TARGET} ${CUDA_LIBRARIES} ${Boost_LIBRARIES})
endforeach()
endif()
# intel tbb benchmarks (for comparison)
if(${BOOST_COMPUTE_HAVE_TBB})
find_package(TBB REQUIRED)
include_directories(SYSTEM ${TBB_INCLUDE_DIRS})
set(TBB_BENCHMARKS
tbb_accumulate
tbb_merge
tbb_sort
)
foreach(BENCHMARK ${TBB_BENCHMARKS})
set(PERF_TARGET perf_${BENCHMARK})
add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
target_link_libraries(${PERF_TARGET} ${TBB_LIBRARIES} ${Boost_LIBRARIES})
endforeach()
endif()
# bolt c++ template lib benchmarks (for comparison)
if(${BOOST_COMPUTE_HAVE_BOLT} AND ${BOOST_COMPUTE_USE_CPP11})
find_package(Bolt REQUIRED)
include_directories(SYSTEM ${BOLT_INCLUDE_DIRS})
set(BOLT_BENCHMARKS
bolt_accumulate
bolt_count
bolt_exclusive_scan
bolt_fill
bolt_inner_product
bolt_max_element
bolt_merge
bolt_partial_sum
bolt_reduce_by_key
bolt_saxpy
bolt_sort
)
foreach(BENCHMARK ${BOLT_BENCHMARKS})
set(PERF_TARGET perf_${BENCHMARK})
add_executable(${PERF_TARGET} perf_${BENCHMARK}.cpp)
target_link_libraries(${PERF_TARGET} ${OpenCL_LIBRARIES} ${BOLT_LIBRARIES} ${Boost_LIBRARIES})
endforeach()
elseif(${BOOST_COMPUTE_HAVE_BOLT} AND NOT ${BOOST_COMPUTE_USE_CPP11})
message(WARNING "BOOST_COMPUTE_USE_CPP11 must be ON for building Bolt C++ Template Library performance tests.")
endif()

109
libs/compute/perf/perf.hpp Normal file
View File

@@ -0,0 +1,109 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#ifndef PERF_HPP
#define PERF_HPP
// this header contains general purpose functions and variables used by
// the boost.compute performance benchmarks.
#include <vector>
#include <cstdlib>
#include <algorithm>
#include <boost/lexical_cast.hpp>
#include <boost/timer/timer.hpp>
static size_t PERF_N = 1024;
static size_t PERF_TRIALS = 3;
// parses command line arguments and sets the corresponding perf variables
inline void perf_parse_args(int argc, char *argv[])
{
if(argc >= 2){
PERF_N = boost::lexical_cast<size_t>(argv[1]);
}
if(argc >= 3){
PERF_TRIALS = boost::lexical_cast<size_t>(argv[2]);
}
}
// generates a vector of random numbers
template<class T>
std::vector<T> generate_random_vector(const size_t size)
{
std::vector<T> vector(size);
std::generate(vector.begin(), vector.end(), rand);
return vector;
}
// a simple timer wrapper which records multiple time entries
class perf_timer
{
public:
typedef boost::timer::nanosecond_type nanosecond_type;
perf_timer()
{
timer.stop();
}
void start()
{
timer.start();
}
void stop()
{
timer.stop();
times.push_back(timer.elapsed().wall);
}
size_t trials() const
{
return times.size();
}
void clear()
{
times.clear();
}
nanosecond_type last_time() const
{
return times.back();
}
nanosecond_type min_time() const
{
return *std::min_element(times.begin(), times.end());
}
nanosecond_type max_time() const
{
return *std::max_element(times.begin(), times.end());
}
boost::timer::cpu_timer timer;
std::vector<boost::timer::nanosecond_type> times;
};
// returns the rate (in MB/s) for processing 'count' items of type 'T'
// in 'time' nanoseconds
template<class T>
double perf_rate(const size_t count, perf_timer::nanosecond_type time)
{
const size_t byte_count = count * sizeof(T);
return (double(byte_count) / 1024 / 1024) / (time / 1e9);
}
#endif // PERF_HPP

238
libs/compute/perf/perf.py Executable file
View File

@@ -0,0 +1,238 @@
#!/usr/bin/python
# Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com>
# Distributed under the Boost Software License, Version 1.0
# See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt
#
# See http://boostorg.github.com/compute for more information.
# driver script for boost.compute benchmarking. will run a
# benchmark for a given function (e.g. accumulate, sort).
import os
import sys
import subprocess
try:
import pylab
except:
print('pylab not found, no ploting...')
pass
def run_perf_process(name, size, backend = ""):
if not backend:
proc = "perf_%s" % name
else:
proc = "perf_%s_%s" % (backend, name)
filename = "./perf/" + proc
if not os.path.isfile(filename):
print("Error: failed to find ", filename, " for running")
return 0
try:
output = subprocess.check_output([filename, str(int(size))])
except:
return 0
t = 0
for line in output.decode('utf8').split("\n"):
if line.startswith("time:"):
t = float(line.split(":")[1].split()[0])
return t
class Report:
def __init__(self, name):
self.name = name
self.samples = {}
def add_sample(self, name, size, time):
if not name in self.samples:
self.samples[name] = []
self.samples[name].append((size, time))
def display(self):
for name in self.samples.keys():
print('=== %s with %s ===' % (self.name, name))
print('size,time (ms)')
for sample in self.samples[name]:
print('%d,%f' % sample)
def plot_time(self, name):
if not name in self.samples:
return
x = []
y = []
any_valid_samples = False
for sample in self.samples[name]:
if sample[1] == 0:
continue
x.append(sample[0])
y.append(sample[1])
any_valid_samples = True
if not any_valid_samples:
return
pylab.loglog(x, y, marker='o', label=name)
pylab.xlabel("Size")
pylab.ylabel("Time (ms)")
pylab.title(self.name)
def plot_rate(self, name):
if not name in self.samples:
return
x = []
y = []
any_valid_samples = False
for sample in self.samples[name]:
if sample[1] == 0:
continue
x.append(sample[0])
y.append(float(sample[0]) / (float(sample[1]) * 1e-3))
any_valid_samples = True
if not any_valid_samples:
return
pylab.loglog(x, y, marker='o', label=name)
pylab.xlabel("Size")
pylab.ylabel("Rate (values/s)")
pylab.title(self.name)
def run_benchmark(name, sizes, vs=[]):
report = Report(name)
for size in sizes:
time = run_perf_process(name, size)
report.add_sample("compute", size, time)
competitors = {
"thrust" : [
"accumulate",
"count",
"exclusive_scan",
"find",
"inner_product",
"merge",
"partial_sum",
"partition",
"reduce_by_key",
"reverse",
"reverse_copy",
"rotate",
"saxpy",
"sort",
"unique"
],
"bolt" : [
"accumulate",
"count",
"exclusive_scan",
"fill",
"inner_product",
"max_element",
"merge",
"partial_sum",
"reduce_by_key",
"saxpy",
"sort"
],
"tbb": [
"accumulate",
"merge",
"sort"
],
"stl": [
"accumulate",
"count",
"find",
"find_end",
"includes",
"inner_product",
"is_permutation",
"max_element",
"merge",
"next_permutation",
"nth_element",
"partial_sum",
"partition",
"partition_point",
"prev_permutation",
"reverse",
"reverse_copy",
"rotate",
"rotate_copy",
"saxpy",
"search",
"search_n",
"set_difference",
"set_intersection",
"set_symmetric_difference",
"set_union",
"sort",
"stable_partition",
"unique",
"unique_copy"
]
}
for other in vs:
if not other in competitors:
continue
if not name in competitors[other]:
continue
for size in sizes:
time = run_perf_process(name, size, other)
report.add_sample(other, size, time)
return report
if __name__ == '__main__':
test = "sort"
if len(sys.argv) >= 2:
test = sys.argv[1]
print('running %s perf test' % test)
sizes = [ pow(2, x) for x in range(1, 26) ]
sizes = sorted(sizes)
competitors = ["bolt", "tbb", "thrust", "stl"]
report = run_benchmark(test, sizes, competitors)
plot = None
if "--plot-time" in sys.argv:
plot = "time"
elif "--plot-rate" in sys.argv:
plot = "rate"
if plot == "time":
report.plot_time("compute")
for competitor in competitors:
report.plot_time(competitor)
elif plot == "rate":
report.plot_rate("compute")
for competitor in competitors:
report.plot_rate(competitor)
if plot:
pylab.legend(loc='upper left')
pylab.show()
else:
report.display()

View File

@@ -0,0 +1,140 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/program_options.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/accumulate.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
namespace po = boost::program_options;
namespace compute = boost::compute;
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
template<class T>
double perf_accumulate(const compute::vector<T>& data,
const size_t trials,
compute::command_queue& queue)
{
perf_timer t;
for(size_t trial = 0; trial < trials; trial++){
t.start();
compute::accumulate(data.begin(), data.end(), T(0), queue);
queue.finish();
t.stop();
}
return t.min_time();
}
template<class T>
void tune_accumulate(const compute::vector<T>& data,
const size_t trials,
compute::command_queue& queue)
{
boost::shared_ptr<compute::detail::parameter_cache>
params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
const std::string cache_key =
std::string("__boost_reduce_on_gpu_") + compute::type_name<T>();
const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 };
const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
double min_time = (std::numeric_limits<double>::max)();
compute::uint_ best_tpb = 0;
compute::uint_ best_vpt = 0;
for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
params->set(cache_key, "tpb", tpbs[i]);
for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){
params->set(cache_key, "vpt", vpts[j]);
try {
const double t = perf_accumulate(data, trials, queue);
if(t < min_time){
best_tpb = tpbs[i];
best_vpt = vpts[j];
min_time = t;
}
}
catch(compute::opencl_error&){
// invalid parameters for this device, skip
}
}
}
// store optimal parameters
params->set(cache_key, "tpb", best_tpb);
params->set(cache_key, "vpt", best_vpt);
}
int main(int argc, char *argv[])
{
// setup command line arguments
po::options_description options("options");
options.add_options()
("help", "show usage instructions")
("size", po::value<size_t>()->default_value(8192), "input size")
("trials", po::value<size_t>()->default_value(3), "number of trials to run")
("tune", "run tuning procedure")
;
po::positional_options_description positional_options;
positional_options.add("size", 1);
// parse command line
po::variables_map vm;
po::store(
po::command_line_parser(argc, argv)
.options(options).positional(positional_options).run(),
vm
);
po::notify(vm);
const size_t size = vm["size"].as<size_t>();
const size_t trials = vm["trials"].as<size_t>();
std::cout << "size: " << size << std::endl;
// setup context and queue for the default device
compute::device device = compute::system::default_device();
compute::context context(device);
compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_data(size);
std::generate(host_data.begin(), host_data.end(), rand_int);
// create vector on the device and copy the data
compute::vector<int> device_data(
host_data.begin(), host_data.end(), queue
);
// run tuning proceure (if requested)
if(vm.count("tune")){
tune_accumulate(device_data, trials, queue);
}
// run benchmark
double t = perf_accumulate(device_data, trials, queue);
std::cout << "time: " << t / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,46 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/random/default_random_engine.hpp>
#include <boost/compute/random/bernoulli_distribution.hpp>
#include "perf.hpp"
namespace compute = boost::compute;
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
compute::device device = compute::system::default_device();
compute::context context(device);
compute::command_queue queue(context, device);
compute::vector<bool> vector(PERF_N, context);
compute::default_random_engine rng(queue);
compute::bernoulli_distribution<float> dist(0.5);
perf_timer t;
t.start();
dist.generate(vector.begin(), vector.end(), rng, queue);
queue.finish();
t.stop();
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,71 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/lambda.hpp>
#include <boost/compute/algorithm/detail/binary_find.hpp>
#include <boost/compute/algorithm/partition.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
using boost::compute::_1;
boost::compute::partition(
device_vector.begin(), device_vector.end(), _1 < 20, queue
);
// just to be sure everything is finished before measuring execution time
// of binary_find algorithm
queue.finish();
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::detail::binary_find(
device_vector.begin(), device_vector.end(), _1 >= 20, queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,51 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include <bolt/cl/reduce.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create host vector
std::vector<int> host_vec = generate_random_vector<int>(PERF_N);
// create device vectors
bolt::cl::device_vector<int> device_vec(PERF_N);
// transfer data to the device
bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin());
int sum = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
sum = bolt::cl::reduce(device_vec.begin(), device_vec.end());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "sum: " << sum << std::endl;
return 0;
}

View File

@@ -0,0 +1,57 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/count.h>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create vector of random numbers on the host
std::vector<int> h_vec(PERF_N);
std::generate(h_vec.begin(), h_vec.end(), rand_int);
// create device vector
bolt::cl::device_vector<int> d_vec(PERF_N);
// transfer data to the device
bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
size_t count = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
count = bolt::cl::count(ctrl, d_vec.begin(), d_vec.end(), 4);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "count: " << count << std::endl;
return 0;
}

View File

@@ -0,0 +1,52 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/scan.h>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create vector of random numbers on the host
std::vector<int> h_vec = generate_random_vector<int>(PERF_N);
// create device vector
bolt::cl::device_vector<int> d_vec(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
// transfer data to the device
bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
t.start();
bolt::cl::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// transfer data back to host
bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
return 0;
}

View File

@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/fill.h>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create device vector (filled with zeros)
bolt::cl::device_vector<int> d_vec(PERF_N, 0);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
bolt::cl::fill(d_vec.begin(), d_vec.end(), int(trial));
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,56 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/inner_product.h>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create host vectors
std::vector<int> host_x = generate_random_vector<int>(PERF_N);
std::vector<int> host_y = generate_random_vector<int>(PERF_N);
// create device vectors
bolt::cl::device_vector<int> device_x(PERF_N);
bolt::cl::device_vector<int> device_y(PERF_N);
// transfer data to the device
bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin());
bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin());
int product = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
product = bolt::cl::inner_product(
device_x.begin(), device_x.end(), device_y.begin(), 0
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "product: " << product << std::endl;
return 0;
}

View File

@@ -0,0 +1,69 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include <bolt/cl/max_element.h>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>(rand() % 10000000);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create host vector
std::vector<int> host_vec = generate_random_vector<int>(PERF_N);
// create device vectors
bolt::cl::device_vector<int> device_vec(PERF_N);
// transfer data to the device
bolt::cl::copy(host_vec.begin(), host_vec.end(), device_vec.begin());
bolt::cl::device_vector<int>::iterator max_iter = device_vec.begin();
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
max_iter = bolt::cl::max_element(device_vec.begin(), device_vec.end());
t.stop();
}
int device_max = *max_iter;
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "max: " << device_max << std::endl;
// verify max is correct
int host_max = *std::max_element(host_vec.begin(), host_vec.end());
if(device_max != host_max){
std::cout << "ERROR: "
<< "device_max (" << device_max << ") "
<< "!= "
<< "host_max (" << host_max << ")"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,60 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/merge.h>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vec1 = generate_random_vector<int>(std::floor(PERF_N / 2.0));
std::vector<int> host_vec2 = generate_random_vector<int>(std::ceil(PERF_N / 2.0));
// sort them
std::sort(host_vec1.begin(), host_vec1.end());
std::sort(host_vec2.begin(), host_vec2.end());
// create device vectors
bolt::cl::device_vector<int> device_vec1(PERF_N);
bolt::cl::device_vector<int> device_vec2(PERF_N);
bolt::cl::device_vector<int> device_vec3(PERF_N);
// transfer data to the device
bolt::cl::copy(host_vec1.begin(), host_vec1.end(), device_vec1.begin());
bolt::cl::copy(host_vec2.begin(), host_vec2.end(), device_vec2.begin());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
bolt::cl::merge(
device_vec1.begin(), device_vec1.end(),
device_vec2.begin(), device_vec2.end(),
device_vec3.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,53 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/scan.h>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create vector of random numbers on the host
std::vector<int> h_vec = generate_random_vector<int>(PERF_N);
// create device vector
bolt::cl::device_vector<int> d_vec(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
// transfer data to the device
bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
t.start();
bolt::cl::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// transfer data back to host
bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
return 0;
}

View File

@@ -0,0 +1,100 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include <bolt/cl/reduce_by_key.h>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
struct unique_key {
int current;
int avgValuesNoPerKey;
unique_key()
{
current = 0;
avgValuesNoPerKey = 512;
}
int operator()()
{
double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
if((rand() / double(RAND_MAX)) <= p)
return ++current;
return current;
}
} UniqueKey;
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create vector of keys and random values
std::vector<int> host_keys(PERF_N);
std::vector<int> host_values(PERF_N);
std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
std::generate(host_values.begin(), host_values.end(), rand_int);
// create device vectors for data
bolt::cl::device_vector<int> device_keys(PERF_N);
bolt::cl::device_vector<int> device_values(PERF_N);
// transfer data to the device
bolt::cl::copy(host_keys.begin(), host_keys.end(), device_keys.begin());
bolt::cl::copy(host_values.begin(), host_values.end(), device_values.begin());
// create device vectors for the results
bolt::cl::device_vector<int> device_keys_results(PERF_N);
bolt::cl::device_vector<int> device_values_results(PERF_N);
typedef bolt::cl::device_vector<int>::iterator iterType;
bolt::cl::pair<iterType, iterType> result = {
device_keys_results.begin(),
device_values_results.begin()
};
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
result = bolt::cl::reduce_by_key(device_keys.begin(),
device_keys.end(),
device_values.begin(),
device_keys_results.begin(),
device_values_results.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
size_t result_size = bolt::cl::distance(device_keys_results.begin(), result.first);
if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
std::cout << "ERROR: "
<< "wrong number of keys"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,76 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <algorithm>
#include <vector>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include <bolt/cl/transform.h>
#include "perf.hpp"
BOLT_FUNCTOR(saxpy_functor,
struct saxpy_functor
{
float _a;
saxpy_functor(float a) : _a(a) {};
float operator() (const float &x, const float &y) const
{
return _a * x + y;
};
};
)
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
bolt::cl::control ctrl = bolt::cl::control::getDefault();
::cl::Device device = ctrl.getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create host vectors
std::vector<float> host_x(PERF_N);
std::vector<float> host_y(PERF_N);
std::generate(host_x.begin(), host_x.end(), rand);
std::generate(host_y.begin(), host_y.end(), rand);
// create device vectors
bolt::cl::device_vector<float> device_x(PERF_N);
bolt::cl::device_vector<float> device_y(PERF_N);
// transfer data to the device
bolt::cl::copy(host_x.begin(), host_x.end(), device_x.begin());
bolt::cl::copy(host_y.begin(), host_y.end(), device_y.begin());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
bolt::cl::transform(
device_x.begin(), device_x.end(),
device_y.begin(),
device_y.begin(),
saxpy_functor(2.5f)
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// transfer data back to host
bolt::cl::copy(device_x.begin(), device_x.end(), host_x.begin());
bolt::cl::copy(device_y.begin(), device_y.end(), host_y.begin());
return 0;
}

View File

@@ -0,0 +1,50 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <cstdlib>
#include <vector>
#include <bolt/cl/sort.h>
#include <bolt/cl/copy.h>
#include <bolt/cl/device_vector.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
::cl::Device device = bolt::cl::control::getDefault().getDevice();
std::cout << "device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// create host vector
std::vector<int> h_vec = generate_random_vector<int>(PERF_N);
// create device vector
bolt::cl::device_vector<int> d_vec(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
// transfer data to the device
bolt::cl::copy(h_vec.begin(), h_vec.end(), d_vec.begin());
t.start();
bolt::cl::sort(d_vec.begin(), d_vec.end());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// transfer data back to host
bolt::cl::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
return 0;
}

View File

@@ -0,0 +1,158 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#define _USE_MATH_DEFINES
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/copy.hpp>
#include <boost/compute/algorithm/copy_n.hpp>
#include <boost/compute/algorithm/transform.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
namespace compute = boost::compute;
using compute::float2_;
float rand_float()
{
return (float(rand()) / float(RAND_MAX)) * 1000.f;
}
void serial_cartesian_to_polar(const float *input, size_t n, float *output)
{
for(size_t i = 0; i < n; i++){
float x = input[i*2+0];
float y = input[i*2+1];
float magnitude = std::sqrt(x*x + y*y);
float angle = std::atan2(y, x) * 180.f / M_PI;
output[i*2+0] = magnitude;
output[i*2+1] = angle;
}
}
void serial_polar_to_cartesian(const float *input, size_t n, float *output)
{
for(size_t i = 0; i < n; i++){
float magnitude = input[i*2+0];
float angle = input[i*2+1];
float x = magnitude * cos(angle);
float y = magnitude * sin(angle);
output[i*2+0] = x;
output[i*2+1] = y;
}
}
// converts from cartesian coordinates (x, y) to polar coordinates (magnitude, angle)
BOOST_COMPUTE_FUNCTION(float2_, cartesian_to_polar, (float2_ p),
{
float x = p.x;
float y = p.y;
float magnitude = sqrt(x*x + y*y);
float angle = atan2(y, x) * 180.f / M_PI;
return (float2)(magnitude, angle);
});
// converts from polar coordinates (magnitude, angle) to cartesian coordinates (x, y)
BOOST_COMPUTE_FUNCTION(float2_, polar_to_cartesian, (float2_ p),
{
float magnitude = p.x;
float angle = p.y;
float x = magnitude * cos(angle);
float y = magnitude * sin(angle);
return (float2)(x, y)
});
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
compute::device device = compute::system::default_device();
compute::context context(device);
compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<float> host_vector(PERF_N*2);
std::generate(host_vector.begin(), host_vector.end(), rand_float);
// create vector on the device and copy the data
compute::vector<float2_> device_vector(PERF_N, context);
compute::copy_n(
reinterpret_cast<float2_ *>(&host_vector[0]),
PERF_N,
device_vector.begin(),
queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
compute::transform(
device_vector.begin(),
device_vector.end(),
device_vector.begin(),
cartesian_to_polar,
queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// perform saxpy on host
t.clear();
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
serial_cartesian_to_polar(&host_vector[0], PERF_N, &host_vector[0]);
t.stop();
}
std::cout << "host time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::vector<float> device_data(PERF_N*2);
compute::copy(
device_vector.begin(),
device_vector.end(),
reinterpret_cast<float2_ *>(&device_data[0]),
queue
);
for(size_t i = 0; i < PERF_N; i++){
float host_value = host_vector[i];
float device_value = device_data[i];
if(std::abs(device_value - host_value) > 1e-3){
std::cout << "ERROR: "
<< "value at " << i << " "
<< "device_value (" << device_value << ") "
<< "!= "
<< "host_value (" << host_value << ")"
<< std::endl;
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,86 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/program_options.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/sort.hpp>
#include <boost/compute/algorithm/is_sorted.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
namespace po = boost::program_options;
namespace compute = boost::compute;
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
using boost::compute::int_;
// create vector of random numbers on the host
std::vector<int_> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand);
// create vector on the device and copy the data
boost::compute::vector<int_> device_vector(PERF_N, context);
// less function for float
BOOST_COMPUTE_FUNCTION(bool, comp, (int_ a, int_ b),
{
return a < b;
});
// sort vector
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
queue.finish();
t.start();
boost::compute::sort(
device_vector.begin(),
device_vector.end(),
comp,
queue
);
queue.finish();
t.stop();
};
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify vector is sorted
if(!boost::compute::is_sorted(device_vector.begin(),
device_vector.end(),
comp,
queue)){
std::cout << "ERROR: is_sorted() returned false" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,122 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <boost/compute/core.hpp>
#include <boost/compute/closure.hpp>
#include <boost/compute/algorithm/copy_if.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/random/default_random_engine.hpp>
#include <boost/compute/random/uniform_int_distribution.hpp>
#include <boost/compute/random/uniform_real_distribution.hpp>
#include "perf.hpp"
namespace compute = boost::compute;
void test_copy_if_odd(compute::command_queue &queue)
{
// create input and output vectors on the device
const compute::context &context = queue.get_context();
compute::vector<int> input(PERF_N, context);
compute::vector<int> output(PERF_N, context);
// generate random numbers between 1 and 10
compute::default_random_engine rng(queue);
compute::uniform_int_distribution<int> d(1, 10);
d.generate(input.begin(), input.end(), rng, queue);
BOOST_COMPUTE_FUNCTION(bool, is_odd, (int x),
{
return x & 1;
});
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
compute::vector<int>::iterator i = compute::copy_if(
input.begin(), input.end(), output.begin(), is_odd, queue
);
queue.finish();
t.stop();
float ratio = float(std::distance(output.begin(), i)) / PERF_N;
if(PERF_N > 1000 && (ratio < 0.45f || ratio > 0.55f)){
std::cerr << "error: ratio is " << ratio << std::endl;
std::cerr << "error: ratio should be around 45-55%" << std::endl;
}
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
}
void test_copy_if_in_sphere(compute::command_queue &queue)
{
using boost::compute::float4_;
// create input and output vectors on the device
const compute::context &context = queue.get_context();
compute::vector<float4_> input_points(PERF_N, context);
compute::vector<float4_> output_points(PERF_N, context);
// generate random numbers in a cube
float radius = 5.0f;
compute::default_random_engine rng(queue);
compute::uniform_real_distribution<float> d(-radius, +radius);
d.generate(
compute::make_buffer_iterator<float>(input_points.get_buffer(), 0),
compute::make_buffer_iterator<float>(input_points.get_buffer(), PERF_N * 4),
rng,
queue
);
// predicate which returns true if the point lies within the sphere
BOOST_COMPUTE_CLOSURE(bool, is_in_sphere, (float4_ point), (radius),
{
// ignore fourth component
point.w = 0;
return length(point) < radius;
});
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
compute::vector<float4_>::iterator i = compute::copy_if(
input_points.begin(),
input_points.end(),
output_points.begin(),
is_in_sphere,
queue
);
queue.finish();
t.stop();
float ratio = float(std::distance(output_points.begin(), i)) / PERF_N;
if(PERF_N > 1000 && (ratio < 0.5f || ratio > 0.6f)){
std::cerr << "error: ratio is " << ratio << std::endl;
std::cerr << "error: ratio should be around 50-60%" << std::endl;
}
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
test_copy_if_odd(queue);
return 0;
}

View File

@@ -0,0 +1,55 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <vector>
#include <cstdlib>
#include <iostream>
#include <boost/compute.hpp>
int main(int argc, char *argv[])
{
size_t size = 1000;
if(argc >= 2){
size = boost::lexical_cast<size_t>(argv[1]);
}
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue::properties
properties = boost::compute::command_queue::enable_profiling;
boost::compute::command_queue queue(context, device, properties);
std::vector<int> host_vector(size);
std::generate(host_vector.begin(), host_vector.end(), rand);
boost::compute::vector<int> device_vector(host_vector.size(), context);
boost::compute::future<void> future =
boost::compute::copy_async(host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue);
// wait for copy to finish
future.wait();
// get elapsed time in nanoseconds
size_t elapsed =
future.get_event().duration<boost::chrono::nanoseconds>().count();
std::cout << "time: " << elapsed / 1e6 << " ms" << std::endl;
float rate = (float(size * sizeof(int)) / elapsed) * 1000.f;
std::cout << "rate: " << rate << " MB/s" << std::endl;
return 0;
}

View File

@@ -0,0 +1,77 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/count.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
size_t count = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
count = boost::compute::count(
device_vector.begin(), device_vector.end(), 4, queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "count: " << count << std::endl;
// verify count is correct
size_t host_count = std::count(host_vector.begin(),
host_vector.end(),
4);
if(count != host_count){
std::cout << "ERROR: "
<< "device_count (" << count << ") "
<< "!= "
<< "host_count (" << host_count << ")"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,48 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/random/default_random_engine.hpp>
#include <boost/compute/random/discrete_distribution.hpp>
#include "perf.hpp"
namespace compute = boost::compute;
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
compute::device device = compute::system::default_device();
compute::context context(device);
compute::command_queue queue(context, device);
compute::vector<compute::uint_> vector(PERF_N, context);
int weights[] = {1, 1};
compute::default_random_engine rng(queue);
compute::discrete_distribution<compute::uint_> dist(weights, weights+2);
perf_timer t;
t.start();
dist.generate(vector.begin(), vector.end(), rng, queue);
queue.finish();
t.stop();
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,61 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/remove.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 10.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
boost::compute::vector<int> device_vector(
host_vector.begin(), host_vector.end(), queue
);
t.start();
device_vector.erase(
boost::compute::remove(
device_vector.begin(), device_vector.end(), 4, queue
),
device_vector.end(),
queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,97 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Benoit
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/exclusive_scan.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::vector<int> device_res(PERF_N,context);
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
// sum vector
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
t.start();
boost::compute::exclusive_scan(
device_vector.begin(),
device_vector.end(),
device_res.begin(),
queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify sum is correct
std::partial_sum(
host_vector.begin(),
host_vector.end(),
host_vector.begin()
);
int device_sum = device_res.back();
// when scan is exclusive values are shifted by one on the left
// compared to a inclusive scan
int host_sum = host_vector[host_vector.size()-2];
if(device_sum != host_sum){
std::cout << "ERROR: "
<< "device_sum (" << device_sum << ") "
<< "!= "
<< "host_sum (" << host_sum << ")"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/fill.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector on the device (filled with zeros)
boost::compute::vector<int> vec(PERF_N, 0, queue);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::fill(vec.begin(), vec.end(), int(trial), queue);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,88 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/find.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
// Max integer that can be generated by rand_int() function.
int rand_int_max = 25;
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * rand_int_max);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
// trying to find element that isn't in vector (worst-case scenario)
int wanted = rand_int_max + 1;
// device iterator
boost::compute::vector<int>::iterator device_result_it;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
device_result_it = boost::compute::find(device_vector.begin(),
device_vector.end(),
wanted,
queue);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify if found index is correct by comparing it with std::find() result
size_t host_result_index = std::distance(host_vector.begin(),
std::find(host_vector.begin(),
host_vector.end(),
wanted));
size_t device_result_index = device_result_it.get_index();
if(device_result_index != host_result_index){
std::cout << "ERROR: "
<< "device_result_index (" << device_result_index << ") "
<< "!= "
<< "host_result_index (" << host_result_index << ")"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,65 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/find_end.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
int pattern[] = {2, 6, 6, 7, 8, 4};
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
boost::compute::vector<int> pattern_vector(pattern, pattern + 6, queue);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::find_end(
device_vector.begin(), device_vector.end(),
pattern_vector.begin(), pattern_vector.end(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,65 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/timer/timer.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/algorithm/sort.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> random_vector(PERF_N);
std::generate(random_vector.begin(), random_vector.end(), rand);
// create input vector for gpu
std::vector<int> gpu_vector = random_vector;
// sort vector on gpu
boost::timer::cpu_timer t;
boost::compute::sort(
gpu_vector.begin(), gpu_vector.end(), queue
);
queue.finish();
std::cout << "time: " << t.elapsed().wall / 1e6 << " ms" << std::endl;
// create input vector for host
std::vector<int> host_vector = random_vector;
// sort vector on host
t.start();
std::sort(host_vector.begin(), host_vector.end());
std::cout << "host time: " << t.elapsed().wall / 1e6 << " ms" << std::endl;
// ensure that both sorted vectors are equal
if(!std::equal(gpu_vector.begin(), gpu_vector.end(), host_vector.begin())){
std::cerr << "ERROR: sorted vectors not the same" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,68 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/includes.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vectors of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::sort(host_vector.begin(), host_vector.end());
// create vectors on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
boost::compute::vector<int> device_vector2(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector2.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::includes(
device_vector.begin(), device_vector.end(),
device_vector2.begin(), device_vector2.end(),
queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,74 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/inner_product.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
std::vector<int> h1(PERF_N);
std::vector<int> h2(PERF_N);
std::generate(h1.begin(), h1.end(), rand_int);
std::generate(h2.begin(), h2.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> d1(PERF_N, context);
boost::compute::vector<int> d2(PERF_N, context);
boost::compute::copy(h1.begin(), h1.end(), d1.begin(), queue);
boost::compute::copy(h2.begin(), h2.end(), d2.begin(), queue);
int product = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
product = boost::compute::inner_product(
d1.begin(), d1.end(), d2.begin(), int(0), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify product is correct
int host_product = std::inner_product(
h1.begin(), h1.end(), h2.begin(), int(0)
);
if(product != host_product){
std::cout << "ERROR: "
<< "device_product (" << product << ") "
<< "!= "
<< "host_product (" << host_product << ")"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,66 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/is_permutation.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
boost::compute::vector<int> device_vector2(PERF_N, context);
boost::compute::copy(
host_vector.rbegin(), host_vector.rend(), device_vector2.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::is_permutation(
device_vector.begin(), device_vector.end(),
device_vector2.begin(), device_vector2.end(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,63 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/is_sorted.hpp>
#include <boost/compute/algorithm/reverse.hpp>
#include <boost/compute/algorithm/sort.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
// sort and then reverse the random vector
boost::compute::sort(device_vector.begin(), device_vector.end(), queue);
boost::compute::reverse(device_vector.begin(), device_vector.end(), queue);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
bool sorted = boost::compute::is_sorted(
device_vector.begin(), device_vector.end(), queue
);
queue.finish();
t.stop();
if(sorted){
std::cerr << "ERROR: is_sorted() returned true" << std::endl;
}
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,93 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Rastko Anicic <anicic.rastko@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/max_element.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>(rand() % 10000000);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
boost::compute::vector<int>::iterator device_max_iter
= device_vector.begin();
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
device_max_iter = boost::compute::max_element(
device_vector.begin(), device_vector.end(), queue
);
queue.finish();
t.stop();
}
int device_max = device_max_iter.read(queue);
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "max: " << device_max << std::endl;
// verify max is correct
std::vector<int>::iterator host_max_iter
= std::max_element(host_vector.begin(), host_vector.end());
int host_max = *host_max_iter;
if(device_max != host_max){
std::cout << "ERROR: "
<< "device_max (" << device_max << ") "
<< "!= "
<< "host_max (" << host_max << ")"
<< std::endl;
return -1;
}
size_t host_max_idx = std::distance(host_vector.begin(), host_max_iter);
size_t device_max_idx = std::distance(device_vector.begin(), device_max_iter);
if(device_max_idx != host_max_idx){
std::cout << "ERROR: "
<< "device_max index (" << device_max_idx << ") "
<< "!= "
<< "host_max index (" << host_max_idx << ")"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,69 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <cmath>
#include <vector>
#include <algorithm>
#include <iostream>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/merge.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
std::vector<int> v1 = generate_random_vector<int>(std::floor(PERF_N / 2.0));
std::vector<int> v2 = generate_random_vector<int>(std::ceil(PERF_N / 2.0));
std::vector<int> v3(PERF_N);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
boost::compute::vector<int> gpu_v1(v1.begin(), v1.end(), queue);
boost::compute::vector<int> gpu_v2(v2.begin(), v2.end(), queue);
boost::compute::vector<int> gpu_v3(PERF_N, context);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::merge(gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin(),
queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::vector<int> check_v3(PERF_N);
boost::compute::copy(gpu_v3.begin(), gpu_v3.end(), check_v3.begin(), queue);
queue.finish();
std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin());
if(!ok){
std::cerr << "ERROR: merged ranges different" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,65 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/next_permutation.hpp>
#include <boost/compute/algorithm/prev_permutation.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::sort(host_vector.begin(), host_vector.end(), std::greater<int>());
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::next_permutation(
device_vector.begin(), device_vector.end(), queue
);
queue.finish();
t.stop();
boost::compute::prev_permutation(
device_vector.begin(), device_vector.end(), queue
);
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,60 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/nth_element.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::nth_element(
device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,97 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/partial_sum.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
using boost::compute::int_;
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int_> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int_> device_vector(PERF_N, context);
boost::compute::vector<int_> device_res(PERF_N,context);
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
// sum vector
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
t.start();
boost::compute::partial_sum(
device_vector.begin(),
device_vector.end(),
device_res.begin(),
queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify sum is correct
std::partial_sum(
host_vector.begin(),
host_vector.end(),
host_vector.begin()
);
int device_sum = device_res.back();
int host_sum = host_vector.back();
if(device_sum != host_sum){
std::cout << "ERROR: "
<< "device_sum (" << device_sum << ") "
<< "!= "
<< "host_sum (" << host_sum << ")"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,66 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/partition.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
using boost::compute::_1;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
t.start();
boost::compute::partition(
device_vector.begin(), device_vector.end(), _1 < 10, queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,68 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/lambda.hpp>
#include <boost/compute/algorithm/partition.hpp>
#include <boost/compute/algorithm/partition_point.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
using boost::compute::_1;
boost::compute::partition(
device_vector.begin(), device_vector.end(), _1 < 20, queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::partition_point(
device_vector.begin(), device_vector.end(), _1 < 20, queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,65 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/next_permutation.hpp>
#include <boost/compute/algorithm/prev_permutation.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::sort(host_vector.begin(), host_vector.end());
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::prev_permutation(
device_vector.begin(), device_vector.end(), queue
);
queue.finish();
t.stop();
boost::compute::next_permutation(
device_vector.begin(), device_vector.end(), queue
);
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,101 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <vector>
#include <boost/program_options.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/core.hpp>
#include <boost/compute/random.hpp>
#include "perf.hpp"
namespace compute = boost::compute;
namespace po = boost::program_options;
template<class Engine>
void perf_random_number_engine(const size_t size,
const size_t trials,
compute::command_queue& queue)
{
typedef typename Engine::result_type T;
// create random number engine
Engine engine(queue);
// create vector on the device
std::cout << "size = " << size << std::endl;
compute::vector<T> vector(size, queue.get_context());
// generate random numbers
perf_timer t;
for(size_t i = 0; i < trials; i++){
t.start();
engine.generate(vector.begin(), vector.end(), queue);
queue.finish();
t.stop();
}
// print result
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "rate: " << perf_rate<T>(size, t.min_time()) << " MB/s" << std::endl;
}
int main(int argc, char *argv[])
{
// setup and parse command line options
po::options_description options("options");
options.add_options()
("help", "show usage instructions")
("size", po::value<size_t>()->default_value(8192), "number of values")
("trials", po::value<size_t>()->default_value(3), "number of trials")
("engine", po::value<std::string>()->default_value("default_random_engine"), "random number engine")
;
po::variables_map vm;
po::store(po::parse_command_line(argc, argv, options), vm);
po::notify(vm);
if(vm.count("help")) {
std::cout << options << std::endl;
return 0;
}
// setup context and queue for the default device
compute::device device = compute::system::default_device();
compute::context context(device);
compute::command_queue queue(context, device);
// get command line options
const size_t size = vm["size"].as<size_t>();
const size_t trials = vm["trials"].as<size_t>();
const std::string& engine = vm["engine"].as<std::string>();
// run benchmark
if(engine == "default_random_engine"){
perf_random_number_engine<compute::default_random_engine>(size, trials, queue);
}
else if(engine == "mersenne_twister_engine"){
perf_random_number_engine<compute::mt19937>(size, trials, queue);
}
else if(engine == "linear_congruential_engine"){
perf_random_number_engine<compute::linear_congruential_engine<> >(size, trials, queue);
}
else if(engine == "threefry_engine"){
perf_random_number_engine<compute::threefry_engine<> >(size, trials, queue);
}
else {
std::cerr << "error: unknown random number engine '" << engine << "'" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,114 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/fill.hpp>
#include <boost/compute/algorithm/reduce_by_key.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
struct unique_key {
int current;
int avgValuesNoPerKey;
unique_key()
{
current = 0;
avgValuesNoPerKey = 512;
}
int operator()()
{
double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
if((rand() / double(RAND_MAX)) <= p)
return ++current;
return current;
}
} UniqueKey;
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of keys and random values
std::vector<int> host_keys(PERF_N);
std::vector<int> host_values(PERF_N);
std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
std::generate(host_values.begin(), host_values.end(), rand_int);
// create vectors for keys and values on the device and copy the data
boost::compute::vector<int> device_keys(PERF_N, context);
boost::compute::vector<int> device_values(PERF_N,context);
boost::compute::copy(
host_keys.begin(),
host_keys.end(),
device_keys.begin(),
queue
);
boost::compute::copy(
host_values.begin(),
host_values.end(),
device_values.begin(),
queue
);
// vectors for the results
boost::compute::vector<int> device_keys_results(PERF_N, context);
boost::compute::vector<int> device_values_results(PERF_N,context);
typedef boost::compute::vector<int>::iterator iterType;
std::pair<iterType, iterType> result(
device_keys_results.begin(),
device_values_results.begin()
);
// reduce by key
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
result = boost::compute::reduce_by_key(device_keys.begin(),
device_keys.end(),
device_values.begin(),
device_keys_results.begin(),
device_values_results.begin(),
queue);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
size_t result_size = std::distance(device_keys_results.begin(), result.first);
if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
std::cout << "ERROR: "
<< "wrong number of keys" << result_size << "\n" << (host_keys[PERF_N-1] + 1)
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,60 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/reverse.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::reverse(
device_vector.begin(), device_vector.end(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,65 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/reverse_copy.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
// create vector on the device for reversed data
boost::compute::vector<int> device_reversed_vector(PERF_N, context);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::reverse_copy(
device_vector.begin(), device_vector.end(),
device_reversed_vector.begin(),
queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,60 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/rotate.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::rotate(
device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,62 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/rotate_copy.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
boost::compute::vector<int> device_vector2(PERF_N, context);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::rotate_copy(
device_vector.begin(), device_vector.begin()+(PERF_N/2), device_vector.end(), device_vector2.begin(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,162 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/program_options.hpp>
#include <boost/compute/lambda.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/copy.hpp>
#include <boost/compute/algorithm/transform.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
namespace po = boost::program_options;
namespace compute = boost::compute;
float rand_float()
{
return (float(rand()) / float(RAND_MAX)) * 1000.f;
}
template<class T>
double perf_saxpy(const compute::vector<T>& x,
const compute::vector<T>& y,
const T alpha,
const size_t trials,
compute::command_queue& queue)
{
// create vector on the device to store the result
compute::vector<T> result(x.size(), queue.get_context());
perf_timer t;
for(size_t trial = 0; trial < trials; trial++){
compute::fill(result.begin(), result.end(), T(0), queue);
queue.finish();
t.start();
using compute::lambda::_1;
using compute::lambda::_2;
compute::transform(
x.begin(), x.end(), y.begin(), result.begin(), alpha * _1 + _2, queue
);
queue.finish();
t.stop();
}
return t.min_time();
}
template<class T>
void tune_saxpy(const compute::vector<T>& x,
const compute::vector<T>& y,
const T alpha,
const size_t trials,
compute::command_queue& queue)
{
boost::shared_ptr<compute::detail::parameter_cache>
params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
const std::string cache_key =
std::string("__boost_copy_kernel_") + boost::lexical_cast<std::string>(sizeof(T));
const compute::uint_ tpbs[] = { 4, 8, 16, 32, 64, 128, 256, 512, 1024 };
const compute::uint_ vpts[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
double min_time = (std::numeric_limits<double>::max)();
compute::uint_ best_tpb = 0;
compute::uint_ best_vpt = 0;
for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
params->set(cache_key, "tpb", tpbs[i]);
for(size_t j = 0; j < sizeof(vpts) / sizeof(*vpts); j++){
params->set(cache_key, "vpt", vpts[j]);
try {
const double t = perf_saxpy(x, y, alpha, trials, queue);
if(t < min_time){
best_tpb = tpbs[i];
best_vpt = vpts[j];
min_time = t;
}
}
catch(compute::opencl_error&){
// invalid parameters for this device, skip
}
}
}
// store optimal parameters
params->set(cache_key, "tpb", best_tpb);
params->set(cache_key, "vpt", best_vpt);
}
int main(int argc, char *argv[])
{
// setup command line arguments
po::options_description options("options");
options.add_options()
("help", "show usage instructions")
("size", po::value<size_t>()->default_value(8192), "input size")
("trials", po::value<size_t>()->default_value(3), "number of trials to run")
("tune", "run tuning procedure")
("alpha", po::value<double>()->default_value(2.5), "saxpy alpha value")
;
po::positional_options_description positional_options;
positional_options.add("size", 1);
// parse command line
po::variables_map vm;
po::store(
po::command_line_parser(argc, argv)
.options(options).positional(positional_options).run(),
vm
);
po::notify(vm);
const size_t size = vm["size"].as<size_t>();
const size_t trials = vm["trials"].as<size_t>();
const float alpha = vm["alpha"].as<double>();
std::cout << "size: " << size << std::endl;
// setup context and queue for the default device
compute::device device = boost::compute::system::default_device();
compute::context context(device);
compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<float> host_x(size);
std::vector<float> host_y(size);
std::generate(host_x.begin(), host_x.end(), rand_float);
std::generate(host_y.begin(), host_y.end(), rand_float);
// create vector on the device and copy the data
compute::vector<float> x(host_x.begin(), host_x.end(), queue);
compute::vector<float> y(host_y.begin(), host_y.end(), queue);
// run tuning proceure (if requested)
if(vm.count("tune")){
tune_saxpy(x, y, alpha, trials, queue);
}
// run benchmark
double t = perf_saxpy(x, y, alpha, trials, queue);
std::cout << "time: " << t / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,65 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/search.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
int pattern[] = {2, 6, 6, 7, 8, 4};
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
boost::compute::vector<int> pattern_vector(pattern, pattern + 6, queue);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::search(
device_vector.begin(), device_vector.end(),
pattern_vector.begin(), pattern_vector.end(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,61 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/search_n.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::search_n(
device_vector.begin(), device_vector.end(),
5, 2, queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,75 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/set_difference.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vectors of random numbers on the host
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
// create vectors on the device and copy the data
boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
boost::compute::copy(
v1.begin(), v1.end(), gpu_v1.begin(), queue
);
boost::compute::copy(
v2.begin(), v2.end(), gpu_v2.begin(), queue
);
boost::compute::vector<int> gpu_v3(PERF_N, context);
boost::compute::vector<int>::iterator gpu_v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
gpu_v3_end = boost::compute::set_difference(
gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,75 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/set_intersection.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vectors of random numbers on the host
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
// create vectors on the device and copy the data
boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
boost::compute::copy(
v1.begin(), v1.end(), gpu_v1.begin(), queue
);
boost::compute::copy(
v2.begin(), v2.end(), gpu_v2.begin(), queue
);
boost::compute::vector<int> gpu_v3(PERF_N, context);
boost::compute::vector<int>::iterator gpu_v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
gpu_v3_end = boost::compute::set_intersection(
gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,75 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/set_symmetric_difference.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vectors of random numbers on the host
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
// create vectors on the device and copy the data
boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
boost::compute::copy(
v1.begin(), v1.end(), gpu_v1.begin(), queue
);
boost::compute::copy(
v2.begin(), v2.end(), gpu_v2.begin(), queue
);
boost::compute::vector<int> gpu_v3(PERF_N, context);
boost::compute::vector<int>::iterator gpu_v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
gpu_v3_end = boost::compute::set_symmetric_difference(
gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,75 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/set_union.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vectors of random numbers on the host
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
// create vectors on the device and copy the data
boost::compute::vector<int> gpu_v1(std::floor(PERF_N / 2.0), context);
boost::compute::vector<int> gpu_v2(std::ceil(PERF_N / 2.0), context);
boost::compute::copy(
v1.begin(), v1.end(), gpu_v1.begin(), queue
);
boost::compute::copy(
v2.begin(), v2.end(), gpu_v2.begin(), queue
);
boost::compute::vector<int> gpu_v3(PERF_N, context);
boost::compute::vector<int>::iterator gpu_v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
gpu_v3_end = boost::compute::set_union(
gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(gpu_v3.begin(), gpu_v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,130 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/program_options.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/sort.hpp>
#include <boost/compute/algorithm/is_sorted.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
namespace po = boost::program_options;
namespace compute = boost::compute;
template<class T>
double perf_sort(const std::vector<T>& data,
const size_t trials,
compute::command_queue& queue)
{
compute::vector<T> vec(data.size(), queue.get_context());
perf_timer t;
for(size_t trial = 0; trial < trials; trial++){
compute::copy(data.begin(), data.end(), vec.begin(), queue);
t.start();
compute::sort(vec.begin(), vec.end(), queue);
queue.finish();
t.stop();
if(!compute::is_sorted(vec.begin(), vec.end(), queue)){
std::cerr << "ERROR: is_sorted() returned false" << std::endl;
}
}
return t.min_time();
}
template<class T>
void tune_sort(const std::vector<T>& data,
const size_t trials,
compute::command_queue& queue)
{
boost::shared_ptr<compute::detail::parameter_cache>
params = compute::detail::parameter_cache::get_global_cache(queue.get_device());
const std::string cache_key =
std::string("__boost_radix_sort_") + compute::type_name<T>();
const compute::uint_ tpbs[] = { 32, 64, 128, 256, 512, 1024 };
double min_time = (std::numeric_limits<double>::max)();
compute::uint_ best_tpb = 0;
for(size_t i = 0; i < sizeof(tpbs) / sizeof(*tpbs); i++){
params->set(cache_key, "tpb", tpbs[i]);
try {
const double t = perf_sort(data, trials, queue);
if(t < min_time){
best_tpb = tpbs[i];
min_time = t;
}
}
catch(compute::opencl_error&){
// invalid work group size for this device, skip
}
}
// store optimal parameters
params->set(cache_key, "tpb", best_tpb);
}
int main(int argc, char *argv[])
{
// setup command line arguments
po::options_description options("options");
options.add_options()
("help", "show usage instructions")
("size", po::value<size_t>()->default_value(8192), "input size")
("trials", po::value<size_t>()->default_value(3), "number of trials to run")
("tune", "run tuning procedure")
;
po::positional_options_description positional_options;
positional_options.add("size", 1);
// parse command line
po::variables_map vm;
po::store(
po::command_line_parser(argc, argv)
.options(options).positional(positional_options).run(),
vm
);
po::notify(vm);
const size_t size = vm["size"].as<size_t>();
const size_t trials = vm["trials"].as<size_t>();
std::cout << "size: " << size << std::endl;
// setup context and queue for the default device
compute::device device = boost::compute::system::default_device();
compute::context context(device);
compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<unsigned int> data(size);
std::generate(data.begin(), data.end(), rand);
// run tuning proceure (if requested)
if(vm.count("tune")){
tune_sort(data, trials, queue);
}
// run sort benchmark
double t = perf_sort(data, trials, queue);
std::cout << "time: " << t / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,79 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/sort_by_key.hpp>
#include <boost/compute/algorithm/is_sorted.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/types/fundamental.hpp>
#include "perf.hpp"
int main(int argc, char *argv[])
{
using boost::compute::int_;
using boost::compute::long_;
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int_> host_keys(PERF_N);
std::generate(host_keys.begin(), host_keys.end(), rand);
std::vector<long_> host_values(PERF_N);
std::copy(host_keys.begin(), host_keys.end(), host_values.begin());
// create vector on the device and copy the data
boost::compute::vector<int_> device_keys(PERF_N, context);
boost::compute::vector<long_> device_values(PERF_N, context);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
boost::compute::copy(
host_keys.begin(), host_keys.end(), device_keys.begin(), queue
);
boost::compute::copy(
host_values.begin(), host_values.end(), device_values.begin(), queue
);
t.start();
// sort vector
boost::compute::sort_by_key(
device_keys.begin(), device_keys.end(), device_values.begin(), queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify keys are sorted
if(!boost::compute::is_sorted(device_keys.begin(), device_keys.end(), queue)){
std::cout << "ERROR: is_sorted() returned false for the keys" << std::endl;
return -1;
}
// verify values are sorted
if(!boost::compute::is_sorted(device_values.begin(), device_values.end(), queue)){
std::cout << "ERROR: is_sorted() returned false for the values" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,72 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/is_sorted.hpp>
#include <boost/compute/algorithm/sort.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
float rand_float()
{
return ((rand() / float(RAND_MAX)) - 0.5f) * 100000.0f;
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<float> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_float);
// create vector on the device and copy the data
boost::compute::vector<float> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(),
host_vector.end(),
device_vector.begin(),
queue
);
// sort vector
perf_timer t;
t.start();
boost::compute::sort(
device_vector.begin(),
device_vector.end(),
queue
);
queue.finish();
t.stop();
std::cout << "time: " << t.last_time() / 1e6 << " ms" << std::endl;
// verify vector is sorted
if(!boost::compute::is_sorted(device_vector.begin(),
device_vector.end(),
queue)){
std::cout << "ERROR: is_sorted() returned false" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,62 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include <boost/compute/algorithm/stable_partition.hpp>
#include <boost/compute/container/vector.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// setup context and queue for the default device
boost::compute::device device = boost::compute::system::default_device();
boost::compute::context context(device);
boost::compute::command_queue queue(context, device);
std::cout << "device: " << device.name() << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector on the device and copy the data
boost::compute::vector<int> device_vector(PERF_N, context);
boost::compute::copy(
host_vector.begin(), host_vector.end(), device_vector.begin(), queue
);
using boost::compute::_1;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
boost::compute::stable_partition(
device_vector.begin(), device_vector.end(), _1 < 10, queue
);
queue.finish();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
int sum = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0));
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "sum: " << sum << std::endl;
return 0;
}

View File

@@ -0,0 +1,45 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// count values equal to four in the vector
size_t count = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
count = std::count(
host_vector.begin(), host_vector.end(), 4
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "count: " << count << std::endl;
return 0;
}

View File

@@ -0,0 +1,58 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include "perf.hpp"
// Max integer that can be generated by rand_int() function.
int rand_int_max = 25;
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * rand_int_max);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// trying to find element that isn't in vector (worst-case scenario)
int wanted = rand_int_max + 1;
// result
std::vector<int>::iterator host_result_it;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
host_result_it = std::find(host_vector.begin(), host_vector.end(), wanted);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify
if(host_result_it != host_vector.end()){
std::cout << "ERROR: "
<< "host_result_iterator != "
<< "host_vector.end()"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,44 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
int pattern[] = {2, 6, 6, 7, 8, 4};
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::find_end(host_vector.begin(), host_vector.end(),
pattern, pattern + 6);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,48 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <vector>
#include <algorithm>
#include <iostream>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v1(PERF_N);
std::generate(v1.begin(), v1.end(), rand_int);
std::vector<int> v2(v1);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::includes(
v1.begin(), v1.end(),
v2.begin(), v2.end()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,46 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> h1(PERF_N);
std::vector<int> h2(PERF_N);
std::generate(h1.begin(), h1.end(), rand_int);
std::generate(h2.begin(), h2.end(), rand_int);
int product = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
product = std::inner_product(
h1.begin(), h1.end(), h2.begin(), int(0)
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "product: " << product << std::endl;
return 0;
}

View File

@@ -0,0 +1,45 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::vector<int> host_vector2(PERF_N);
std::copy(host_vector.rbegin(), host_vector.rend(), host_vector2.begin());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::is_permutation(host_vector.begin(), host_vector.end(),
host_vector2.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Rastko Anicic <anicic.rastko@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>(rand() % 10000000);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
int max = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
max = *(std::max_element(host_vector.begin(), host_vector.end()));
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "max: " << max << std::endl;
return 0;
}

View File

@@ -0,0 +1,38 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <vector>
#include <algorithm>
#include <iostream>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v1 = generate_random_vector<int>(std::floor(PERF_N / 2.0));
std::vector<int> v2 = generate_random_vector<int>(std::ceil(PERF_N / 2.0));
std::vector<int> v3(PERF_N);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::sort(host_vector.begin(), host_vector.end(), std::greater<int>());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::next_permutation(host_vector.begin(), host_vector.end());
t.stop();
std::prev_permutation(host_vector.begin(), host_vector.end());
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,51 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <boost/compute/system.hpp>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
using boost::compute::int_;
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int_> v(PERF_N);
std::vector<int_> r(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
std::generate(v.begin(), v.end(), rand_int);
t.start();
std::partial_sum(
v.begin(),
v.end(),
r.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,46 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
bool less_than_10(int value)
{
return value < 10;
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::partition(host_vector.begin(), host_vector.end(), less_than_10);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,48 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
bool less_than_20(int value)
{
return value < 20;
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::partition(host_vector.begin(), host_vector.end(),
less_than_20);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::partition_point(host_vector.begin(), host_vector.end(),
less_than_20);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::sort(host_vector.begin(), host_vector.end());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::prev_permutation(host_vector.begin(), host_vector.end());
t.stop();
std::next_permutation(host_vector.begin(), host_vector.end());
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,41 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::reverse(host_vector.begin(), host_vector.end());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,45 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
// create vector for reversed data
std::vector<int> host_reversed_vector(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::reverse_copy(host_vector.begin(), host_vector.end(),
host_reversed_vector.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,41 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::rotate(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,43 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
std::vector<int> host_vector2(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::rotate_copy(host_vector.begin(), host_vector.begin()+(PERF_N/2), host_vector.end(), host_vector2.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,52 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include "perf.hpp"
float rand_float()
{
return (float(rand()) / float(RAND_MAX)) * 1000.f;
}
// y <- alpha * x + y
void serial_saxpy(size_t n, float alpha, const float *x, float *y)
{
for(size_t i = 0; i < n; i++){
y[i] = alpha * x[i] + y[i];
}
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
float alpha = 2.5f;
std::vector<float> host_x(PERF_N);
std::vector<float> host_y(PERF_N);
std::generate(host_x.begin(), host_x.end(), rand_float);
std::generate(host_y.begin(), host_y.end(), rand_float);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
serial_saxpy(PERF_N, alpha, &host_x[0], &host_y[0]);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,44 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
int pattern[] = {2, 6, 6, 7, 8, 4};
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::search(host_vector.begin(), host_vector.end(),
pattern, pattern + 6);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,41 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::search_n(host_vector.begin(), host_vector.end(), 5, 2);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,54 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <vector>
#include <algorithm>
#include <iostream>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
std::vector<int> v3(PERF_N);
std::vector<int>::iterator v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
v3_end = std::set_difference(
v1.begin(), v1.end(),
v2.begin(), v2.end(),
v3.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,54 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <vector>
#include <algorithm>
#include <iostream>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
std::vector<int> v3(PERF_N);
std::vector<int>::iterator v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
v3_end = std::set_intersection(
v1.begin(), v1.end(),
v2.begin(), v2.end(),
v3.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,54 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <vector>
#include <algorithm>
#include <iostream>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
std::vector<int> v3(PERF_N);
std::vector<int>::iterator v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
v3_end = std::set_symmetric_difference(
v1.begin(), v1.end(),
v2.begin(), v2.end(),
v3.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,54 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <vector>
#include <algorithm>
#include <iostream>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v1(std::floor(PERF_N / 2.0));
std::vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand_int);
std::generate(v2.begin(), v2.end(), rand_int);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
std::vector<int> v3(PERF_N);
std::vector<int>::iterator v3_end;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
v3_end = std::set_union(
v1.begin(), v1.end(),
v2.begin(), v2.end(),
v3.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "size: " << std::distance(v3.begin(), v3_end) << std::endl;
return 0;
}

View File

@@ -0,0 +1,33 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
v = generate_random_vector<int>(PERF_N);
t.start();
std::sort(v.begin(), v.end());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,47 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
bool less_than_10(int value)
{
return value < 10;
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
std::stable_partition(host_vector.begin(), host_vector.end(),
less_than_10);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,41 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
std::generate(host_vector.begin(), host_vector.end(), rand_int);
t.start();
std::unique(host_vector.begin(), host_vector.end());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,44 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::vector<int> host_vector2(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
std::generate(host_vector.begin(), host_vector.end(), rand_int);
t.start();
std::unique_copy(
host_vector.begin(), host_vector.end(), host_vector2.begin()
);
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,75 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <numeric>
#include <vector>
#include <tbb/blocked_range.h>
#include <tbb/parallel_reduce.h>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
template<class T>
struct Sum {
T value;
Sum() : value(0) {}
Sum( Sum& s, tbb::split ) {value = 0;}
void operator()( const tbb::blocked_range<T*>& r ) {
T temp = value;
for( T* a=r.begin(); a!=r.end(); ++a ) {
temp += *a;
}
value = temp;
}
void join( Sum& rhs ) {value += rhs.value;}
};
template<class T>
T ParallelSum( T array[], size_t n ) {
Sum<T> total;
tbb::parallel_reduce( tbb::blocked_range<T*>( array, array+n ),
total );
return total.value;
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
std::vector<int> host_vector(PERF_N);
std::generate(host_vector.begin(), host_vector.end(), rand_int);
int sum = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
sum = ParallelSum<int>(&host_vector[0], host_vector.size());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "sum: " << sum << std::endl;
int host_sum = std::accumulate(host_vector.begin(), host_vector.end(), int(0));
if(sum != host_sum){
std::cerr << "ERROR: sum (" << sum << ") != (" << host_sum << ")" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,95 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <tbb/parallel_for.h>
#include "perf.hpp"
// example from: http://www.threadingbuildingblocks.org/docs/help/reference/algorithms/parallel_for_func.htm
using namespace tbb;
template<typename Iterator>
struct ParallelMergeRange {
static size_t grainsize;
Iterator begin1, end1; // [begin1,end1) is 1st sequence to be merged
Iterator begin2, end2; // [begin2,end2) is 2nd sequence to be merged
Iterator out; // where to put merged sequence
bool empty() const {return (end1-begin1)+(end2-begin2)==0;}
bool is_divisible() const {
return (std::min)( end1-begin1, end2-begin2 ) > grainsize;
}
ParallelMergeRange( ParallelMergeRange& r, split ) {
if( r.end1-r.begin1 < r.end2-r.begin2 ) {
std::swap(r.begin1,r.begin2);
std::swap(r.end1,r.end2);
}
Iterator m1 = r.begin1 + (r.end1-r.begin1)/2;
Iterator m2 = std::lower_bound( r.begin2, r.end2, *m1 );
begin1 = m1;
begin2 = m2;
end1 = r.end1;
end2 = r.end2;
out = r.out + (m1-r.begin1) + (m2-r.begin2);
r.end1 = m1;
r.end2 = m2;
}
ParallelMergeRange( Iterator begin1_, Iterator end1_,
Iterator begin2_, Iterator end2_,
Iterator out_ ) :
begin1(begin1_), end1(end1_),
begin2(begin2_), end2(end2_), out(out_)
{}
};
template<typename Iterator>
size_t ParallelMergeRange<Iterator>::grainsize = 1000;
template<typename Iterator>
struct ParallelMergeBody {
void operator()( ParallelMergeRange<Iterator>& r ) const {
std::merge( r.begin1, r.end1, r.begin2, r.end2, r.out );
}
};
template<typename Iterator>
void ParallelMerge( Iterator begin1, Iterator end1, Iterator begin2, Iterator end2, Iterator out ) {
parallel_for(
ParallelMergeRange<Iterator>(begin1,end1,begin2,end2,out),
ParallelMergeBody<Iterator>(),
simple_partitioner()
);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v1 = generate_random_vector<int>(PERF_N / 2);
std::vector<int> v2 = generate_random_vector<int>(PERF_N / 2);
std::vector<int> v3(PERF_N);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
ParallelMerge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,35 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <vector>
#include <tbb/parallel_sort.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
std::vector<int> v(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
v = generate_random_vector<int>(PERF_N);
t.start();
tbb::parallel_sort(v.begin(), v.end());
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,45 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
// transfer data to the device
thrust::device_vector<int> d_vec = h_vec;
int sum = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
sum = thrust::reduce(d_vec.begin(), d_vec.end());
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "sum: " << sum << std::endl;
return 0;
}

View File

@@ -0,0 +1,49 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <thrust/count.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
thrust::host_vector<int> host_vector(PERF_N);
thrust::generate(host_vector.begin(), host_vector.end(), rand_int);
thrust::device_vector<int> v = host_vector;
size_t count = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
count = thrust::count(v.begin(), v.end(), 4);
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "count: " << count << std::endl;
return 0;
}

View File

@@ -0,0 +1,48 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2014 Benoit
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/scan.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
// transfer data to the device
thrust::device_vector<int> d_vec = h_vec;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
d_vec = h_vec;
t.start();
thrust::exclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// transfer data back to host
thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
return 0;
}

View File

@@ -0,0 +1,65 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <iostream>
#include <vector>
#include <thrust/find.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include "perf.hpp"
// Max integer that can be generated by rand_int() function.
int rand_int_max = 25;
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * rand_int_max);
}
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of random numbers on the host
thrust::host_vector<int> host_vector(PERF_N);
thrust::generate(host_vector.begin(), host_vector.end(), rand_int);
thrust::device_vector<int> v = host_vector;
// trying to find element that isn't in vector (worst-case scenario)
int wanted = rand_int_max + 1;
// result
thrust::device_vector<int>::iterator device_result_it;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
device_result_it = thrust::find(v.begin(), v.end(), wanted);
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// verify
if(device_result_it != v.end()){
std::cout << "ERROR: "
<< "device_result_iterator != "
<< "v.end()"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,49 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <iterator>
#include <algorithm>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/inner_product.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> host_x(PERF_N);
thrust::host_vector<int> host_y(PERF_N);
std::generate(host_x.begin(), host_x.end(), rand);
std::generate(host_y.begin(), host_y.end(), rand);
// transfer data to the device
thrust::device_vector<int> device_x = host_x;
thrust::device_vector<int> device_y = host_y;
int product = 0;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
product = thrust::inner_product(
device_x.begin(), device_x.end(), device_y.begin(), 0
);
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
std::cout << "product: " << product << std::endl;
return 0;
}

View File

@@ -0,0 +1,63 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <iterator>
#include <algorithm>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/merge.h>
#include <thrust/sort.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> v1(std::floor(PERF_N / 2.0));
thrust::host_vector<int> v2(std::ceil(PERF_N / 2.0));
std::generate(v1.begin(), v1.end(), rand);
std::generate(v2.begin(), v2.end(), rand);
std::sort(v1.begin(), v1.end());
std::sort(v2.begin(), v2.end());
// transfer data to the device
thrust::device_vector<int> gpu_v1 = v1;
thrust::device_vector<int> gpu_v2 = v2;
thrust::device_vector<int> gpu_v3(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
thrust::merge(
gpu_v1.begin(), gpu_v1.end(),
gpu_v2.begin(), gpu_v2.end(),
gpu_v3.begin()
);
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
thrust::host_vector<int> check_v3 = gpu_v3;
thrust::host_vector<int> v3(PERF_N);
std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), v3.begin());
bool ok = std::equal(check_v3.begin(), check_v3.end(), v3.begin());
if(!ok){
std::cerr << "ERROR: merged ranges different" << std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,48 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/scan.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
// transfer data to the device
thrust::device_vector<int> d_vec = h_vec;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
d_vec = h_vec;
t.start();
thrust::inclusive_scan(d_vec.begin(), d_vec.end(), d_vec.begin());
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// transfer data back to host
thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
return 0;
}

View File

@@ -0,0 +1,60 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/partition.h>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
struct less_than_ten : public thrust::unary_function<bool, int>
{
__device__ bool operator()(int x) const
{
return x < 10;
}
};
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> h_vec(PERF_N);
std::generate(h_vec.begin(), h_vec.end(), rand_int);
thrust::device_vector<int> d_vec(PERF_N);
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
d_vec = h_vec;
t.start();
thrust::partition(
d_vec.begin(), d_vec.end(), less_than_ten()
);
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
return 0;
}

View File

@@ -0,0 +1,92 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/reduce.h>
#include "perf.hpp"
int rand_int()
{
return static_cast<int>((rand() / double(RAND_MAX)) * 25.0);
}
struct unique_key {
int current;
int avgValuesNoPerKey;
unique_key()
{
current = 0;
avgValuesNoPerKey = 512;
}
int operator()()
{
double p = double(1.0) / static_cast<double>(avgValuesNoPerKey);
if((rand() / double(RAND_MAX)) <= p)
return ++current;
return current;
}
} UniqueKey;
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
// create vector of keys and random values
thrust::host_vector<int> host_keys(PERF_N);
thrust::host_vector<int> host_values(PERF_N);
std::generate(host_keys.begin(), host_keys.end(), UniqueKey);
std::generate(host_values.begin(), host_values.end(), rand_int);
// transfer data to the device
thrust::device_vector<int> device_keys = host_keys;
thrust::device_vector<int> device_values = host_values;
// create device vectors for the results
thrust::device_vector<int> device_keys_results(PERF_N);
thrust::device_vector<int> device_values_results(PERF_N);
typedef typename thrust::device_vector<int>::iterator iterType;
thrust::pair<iterType, iterType> result;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
t.start();
result = thrust::reduce_by_key(device_keys.begin(),
device_keys.end(),
device_values.begin(),
device_keys_results.begin(),
device_values_results.begin());
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
size_t result_size = thrust::distance(device_keys_results.begin(), result.first);
if(result_size != static_cast<size_t>(host_keys[PERF_N-1] + 1)){
std::cout << "ERROR: "
<< "wrong number of keys"
<< std::endl;
return -1;
}
return 0;
}

View File

@@ -0,0 +1,48 @@
//---------------------------------------------------------------------------//
// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/host_vector.h>
#include <thrust/reverse.h>
#include "perf.hpp"
int main(int argc, char *argv[])
{
perf_parse_args(argc, argv);
std::cout << "size: " << PERF_N << std::endl;
thrust::host_vector<int> h_vec = generate_random_vector<int>(PERF_N);
// transfer data to the device
thrust::device_vector<int> d_vec;
perf_timer t;
for(size_t trial = 0; trial < PERF_TRIALS; trial++){
d_vec = h_vec;
t.start();
thrust::reverse(d_vec.begin(), d_vec.end());
cudaDeviceSynchronize();
t.stop();
}
std::cout << "time: " << t.min_time() / 1e6 << " ms" << std::endl;
// transfer data back to host
thrust::copy(d_vec.begin(), d_vec.end(), h_vec.begin());
return 0;
}

Some files were not shown because too many files have changed in this diff Show More