cmdutils & opencl: add -opencl_bench option to test and show available OpenCL devices

Reviewed-by: Wei Gao <highgod0401@gmail.com>
Reviewed-by: Stefano Sabatini <stefasab@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Lenny Wang 2013-12-08 21:01:00 -06:00 committed by Michael Niedermayer
parent 8e702bd3a8
commit 64f73acd1d
11 changed files with 347 additions and 27 deletions

View File

@ -25,6 +25,7 @@ ALLAVPROGS = $(AVBASENAMES:%=%$(PROGSSUF)$(EXESUF))
ALLAVPROGS_G = $(AVBASENAMES:%=%$(PROGSSUF)_g$(EXESUF)) ALLAVPROGS_G = $(AVBASENAMES:%=%$(PROGSSUF)_g$(EXESUF))
$(foreach prog,$(AVBASENAMES),$(eval OBJS-$(prog) += cmdutils.o)) $(foreach prog,$(AVBASENAMES),$(eval OBJS-$(prog) += cmdutils.o))
$(foreach prog,$(AVBASENAMES),$(eval OBJS-$(prog)-$(CONFIG_OPENCL) += cmdutils_opencl.o))
OBJS-ffmpeg += ffmpeg_opt.o ffmpeg_filter.o OBJS-ffmpeg += ffmpeg_opt.o ffmpeg_filter.o
OBJS-ffmpeg-$(HAVE_VDPAU_X11) += ffmpeg_vdpau.o OBJS-ffmpeg-$(HAVE_VDPAU_X11) += ffmpeg_vdpau.o

View File

@ -58,10 +58,6 @@
#include <sys/time.h> #include <sys/time.h>
#include <sys/resource.h> #include <sys/resource.h>
#endif #endif
#if CONFIG_OPENCL
#include "libavutil/opencl.h"
#endif
static int init_report(const char *env); static int init_report(const char *env);
@ -985,26 +981,6 @@ int opt_timelimit(void *optctx, const char *opt, const char *arg)
return 0; return 0;
} }
#if CONFIG_OPENCL
int opt_opencl(void *optctx, const char *opt, const char *arg)
{
char *key, *value;
const char *opts = arg;
int ret = 0;
while (*opts) {
ret = av_opt_get_key_value(&opts, "=", ":", 0, &key, &value);
if (ret < 0)
return ret;
ret = av_opencl_set_option(key, value);
if (ret < 0)
return ret;
if (*opts)
opts++;
}
return ret;
}
#endif
void print_error(const char *filename, int err) void print_error(const char *filename, int err)
{ {
char errbuf[128]; char errbuf[128];

View File

@ -98,8 +98,12 @@ int opt_max_alloc(void *optctx, const char *opt, const char *arg);
int opt_codec_debug(void *optctx, const char *opt, const char *arg); int opt_codec_debug(void *optctx, const char *opt, const char *arg);
#if CONFIG_OPENCL
int opt_opencl(void *optctx, const char *opt, const char *arg); int opt_opencl(void *optctx, const char *opt, const char *arg);
int opt_opencl_bench(void *optctx, const char *opt, const char *arg);
#endif
/** /**
* Limit the execution time. * Limit the execution time.
*/ */

View File

@ -22,5 +22,6 @@
{ "max_alloc" , HAS_ARG, {.func_arg = opt_max_alloc}, "set maximum size of a single allocated block", "bytes" }, { "max_alloc" , HAS_ARG, {.func_arg = opt_max_alloc}, "set maximum size of a single allocated block", "bytes" },
{ "cpuflags" , HAS_ARG | OPT_EXPERT, { .func_arg = opt_cpuflags }, "force specific cpu flags", "flags" }, { "cpuflags" , HAS_ARG | OPT_EXPERT, { .func_arg = opt_cpuflags }, "force specific cpu flags", "flags" },
#if CONFIG_OPENCL #if CONFIG_OPENCL
{ "opencl_bench", OPT_EXIT, {.func_arg = opt_opencl_bench}, "run benchmark on all OpenCL devices and show results" },
{ "opencl_options", HAS_ARG, {.func_arg = opt_opencl}, "set OpenCL environment options" }, { "opencl_options", HAS_ARG, {.func_arg = opt_opencl}, "set OpenCL environment options" },
#endif #endif

274
cmdutils_opencl.c Normal file
View File

@ -0,0 +1,274 @@
/*
* Copyright (C) 2013 Lenny Wang
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/opt.h"
#include "libavutil/time.h"
#include "libavutil/log.h"
#include "libavutil/opencl.h"
#include "cmdutils.h"
typedef struct {
int platform_idx;
int device_idx;
char device_name[64];
int64_t runtime;
} OpenCLDeviceBenchmark;
const char *ocl_bench_source = AV_OPENCL_KERNEL(
inline unsigned char clip_uint8(int a)
{
if (a & (~0xFF))
return (-a)>>31;
else
return a;
}
kernel void unsharp_bench(
global unsigned char *src,
global unsigned char *dst,
global int *mask,
int width,
int height)
{
int i, j, local_idx, lc_idx, sum = 0;
int2 thread_idx, block_idx, global_idx, lm_idx;
thread_idx.x = get_local_id(0);
thread_idx.y = get_local_id(1);
block_idx.x = get_group_id(0);
block_idx.y = get_group_id(1);
global_idx.x = get_global_id(0);
global_idx.y = get_global_id(1);
local uchar data[32][32];
local int lc[128];
for (i = 0; i <= 1; i++) {
lm_idx.y = -8 + (block_idx.y + i) * 16 + thread_idx.y;
lm_idx.y = lm_idx.y < 0 ? 0 : lm_idx.y;
lm_idx.y = lm_idx.y >= height ? height - 1: lm_idx.y;
for (j = 0; j <= 1; j++) {
lm_idx.x = -8 + (block_idx.x + j) * 16 + thread_idx.x;
lm_idx.x = lm_idx.x < 0 ? 0 : lm_idx.x;
lm_idx.x = lm_idx.x >= width ? width - 1: lm_idx.x;
data[i*16 + thread_idx.y][j*16 + thread_idx.x] = src[lm_idx.y*width + lm_idx.x];
}
}
local_idx = thread_idx.y*16 + thread_idx.x;
if (local_idx < 128)
lc[local_idx] = mask[local_idx];
barrier(CLK_LOCAL_MEM_FENCE);
\n#pragma unroll\n
for (i = -4; i <= 4; i++) {
lm_idx.y = 8 + i + thread_idx.y;
\n#pragma unroll\n
for (j = -4; j <= 4; j++) {
lm_idx.x = 8 + j + thread_idx.x;
lc_idx = (i + 4)*8 + j + 4;
sum += (int)data[lm_idx.y][lm_idx.x] * lc[lc_idx];
}
}
int temp = (int)data[thread_idx.y + 8][thread_idx.x + 8];
int res = temp + (((temp - (int)((sum + 1<<15) >> 16))) >> 16);
if (global_idx.x < width && global_idx.y < height)
dst[global_idx.x + global_idx.y*width] = clip_uint8(res);
}
);
#define OCLCHECK(method, ... ) \
do { \
status = method(__VA_ARGS__); \
if (status != CL_SUCCESS) { \
av_log(NULL, AV_LOG_ERROR, # method " error '%s'\n", \
av_opencl_errstr(status)); \
ret = AVERROR_EXTERNAL; \
goto end; \
} \
} while (0)
#define CREATEBUF(out, flags, size) \
do { \
out = clCreateBuffer(ext_opencl_env->context, flags, size, NULL, &status); \
if (status != CL_SUCCESS) { \
av_log(NULL, AV_LOG_ERROR, "Could not create OpenCL buffer\n"); \
ret = AVERROR_EXTERNAL; \
goto end; \
} \
} while (0)
static void fill_rand_int(int *data, int n)
{
int i;
srand(av_gettime());
for (i = 0; i < n; i++)
data[i] = rand();
}
#define OPENCL_NB_ITER 5
static int64_t run_opencl_bench(AVOpenCLExternalEnv *ext_opencl_env)
{
int i, arg = 0, width = 1920, height = 1088;
int64_t start, ret = 0;
cl_int status;
size_t kernel_len;
char *inbuf;
int *mask;
int buf_size = width * height * sizeof(char);
int mask_size = sizeof(uint32_t) * 128;
cl_mem cl_mask, cl_inbuf, cl_outbuf;
cl_kernel kernel = NULL;
cl_program program = NULL;
size_t local_work_size_2d[2] = {16, 16};
size_t global_work_size_2d[2] = {(size_t)width, (size_t)height};
if (!(inbuf = av_malloc(buf_size)) || !(mask = av_malloc(mask_size))) {
av_log(NULL, AV_LOG_ERROR, "Out of memory\n");
ret = AVERROR(ENOMEM);
goto end;
}
fill_rand_int((int*)inbuf, buf_size/4);
fill_rand_int(mask, mask_size/4);
CREATEBUF(cl_mask, CL_MEM_READ_ONLY, mask_size);
CREATEBUF(cl_inbuf, CL_MEM_READ_ONLY, buf_size);
CREATEBUF(cl_outbuf, CL_MEM_READ_WRITE, buf_size);
kernel_len = strlen(ocl_bench_source);
program = clCreateProgramWithSource(ext_opencl_env->context, 1, &ocl_bench_source,
&kernel_len, &status);
if (status != CL_SUCCESS || !program) {
av_log(NULL, AV_LOG_ERROR, "OpenCL unable to create benchmark program\n");
ret = AVERROR_EXTERNAL;
goto end;
}
status = clBuildProgram(program, 1, &(ext_opencl_env->device_id), NULL, NULL, NULL);
if (status != CL_SUCCESS) {
av_log(NULL, AV_LOG_ERROR, "OpenCL unable to build benchmark program\n");
ret = AVERROR_EXTERNAL;
goto end;
}
kernel = clCreateKernel(program, "unsharp_bench", &status);
if (status != CL_SUCCESS) {
av_log(NULL, AV_LOG_ERROR, "OpenCL unable to create benchmark kernel\n");
ret = AVERROR_EXTERNAL;
goto end;
}
OCLCHECK(clEnqueueWriteBuffer, ext_opencl_env->command_queue, cl_inbuf, CL_TRUE, 0,
buf_size, inbuf, 0, NULL, NULL);
OCLCHECK(clEnqueueWriteBuffer, ext_opencl_env->command_queue, cl_mask, CL_TRUE, 0,
mask_size, mask, 0, NULL, NULL);
OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_inbuf);
OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_outbuf);
OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_mask);
OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_int), &width);
OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_int), &height);
start = av_gettime();
for (i = 0; i < OPENCL_NB_ITER; i++)
OCLCHECK(clEnqueueNDRangeKernel, ext_opencl_env->command_queue, kernel, 2, NULL,
global_work_size_2d, local_work_size_2d, 0, NULL, NULL);
clFinish(ext_opencl_env->command_queue);
ret = (av_gettime() - start)/OPENCL_NB_ITER;
end:
if (kernel)
clReleaseKernel(kernel);
if (program)
clReleaseProgram(program);
if (cl_inbuf)
clReleaseMemObject(cl_inbuf);
if (cl_outbuf)
clReleaseMemObject(cl_outbuf);
if (cl_mask)
clReleaseMemObject(cl_mask);
av_free(inbuf);
av_free(mask);
return ret;
}
static int compare_ocl_device_desc(const void *a, const void *b)
{
return ((OpenCLDeviceBenchmark*)a)->runtime - ((OpenCLDeviceBenchmark*)b)->runtime;
}
int opt_opencl_bench(void *optctx, const char *opt, const char *arg)
{
int i, j, nb_devices = 0, count = 0;
int64_t score = 0;
AVOpenCLDeviceList *device_list;
AVOpenCLDeviceNode *device_node = NULL;
OpenCLDeviceBenchmark *devices = NULL;
cl_platform_id platform;
av_opencl_get_device_list(&device_list);
for (i = 0; i < device_list->platform_num; i++)
nb_devices += device_list->platform_node[i]->device_num;
if (!nb_devices) {
av_log(NULL, AV_LOG_ERROR, "No OpenCL device detected!\n");
return AVERROR(EINVAL);
}
if (!(devices = av_malloc(sizeof(OpenCLDeviceBenchmark) * nb_devices))) {
av_log(NULL, AV_LOG_ERROR, "Could not allocate buffer\n");
return AVERROR(ENOMEM);
}
for (i = 0; i < device_list->platform_num; i++) {
for (j = 0; j < device_list->platform_node[i]->device_num; j++) {
device_node = device_list->platform_node[i]->device_node[j];
platform = device_list->platform_node[i]->platform_id;
score = av_opencl_benchmark(device_node, platform, run_opencl_bench);
if (score > 0) {
devices[count].platform_idx = i;
devices[count].device_idx = j;
devices[count].runtime = score;
strcpy(devices[count].device_name, device_node->device_name);
count++;
}
}
}
qsort(devices, count, sizeof(OpenCLDeviceBenchmark), compare_ocl_device_desc);
fprintf(stderr, "platform_idx\tdevice_idx\tdevice_name\truntime\n");
for (i = 0; i < count; i++)
fprintf(stdout, "%d\t%d\t%s\t%"PRId64"\n",
devices[i].platform_idx, devices[i].device_idx,
devices[i].device_name, devices[i].runtime);
av_opencl_free_device_list(&device_list);
av_free(devices);
return 0;
}
int opt_opencl(void *optctx, const char *opt, const char *arg)
{
char *key, *value;
const char *opts = arg;
int ret = 0;
while (*opts) {
ret = av_opt_get_key_value(&opts, "=", ":", 0, &key, &value);
if (ret < 0)
return ret;
ret = av_opencl_set_option(key, value);
if (ret < 0)
return ret;
if (*opts)
opts++;
}
return ret;
}

View File

@ -14,6 +14,8 @@ libavutil: 2012-10-22
API changes, most recent first: API changes, most recent first:
2013-12-xx - xxxxxxx - lavu 52.57.100 - opencl.h
Add av_opencl_benchmark() function.
2013-11-xx - xxxxxxx - lavu 52.56.100 - ffversion.h 2013-11-xx - xxxxxxx - lavu 52.56.100 - ffversion.h
Moves version.h to libavutil/ffversion.h. Moves version.h to libavutil/ffversion.h.

View File

@ -250,6 +250,10 @@ Possible flags for this option are:
@end table @end table
@end table @end table
@item -opencl_bench
Benchmark all available OpenCL devices and show the results. This option
is only available when FFmpeg has been compiled with @code{--enable-opencl}.
@item -opencl_options options (@emph{global}) @item -opencl_options options (@emph{global})
Set OpenCL environment options. This option is only available when Set OpenCL environment options. This option is only available when
FFmpeg has been compiled with @code{--enable-opencl}. FFmpeg has been compiled with @code{--enable-opencl}.

View File

@ -1051,13 +1051,13 @@ See reference "OpenCL Specification Version: 1.2 chapter 5.6.4".
Select the index of the platform to run OpenCL code. Select the index of the platform to run OpenCL code.
The specified index must be one of the indexes in the device list The specified index must be one of the indexes in the device list
which can be obtained with @code{av_opencl_get_device_list()}. which can be obtained with @code{ffmpeg -opencl_bench} or @code{av_opencl_get_device_list()}.
@item device_idx @item device_idx
Select the index of the device used to run OpenCL code. Select the index of the device used to run OpenCL code.
The specifed index must be one of the indexes in the device list which The specifed index must be one of the indexes in the device list which
can be obtained with @code{av_opencl_get_device_list()}. can be obtained with @code{ffmpeg -opencl_bench} or @code{av_opencl_get_device_list()}.
@end table @end table

View File

@ -761,3 +761,45 @@ int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_n
} }
return 0; return 0;
} }
int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device_node, cl_platform_id platform,
int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env))
{
int64_t ret = 0;
cl_int status;
cl_context_properties cps[3];
AVOpenCLExternalEnv *ext_opencl_env = NULL;
ext_opencl_env = av_opencl_alloc_external_env();
ext_opencl_env->device_id = device_node->device_id;
ext_opencl_env->device_type = device_node->device_type;
av_log(&opencl_ctx, AV_LOG_VERBOSE, "Performing test on OpenCL device %s\n",
device_node->device_name);
cps[0] = CL_CONTEXT_PLATFORM;
cps[1] = (cl_context_properties)platform;
cps[2] = 0;
ext_opencl_env->context = clCreateContextFromType(cps, ext_opencl_env->device_type,
NULL, NULL, &status);
if (status != CL_SUCCESS || !ext_opencl_env->context) {
ret = AVERROR_EXTERNAL;
goto end;
}
ext_opencl_env->command_queue = clCreateCommandQueue(ext_opencl_env->context,
ext_opencl_env->device_id, 0, &status);
if (status != CL_SUCCESS || !ext_opencl_env->command_queue) {
ret = AVERROR_EXTERNAL;
goto end;
}
ret = benchmark(ext_opencl_env);
if (ret < 0)
av_log(&opencl_ctx, AV_LOG_ERROR, "Benchmark failed with OpenCL device %s\n",
device_node->device_name);
end:
if (ext_opencl_env->command_queue)
clReleaseCommandQueue(ext_opencl_env->command_queue);
if (ext_opencl_env->context)
clReleaseContext(ext_opencl_env->context);
av_opencl_free_external_env(&ext_opencl_env);
return ret;
}

View File

@ -310,4 +310,20 @@ void av_opencl_release_kernel(AVOpenCLKernelEnv *env);
*/ */
void av_opencl_uninit(void); void av_opencl_uninit(void);
/**
* Benchmark an OpenCL device with a user defined callback function. This function
* sets up an external OpenCL environment including context and command queue on
* the device then tears it down in the end. The callback function should perform
* the rest of the work.
*
* @param device pointer to the OpenCL device to be used
* @param platform cl_platform_id handle to which the device belongs to
* @param benchmark callback function to perform the benchmark, return a
* negative value in case of failure
* @return the score passed from the callback function, a negative error code in case
* of failure
*/
int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device, cl_platform_id platform,
int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env));
#endif /* LIBAVUTIL_OPENCL_H */ #endif /* LIBAVUTIL_OPENCL_H */

View File

@ -75,7 +75,7 @@
*/ */
#define LIBAVUTIL_VERSION_MAJOR 52 #define LIBAVUTIL_VERSION_MAJOR 52
#define LIBAVUTIL_VERSION_MINOR 56 #define LIBAVUTIL_VERSION_MINOR 57
#define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \