Added implementation SetTo() without mask
This commit is contained in:
@@ -40,6 +40,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include "cuda_shared.hpp"
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
@@ -47,11 +48,30 @@ __constant__ float scalar_d[4];
|
||||
|
||||
namespace mat_operators
|
||||
{
|
||||
|
||||
template <typename T, int channels, int count = channels>
|
||||
struct unroll
|
||||
{
|
||||
__device__ static void unroll_set(T * mat, size_t i)
|
||||
{
|
||||
mat[i] = static_cast<T>(scalar_d[i % channels]);
|
||||
unroll<T, channels, count - 1>::unroll_set(mat, i+1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int channels>
|
||||
struct unroll<T,channels,0>
|
||||
{
|
||||
__device__ static void unroll_set(T * , size_t){}
|
||||
};
|
||||
|
||||
|
||||
|
||||
template <typename T, int channels>
|
||||
__global__ void kernel_set_to_without_mask(T * mat)
|
||||
{
|
||||
int i = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
mat[i * sizeof(T)] = static_cast<T>(scalar_d[i % channels]);
|
||||
size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
|
||||
unroll<T, channels>::unroll_set(mat, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user