535 lines
14 KiB
Common Lisp
535 lines
14 KiB
Common Lisp
#pragma OPENCL EXTENSION cl_amd_printf : enable
|
|
|
|
#define float3 float4
|
|
#define uint3 uint4
|
|
|
|
#define PARTICLE_RADIUS 0.05;
|
|
|
|
#define width 1280
|
|
#define height 1024
|
|
|
|
#define B 0
|
|
#define T height
|
|
#define L 0
|
|
#define R width
|
|
|
|
#define shiftNumber 4
|
|
#define shiftMask 0xF
|
|
#define shiftValue 16.0f
|
|
#define stride 4
|
|
|
|
#define screenWidth1 width
|
|
#define screenHeight1 height
|
|
#define halfScreenWidth1 screenWidth1/2
|
|
#define halfScreenHeight1 screenHeight1/2
|
|
#define screenWidth1SubOne (screenWidth1-1)
|
|
#define screenHeight1SubOne (screenHeight1-1)
|
|
#define stride screenWidth1
|
|
#define screenPixelNumber screenWidth1*screenHeight1
|
|
#define depthBufferSize screenPixelNumber*depthComplexity
|
|
|
|
#define WGS 1
|
|
|
|
//---------------------------------------------------------------
|
|
|
|
struct __VSSpriteOut
|
|
{
|
|
float4 position;
|
|
float4 particlePosition;
|
|
};
|
|
|
|
typedef struct __VSSpriteout VSSpriteOut;
|
|
|
|
struct __GSSpriteOut
|
|
{
|
|
float4 position;
|
|
float2 textureUV;
|
|
// float4 viewSpacePosition;
|
|
// float4 particlePosition;
|
|
};
|
|
|
|
typedef struct __GSSpriteout GSSpriteOut;
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
__constant float4 g_positions[4] =
|
|
{
|
|
(float4)(-1.0f, 1.0f, 0.0f, 0.0f),
|
|
(float4)( 1.0f, 1.0f, 0.0f, 0.0f),
|
|
(float4)( -1.0f, -1.0f, 0.0f, 0.0f),
|
|
(float4)( 1.0f, -1.0f, 0.0f, 0.0f)
|
|
};
|
|
|
|
__constant float2 g_texcoords[4] =
|
|
{
|
|
(float2)(0.0f,0.0f),
|
|
(float2)(1.0f,0.0f),
|
|
(float2)(0.0f,1.0f),
|
|
(float2)(1.0f,1.0f)
|
|
};
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
void copyMatrix(
|
|
float matrix[16],
|
|
__constant float matrix0[16])
|
|
{
|
|
uint i;
|
|
|
|
for (i = 0; i < 16; i++) {
|
|
matrix[i] = matrix0[i];
|
|
}
|
|
}
|
|
|
|
void matrixMulLoopBody(
|
|
uint i,
|
|
float matrix[16],
|
|
__constant float matrix0[16],
|
|
__constant float matrix1[16])
|
|
{
|
|
matrix[i] = 0.0f;
|
|
matrix[i] += matrix0[(i%4) + (0*4)] * matrix1[(0) + ((i/4)*4)];
|
|
matrix[i] += matrix0[(i%4) + (1*4)] * matrix1[(1) + ((i/4)*4)];
|
|
matrix[i] += matrix0[(i%4) + (2*4)] * matrix1[(2) + ((i/4)*4)];
|
|
matrix[i] += matrix0[(i%4) + (3*4)] * matrix1[(3) + ((i/4)*4)];
|
|
}
|
|
|
|
void matrixMul(
|
|
float matrix[16],
|
|
__constant float matrix0[16],
|
|
__constant float matrix1[16])
|
|
{
|
|
matrixMulLoopBody(0, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(1, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(2, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(3, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(4, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(5, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(6, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(7, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(8, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(9, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(10, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(11, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(12, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(13, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(14, matrix, matrix0, matrix1);
|
|
matrixMulLoopBody(15, matrix, matrix0, matrix1);
|
|
}
|
|
|
|
float4 matrixVectorMul(float matrix[16], float4 vector)
|
|
{
|
|
float4 result;
|
|
|
|
result.x = matrix[0]*vector.x + matrix[4+0]*vector.y + matrix[8+0]*vector.z + matrix[12+0]*vector.w;
|
|
result.y = matrix[1]*vector.x + matrix[4+1]*vector.y + matrix[8+1]*vector.z + matrix[12+1]*vector.w;
|
|
result.z = matrix[2]*vector.x + matrix[4+2]*vector.y + matrix[8+2]*vector.z + matrix[12+2]*vector.w;
|
|
result.w = matrix[3]*vector.x + matrix[4+3]*vector.y + matrix[8+3]*vector.z + matrix[12+3]*vector.w;
|
|
|
|
return result;
|
|
}
|
|
|
|
float3 matrixVector3Mul(__constant float matrix[9], float3 vector)
|
|
{
|
|
float3 result;
|
|
|
|
result.x = matrix[0]*vector.x + matrix[3+0]*vector.y + matrix[6+0]*vector.z;
|
|
result.y = matrix[1]*vector.x + matrix[3+1]*vector.y + matrix[6+1]*vector.z;
|
|
result.z = matrix[2]*vector.x + matrix[3+2]*vector.y + matrix[6+2]*vector.z;
|
|
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
//#define DEVICE_CPU 1
|
|
#if defined(DEVICE_CPU)
|
|
void printMatrix(char * name, __constant float matrix[16])
|
|
{
|
|
printf("%s[0] = %f, %f, %f, %f\n", name, matrix[0], matrix[1], matrix[2], matrix[3]);
|
|
printf("%s[1] = %f, %f, %f, %f\n", name, matrix[4], matrix[5], matrix[6], matrix[7]);
|
|
printf("%s[2] = %f, %f, %f, %f\n", name, matrix[8], matrix[9], matrix[10], matrix[11]);
|
|
printf("%s[3] = %f, %f, %f, %f\n", name, matrix[12], matrix[13], matrix[14], matrix[15]);
|
|
}
|
|
#endif
|
|
|
|
#if 1
|
|
__kernel void vertexShader(
|
|
__constant float modelview[16],
|
|
__constant float projection[16],
|
|
__global float4 * inputPrimitives,
|
|
__global float4 * outputPrimitives)
|
|
{
|
|
float matrix[16];
|
|
float4 gl_Vertex;
|
|
float4 gl_Position;
|
|
|
|
uint id = get_global_id(0);
|
|
|
|
gl_Vertex = inputPrimitives[id];
|
|
|
|
// gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex
|
|
matrixMul(matrix, projection, modelview);
|
|
|
|
gl_Position = matrixVectorMul(matrix, gl_Vertex);
|
|
|
|
outputPrimitives[id] = gl_Position;
|
|
}
|
|
|
|
#else
|
|
|
|
__kernel void vertexShader(
|
|
__constant float modelview[16],
|
|
__constant float projection[16],
|
|
__global float4 * inputPrimitives,
|
|
__global float4 * outputPrimitives)
|
|
{
|
|
uint id = get_global_id(0);
|
|
|
|
outputPrimitives[id] = inputPrimitives[id];
|
|
}
|
|
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------------
|
|
|
|
__kernel void
|
|
clearImage(
|
|
__write_only image2d_t image,
|
|
float4 color)
|
|
{
|
|
|
|
int2 coords = (int2)(get_global_id(0), get_global_id(1));
|
|
write_imagef(image, coords, color);
|
|
}
|
|
|
|
// OpenGL viewport transformation
|
|
// The site http://research.cs.queensu.ca/~jstewart/454/notes/pipeline/
|
|
// contains a description of this process
|
|
void
|
|
viewportTransform(float4 v, __constant int4 viewport[1], float2 * output)
|
|
{
|
|
int4 vp = viewport[0];
|
|
*output
|
|
= 0.5f *
|
|
(float2)(v.x+1,v.y+1) *
|
|
(float2)((vp.s2-vp.s0) + vp.s0,
|
|
(vp.s3-vp.s1) + vp.s1);
|
|
}
|
|
|
|
#define PARTICLE_WIDTH 32.0f
|
|
#define PARTICLE_HEIGHT 32.0f
|
|
|
|
// Unoptimized triangle rasterizer function
|
|
// Details of the algorithm can be found here:
|
|
// http://www.devmaster.net/forums/showthread.php?t=1884
|
|
//
|
|
void
|
|
rasterizerUnOpt(
|
|
__global struct __GSSpriteOut * outputPrimitives,
|
|
// __global float4 * outputPrimitives,
|
|
__constant int4 viewport[1],
|
|
__write_only image2d_t screen,
|
|
__read_only image2d_t particle,
|
|
uint v1Offset,
|
|
uint v2Offset,
|
|
uint v3Offset,
|
|
__global float4 * debugOut1)
|
|
{
|
|
sampler_t sampler =
|
|
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
|
|
|
|
uint id = get_global_id(0);
|
|
|
|
struct __GSSpriteOut output;
|
|
float2 v1, v2, v3;
|
|
float2 uv1, uv2, uv3;
|
|
|
|
output = outputPrimitives[id*4+v1Offset];
|
|
uv1 = output.textureUV;
|
|
viewportTransform(output.position, viewport, &v1);
|
|
|
|
output = outputPrimitives[id*4+v2Offset];
|
|
uv2 = output.textureUV;
|
|
viewportTransform(output.position, viewport, &v2);
|
|
|
|
output = outputPrimitives[id*4+v3Offset];
|
|
uv3 = output.textureUV;
|
|
viewportTransform(output.position, viewport, &v3);
|
|
|
|
// Bounding rectangle
|
|
int2 min_ = convert_int2(min(v1, min(v2, v3)));
|
|
int2 max_ = convert_int2(max(v1, max(v2, v3)));
|
|
|
|
// naive bi-linear interploation for texture coords, note this is
|
|
// broken with respect to OpenGL and needs to be fixed for the
|
|
// general case.
|
|
float p1x = v2.x - v1.x;
|
|
float p1y = v2.y - v1.y;
|
|
|
|
float p2x = v3.x - v1.x;
|
|
float p2y = v3.y - v1.y;
|
|
|
|
// Scan through bounding rectangle
|
|
for(int y = min_.y; y < max_.y; y++) {
|
|
for(int x = min_.x; x < max_.x; x++) {
|
|
// When all half-space functions positive, pixel is in triangle
|
|
if((v1.x - v2.x) * (y - v1.y) - (v1.y - v2.y) * (x - v1.x) > 0 &&
|
|
(v2.x - v3.x) * (y - v2.y) - (v2.y - v3.y) * (x - v2.x) > 0 &&
|
|
(v3.x - v1.x) * (y - v3.y) - (v3.y - v1.y) * (x - v3.x) > 0) {
|
|
|
|
float px = x - v1.x;
|
|
float py = y - v1.y;
|
|
|
|
write_imagef(
|
|
screen,
|
|
(int2)(x,y),
|
|
// texel);
|
|
(float4)(1.0f,1.0f,1.0f,1.0f));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Optimized rasterizer function
|
|
// Details of the algorithm can be found here:
|
|
// http://www.devmaster.net/forums/showthread.php?t=1884
|
|
//
|
|
// Currently has a bug, still work in progess
|
|
__kernel void
|
|
rasterizerXX(
|
|
__global float4 * outputPrimitives,
|
|
__write_only image2d_t screen,
|
|
__global float4 * debugOut1,
|
|
__global int2 * debugOut2)
|
|
{
|
|
uint id = get_global_id(0);
|
|
|
|
// printf("ras\n");
|
|
|
|
float4 v1 = outputPrimitives[id*4+0];
|
|
float4 v2 = outputPrimitives[id*4+1];
|
|
float4 v3 = outputPrimitives[id*4+2];
|
|
|
|
float y1 = 0.5f* (v1.y+1) * (T - B) + B;
|
|
float y2 = 0.5f* (v2.y+1) * (T - B) + B;
|
|
float y3 = 0.5f* (v3.y+1) * (T - B) + B;
|
|
|
|
float x1 = 0.5f * (v1.x+1) * (R - L) + L;
|
|
float x2 = 0.5f * (v2.x+1) * (R - L) + L;
|
|
float x3 = 0.5f * (v3.x+1) * (R - L) + L;
|
|
|
|
const int Y1 = convert_int(shiftValue * y1);
|
|
const int Y2 = convert_int(shiftValue * y2);
|
|
const int Y3 = convert_int(shiftValue * y3);
|
|
|
|
const int X1 = convert_int(shiftValue * x1);
|
|
const int X2 = convert_int(shiftValue * x2);
|
|
const int X3 = convert_int(shiftValue * x3);
|
|
|
|
debugOut1[id*4+0] = v1;
|
|
debugOut1[id*4+1] = v2;
|
|
debugOut1[id*4+2] = v3;
|
|
|
|
debugOut2[id*3+0] = (int2)(X1, Y1);
|
|
debugOut2[id*3+1] = (int2)(X2, Y2);
|
|
debugOut2[id*3+2] = (int2)(X3, Y3);
|
|
|
|
// Deltas
|
|
const int DX12 = X1 - X2;
|
|
const int DX23 = X2 - X3;
|
|
const int DX31 = X3 - X1;
|
|
|
|
const int DY12 = Y1 - Y2;
|
|
const int DY23 = Y2 - Y3;
|
|
const int DY31 = Y3 - Y1;
|
|
|
|
// Fixed-point deltas
|
|
const int FDX12 = DX12 << shiftNumber;
|
|
const int FDX23 = DX23 << shiftNumber;
|
|
const int FDX31 = DX31 << shiftNumber;
|
|
|
|
const int FDY12 = DY12 << shiftNumber;
|
|
const int FDY23 = DY23 << shiftNumber;
|
|
const int FDY31 = DY31 << shiftNumber;
|
|
|
|
// Bounding rectangle
|
|
int minx = (min(X1, min(X2, X3)) + shiftMask) >> shiftNumber;
|
|
//minx = max(0,minx);
|
|
|
|
int maxx = (max(X1, min(X2, X3)) + shiftMask) >> shiftNumber;
|
|
//min(maxx , screenWidth1SubOne);
|
|
|
|
int miny = (min(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber;
|
|
//max(0,miny);
|
|
|
|
int maxy = (max(Y1, min(Y2, Y3)) + shiftMask) >> shiftNumber;
|
|
//min(maxy , screenHeight1SubOne);
|
|
|
|
//(char*&)colorBuffer += miny * stride;
|
|
int offset = miny * stride;
|
|
|
|
// Half-edge constants
|
|
int C1 = DY12 * X1 - DX12 * Y1;
|
|
int C2 = DY23 * X2 - DX23 * Y2;
|
|
int C3 = DY31 * X3 - DX31 * Y3;
|
|
|
|
// Correct for fill convention
|
|
if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++;
|
|
if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
|
|
if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
|
|
|
|
int CY1 = C1 + DX12 * (miny << shiftNumber) - DY12 * (minx << shiftNumber);
|
|
int CY2 = C2 + DX23 * (miny << shiftNumber) - DY23 * (minx << shiftNumber);
|
|
int CY3 = C3 + DX31 * (miny << shiftNumber) - DY31 * (minx << shiftNumber);
|
|
|
|
for(int y = miny; y < maxy; y++) {
|
|
int CX1 = CY1;
|
|
int CX2 = CY2;
|
|
int CX3 = CY3;
|
|
|
|
debugOut2[id*3+0] = (int2)(minx, maxx);
|
|
|
|
for(int x = minx; x < maxx; x++) {
|
|
debugOut2[id*3+0] = (int2)(CX1, CX2);
|
|
|
|
if(CX1 > 0 && CX2 > 0 && CX3 > 0) {
|
|
debugOut2[id*3+0] = (int2)(1, 1);
|
|
write_imagef(
|
|
screen,
|
|
(int2)(x,y),
|
|
(float4)(1.0f,1.0f,1.0f,1.0f));
|
|
}
|
|
|
|
CX1 -= FDY12;
|
|
CX2 -= FDY23;
|
|
CX3 -= FDY31;
|
|
}
|
|
|
|
CY1 += FDX12;
|
|
CY2 += FDX23;
|
|
CY3 += FDX31;
|
|
|
|
//(char*&)colorBuffer += stride;
|
|
offset += stride;
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
void geometryShader(
|
|
__constant float modelview[16],
|
|
__constant float projection[16],
|
|
__constant float inverseView[9],
|
|
__constant int4 viewport[1],
|
|
__local struct __VSSpriteOut * vsOutputPrimitives,
|
|
__global struct __GSSpriteOut * outputPrimitives,
|
|
// __global float4 * outputPrimitives,
|
|
__write_only image2d_t screen,
|
|
__read_only image2d_t particle,
|
|
__global float4 * debugOut1,
|
|
__global int * debugOut2)
|
|
{
|
|
float2 texcoords[4] =
|
|
{
|
|
(float2)(0.0f,0.0f),
|
|
(float2)(1.0f,0.0f),
|
|
(float2)(0.0f,1.0f),
|
|
(float2)(1.0f,1.0f)
|
|
};
|
|
|
|
float matrix[16];
|
|
|
|
uint id = get_global_id(0);
|
|
uint lid = get_local_id(0);
|
|
|
|
float4 vsPosition = vsOutputPrimitives[lid].position;
|
|
|
|
matrixMul(matrix, projection, modelview);
|
|
//
|
|
// Emit two new triangles
|
|
//
|
|
for (uint i = 0; i<4; i++) {
|
|
float3 position = g_positions[i] * PARTICLE_RADIUS;
|
|
position = matrixVector3Mul(inverseView, position) + vsPosition;
|
|
float3 particlePosition =
|
|
matrixVector3Mul(
|
|
inverseView,
|
|
(float4)(0.0f,0.0f,0.0f,0.0f)) + vsPosition; // world space
|
|
|
|
// Compute view space position
|
|
position.w = 1.0f;
|
|
position = matrixVectorMul(matrix, position);
|
|
|
|
//perspective division
|
|
position /= position.w;
|
|
|
|
struct __GSSpriteOut output;
|
|
output.position = position;
|
|
//output.textureUV = g_texcoords[i];
|
|
output.textureUV = texcoords[i];
|
|
outputPrimitives[id*4+i] = output;
|
|
}
|
|
|
|
// Render QUAD - Triangle 1
|
|
rasterizerUnOpt(
|
|
outputPrimitives,
|
|
viewport,
|
|
screen,
|
|
particle,
|
|
0,
|
|
1,
|
|
2,
|
|
debugOut1);
|
|
|
|
// Render QUAD - Triangle 2
|
|
rasterizerUnOpt(
|
|
outputPrimitives,
|
|
viewport,
|
|
screen,
|
|
particle,
|
|
2,
|
|
1,
|
|
3,
|
|
debugOut1);
|
|
}
|
|
|
|
__kernel void vertexShaderSprite(
|
|
__constant float modelview[16],
|
|
__constant float projection[16],
|
|
__constant float inverseView[9],
|
|
__constant int4 viewport[1],
|
|
__local struct __VSSpriteOut * vsOutputPrimitives,
|
|
__global float4 * inputPrimitives,
|
|
__global struct __GSSpriteOut * outputPrimitives,
|
|
// __global float4 * outputPrimitives,
|
|
__write_only image2d_t screen,
|
|
__read_only image2d_t particle,
|
|
__global float4 * debugOut1,
|
|
__global int * debugOut2)
|
|
{
|
|
float matrix[16];
|
|
|
|
uint id = get_global_id(0);
|
|
uint lid = get_local_id(0);
|
|
|
|
// gl_ProjectionMatrix * gl_ModelViewMatrix * gl_Vertex
|
|
matrixMul(matrix, projection, modelview);
|
|
|
|
float4 position = inputPrimitives[id];
|
|
vsOutputPrimitives[lid].position = position;
|
|
vsOutputPrimitives[lid].particlePosition =
|
|
matrixVectorMul(matrix, position);
|
|
|
|
geometryShader(
|
|
modelview,
|
|
projection,
|
|
inverseView,
|
|
viewport,
|
|
vsOutputPrimitives,
|
|
outputPrimitives,
|
|
screen,
|
|
particle,
|
|
debugOut1,
|
|
debugOut2);
|
|
} |