// KernelAnalyzer: avg=8 max=255 float distanceSq(float2 this_, float4 pt) { // 5 float2 p=(float2)(pt.x-this_.x, pt.y-this_.y); //return p.x*p.x + p.y*p.y; p=p*p; return p.x+p.y; } float4 invLengthSq(float4 this_, float4 pt1, float4 pt2) { // 5 float4 p1=pt1-this_; float4 p2=pt2-this_; //return p.x*p.x + p.y*p.y; p1=p1*p1; p2=p2*p2; return native_recip((float4)(p1.x+p1.y, p1.z+p1.w, p2.x+p2.y, p2.z+p2.w)); } __kernel void templateKernel( __global float *a, __constant float4 *points /*__attribute__((max_constant_size (8192)))*/, const unsigned int points_length, const float step_0, const float step_1, const unsigned int widthPerKernel, const unsigned int width, const float top_0, const float top_1) { // 64.000*FullHD~132.710.400.000/0,3~1/2 TFlOp/s //float2 point; float4 point4; //point.y=get_global_id(0)*step_1; point4.y=point4.w=top_0+get_global_id(0)*step_0; for(int x=0; x