{ // Data allocations are very expensive on GPU. Use a buffer to solve: allocate once reuse later. gpu::GpuMat gI1, gI2, gs, t1,t2; gpu::GpuMat buf; double getPSNR_GPU_optimized(const Mat& I1, const ...