@@ -339,24 +339,22 @@ namespace cv { namespace cuda { namespace device {
339
339
{
340
340
int xstart = __float2int_rd (fx) - 1 ;
341
341
int ystart = __float2int_rd (fy) - 1 ;
342
- int xlimit = xstart + 3 ;
343
- int ylimit = ystart + 3 ;
344
342
int xoffset[4 ];
345
- float xcoeff[4 ];
346
- for (int x = xstart ; x <= xlimit ; ++x)
343
+ W1 xcoeff[4 ];
344
+ for (int x = 0 ; x < 4 ; ++x, ++xstart )
347
345
{
348
- xoffset[x - xstart ] = clamp (x , 0 , col1);
349
- xcoeff[x - xstart ] = cubic.at (x - fx);
346
+ xoffset[x] = clamp (xstart , 0 , col1);
347
+ xcoeff [x ] = cubic.at (xstart - fx);
350
348
}
351
349
W sumval = VecTraits<W>::all (0 );
352
- for (int y = ystart ; y <= ylimit ; ++y)
350
+ for (int y = 0 ; y < 4 ; ++y, ++ystart )
353
351
{
354
- int yoffest = clamp (y , 0 , row1);
352
+ int yoffest = clamp (ystart , 0 , row1);
355
353
T const * S = ptr<T>(src, yoffest);
356
354
W sline = VecTraits<W>::all (0 );
357
355
for (int x = 0 ; x < 4 ; ++x)
358
356
sline += xcoeff[x] * saturate_cast<W>(S[xoffset[x]]);
359
- sumval += sline * cubic.at (y - fy);
357
+ sumval += sline * cubic.at (ystart - fy);
360
358
}
361
359
at<T>(dst, dy, dx) = saturate_cast<T>(sumval);
362
360
}
@@ -376,19 +374,17 @@ namespace cv { namespace cuda { namespace device {
376
374
{
377
375
int xstart = __float2int_rd (fx) - 1 ;
378
376
int ystart = __float2int_rd (fy) - 1 ;
379
- int xlimit = xstart + 3 ;
380
- int ylimit = ystart + 3 ;
381
377
int xoffset[4 ], yoffset[4 ];
382
378
W xcoeff[4 ], ycoeff[4 ];
383
- for (int x = xstart ; x <= xlimit ; ++x)
379
+ for (int x = 0 ; x < 4 ; ++x, ++xstart )
384
380
{
385
- xoffset[x - xstart ] = clamp (x , 0 , col1) * cn;
386
- xcoeff[x - xstart ] = cubic.at (x - fx);
381
+ xoffset[x] = clamp (xstart , 0 , col1) * cn;
382
+ xcoeff [x ] = cubic.at (xstart - fx);
387
383
}
388
- for (int y = ystart ; y <= ylimit ; ++y)
384
+ for (int y = 0 ; y < 4 ; ++y, ++ystart )
389
385
{
390
- yoffset[y - ystart ] = clamp (y , 0 , row1);
391
- ycoeff[y - ystart ] = cubic.at (y - fy);
386
+ yoffset[y] = clamp (ystart , 0 , row1);
387
+ ycoeff [y ] = cubic.at (ystart - fy);
392
388
}
393
389
T* D = ptr<T>(dst, dy) + dx * cn;
394
390
for (int i = 0 ; i < cn; ++i)
@@ -509,15 +505,15 @@ namespace cv { namespace cuda { namespace device {
509
505
dim3 block (32 , 8 );
510
506
dim3 grid (divUp (dst.cols , block.x ), divUp (dst.rows , block.y ));
511
507
if (cn == 1 )
512
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearVec<T, W, 1 >(src, dst));
508
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearVec<T, W, 1 >(src, dst));
513
509
else if (cn == 2 )
514
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearVec<T, W, 2 >(src, dst));
510
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearVec<T, W, 2 >(src, dst));
515
511
else if (cn == 3 )
516
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearVec<T, W, 3 >(src, dst));
512
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearVec<T, W, 3 >(src, dst));
517
513
else if (cn == 4 )
518
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearVec<T, W, 4 >(src, dst));
514
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearVec<T, W, 4 >(src, dst));
519
515
else
520
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearCn<T, W>(src, dst, cn));
516
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearCn<T, W>(src, dst, cn));
521
517
}
522
518
523
519
template <typename T, typename W>
@@ -527,15 +523,15 @@ namespace cv { namespace cuda { namespace device {
527
523
dim3 block (32 , 8 );
528
524
dim3 grid (divUp (dst.cols , block.x ), divUp (dst.rows , block.y ));
529
525
if (cn == 1 )
530
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearAntiVec<T, W, 1 >(src, dst, scale, 0 ));
526
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearAntiVec<T, W, 1 >(src, dst, scale, 0 ));
531
527
else if (cn == 2 )
532
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearAntiVec<T, W, 2 >(src, dst, scale, 0 ));
528
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearAntiVec<T, W, 2 >(src, dst, scale, 0 ));
533
529
else if (cn == 3 )
534
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearAntiVec<T, W, 3 >(src, dst, scale, 0 ));
530
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearAntiVec<T, W, 3 >(src, dst, scale, 0 ));
535
531
else if (cn == 4 )
536
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearAntiVec<T, W, 4 >(src, dst, scale, 0 ));
532
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearAntiVec<T, W, 4 >(src, dst, scale, 0 ));
537
533
else
538
- sampleKernel<<<block, grid , 0 , stream>>> (M, LinearAntiCn<T, W>(src, dst, scale, 0 , cn));
534
+ sampleKernel<<<grid, block , 0 , stream>>> (M, LinearAntiCn<T, W>(src, dst, scale, 0 , cn));
539
535
}
540
536
541
537
// ==================== cubic ====================//
@@ -547,15 +543,15 @@ namespace cv { namespace cuda { namespace device {
547
543
dim3 block (32 , 8 );
548
544
dim3 grid (divUp (dst.cols , block.x ), divUp (dst.rows , block.y ));
549
545
if (cn == 1 )
550
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicVec<T, W, 1 >(src, dst, A));
546
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicVec<T, W, 1 >(src, dst, A));
551
547
else if (cn == 2 )
552
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicVec<T, W, 2 >(src, dst, A));
548
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicVec<T, W, 2 >(src, dst, A));
553
549
else if (cn == 3 )
554
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicVec<T, W, 3 >(src, dst, A));
550
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicVec<T, W, 3 >(src, dst, A));
555
551
else if (cn == 4 )
556
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicVec<T, W, 4 >(src, dst, A));
552
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicVec<T, W, 4 >(src, dst, A));
557
553
else
558
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicCn<T, W>(src, dst, A, cn));
554
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicCn<T, W>(src, dst, A, cn));
559
555
}
560
556
561
557
template <typename T, typename W>
@@ -565,15 +561,15 @@ namespace cv { namespace cuda { namespace device {
565
561
dim3 block (32 , 8 );
566
562
dim3 grid (divUp (dst.cols , block.x ), divUp (dst.rows , block.y ));
567
563
if (cn == 1 )
568
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicAntiVec<T, W, 1 >(src, dst, scale, A));
564
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicAntiVec<T, W, 1 >(src, dst, scale, A));
569
565
else if (cn == 2 )
570
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicAntiVec<T, W, 2 >(src, dst, scale, A));
566
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicAntiVec<T, W, 2 >(src, dst, scale, A));
571
567
else if (cn == 3 )
572
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicAntiVec<T, W, 3 >(src, dst, scale, A));
568
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicAntiVec<T, W, 3 >(src, dst, scale, A));
573
569
else if (cn == 4 )
574
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicAntiVec<T, W, 4 >(src, dst, scale, A));
570
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicAntiVec<T, W, 4 >(src, dst, scale, A));
575
571
else
576
- sampleKernel<<<block, grid , 0 , stream>>> (M, CubicAntiCn<T, W>(src, dst, scale, A, cn));
572
+ sampleKernel<<<grid, block , 0 , stream>>> (M, CubicAntiCn<T, W>(src, dst, scale, A, cn));
577
573
}
578
574
579
575
template <typename T, typename W>
0 commit comments