@@ -386,28 +386,38 @@ namespace
386
386
const int cn = CV_MAT_CN (srcType);
387
387
const int ddepth = CV_MAT_DEPTH (dstType);
388
388
389
- Mat rowKernel = _rowKernel.getMat ();
390
- Mat columnKernel = _columnKernel.getMat ();
389
+ CV_Assert ( _rowKernel.empty () || _rowKernel.isMat () );
390
+ CV_Assert ( _columnKernel.empty () || _columnKernel.isMat () );
391
+ Mat rowKernel = _rowKernel.empty () ? cv::Mat () : _rowKernel.getMat ();
392
+ Mat columnKernel = _columnKernel.empty () ? cv::Mat () : _columnKernel.getMat ();
391
393
392
394
CV_Assert ( sdepth <= CV_64F && cn <= 4 );
393
- CV_Assert ( rowKernel.channels () == 1 );
394
- CV_Assert ( columnKernel.channels () == 1 );
395
+ CV_Assert ( rowKernel.empty () || rowKernel. channels () == 1 );
396
+ CV_Assert ( columnKernel.empty () || columnKernel. channels () == 1 );
395
397
CV_Assert ( rowBorderMode == BORDER_REFLECT101 || rowBorderMode == BORDER_REPLICATE || rowBorderMode == BORDER_CONSTANT || rowBorderMode == BORDER_REFLECT || rowBorderMode == BORDER_WRAP );
396
398
CV_Assert ( columnBorderMode == BORDER_REFLECT101 || columnBorderMode == BORDER_REPLICATE || columnBorderMode == BORDER_CONSTANT || columnBorderMode == BORDER_REFLECT || columnBorderMode == BORDER_WRAP );
397
399
398
400
Mat kernel32F;
399
401
400
- rowKernel.convertTo (kernel32F, CV_32F);
401
- rowKernel_.upload (kernel32F.reshape (1 , 1 ));
402
+ if (!rowKernel.empty ())
403
+ {
404
+ rowKernel.convertTo (kernel32F, CV_32F);
405
+ rowKernel_.upload (kernel32F.reshape (1 , 1 ));
406
+ }
402
407
403
- columnKernel.convertTo (kernel32F, CV_32F);
404
- columnKernel_.upload (kernel32F.reshape (1 , 1 ));
408
+ if (!columnKernel.empty ())
409
+ {
410
+ columnKernel.convertTo (kernel32F, CV_32F);
411
+ columnKernel_.upload (kernel32F.reshape (1 , 1 ));
412
+ }
405
413
406
- CV_Assert ( rowKernel_.cols > 0 && rowKernel_.cols <= 32 );
407
- CV_Assert ( columnKernel_.cols > 0 && columnKernel_.cols <= 32 );
414
+ CV_Assert ( rowKernel_.empty () || (rowKernel_. cols > 0 && rowKernel_.cols <= 32 ) );
415
+ CV_Assert ( columnKernel_.empty () || (columnKernel_. cols > 0 && columnKernel_.cols <= 32 ) );
408
416
409
- normalizeAnchor (anchor_.x , rowKernel_.cols );
410
- normalizeAnchor (anchor_.y , columnKernel_.cols );
417
+ if (!rowKernel_.empty ())
418
+ normalizeAnchor (anchor_.x , rowKernel_.cols );
419
+ if (!columnKernel_.empty ())
420
+ normalizeAnchor (anchor_.y , columnKernel_.cols );
411
421
412
422
bufType_ = CV_MAKE_TYPE (CV_32F, cn);
413
423
@@ -426,15 +436,45 @@ namespace
426
436
_dst.create (src.size (), dstType_);
427
437
GpuMat dst = _dst.getGpuMat ();
428
438
429
- ensureSizeIsEnough (src.size (), bufType_, buf_);
439
+ const bool isInPlace = (src.data == dst.data );
440
+ const bool hasRowKernel = !rowKernel_.empty ();
441
+ const bool hasColKernel = !columnKernel_.empty ();
442
+ const bool hasSingleKernel = (hasRowKernel ^ hasColKernel);
443
+ const bool needsSrcAdaptation = !hasRowKernel && hasColKernel && (srcType_ != bufType_);
444
+ const bool needsDstAdaptation = hasRowKernel && !hasColKernel && (dstType_ != bufType_);
445
+ const bool needsBufForIntermediateStorage = (hasRowKernel && hasColKernel) || (hasSingleKernel && isInPlace);
446
+ const bool needsBuf = needsSrcAdaptation || needsDstAdaptation || needsBufForIntermediateStorage;
447
+ if (needsBuf)
448
+ ensureSizeIsEnough (src.size (), bufType_, buf_);
449
+
450
+ if (needsSrcAdaptation)
451
+ src.convertTo (buf_, bufType_, _stream);
452
+ GpuMat& srcAdapted = needsSrcAdaptation ? buf_ : src;
430
453
431
454
DeviceInfo devInfo;
432
455
const int cc = devInfo.majorVersion () * 10 + devInfo.minorVersion ();
433
456
434
457
cudaStream_t stream = StreamAccessor::getStream (_stream);
435
458
436
- rowFilter_ (src, buf_, rowKernel_.ptr <float >(), rowKernel_.cols , anchor_.x , rowBorderMode_, cc, stream);
437
- columnFilter_ (buf_, dst, columnKernel_.ptr <float >(), columnKernel_.cols , anchor_.y , columnBorderMode_, cc, stream);
459
+ if (!hasRowKernel && !hasColKernel && !isInPlace)
460
+ srcAdapted.convertTo (dst, dstType_, _stream);
461
+ else if (hasRowKernel || hasColKernel)
462
+ {
463
+ GpuMat& rowFilterSrc = srcAdapted;
464
+ GpuMat& rowFilterDst = !hasRowKernel ? srcAdapted : needsBuf ? buf_ : dst;
465
+ GpuMat& colFilterSrc = hasColKernel && needsBuf ? buf_ : srcAdapted;
466
+ GpuMat& colFilterTo = dst;
467
+
468
+ if (hasRowKernel)
469
+ rowFilter_ (rowFilterSrc, rowFilterDst, rowKernel_.ptr <float >(), rowKernel_.cols , anchor_.x , rowBorderMode_, cc, stream);
470
+ else if (hasColKernel && (needsBufForIntermediateStorage && !needsSrcAdaptation))
471
+ rowFilterSrc.convertTo (buf_, bufType_, _stream);
472
+
473
+ if (hasColKernel)
474
+ columnFilter_ (colFilterSrc, colFilterTo, columnKernel_.ptr <float >(), columnKernel_.cols , anchor_.y , columnBorderMode_, cc, stream);
475
+ else if (needsBuf)
476
+ buf_.convertTo (dst, dstType_, _stream);
477
+ }
438
478
}
439
479
}
440
480
0 commit comments