~paparazzi-uav/paparazzi/v5.0-manual

SGBM3WayMainLoop(Mat *_buffers, const Mat& _img1, const Mat& _img2, Mat* _dst_disp, const StereoSGBMParams& params, PixType* _clipTab, int _nstripes, int _stripe_overlap);

856

void getRawMatchingCost(CostType* C, CostType* hsumBuf, CostType* pixDiff, PixType* tmpBuf, int y, int src_start_idx) const;

857

void operator () (const Range& range) const;

858

};

859

860

SGBM3WayMainLoop::SGBM3WayMainLoop(Mat *_buffers, const Mat& _img1, const Mat& _img2, Mat* _dst_disp, const StereoSGBMParams& params, PixType* _clipTab, int _nstripes, int _stripe_overlap):

861

buffers(_buffers), img1(&_img1), img2(&_img2), dst_disp(_dst_disp), clipTab(_clipTab)

862

{

863

nstripes = _nstripes;

864

stripe_overlap = _stripe_overlap;

865

stripe_sz = (int)ceil(img1->rows/(double)nstripes);

866

867

width = img1->cols; height = img1->rows;

868

minD = params.minDisparity; maxD = minD + params.numDisparities; D = maxD - minD;

869

minX1 = std::max(maxD, 0); maxX1 = width + std::min(minD, 0); width1 = maxX1 - minX1;

870

CV_Assert( D % 16 == 0 );

871

872

SW2 = SH2 = params.SADWindowSize > 0 ? params.SADWindowSize/2 : 1;

873

874

P1 = params.P1 > 0 ? params.P1 : 2; P2 = std::max(params.P2 > 0 ? params.P2 : 5, P1+1);

875

uniquenessRatio = params.uniquenessRatio >= 0 ? params.uniquenessRatio : 10;

876

disp12MaxDiff = params.disp12MaxDiff > 0 ? params.disp12MaxDiff : 1;

877

878

costBufSize = width1*D;

879

hsumBufNRows = SH2*2 + 2;

880

TAB_OFS = 256*4;

881

ftzero = std::max(params.preFilterCap, 15) | 1;

882

}

883

884

void getBufferPointers(Mat& buffer, int width, int width1, int D, int num_ch, int SH2, int P2,

885

CostType*& curCostVolumeLine, CostType*& hsumBuf, CostType*& pixDiff,

886

PixType*& tmpBuf, CostType*& horPassCostVolume,

887

CostType*& vertPassCostVolume, CostType*& vertPassMin, CostType*& rightPassBuf,

888

CostType*& disp2CostBuf, short*& disp2Buf)

889

{

890

// allocating all the required memory:

891

int costVolumeLineSize = width1*D;

892

int width1_ext = width1+2;

893

int costVolumeLineSize_ext = width1_ext*D;

894

int hsumBufNRows = SH2*2 + 2;

895

896

// main buffer to store matching costs for the current line:

897

int curCostVolumeLineSize = costVolumeLineSize*sizeof(CostType);

898

899

// auxiliary buffers for the raw matching cost computation:

900

int hsumBufSize = costVolumeLineSize*hsumBufNRows*sizeof(CostType);

901

int pixDiffSize = costVolumeLineSize*sizeof(CostType);

902

int tmpBufSize = width*16*num_ch*sizeof(PixType);

903

904

// auxiliary buffers for the matching cost aggregation:

905

int horPassCostVolumeSize = costVolumeLineSize_ext*sizeof(CostType); // buffer for the 2-pass horizontal cost aggregation

906

int vertPassCostVolumeSize = costVolumeLineSize_ext*sizeof(CostType); // buffer for the vertical cost aggregation

907

int vertPassMinSize = width1_ext*sizeof(CostType); // buffer for storing minimum costs from the previous line

908

int rightPassBufSize = D*sizeof(CostType); // additional small buffer for the right-to-left pass

909

910

// buffers for the pseudo-LRC check:

911

int disp2CostBufSize = width*sizeof(CostType);

912

int disp2BufSize = width*sizeof(short);

913

914

// sum up the sizes of all the buffers:

915

size_t totalBufSize = curCostVolumeLineSize +

916

hsumBufSize +

917

pixDiffSize +

918

tmpBufSize +

919

horPassCostVolumeSize +

920

vertPassCostVolumeSize +

921

vertPassMinSize +

922

rightPassBufSize +

923

disp2CostBufSize +

924

disp2BufSize +

925

16; //to compensate for the alignPtr shifts

926

927

if( buffer.empty() || !buffer.isContinuous() || buffer.cols*buffer.rows*buffer.elemSize() < totalBufSize )

928

buffer.create(1, (int)totalBufSize, CV_8U);

929

930

// set up all the pointers:

931

curCostVolumeLine = (CostType*)alignPtr(buffer.ptr(), 16);

932

hsumBuf = curCostVolumeLine + costVolumeLineSize;

933

pixDiff = hsumBuf + costVolumeLineSize*hsumBufNRows;

934

tmpBuf = (PixType*)(pixDiff + costVolumeLineSize);

935

horPassCostVolume = (CostType*)(tmpBuf + width*16*num_ch);

936

vertPassCostVolume = horPassCostVolume + costVolumeLineSize_ext;

937

rightPassBuf = vertPassCostVolume + costVolumeLineSize_ext;

938

vertPassMin = rightPassBuf + D;

939

disp2CostBuf = vertPassMin + width1_ext;

940

disp2Buf = disp2CostBuf + width;

941

942

// initialize memory:

943

memset(buffer.ptr(),0,totalBufSize);

944

for(int i=0;i<costVolumeLineSize;i++)

945

curCostVolumeLine[i] = (CostType)P2; //such initialization simplifies the cost aggregation loops a bit

946

}

947

948

// performing block matching and building raw cost-volume for the current row

949

void SGBM3WayMainLoop::getRawMatchingCost(CostType* C, // target cost-volume row

950

CostType* hsumBuf, CostType* pixDiff, PixType* tmpBuf, //buffers

951

int y, int src_start_idx) const

952

{

953

int x, d;

954

int dy1 = (y == src_start_idx) ? src_start_idx : y + SH2, dy2 = (y == src_start_idx) ? src_start_idx+SH2 : dy1;

955

956

for(int k = dy1; k <= dy2; k++ )

957

{

958

CostType* hsumAdd = hsumBuf + (std::min(k, height-1) % hsumBufNRows)*costBufSize;

959

if( k < height )

960

{

961

calcPixelCostBT( *img1, *img2, k, minD, maxD, pixDiff, tmpBuf, clipTab, TAB_OFS, ftzero );

962

963

memset(hsumAdd, 0, D*sizeof(CostType));

964

for(x = 0; x <= SW2*D; x += D )

965

{

966

int scale = x == 0 ? SW2 + 1 : 1;

967

968

for( d = 0; d < D; d++ )

969

hsumAdd[d] = (CostType)(hsumAdd[d] + pixDiff[x + d]*scale);

970

}

971

972

if( y > src_start_idx )

973

{

974

const CostType* hsumSub = hsumBuf + (std::max(y - SH2 - 1, src_start_idx) % hsumBufNRows)*costBufSize;

975

976

for( x = D; x < width1*D; x += D )

977

{

978

const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);

979

const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);

980

981

#if CV_SIMD128

982

v_int16x8 hv_reg;

983

for( d = 0; d < D; d+=8 )

984

{

985

hv_reg = v_load_aligned(hsumAdd+x-D+d) + (v_load_aligned(pixAdd+d) - v_load_aligned(pixSub+d));

986

v_store_aligned(hsumAdd+x+d,hv_reg);

987

v_store_aligned(C+x+d,v_load_aligned(C+x+d)+(hv_reg-v_load_aligned(hsumSub+x+d)));

988

}

989

#else

990

for( d = 0; d < D; d++ )

991

{

992

int hv = hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);

993

C[x + d] = (CostType)(C[x + d] + hv - hsumSub[x + d]);

994

}

995

#endif

996

}

997

}

998

else

999

{

1000

for( x = D; x < width1*D; x += D )

1001

{

1002

const CostType* pixAdd = pixDiff + std::min(x + SW2*D, (width1-1)*D);

1003

const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);

1004

1005

for( d = 0; d < D; d++ )

1006

hsumAdd[x + d] = (CostType)(hsumAdd[x - D + d] + pixAdd[d] - pixSub[d]);

1007

}

1008

}

1009

}

1010

1011

if( y == src_start_idx )

1012

{

1013

int scale = k == src_start_idx ? SH2 + 1 : 1;

1014

for( x = 0; x < width1*D; x++ )

1015

C[x] = (CostType)(C[x] + hsumAdd[x]*scale);

1016

}

1017

}

1018

}

1019

1020

#if CV_SIMD128

1021

// define some additional reduce operations:

1022

inline short min(const v_int16x8& a)

1023

{

1024

short CV_DECL_ALIGNED(16) buf[8];

1025

v_store_aligned(buf, a);

1026

short s0 = std::min(buf[0], buf[1]);

1027

short s1 = std::min(buf[2], buf[3]);

1028

short s2 = std::min(buf[4], buf[5]);

1029

short s3 = std::min(buf[6], buf[7]);

1030

return std::min(std::min(s0, s1),std::min(s2, s3));

1031

}

1032

1033

inline short min_pos(const v_int16x8& val,const v_int16x8& pos)

1034

{

1035

short CV_DECL_ALIGNED(16) val_buf[8];

1036

v_store_aligned(val_buf, val);

1037

short CV_DECL_ALIGNED(16) pos_buf[8];

1038

v_store_aligned(pos_buf, pos);

1039

short res_pos = 0;

1040

short min_val = SHRT_MAX;

1041

if(val_buf[0]<min_val) {min_val=val_buf[0]; res_pos=pos_buf[0];}

1042

if(val_buf[1]<min_val) {min_val=val_buf[1]; res_pos=pos_buf[1];}

1043

if(val_buf[2]<min_val) {min_val=val_buf[2]; res_pos=pos_buf[2];}

1044

if(val_buf[3]<min_val) {min_val=val_buf[3]; res_pos=pos_buf[3];}

1045

if(val_buf[4]<min_val) {min_val=val_buf[4]; res_pos=pos_buf[4];}

1046

if(val_buf[5]<min_val) {min_val=val_buf[5]; res_pos=pos_buf[5];}

1047

if(val_buf[6]<min_val) {min_val=val_buf[6]; res_pos=pos_buf[6];}

1048

if(val_buf[7]<min_val) {min_val=val_buf[7]; res_pos=pos_buf[7];}

1049

return res_pos;

1050

}

1051

#endif

1052

1053

// performing SGM cost accumulation from left to right (result is stored in leftBuf) and

1054

// in-place cost accumulation from top to bottom (result is stored in topBuf)

1055

inline void accumulateCostsLeftTop(CostType* leftBuf, CostType* leftBuf_prev, CostType* topBuf, CostType* costs,

1056

CostType& leftMinCost, CostType& topMinCost, int D, int P1, int P2)

1057

{

1058

#if CV_SIMD128

1059

v_int16x8 P1_reg = v_setall_s16(cv::saturate_cast<CostType>(P1));

1060

1061

v_int16x8 leftMinCostP2_reg = v_setall_s16(cv::saturate_cast<CostType>(leftMinCost+P2));

1062

v_int16x8 leftMinCost_new_reg = v_setall_s16(SHRT_MAX);

1063

v_int16x8 src0_leftBuf = v_setall_s16(SHRT_MAX);

1064

v_int16x8 src1_leftBuf = v_load_aligned(leftBuf_prev);

1065

1066

v_int16x8 topMinCostP2_reg = v_setall_s16(cv::saturate_cast<CostType>(topMinCost+P2));

1067

v_int16x8 topMinCost_new_reg = v_setall_s16(SHRT_MAX);

1068

v_int16x8 src0_topBuf = v_setall_s16(SHRT_MAX);

1069

v_int16x8 src1_topBuf = v_load_aligned(topBuf);

1070

1071

v_int16x8 src2;

1072

v_int16x8 src_shifted_left,src_shifted_right;

1073

v_int16x8 res;

1074

1075

for(int i=0;i<D-8;i+=8)

1076

{

1077

//process leftBuf:

1078

//lookahead load:

1079

src2 = v_load_aligned(leftBuf_prev+i+8);

1080

1081

//get shifted versions of the current block and add P1:

1082

src_shifted_left = v_extract<7> (src0_leftBuf,src1_leftBuf) + P1_reg;

1083

src_shifted_right = v_extract<1> (src1_leftBuf,src2 ) + P1_reg;

1084

1085

// process and save current block:

1086

res = v_load_aligned(costs+i) + (v_min(v_min(src_shifted_left,src_shifted_right),v_min(src1_leftBuf,leftMinCostP2_reg))-leftMinCostP2_reg);

1087

leftMinCost_new_reg = v_min(leftMinCost_new_reg,res);

1088

v_store_aligned(leftBuf+i, res);

1089

1090

//update src buffers:

1091

src0_leftBuf = src1_leftBuf;

1092

src1_leftBuf = src2;

1093

1094

//process topBuf:

1095

//lookahead load:

1096

src2 = v_load_aligned(topBuf+i+8);

1097

1098

//get shifted versions of the current block and add P1:

1099

src_shifted_left = v_extract<7> (src0_topBuf,src1_topBuf) + P1_reg;

1100

src_shifted_right = v_extract<1> (src1_topBuf,src2 ) + P1_reg;

1101

1102

// process and save current block:

1103

res = v_load_aligned(costs+i) + (v_min(v_min(src_shifted_left,src_shifted_right),v_min(src1_topBuf,topMinCostP2_reg))-topMinCostP2_reg);

1104

topMinCost_new_reg = v_min(topMinCost_new_reg,res);

1105

v_store_aligned(topBuf+i, res);

1106

1107

//update src buffers:

1108

src0_topBuf = src1_topBuf;

1109

src1_topBuf = src2;

1110

}

1111

1112

// a bit different processing for the last cycle of the loop:

1113

//process leftBuf:

1114

src2 = v_setall_s16(SHRT_MAX);

1115

src_shifted_left = v_extract<7> (src0_leftBuf,src1_leftBuf) + P1_reg;

1116

src_shifted_right = v_extract<1> (src1_leftBuf,src2 ) + P1_reg;

1117

1118

res = v_load_aligned(costs+D-8) + (v_min(v_min(src_shifted_left,src_shifted_right),v_min(src1_leftBuf,leftMinCostP2_reg))-leftMinCostP2_reg);

1119

leftMinCost = min(v_min(leftMinCost_new_reg,res));

1120

v_store_aligned(leftBuf+D-8, res);

1121

1122

//process topBuf:

1123

src2 = v_setall_s16(SHRT_MAX);

1124

src_shifted_left = v_extract<7> (src0_topBuf,src1_topBuf) + P1_reg;

1125

src_shifted_right = v_extract<1> (src1_topBuf,src2 ) + P1_reg;

1126

1127

res = v_load_aligned(costs+D-8) + (v_min(v_min(src_shifted_left,src_shifted_right),v_min(src1_topBuf,topMinCostP2_reg))-topMinCostP2_reg);

1128

topMinCost = min(v_min(topMinCost_new_reg,res));

1129

v_store_aligned(topBuf+D-8, res);

1130

#else

1131

CostType leftMinCost_new = SHRT_MAX;

1132

CostType topMinCost_new = SHRT_MAX;

1133

int leftMinCost_P2 = leftMinCost + P2;

1134

int topMinCost_P2 = topMinCost + P2;

1135

CostType leftBuf_prev_i_minus_1 = SHRT_MAX;

1136

CostType topBuf_i_minus_1 = SHRT_MAX;

1137

CostType tmp;

1138

1139

for(int i=0;i<D-1;i++)

1140

{

1141

leftBuf[i] = cv::saturate_cast<CostType>(costs[i] + std::min(std::min(leftBuf_prev_i_minus_1+P1,leftBuf_prev[i+1]+P1),std::min((int)leftBuf_prev[i],leftMinCost_P2))-leftMinCost_P2);

1142

leftBuf_prev_i_minus_1 = leftBuf_prev[i];

1143

leftMinCost_new = std::min(leftMinCost_new,leftBuf[i]);

1144

1145

tmp = topBuf[i];

1146

topBuf[i] = cv::saturate_cast<CostType>(costs[i] + std::min(std::min(topBuf_i_minus_1+P1,topBuf[i+1]+P1),std::min((int)topBuf[i],topMinCost_P2))-topMinCost_P2);

1147

topBuf_i_minus_1 = tmp;

1148

topMinCost_new = std::min(topMinCost_new,topBuf[i]);

1149

}

1150

1151

leftBuf[D-1] = cv::saturate_cast<CostType>(costs[D-1] + std::min(leftBuf_prev_i_minus_1+P1,std::min((int)leftBuf_prev[D-1],leftMinCost_P2))-leftMinCost_P2);

1152

leftMinCost = std::min(leftMinCost_new,leftBuf[D-1]);

1153

1154

topBuf[D-1] = cv::saturate_cast<CostType>(costs[D-1] + std::min(topBuf_i_minus_1+P1,std::min((int)topBuf[D-1],topMinCost_P2))-topMinCost_P2);

1155

topMinCost = std::min(topMinCost_new,topBuf[D-1]);

1156

#endif

1157

}

1158

1159

// performing in-place SGM cost accumulation from right to left (the result is stored in rightBuf) and

1160

// summing rightBuf, topBuf, leftBuf together (the result is stored in leftBuf), as well as finding the

1161

// optimal disparity value with minimum accumulated cost

1162

inline void accumulateCostsRight(CostType* rightBuf, CostType* topBuf, CostType* leftBuf, CostType* costs,

1163

CostType& rightMinCost, int D, int P1, int P2, int& optimal_disp, CostType& min_cost)

1164

{

1165

#if CV_SIMD128

1166

v_int16x8 P1_reg = v_setall_s16(cv::saturate_cast<CostType>(P1));

1167

1168

v_int16x8 rightMinCostP2_reg = v_setall_s16(cv::saturate_cast<CostType>(rightMinCost+P2));

1169

v_int16x8 rightMinCost_new_reg = v_setall_s16(SHRT_MAX);

1170

v_int16x8 src0_rightBuf = v_setall_s16(SHRT_MAX);

1171

v_int16x8 src1_rightBuf = v_load(rightBuf);

1172

1173

v_int16x8 src2;

1174

v_int16x8 src_shifted_left,src_shifted_right;

1175

v_int16x8 res;

1176

1177

v_int16x8 min_sum_cost_reg = v_setall_s16(SHRT_MAX);

1178

v_int16x8 min_sum_pos_reg = v_setall_s16(0);

1179

v_int16x8 loop_idx(0,1,2,3,4,5,6,7);

1180

v_int16x8 eight_reg = v_setall_s16(8);

1181

1182

for(int i=0;i<D-8;i+=8)

1183

{

1184

//lookahead load:

1185

src2 = v_load_aligned(rightBuf+i+8);

1186

1187

//get shifted versions of the current block and add P1:

1188

src_shifted_left = v_extract<7> (src0_rightBuf,src1_rightBuf) + P1_reg;

1189

src_shifted_right = v_extract<1> (src1_rightBuf,src2 ) + P1_reg;

1190

1191

// process and save current block:

1192

res = v_load_aligned(costs+i) + (v_min(v_min(src_shifted_left,src_shifted_right),v_min(src1_rightBuf,rightMinCostP2_reg))-rightMinCostP2_reg);

1193

rightMinCost_new_reg = v_min(rightMinCost_new_reg,res);

1194

v_store_aligned(rightBuf+i, res);

1195

1196

// compute and save total cost:

1197

res = res + v_load_aligned(leftBuf+i) + v_load_aligned(topBuf+i);

1198

v_store_aligned(leftBuf+i, res);

1199

1200

// track disparity value with the minimum cost:

1201

min_sum_cost_reg = v_min(min_sum_cost_reg,res);

1202

min_sum_pos_reg = min_sum_pos_reg + ((min_sum_cost_reg == res) & (loop_idx - min_sum_pos_reg));

1203

loop_idx = loop_idx+eight_reg;

1204

1205

//update src:

1206

src0_rightBuf = src1_rightBuf;

1207

src1_rightBuf = src2;

1208

}

1209

1210

// a bit different processing for the last cycle of the loop:

1211

src2 = v_setall_s16(SHRT_MAX);

1212

src_shifted_left = v_extract<7> (src0_rightBuf,src1_rightBuf) + P1_reg;

1213

src_shifted_right = v_extract<1> (src1_rightBuf,src2 ) + P1_reg;

1214

1215

res = v_load_aligned(costs+D-8) + (v_min(v_min(src_shifted_left,src_shifted_right),v_min(src1_rightBuf,rightMinCostP2_reg))-rightMinCostP2_reg);

1216

rightMinCost = min(v_min(rightMinCost_new_reg,res));

1217

v_store_aligned(rightBuf+D-8, res);

1218

1219

res = res + v_load_aligned(leftBuf+D-8) + v_load_aligned(topBuf+D-8);

1220

v_store_aligned(leftBuf+D-8, res);

1221

1222

min_sum_cost_reg = v_min(min_sum_cost_reg,res);

1223

min_cost = min(min_sum_cost_reg);

1224

min_sum_pos_reg = min_sum_pos_reg + ((min_sum_cost_reg == res) & (loop_idx - min_sum_pos_reg));

1225

optimal_disp = min_pos(min_sum_cost_reg,min_sum_pos_reg);

1226

#else

1227

CostType rightMinCost_new = SHRT_MAX;

1228

int rightMinCost_P2 = rightMinCost + P2;

1229

CostType rightBuf_i_minus_1 = SHRT_MAX;

1230

CostType tmp;

1231

min_cost = SHRT_MAX;

1232

1233

for(int i=0;i<D-1;i++)

1234

{

1235

tmp = rightBuf[i];

1236

rightBuf[i] = cv::saturate_cast<CostType>(costs[i] + std::min(std::min(rightBuf_i_minus_1+P1,rightBuf[i+1]+P1),std::min((int)rightBuf[i],rightMinCost_P2))-rightMinCost_P2);

1237

rightBuf_i_minus_1 = tmp;

1238

rightMinCost_new = std::min(rightMinCost_new,rightBuf[i]);

1239

leftBuf[i] = cv::saturate_cast<CostType>((int)leftBuf[i]+rightBuf[i]+topBuf[i]);

1240

if(leftBuf[i]<min_cost)

1241

{

1242

optimal_disp = i;

1243

min_cost = leftBuf[i];

1244

}

1245

}

1246

1247

rightBuf[D-1] = cv::saturate_cast<CostType>(costs[D-1] + std::min(rightBuf_i_minus_1+P1,std::min((int)rightBuf[D-1],rightMinCost_P2))-rightMinCost_P2);

1248

rightMinCost = std::min(rightMinCost_new,rightBuf[D-1]);

1249

leftBuf[D-1] = cv::saturate_cast<CostType>((int)leftBuf[D-1]+rightBuf[D-1]+topBuf[D-1]);

1250

if(leftBuf[D-1]<min_cost)

1251

{

1252

optimal_disp = D-1;

1253

min_cost = leftBuf[D-1];

1254

}

1255

#endif

1256

}

1257

1258

void SGBM3WayMainLoop::operator () (const Range& range) const

1259

{

1260

// force separate processing of stripes:

1261

if(range.end>range.start+1)

1262

{

1263

for(int n=range.start;n<range.end;n++)

1264

(*this)(Range(n,n+1));

1265

return;

1266

}

1267

1268

const int DISP_SCALE = (1 << StereoMatcher::DISP_SHIFT);

1269

int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;

1270

1271

// setting up the ranges:

1272

int src_start_idx = std::max(std::min(range.start * stripe_sz - stripe_overlap, height),0);

1273

int src_end_idx = std::min(range.end * stripe_sz, height);

1274

1275

int dst_offset;

1276

if(range.start==0)

1277

dst_offset=stripe_overlap;

1278

else

1279

dst_offset=0;

1280

1281

Mat cur_buffer = buffers [range.start];

1282

Mat cur_disp = dst_disp[range.start];

1283

cur_disp = Scalar(INVALID_DISP_SCALED);

1284

1285

// prepare buffers:

1286

CostType *curCostVolumeLine, *hsumBuf, *pixDiff;

1287

PixType* tmpBuf;

1288

CostType *horPassCostVolume, *vertPassCostVolume, *vertPassMin, *rightPassBuf, *disp2CostBuf;

1289

short* disp2Buf;

1290

getBufferPointers(cur_buffer,width,width1,D,img1->channels(),SH2,P2,

1291

curCostVolumeLine,hsumBuf,pixDiff,tmpBuf,horPassCostVolume,

1292

vertPassCostVolume,vertPassMin,rightPassBuf,disp2CostBuf,disp2Buf);

1293

1294

// start real processing:

1295

for(int y=src_start_idx;y<src_end_idx;y++)

1296

{

1297

getRawMatchingCost(curCostVolumeLine,hsumBuf,pixDiff,tmpBuf,y,src_start_idx);

1298

1299

short* disp_row = (short*)cur_disp.ptr(dst_offset+(y-src_start_idx));

1300

1301

// initialize the auxiliary buffers for the pseudo left-right consistency check:

1302

for(int x=0;x<width;x++)

1303

{

1304

disp2Buf[x] = (short)INVALID_DISP_SCALED;

1305

disp2CostBuf[x] = SHRT_MAX;

1306

}

1307

CostType* C = curCostVolumeLine - D;

1308

CostType prev_min, min_cost;

1309

int d, best_d;

1310

d = best_d = 0;

1311

1312

// forward pass

1313

prev_min=0;

1314

for (int x=D;x<(1+width1)*D;x+=D)

1315

accumulateCostsLeftTop(horPassCostVolume+x,horPassCostVolume+x-D,vertPassCostVolume+x,C+x,prev_min,vertPassMin[x/D],D,P1,P2);

1316

1317

//backward pass

1318

memset(rightPassBuf,0,D*sizeof(CostType));

1319

prev_min=0;

1320

for (int x=width1*D;x>=D;x-=D)

1321

{

1322

accumulateCostsRight(rightPassBuf,vertPassCostVolume+x,horPassCostVolume+x,C+x,prev_min,D,P1,P2,best_d,min_cost);

1323

1324

if(uniquenessRatio>0)

1325

{

1326

#if CV_SIMD128

1327

horPassCostVolume+=x;

1328

int thresh = (100*min_cost)/(100-uniquenessRatio);

1329

v_int16x8 thresh_reg = v_setall_s16((short)(thresh+1));

1330

v_int16x8 d1 = v_setall_s16((short)(best_d-1));

1331

v_int16x8 d2 = v_setall_s16((short)(best_d+1));

1332

v_int16x8 eight_reg = v_setall_s16(8);

1333

v_int16x8 cur_d(0,1,2,3,4,5,6,7);

1334

v_int16x8 mask,cost1,cost2;

1335

1336

for( d = 0; d < D; d+=16 )

1337

{

1338

cost1 = v_load_aligned(horPassCostVolume+d);

1339

cost2 = v_load_aligned(horPassCostVolume+d+8);

1340

1341

mask = cost1 < thresh_reg;

1342

mask = mask & ( (cur_d<d1) | (cur_d>d2) );

1343

if( v_signmask(mask) )

1344

break;

1345

1346

cur_d = cur_d+eight_reg;

1347

1348

mask = cost2 < thresh_reg;

1349

mask = mask & ( (cur_d<d1) | (cur_d>d2) );

1350

if( v_signmask(mask) )

1351

break;

1352

1353

cur_d = cur_d+eight_reg;

1354

}

1355

horPassCostVolume-=x;

1356

#else

1357

for( d = 0; d < D; d++ )

1358

{

1359

if( horPassCostVolume[x+d]*(100 - uniquenessRatio) < min_cost*100 && std::abs(d - best_d) > 1 )

1360

break;

1361

}

1362

#endif

1363

if( d < D )

1364

continue;

1365

}

1366

d = best_d;

1367

1368

int _x2 = x/D - 1 + minX1 - d - minD;

1369

if( _x2>=0 && _x2<width && disp2CostBuf[_x2] > min_cost )

1370

{

1371

disp2CostBuf[_x2] = min_cost;

1372

disp2Buf[_x2] = (short)(d + minD);

1373

}

1374

1375

if( 0 < d && d < D-1 )

1376

{

1377

// do subpixel quadratic interpolation:

1378

// fit parabola into (x1=d-1, y1=Sp[d-1]), (x2=d, y2=Sp[d]), (x3=d+1, y3=Sp[d+1])

1379

// then find minimum of the parabola.

1380

int denom2 = std::max(horPassCostVolume[x+d-1] + horPassCostVolume[x+d+1] - 2*horPassCostVolume[x+d], 1);

1381

d = d*DISP_SCALE + ((horPassCostVolume[x+d-1] - horPassCostVolume[x+d+1])*DISP_SCALE + denom2)/(denom2*2);

1382

}

1383

else

1384

d *= DISP_SCALE;

1385

1386

disp_row[(x/D)-1 + minX1] = (DispType)(d + minD*DISP_SCALE);

1387

}

1388

1389

for(int x = minX1; x < maxX1; x++ )

1390

{

1391

// pseudo LRC consistency check using only one disparity map;

1392

// pixels with difference more than disp12MaxDiff are invalidated

1393

int d1 = disp_row[x];

1394

if( d1 == INVALID_DISP_SCALED )

1395

continue;

1396

int _d = d1 >> StereoMatcher::DISP_SHIFT;

1397

int d_ = (d1 + DISP_SCALE-1) >> StereoMatcher::DISP_SHIFT;

1398

int _x = x - _d, x_ = x - d_;

1399

if( 0 <= _x && _x < width && disp2Buf[_x] >= minD && std::abs(disp2Buf[_x] - _d) > disp12MaxDiff &&

1400

0 <= x_ && x_ < width && disp2Buf[x_] >= minD && std::abs(disp2Buf[x_] - d_) > disp12MaxDiff )

1401

disp_row[x] = (short)INVALID_DISP_SCALED;

1402

}

1403

}

1404

}

1405

1406

static void computeDisparity3WaySGBM( const Mat& img1, const Mat& img2,

1407

Mat& disp1, const StereoSGBMParams& params,

1408

Mat* buffers, int nstripes )

1409

{

1410

// precompute a lookup table for the raw matching cost computation:

1411

const int TAB_OFS = 256*4, TAB_SIZE = 256 + TAB_OFS*2;

1412

PixType* clipTab = new PixType[TAB_SIZE];

1413

int ftzero = std::max(params.preFilterCap, 15) | 1;

1414

for(int k = 0; k < TAB_SIZE; k++ )

1415

clipTab[k] = (PixType)(std::min(std::max(k - TAB_OFS, -ftzero), ftzero) + ftzero);

1416

1417

// allocate separate dst_disp arrays to avoid conflicts due to stripe overlap:

1418

int stripe_sz = (int)ceil(img1.rows/(double)nstripes);

1419

int stripe_overlap = (params.SADWindowSize/2+1) + (int)ceil(0.1*stripe_sz);

1420

Mat* dst_disp = new Mat[nstripes];

1421

for(int i=0;i<nstripes;i++)

1422

dst_disp[i].create(stripe_sz+stripe_overlap,img1.cols,CV_16S);

1423

1424

parallel_for_(Range(0,nstripes),SGBM3WayMainLoop(buffers,img1,img2,dst_disp,params,clipTab,nstripes,stripe_overlap));

1425

1426

//assemble disp1 from dst_disp:

1427

short* dst_row;

1428

short* src_row;

1429

for(int i=0;i<disp1.rows;i++)

1430

{

1431

dst_row = (short*)disp1.ptr(i);

1432

src_row = (short*)dst_disp[i/stripe_sz].ptr(stripe_overlap+i%stripe_sz);

1433

memcpy(dst_row,src_row,disp1.cols*sizeof(short));

1434

}

1435

1436

delete[] clipTab;

1437

delete[] dst_disp;

1438

}

1439

1440

class StereoSGBMImpl : public StereoSGBM

1441

{

1442

public:

1443

StereoSGBMImpl()

1444

{

1445

params = StereoSGBMParams();

1446

}

1447

1448

StereoSGBMImpl( int _minDisparity, int _numDisparities, int _SADWindowSize,

1449

int _P1, int _P2, int _disp12MaxDiff, int _preFilterCap,

1450

int _uniquenessRatio, int _speckleWindowSize, int _speckleRange,

1451

int _mode )

1452

{

1453

params = StereoSGBMParams( _minDisparity, _numDisparities, _SADWindowSize,

1454

_P1, _P2, _disp12MaxDiff, _preFilterCap,

1455

_uniquenessRatio, _speckleWindowSize, _speckleRange,

1456

_mode );

1457

}

1458

1459

void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr )

1460

{

1461

Mat left = leftarr.getMat(), right = rightarr.getMat();

1462

CV_Assert( left.size() == right.size() && left.type() == right.type() &&

1463

left.depth() == CV_8U );

1464

1465

disparr.create( left.size(), CV_16S );

1466

Mat disp = disparr.getMat();

1467

1468

if(params.mode==MODE_SGBM_3WAY)

1469

computeDisparity3WaySGBM( left, right, disp, params, buffers, num_stripes );

1470

else

1471

computeDisparitySGBM( left, right, disp, params, buffer );

1472

1473

medianBlur(disp, disp, 3);

1474

1475

if( params.speckleWindowSize > 0 )

1476

filterSpeckles(disp, (params.minDisparity - 1)*StereoMatcher::DISP_SCALE, params.speckleWindowSize,

1477

StereoMatcher::DISP_SCALE*params.speckleRange, buffer);

1478

}

1479

1480

int getMinDisparity() const { return params.minDisparity; }

1481

void setMinDisparity(int minDisparity) { params.minDisparity = minDisparity; }

1482

1483

int getNumDisparities() const { return params.numDisparities; }

1484

void setNumDisparities(int numDisparities) { params.numDisparities = numDisparities; }

1485

1486

int getBlockSize() const { return params.SADWindowSize; }

1487

void setBlockSize(int blockSize) { params.SADWindowSize = blockSize; }

1488

1489

int getSpeckleWindowSize() const { return params.speckleWindowSize; }

1490

void setSpeckleWindowSize(int speckleWindowSize) { params.speckleWindowSize = speckleWindowSize; }

1491

1492

int getSpeckleRange() const { return params.speckleRange; }

1493

void setSpeckleRange(int speckleRange) { params.speckleRange = speckleRange; }

1494

1495

int getDisp12MaxDiff() const { return params.disp12MaxDiff; }

1496

void setDisp12MaxDiff(int disp12MaxDiff) { params.disp12MaxDiff = disp12MaxDiff; }

1497

1498

int getPreFilterCap() const { return params.preFilterCap; }

1499

void setPreFilterCap(int preFilterCap) { params.preFilterCap = preFilterCap; }

1500

1501

int getUniquenessRatio() const { return params.uniquenessRatio; }

1502

void setUniquenessRatio(int uniquenessRatio) { params.uniquenessRatio = uniquenessRatio; }

1503

1504

int getP1() const { return params.P1; }

1505

void setP1(int P1) { params.P1 = P1; }

1506

1507

int getP2() const { return params.P2; }

1508

void setP2(int P2) { params.P2 = P2; }

1509

1510

int getMode() const { return params.mode; }

1511

void setMode(int mode) { params.mode = mode; }

1512

1513

void write(FileStorage& fs) const

1514

{

1515

fs << "name" << name_

1516

<< "minDisparity" << params.minDisparity

1517

<< "numDisparities" << params.numDisparities

1518

<< "blockSize" << params.SADWindowSize

1519

<< "speckleWindowSize" << params.speckleWindowSize

1520

<< "speckleRange" << params.speckleRange

1521

<< "disp12MaxDiff" << params.disp12MaxDiff

1522

<< "preFilterCap" << params.preFilterCap

1523

<< "uniquenessRatio" << params.uniquenessRatio

1524

<< "P1" << params.P1

1525

<< "P2" << params.P2

1526

<< "mode" << params.mode;

1527

}

1528

1529

void read(const FileNode& fn)

1530

{

1531

FileNode n = fn["name"];

1532

CV_Assert( n.isString() && String(n) == name_ );

1533

params.minDisparity = (int)fn["minDisparity"];

1534

params.numDisparities = (int)fn["numDisparities"];

1535

params.SADWindowSize = (int)fn["blockSize"];

1536

params.speckleWindowSize = (int)fn["speckleWindowSize"];

1537

params.speckleRange = (int)fn["speckleRange"];

1538

params.disp12MaxDiff = (int)fn["disp12MaxDiff"];

1539

params.preFilterCap = (int)fn["preFilterCap"];

1540

params.uniquenessRatio = (int)fn["uniquenessRatio"];

1541

params.P1 = (int)fn["P1"];

1542

params.P2 = (int)fn["P2"];

1543

params.mode = (int)fn["mode"];

1544

}

1545

1546

StereoSGBMParams params;

1547

Mat buffer;

1548

1549

// the number of stripes is fixed, disregarding the number of threads/processors

1550

// to make the results fully reproducible:

1551

static const int num_stripes = 4;

1552

Mat buffers[num_stripes];

1553

1554

static const char* name_;

1555

};

1556

1557

const char* StereoSGBMImpl::name_ = "StereoMatcher.SGBM";

1558

1559

1560

Ptr<StereoSGBM> StereoSGBM::create(int minDisparity, int numDisparities, int SADWindowSize,

1561

int P1, int P2, int disp12MaxDiff,

1562

int preFilterCap, int uniquenessRatio,

1563

int speckleWindowSize, int speckleRange,

1564

int mode)

1565

{

1566

return Ptr<StereoSGBM>(

1567

new StereoSGBMImpl(minDisparity, numDisparities, SADWindowSize,

1568

P1, P2, disp12MaxDiff,

1569

preFilterCap, uniquenessRatio,

1570

speckleWindowSize, speckleRange,

1571

mode));

1572

}

1573

1574

Rect getValidDisparityROI( Rect roi1, Rect roi2,

1575

int minDisparity,

1576

int numberOfDisparities,

1577

int SADWindowSize )

1578

{

1579

int SW2 = SADWindowSize/2;

1580

int minD = minDisparity, maxD = minDisparity + numberOfDisparities - 1;

1581

1582

int xmin = std::max(roi1.x, roi2.x + maxD) + SW2;

1583

int xmax = std::min(roi1.x + roi1.width, roi2.x + roi2.width - minD) - SW2;

1584

int ymin = std::max(roi1.y, roi2.y) + SW2;

1585

int ymax = std::min(roi1.y + roi1.height, roi2.y + roi2.height) - SW2;

1586

1587

Rect r(xmin, ymin, xmax - xmin, ymax - ymin);

1588

1589

return r.width > 0 && r.height > 0 ? r : Rect();

1590

}

1591

1592

typedef cv::Point_<short> Point2s;

1593

1594

template <typename T>

1595

void filterSpecklesImpl(cv::Mat& img, int newVal, int maxSpeckleSize, int maxDiff, cv::Mat& _buf)

1596

{

1597

using namespace cv;

1598

1599

int width = img.cols, height = img.rows, npixels = width*height;

1600

size_t bufSize = npixels*(int)(sizeof(Point2s) + sizeof(int) + sizeof(uchar));

1601

if( !_buf.isContinuous() || _buf.empty() || _buf.cols*_buf.rows*_buf.elemSize() < bufSize )

1602

_buf.create(1, (int)bufSize, CV_8U);

1603

1604

uchar* buf = _buf.ptr();

1605

int i, j, dstep = (int)(img.step/sizeof(T));

1606

int* labels = (int*)buf;

1607

buf += npixels*sizeof(labels[0]);

1608

Point2s* wbuf = (Point2s*)buf;

1609

buf += npixels*sizeof(wbuf[0]);

1610

uchar* rtype = (uchar*)buf;

1611

int curlabel = 0;

1612

1613

// clear out label assignments

1614

memset(labels, 0, npixels*sizeof(labels[0]));

1615

1616

for( i = 0; i < height; i++ )

1617

{

1618

T* ds = img.ptr<T>(i);

1619

int* ls = labels + width*i;

1620

1621

for( j = 0; j < width; j++ )

1622

{

1623

if( ds[j] != newVal ) // not a bad disparity

1624

{

1625

if( ls[j] ) // has a label, check for bad label

1626

{

1627

if( rtype[ls[j]] ) // small region, zero out disparity

1628

ds[j] = (T)newVal;

1629

}

1630

// no label, assign and propagate

1631

else

1632

{

1633

Point2s* ws = wbuf; // initialize wavefront

1634

Point2s p((short)j, (short)i); // current pixel

1635

curlabel++; // next label

1636

int count = 0; // current region size

1637

ls[j] = curlabel;

1638

1639

// wavefront propagation

1640

while( ws >= wbuf ) // wavefront not empty

1641

{

1642

count++;

1643

// put neighbors onto wavefront

1644

T* dpp = &img.at<T>(p.y, p.x);

1645

T dp = *dpp;

1646

int* lpp = labels + width*p.y + p.x;

1647

1648

if( p.y < height-1 && !lpp[+width] && dpp[+dstep] != newVal && std::abs(dp - dpp[+dstep]) <= maxDiff )

1649

{

1650

lpp[+width] = curlabel;

1651

*ws++ = Point2s(p.x, p.y+1);

1652

}

1653

1654

if( p.y > 0 && !lpp[-width] && dpp[-dstep] != newVal && std::abs(dp - dpp[-dstep]) <= maxDiff )

1655

{

1656

lpp[-width] = curlabel;

1657

*ws++ = Point2s(p.x, p.y-1);

1658

}

1659

1660

if( p.x < width-1 && !lpp[+1] && dpp[+1] != newVal && std::abs(dp - dpp[+1]) <= maxDiff )

1661

{

1662

lpp[+1] = curlabel;

1663

*ws++ = Point2s(p.x+1, p.y);

1664

}

1665

1666

if( p.x > 0 && !lpp[-1] && dpp[-1] != newVal && std::abs(dp - dpp[-1]) <= maxDiff )

1667

{

1668

lpp[-1] = curlabel;

1669

*ws++ = Point2s(p.x-1, p.y);

1670

}

1671

1672

// pop most recent and propagate

1673

// NB: could try least recent, maybe better convergence

1674

p = *--ws;

1675

}

1676

1677

// assign label type

1678

if( count <= maxSpeckleSize ) // speckle region

1679

{

1680

rtype[ls[j]] = 1; // small region label

1681

ds[j] = (T)newVal;

1682

}

1683

else

1684

rtype[ls[j]] = 0; // large region label

1685

}

1686

}

1687

}

1688

}

1689

}

1690

1691

#ifdef HAVE_IPP

1692

static bool ipp_filterSpeckles(Mat &img, int maxSpeckleSize, int newVal, int maxDiff)

1693

{

1694

#if IPP_VERSION_X100 >= 810

1695

int type = img.type();

1696

Ipp32s bufsize = 0;

1697

IppiSize roisize = { img.cols, img.rows };

1698

IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp16s;

1699

Ipp8u *pBuffer = NULL;

1700

IppStatus status = ippStsNoErr;

1701

1702

if(ippiMarkSpecklesGetBufferSize(roisize, datatype, CV_MAT_CN(type), &bufsize) < 0)

1703

return false;

1704

1705

pBuffer = (Ipp8u*)ippMalloc(bufsize);

1706

if(!pBuffer && bufsize)

1707

return false;

1708

1709

if (type == CV_8UC1)

1710

{

1711

status = ippiMarkSpeckles_8u_C1IR(img.ptr<Ipp8u>(), (int)img.step, roisize,

1712

(Ipp8u)newVal, maxSpeckleSize, (Ipp8u)maxDiff, ippiNormL1, pBuffer);

1713

}

1714

else

1715

{

1716

status = ippiMarkSpeckles_16s_C1IR(img.ptr<Ipp16s>(), (int)img.step, roisize,

1717

(Ipp16s)newVal, maxSpeckleSize, (Ipp16s)maxDiff, ippiNormL1, pBuffer);

1718

}

1719

if(pBuffer) ippFree(pBuffer);

1720

1721

if (status >= 0)

1722

return true;

1723

#else

1724

CV_UNUSED(img); CV_UNUSED(maxSpeckleSize); CV_UNUSED(newVal); CV_UNUSED(maxDiff);

1725

#endif

1726

return false;

1727

}

1728

#endif

1729

1730

}

1731

1732

void cv::filterSpeckles( InputOutputArray _img, double _newval, int maxSpeckleSize,

1733

double _maxDiff, InputOutputArray __buf )

1734

{

1735

Mat img = _img.getMat();

1736

int type = img.type();

1737

Mat temp, &_buf = __buf.needed() ? __buf.getMatRef() : temp;

1738

CV_Assert( type == CV_8UC1 || type == CV_16SC1 );

1739

1740

int newVal = cvRound(_newval), maxDiff = cvRound(_maxDiff);

1741

1742

CV_IPP_RUN(IPP_VERSION_X100 >= 810 && !__buf.needed() && (type == CV_8UC1 || type == CV_16SC1), ipp_filterSpeckles(img, maxSpeckleSize, newVal, maxDiff));

1743

1744

if (type == CV_8UC1)

1745

filterSpecklesImpl<uchar>(img, newVal, maxSpeckleSize, maxDiff, _buf);

1746

else

1747

filterSpecklesImpl<short>(img, newVal, maxSpeckleSize, maxDiff, _buf);

1748

}

1749

1750

void cv::validateDisparity( InputOutputArray _disp, InputArray _cost, int minDisparity,

1751

int numberOfDisparities, int disp12MaxDiff )

1752

{

1753

Mat disp = _disp.getMat(), cost = _cost.getMat();

1754

int cols = disp.cols, rows = disp.rows;

1755

int minD = minDisparity, maxD = minDisparity + numberOfDisparities;

1756

int x, minX1 = std::max(maxD, 0), maxX1 = cols + std::min(minD, 0);

1757

AutoBuffer<int> _disp2buf(cols*2);

1758

int* disp2buf = _disp2buf;

1759

int* disp2cost = disp2buf + cols;

1760

const int DISP_SHIFT = 4, DISP_SCALE = 1 << DISP_SHIFT;

1761

int INVALID_DISP = minD - 1, INVALID_DISP_SCALED = INVALID_DISP*DISP_SCALE;

1762

int costType = cost.type();

1763

1764

disp12MaxDiff *= DISP_SCALE;

1765

1766

CV_Assert( numberOfDisparities > 0 && disp.type() == CV_16S &&

1767

(costType == CV_16S || costType == CV_32S) &&

1768

disp.size() == cost.size() );

1769

1770

for( int y = 0; y < rows; y++ )

1771

{

1772

short* dptr = disp.ptr<short>(y);

1773

1774

for( x = 0; x < cols; x++ )

1775

{

1776

disp2buf[x] = INVALID_DISP_SCALED;

1777

disp2cost[x] = INT_MAX;

1778

}

1779

1780

if( costType == CV_16S )

1781

{

1782

const short* cptr = cost.ptr<short>(y);

1783

1784

for( x = minX1; x < maxX1; x++ )

1785

{

1786

int d = dptr[x], c = cptr[x];

1787

1788

if( d == INVALID_DISP_SCALED )

1789

continue;

1790

1791

int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);

1792

1793

if( disp2cost[x2] > c )

1794

{

1795

disp2cost[x2] = c;

1796

disp2buf[x2] = d;

1797

}

1798

}

1799

}

1800

else

1801

{

1802

const int* cptr = cost.ptr<int>(y);

1803

1804

for( x = minX1; x < maxX1; x++ )

1805

{

1806

int d = dptr[x], c = cptr[x];

1807

1808

if( d == INVALID_DISP_SCALED )

1809

continue;

1810

1811

int x2 = x - ((d + DISP_SCALE/2) >> DISP_SHIFT);

1812

1813

if( disp2cost[x2] > c )

1814

{

1815

disp2cost[x2] = c;

1816

disp2buf[x2] = d;

1817

}

1818

}

1819

}

1820

1821

for( x = minX1; x < maxX1; x++ )

1822

{

1823

// we round the computed disparity both towards -inf and +inf and check

1824

// if either of the corresponding disparities in disp2 is consistent.

1825

// This is to give the computed disparity a chance to look valid if it is.

1826

int d = dptr[x];

1827

if( d == INVALID_DISP_SCALED )

1828

continue;

1829

int d0 = d >> DISP_SHIFT;

1830

int d1 = (d + DISP_SCALE-1) >> DISP_SHIFT;

1831

int x0 = x - d0, x1 = x - d1;

1832

if( (0 <= x0 && x0 < cols && disp2buf[x0] > INVALID_DISP_SCALED && std::abs(disp2buf[x0] - d) > disp12MaxDiff) &&

1833

(0 <= x1 && x1 < cols && disp2buf[x1] > INVALID_DISP_SCALED && std::abs(disp2buf[x1] - d) > disp12MaxDiff) )

1834

dptr[x] = (short)INVALID_DISP_SCALED;

1835

}

1836

}

1837

}

Older »