3
* select3obj.cpp With a calibration chessboard on a table, mark an object in a 3D box and
4
* track that object in all subseqent frames as long as the camera can see
5
* the chessboard. Also segments the object using the box projection. This
6
* program is useful for collecting large datasets of many views of an object
11
#include "opencv2/core.hpp"
12
#include <opencv2/core/utility.hpp>
13
#include "opencv2/imgproc.hpp"
14
#include "opencv2/calib3d.hpp"
15
#include "opencv2/imgcodecs.hpp"
16
#include "opencv2/videoio.hpp"
17
#include "opencv2/highgui.hpp"
26
const char* helphelp =
27
"\nThis program's purpose is to collect data sets of an object and its segmentation mask.\n"
29
"It shows how to use a calibrated camera together with a calibration pattern to\n"
30
"compute the homography of the plane the calibration pattern is on. It also shows grabCut\n"
33
"select3dobj -w=<board_width> -h=<board_height> [-s=<square_size>]\n"
34
" -i=<camera_intrinsics_filename> -o=<output_prefix>\n"
36
" -w=<board_width> Number of chessboard corners wide\n"
37
" -h=<board_height> Number of chessboard corners width\n"
38
" [-s=<square_size>] Optional measure of chessboard squares in meters\n"
39
" -i=<camera_intrinsics_filename> Camera matrix .yml file from calibration.cpp\n"
40
" -o=<output_prefix> Prefix the output segmentation images with this\n"
41
" [video_filename/cameraId] If present, read from that video file or that ID\n"
43
"Using a camera's intrinsics (from calibrating a camera -- see calibration.cpp) and an\n"
44
"image of the object sitting on a planar surface with a calibration pattern of\n"
45
"(board_width x board_height) on the surface, we draw a 3D box aroung the object. From\n"
46
"then on, we can move a camera and as long as it sees the chessboard calibration pattern,\n"
47
"it will store a mask of where the object is. We get succesive images using <output_prefix>\n"
48
"of the segmentation mask containing the object. This makes creating training sets easy.\n"
49
"It is best of the chessboard is odd x even in dimensions to avoid amiguous poses.\n"
51
"The actions one can use while the program is running are:\n"
53
" Select object as 3D box with the mouse.\n"
54
" First draw one line on the plane to outline the projection of that object on the plane\n"
55
" Then extend that line into a box to encompass the projection of that object onto the plane\n"
56
" The use the mouse again to extend the box upwards from the plane to encase the object.\n"
57
" Then use the following commands\n"
58
" ESC - Reset the selection\n"
59
" SPACE - Skip the frame; move to the next frame (not in video mode)\n"
60
" ENTER - Confirm the selection. Grab next object in video mode.\n"
61
" q - Exit the program\n"
72
MouseEvent() { event = -1; buttonState = 0; }
78
static void onMouse(int event, int x, int y, int flags, void* userdata)
80
MouseEvent* data = (MouseEvent*)userdata;
82
data->pt = Point(x,y);
83
data->buttonState = flags;
86
static bool readCameraMatrix(const string& filename,
87
Mat& cameraMatrix, Mat& distCoeffs,
88
Size& calibratedImageSize )
90
FileStorage fs(filename, FileStorage::READ);
91
fs["image_width"] >> calibratedImageSize.width;
92
fs["image_height"] >> calibratedImageSize.height;
93
fs["distortion_coefficients"] >> distCoeffs;
94
fs["camera_matrix"] >> cameraMatrix;
96
if( distCoeffs.type() != CV_64F )
97
distCoeffs = Mat_<double>(distCoeffs);
98
if( cameraMatrix.type() != CV_64F )
99
cameraMatrix = Mat_<double>(cameraMatrix);
104
static void calcChessboardCorners(Size boardSize, float squareSize, vector<Point3f>& corners)
108
for( int i = 0; i < boardSize.height; i++ )
109
for( int j = 0; j < boardSize.width; j++ )
110
corners.push_back(Point3f(float(j*squareSize),
111
float(i*squareSize), 0));
115
static Point3f image2plane(Point2f imgpt, const Mat& R, const Mat& tvec,
116
const Mat& cameraMatrix, double Z)
119
R1.col(2) = R1.col(2)*Z + tvec;
120
Mat_<double> v = (cameraMatrix*R1).inv()*(Mat_<double>(3,1) << imgpt.x, imgpt.y, 1);
121
double iw = fabs(v(2,0)) > DBL_EPSILON ? 1./v(2,0) : 0;
122
return Point3f((float)(v(0,0)*iw), (float)(v(1,0)*iw), (float)Z);
126
static Rect extract3DBox(const Mat& frame, Mat& shownFrame, Mat& selectedObjFrame,
127
const Mat& cameraMatrix, const Mat& rvec, const Mat& tvec,
128
const vector<Point3f>& box, int nobjpt, bool runExtraSegmentation)
130
selectedObjFrame = Mat::zeros(frame.size(), frame.type());
133
vector<Point3f> objpt;
134
vector<Point2f> imgpt;
136
objpt.push_back(box[0]);
138
objpt.push_back(box[1]);
141
objpt.push_back(box[2]);
142
objpt.push_back(objpt[2] - objpt[1] + objpt[0]);
145
for( int i = 0; i < 4; i++ )
146
objpt.push_back(Point3f(objpt[i].x, objpt[i].y, box[3].z));
148
projectPoints(Mat(objpt), rvec, tvec, cameraMatrix, Mat(), imgpt);
150
if( !shownFrame.empty() )
153
circle(shownFrame, imgpt[0], 3, Scalar(0,255,0), -1, LINE_AA);
154
else if( nobjpt == 2 )
156
circle(shownFrame, imgpt[0], 3, Scalar(0,255,0), -1, LINE_AA);
157
circle(shownFrame, imgpt[1], 3, Scalar(0,255,0), -1, LINE_AA);
158
line(shownFrame, imgpt[0], imgpt[1], Scalar(0,255,0), 3, LINE_AA);
160
else if( nobjpt == 3 )
161
for( int i = 0; i < 4; i++ )
163
circle(shownFrame, imgpt[i], 3, Scalar(0,255,0), -1, LINE_AA);
164
line(shownFrame, imgpt[i], imgpt[(i+1)%4], Scalar(0,255,0), 3, LINE_AA);
167
for( int i = 0; i < 8; i++ )
169
circle(shownFrame, imgpt[i], 3, Scalar(0,255,0), -1, LINE_AA);
170
line(shownFrame, imgpt[i], imgpt[(i+1)%4 + (i/4)*4], Scalar(0,255,0), 3, LINE_AA);
171
line(shownFrame, imgpt[i], imgpt[i%4], Scalar(0,255,0), 3, LINE_AA);
178
convexHull(Mat_<Point>(Mat(imgpt)), hull);
179
Mat selectedObjMask = Mat::zeros(frame.size(), CV_8U);
180
fillConvexPoly(selectedObjMask, &hull[0], (int)hull.size(), Scalar::all(255), 8, 0);
181
Rect roi = boundingRect(Mat(hull)) & Rect(Point(), frame.size());
183
if( runExtraSegmentation )
185
selectedObjMask = Scalar::all(GC_BGD);
186
fillConvexPoly(selectedObjMask, &hull[0], (int)hull.size(), Scalar::all(GC_PR_FGD), 8, 0);
187
Mat bgdModel, fgdModel;
188
grabCut(frame, selectedObjMask, roi, bgdModel, fgdModel,
189
3, GC_INIT_WITH_RECT + GC_INIT_WITH_MASK);
190
bitwise_and(selectedObjMask, Scalar::all(1), selectedObjMask);
193
frame.copyTo(selectedObjFrame, selectedObjMask);
198
static int select3DBox(const string& windowname, const string& selWinName, const Mat& frame,
199
const Mat& cameraMatrix, const Mat& rvec, const Mat& tvec,
200
vector<Point3f>& box)
202
const float eps = 1e-3f;
205
setMouseCallback(windowname, onMouse, &mouse);
206
vector<Point3f> tempobj(8);
207
vector<Point2f> imgpt(4), tempimg(8);
208
vector<Point> temphull;
210
Mat R, selectedObjMask, selectedObjFrame, shownFrame;
217
bool dragging = (mouse.buttonState & EVENT_FLAG_LBUTTON) != 0;
220
if( (mouse.event == EVENT_LBUTTONDOWN ||
221
mouse.event == EVENT_LBUTTONUP ||
222
dragging) && nobjpt < 4 )
224
Point2f m = mouse.pt;
231
int nearestIdx = npt-1;
235
for( int i = 1; i < npt; i++ )
236
if( norm(m - imgpt[i]) < norm(m - imgpt[nearestIdx]) )
242
float dx = box[1].x - box[0].x, dy = box[1].y - box[0].y;
243
float len = 1.f/std::sqrt(dx*dx+dy*dy);
244
tempobj[0] = Point3f(dy*len + box[nearestIdx].x,
245
-dx*len + box[nearestIdx].y, 0.f);
248
tempobj[0] = Point3f(box[nearestIdx].x, box[nearestIdx].y, 1.f);
250
projectPoints(Mat(tempobj), rvec, tvec, cameraMatrix, Mat(), tempimg);
252
Point2f a = imgpt[nearestIdx], b = tempimg[0], d1 = b - a, d2 = m - a;
253
float n1 = (float)norm(d1), n2 = (float)norm(d2);
258
Z = d1.dot(d2)/(n1*n1);
259
imgpt[npt] = d1*Z + a;
262
box[npt] = image2plane(imgpt[npt], R, tvec, cameraMatrix, npt<3 ? 0 : Z);
264
if( (npt == 0 && mouse.event == EVENT_LBUTTONDOWN) ||
265
(npt > 0 && norm(box[npt] - box[npt-1]) > eps &&
266
mouse.event == EVENT_LBUTTONUP) )
271
imgpt[nobjpt] = imgpt[nobjpt-1];
272
box[nobjpt] = box[nobjpt-1];
278
//mouse.buttonState = 0;
282
frame.copyTo(shownFrame);
283
extract3DBox(frame, shownFrame, selectedObjFrame,
284
cameraMatrix, rvec, tvec, box, npt, false);
285
imshow(windowname, shownFrame);
286
imshow(selWinName, selectedObjFrame);
289
if( (c & 255) == 27 )
293
if( c == 'q' || c == 'Q' || c == ' ' )
296
return c == ' ' ? -1 : -100;
298
if( (c == '\r' || c == '\n') && nobjpt == 4 && box[3].z != 0 )
304
static bool readModelViews( const string& filename, vector<Point3f>& box,
305
vector<string>& imagelist,
306
vector<Rect>& roiList, vector<Vec6f>& poseList )
313
FileStorage fs(filename, FileStorage::READ);
318
FileNode all = fs["views"];
319
if( all.type() != FileNode::SEQ )
321
FileNodeIterator it = all.begin(), it_end = all.end();
323
for(; it != it_end; ++it)
326
imagelist.push_back((string)n["image"]);
327
FileNode nr = n["rect"];
328
roiList.push_back(Rect((int)nr[0], (int)nr[1], (int)nr[2], (int)nr[3]));
329
FileNode np = n["pose"];
330
poseList.push_back(Vec6f((float)np[0], (float)np[1], (float)np[2],
331
(float)np[3], (float)np[4], (float)np[5]));
338
static bool writeModelViews(const string& filename, const vector<Point3f>& box,
339
const vector<string>& imagelist,
340
const vector<Rect>& roiList,
341
const vector<Vec6f>& poseList)
343
FileStorage fs(filename, FileStorage::WRITE);
348
fs << box << "]" << "views" << "[";
350
size_t i, nviews = imagelist.size();
352
CV_Assert( nviews == roiList.size() && nviews == poseList.size() );
354
for( i = 0; i < nviews; i++ )
357
Vec6f p = poseList[i];
359
fs << "{" << "image" << imagelist[i] <<
360
"roi" << "[:" << r.x << r.y << r.width << r.height << "]" <<
361
"pose" << "[:" << p[0] << p[1] << p[2] << p[3] << p[4] << p[5] << "]" << "}";
369
static bool readStringList( const string& filename, vector<string>& l )
372
FileStorage fs(filename, FileStorage::READ);
375
FileNode n = fs.getFirstTopLevelNode();
376
if( n.type() != FileNode::SEQ )
378
FileNodeIterator it = n.begin(), it_end = n.end();
379
for( ; it != it_end; ++it )
380
l.push_back((string)*it);
385
int main(int argc, char** argv)
387
const char* help = "Usage: select3dobj -w=<board_width> -h=<board_height> [-s=<square_size>]\n"
388
"\t-i=<intrinsics_filename> -o=<output_prefix> [video_filename/cameraId]\n";
389
const char* screen_help =
391
"\tSelect object as 3D box with the mouse. That's it\n"
392
"\tESC - Reset the selection\n"
393
"\tSPACE - Skip the frame; move to the next frame (not in video mode)\n"
394
"\tENTER - Confirm the selection. Grab next object in video mode.\n"
395
"\tq - Exit the program\n";
397
cv::CommandLineParser parser(argc, argv, "{help h||}{w||}{h||}{s|1|}{i||}{o||}{@input|0|}");
398
if (parser.has("help"))
404
string intrinsicsFilename;
405
string outprefix = "";
406
string inputName = "";
410
vector<string> imageList;
411
intrinsicsFilename = parser.get<string>("i");
412
outprefix = parser.get<string>("o");
413
boardSize.width = parser.get<int>("w");
414
boardSize.height = parser.get<int>("h");
415
squareSize = parser.get<double>("s");
416
if ( parser.get<string>("@input").size() == 1 && isdigit(parser.get<string>("@input")[0]) )
417
cameraId = parser.get<int>("@input");
419
inputName = parser.get<string>("@input");
423
parser.printErrors();
426
if ( boardSize.width <= 0 )
428
printf("Incorrect -w parameter (must be a positive integer)\n");
432
if ( boardSize.height <= 0 )
434
printf("Incorrect -h parameter (must be a positive integer)\n");
438
if ( squareSize <= 0 )
440
printf("Incorrect -s parameter (must be a positive real number)\n");
444
Mat cameraMatrix, distCoeffs;
445
Size calibratedImageSize;
446
readCameraMatrix(intrinsicsFilename, cameraMatrix, distCoeffs, calibratedImageSize );
448
VideoCapture capture;
449
if( !inputName.empty() )
451
if( !readStringList(inputName, imageList) &&
452
!capture.open(inputName))
454
fprintf( stderr, "The input file could not be opened\n" );
459
capture.open(cameraId);
461
if( !capture.isOpened() && imageList.empty() )
462
return fprintf( stderr, "Could not initialize video capture\n" ), -2;
464
const char* outbarename = 0;
466
outbarename = strrchr(outprefix.c_str(), '/');
467
const char* tmp = strrchr(outprefix.c_str(), '\\');
469
sprintf(cmd, "mkdir %s", outprefix.c_str());
470
if( tmp && tmp > outbarename )
474
cmd[6 + outbarename - outprefix.c_str()] = '\0';
475
int result = system(cmd);
476
CV_Assert(result == 0);
480
outbarename = outprefix.c_str();
483
Mat frame, shownFrame, selectedObjFrame, mapxy;
485
namedWindow("View", 1);
486
namedWindow("Selected Object", 1);
487
setMouseCallback("View", onMouse, 0);
488
bool boardFound = false;
490
string indexFilename = format("%s_index.yml", outprefix.c_str());
492
vector<string> capturedImgList;
493
vector<Rect> roiList;
494
vector<Vec6f> poseList;
495
vector<Point3f> box, boardPoints;
497
readModelViews(indexFilename, box, capturedImgList, roiList, poseList);
498
calcChessboardCorners(boardSize, (float)squareSize, boardPoints);
500
bool grabNext = !imageList.empty();
507
if( !imageList.empty() )
509
if( i < (int)imageList.size() )
510
frame0 = imread(string(imageList[i]), 1);
518
if( frame0.size() != calibratedImageSize )
520
double sx = (double)frame0.cols/calibratedImageSize.width;
521
double sy = (double)frame0.rows/calibratedImageSize.height;
523
// adjust the camera matrix for the new resolution
524
cameraMatrix.at<double>(0,0) *= sx;
525
cameraMatrix.at<double>(0,2) *= sx;
526
cameraMatrix.at<double>(1,1) *= sy;
527
cameraMatrix.at<double>(1,2) *= sy;
530
initUndistortRectifyMap(cameraMatrix, distCoeffs, Mat(),
531
cameraMatrix, frame0.size(),
532
CV_32FC2, mapxy, dummy );
533
distCoeffs = Mat::zeros(5, 1, CV_64F);
535
remap(frame0, frame, mapxy, Mat(), INTER_LINEAR);
536
vector<Point2f> foundBoardCorners;
537
boardFound = findChessboardCorners(frame, boardSize, foundBoardCorners);
541
solvePnP(Mat(boardPoints), Mat(foundBoardCorners), cameraMatrix,
542
distCoeffs, rvec, tvec, false);
544
frame.copyTo(shownFrame);
545
drawChessboardCorners(shownFrame, boardSize, Mat(foundBoardCorners), boardFound);
546
selectedObjFrame = Mat::zeros(frame.size(), frame.type());
548
if( boardFound && grabNext )
552
int code = select3DBox("View", "Selected Object", frame,
553
cameraMatrix, rvec, tvec, box);
560
Rect r = extract3DBox(frame, shownFrame, selectedObjFrame,
561
cameraMatrix, rvec, tvec, box, 4, true);
564
const int maxFrameIdx = 10000;
566
for(;frameIdx < maxFrameIdx;frameIdx++)
568
sprintf(path, "%s%04d.jpg", outprefix.c_str(), frameIdx);
569
FILE* f = fopen(path, "rb");
574
if( frameIdx == maxFrameIdx )
576
printf("Can not save the image as %s<...>.jpg", outprefix.c_str());
579
imwrite(path, selectedObjFrame(r));
581
capturedImgList.push_back(string(path));
582
roiList.push_back(r);
585
Mat RV(3, 1, CV_32F, p), TV(3, 1, CV_32F, p+3);
586
rvec.convertTo(RV, RV.type());
587
tvec.convertTo(TV, TV.type());
588
poseList.push_back(Vec6f(p[0], p[1], p[2], p[3], p[4], p[5]));
591
grabNext = !imageList.empty();
594
imshow("View", shownFrame);
595
imshow("Selected Object", selectedObjFrame);
596
int c = waitKey(imageList.empty() && !box.empty() ? 30 : 300);
597
if( c == 'q' || c == 'Q' )
599
if( c == '\r' || c == '\n' )
603
writeModelViews(indexFilename, box, capturedImgList, roiList, poseList);