@conference {HosseiniJafari2019, title = {iPose: Instance-Aware 6D Pose Estimation of Partly Occluded Objects}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {11363 LNCS}, year = {2019}, pages = {477{\textendash}492}, abstract = {We address the task of 6D pose estimation of known rigid objects from single input images in scenarios where the objects are partly occluded. Recent RGB-D-based methods are robust to moderate degrees of occlusion. For RGB inputs, no previous method works well for partly occluded objects. Our main contribution is to present the first deep learning-based system that estimates accurate poses for partly occluded objects from RGB-D and RGB input. We achieve this with a new instance-aware pipeline that decomposes 6D object pose estimation into a sequence of simpler steps, where each step removes specific aspects of the problem. The first step localizes all known objects in the image using an instance segmentation network, and hence eliminates surrounding clutter and occluders. The second step densely maps pixels to 3D object surface positions, so called object coordinates, using an encoder-decoder network, and hence eliminates object appearance. The third, and final, step predicts the 6D pose using geometric optimization. We demonstrate that we significantly outperform the state-of-the-art for pose estimation of partly occluded objects for both RGB and RGB-D input.}, isbn = {9783030208929}, issn = {16113349}, doi = {10.1007/978-3-030-20893-6_30}, author = {Omid Hosseini Jafari and Mustikovela, Siva Karthik and Pertsch, Karl and Brachmann, Eric and Carsten Rother} } @proceedings {6310, title = {iPose: Instance-Aware 6D Pose Estimation of Partly Occluded Objects}, year = {2018}, author = {Omid Hosseini Jafari and Mustikovela, S K and Pertsch, K and E Brachmann and Carsten Rother} } @conference {Kirillov2017a, title = {InstanceCut: From edges to instances with MultiCut}, booktitle = {Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017}, volume = {2017-Janua}, year = {2017}, pages = {7322{\textendash}7331}, abstract = {This work addresses the task of instance-aware semantic segmentation. Our key motivation is to design a simple method with a new modelling-paradigm, which therefore has a different trade-off between advantages and disadvantages compared to known approaches.Our approach, we term InstanceCut, represents the problem by two output modalities: (i) an instance-agnostic semantic segmentation and (ii) all instance-boundaries. The former is computed from a standard convolutional neural network for semantic segmentation, and the latter is derived from a new instanceaware edge detection model. To reason globally about the optimal partitioning of an image into instances, we combine these two modalities into a novel MultiCut formulation. We evaluate our approach on the challenging CityScapes dataset. Despite the conceptual simplicity of our approach, we achieve the best result among all published methods, and perform particularly well for rare object classes.}, isbn = {9781538604571}, doi = {10.1109/CVPR.2017.774}, author = {Kirillov, Alexander and Levinkov, Evgeny and Bj{\"o}rn Andres and Savchynskyy, Bogdan and Carsten Rother} } @conference {Mund2016, title = {Introducing LiDAR Point Cloud-based Object Classification for Safer Apron Operations}, booktitle = {International Symposium on Enhanced Solutions for Aircraft and Vehicle Surveillance Applications}, year = {2016}, abstract = {Current procedures for conventional and remote airport ground control still rely on the direct (camera-) view. Despite further support by different Radar applications occasional shortcomings in the awareness of the responsible controllers may occur, particularly under adverse weather conditions, giving rise to capacity backlogs, incidents and accidents. As Laser scanners and computer vision algorithms have reached new performance levels in recent years, we proposed a novel concept for complete and independent airport apron surveillance based on LiDAR 3D point data. In this paper we extend our object detection/segmentation technique by addressing object classification in LiDAR 3D scans. We hereby enable LiDAR{\textquoteleft}s unique capability to classify non-cooperative objects by means of a single sensor and learned model knowledge. Our technique was able to classify and to estimate the poses of an Airbus A319-100 and a Boeing B737-700 parked on the airport apron. In the future we will enhance our classification technique to a wider range of objects including moving ground vehicles and pedestrians.}, keywords = {- lidar, 3d point cloud, aircraft, airport ground surveillance, apron control, apron control rely on, classification, current legacy procedures for, laser scanning, pose estimation}, url = {https://goo.gl/28Yoqh}, author = {Mund, Johannes and Michel, Frank and Dieke-Meier, Franziska and Fricke, Hartmut and Meyer, Lothar and Carsten Rother} } @conference {Kirillov2015a, title = {Inferring M-best diverse labelings in a single one}, booktitle = {Proceedings of the IEEE International Conference on Computer Vision}, volume = {2015 Inter}, year = {2015}, pages = {1814{\textendash}1822}, abstract = {We consider the task of finding M-best diverse solutions in a graphical model. In a previous work by Batra et al. an algorithmic approach for finding such solutions was proposed, and its usefulness was shown in numerous applications. Contrary to previous work we propose a novel formulation of the problem in form of a single energy minimization problem in a specially constructed graphical model. We show that the method of Batra et al. can be considered as a greedy approximate algorithm for our model, whereas we introduce an efficient specialized optimization technique for it, based on alpha-expansion. We evaluate our method on two application scenarios, interactive and semantic image segmentation, with binary and multiple labels. In both cases we achieve considerably better error rates than state-of-the art diversity methods. Furthermore, we empirically discover that in the binary label case we were able to reach global optimality for all test instances.}, isbn = {9781467383912}, issn = {15505499}, doi = {10.1109/ICCV.2015.211}, author = {Kirillov, Alexander and Savchynskyy, Bogdan and Schlesinger, Dmitrij and Vetrov, Dmitry and Carsten Rother} } @conference {Schelten2015, title = {Interleaved regression tree field cascades for blind image deconvolution}, booktitle = {Proceedings - 2015 IEEE Winter Conference on Applications of Computer Vision, WACV 2015}, year = {2015}, pages = {494{\textendash}501}, abstract = {Image blur from camera shake is a common cause for poor image quality in digital photography, prompting a significant recent interest in image deblurring. The vast majority of work on blind deblurring splits the problem into two subsequent steps: First, the blur process (i.e., blur kernel) is estimated, then the image is restored given the estimated kernel using a non-blind deblurring algorithm. Recent work in non-blind deblurring has shown that discriminative approaches can have clear image quality and runtime benefits over typical generative formulations. In this paper, we propose a cascade for blind deblurring that alternates between kernel estimation and discriminative deblurring using regression tree fields (RTFs). We further contribute a new dataset of realistic image blur kernels from human camera shake, which we use to train the discriminative component. Extensive qualitative and quantitative experiments show a clear gain in image quality by interleaving kernel estimation and discriminative deblurring in an iterative cascade.}, isbn = {9781479966820}, doi = {10.1109/WACV.2015.72}, author = {Schelten, Kevin and Nowozin, Sebastian and Jancsary, Jeremy and Carsten Rother and Roth, Stefan} } @conference {Toppe2011, title = {Image-based 3D modeling via cheeger sets}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {6492 LNCS}, number = {PART 1}, year = {2011}, pages = {53{\textendash}64}, abstract = {We propose a novel variational formulation for generating 3D models of objects from a single view. Based on a few user scribbles in an image, the algorithm automatically extracts the object silhouette and subsequently determines a 3D volume by minimizing the weighted surface area for a fixed user-specified volume. The respective energy can be efficiently minimized by means of convex relaxation techniques, leading to visually pleasing smooth surfaces within a matter of seconds. In contrast to existing techniques for single-view reconstruction, the proposed method is based on an implicit surface representation and a transparent optimality criterion, assuring high-quality 3D models of arbitrary topology with a minimum of user input. {\textcopyright} 2011 Springer-Verlag Berlin Heidelberg.}, isbn = {9783642193149}, issn = {03029743}, doi = {10.1007/978-3-642-19315-6_5}, author = {T{\"o}ppe, Eno and Oswald, Martin R and Daniel Cremers and Carsten Rother} } @article {Rother2011, title = {Interactive foreground extraction using graph cut}, journal = {Advances in Markov \ldots}, year = {2011}, pages = {1{\textendash}20}, abstract = {Note, this is an extended version of chapter 7 from the book: Markov Random Fields for Vision and Image Processing, MIT Press [6]. In this Technical Report, references to other chapters are with respect to the book. The differences are, a new section 4.3 and extra details in section 3.2 and 3.3 The topic of interactive image segmentation has received considerable attention in the computer vision community in the last decade. Today, this topic is very mature and commercial products exist which feature advanced research solutions. This means that interactive image segmentation is today probably one of the most used computer vision technologies worldwide. In this chapter we review one class of interactive segmen-tation techniques, which use discrete optimization and a regional selection interface. We begin the chapter by explaining the seminal work of Boykov and Jolly [9]. After that the GrabCut technique [36] is introduced, which improves on [9]. GrabCut is the underlying algorithm for the Background Removal tool in the Microsoft Office 2010 product. In the third part of the chapter many interesting features and details are explained which are part of the product. In this process several recent research articles are reviewed. Finally, the Background Removal tool, as well as [9, 36], are evaluated in different ways on publicly available databases. This includes static and dynamic user inputs. 1}, url = {http://research.microsoft.com/pubs/147408/rotheretalmrfbook-grabcut.pdf}, author = {Carsten Rother and Kolmogorov, Vladimir} } @conference {Lempitsky2009, title = {Image segmentation with a bounding box prior}, booktitle = {Proceedings of the IEEE International Conference on Computer Vision}, year = {2009}, pages = {277{\textendash}284}, abstract = {User-provided object bounding box is a simple and popular interaction paradigm considered by many existing interactive image segmentation frameworks. However, these frameworks tend to exploit the provided bounding box merely to exclude its exterior from consideration and sometimes to initialize the energy minimization. In this paper, we discuss how the bounding box can be further used to impose a powerful topological prior, which prevents the solution from excessive shrinking and ensures that the user-provided box bounds the segmentation in a sufficiently tight way. The prior is expressed using hard constraints incorporated into the global energy minimization framework leading to an NP-hard integer program. We then investigate the possible optimization strategies including linear relaxation as well as a new graph cut algorithm called pinpointing. The latter can be used either as a rounding method for the fractional LP solution, which is provably better than thresholding-based rounding, or as a fast standalone heuristic. We evaluate the proposed algorithms on a publicly available dataset, and demonstrate the practical benefits of the new prior both qualitatively and quantitatively. {\textcopyright}2009 IEEE.}, isbn = {9781424444205}, doi = {10.1109/ICCV.2009.5459262}, author = {Lempitsky, Victor and Kohli, Pushmeet and Carsten Rother and Sharp, Toby} } @conference {Lempitsky2008a, title = {Image segmentation by branch-and-mincut}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {5305 LNCS}, number = {PART 4}, year = {2008}, pages = {15{\textendash}29}, abstract = {Efficient global optimization techniques such as graph cut exist for energies corresponding to binary image segmentation from low-level cues. However, introducing a high-level prior such as a shape prior or a color-distribution prior into the segmentation process typically results in an energy that is much harder to optimize. The main contribution of the paper is a new global optimization framework for a wide class of such energies. The framework is built upon two powerful techniques: graph cut and branch-and-bound. These techniques are unified through the derivation of lower bounds on the energies. Being computable via graph cut, these bounds are used to prune branches within a branch-and-bound search. We demonstrate that the new framework can compute globally optimal segmentations for a variety of segmentation scenarios in a reasonable time on a modern CPU. These scenarios include unsupervised segmentation of an object undergoing 3D pose change, category-specific shape segmentation, and the segmentation under intensity/color priors defined by Chan-Vese and GrabCut functionals. {\textcopyright} 2008 Springer Berlin Heidelberg.}, isbn = {3540886923}, issn = {03029743}, doi = {10.1007/978-3-540-88693-8-2}, author = {Lempitsky, Victor and Blake, Andrew and Carsten Rother} } @conference {Lempitsky2008b, title = {Image segmentation by branch-and-mincut}, booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, volume = {5305 LNCS}, number = {PART 4}, year = {2008}, pages = {15{\textendash}29}, abstract = {Efficient global optimization techniques such as graph cut exist for energies corresponding to binary image segmentation from low-level cues. However, introducing a high-level prior such as a shape prior or a color-distribution prior into the segmentation process typically results in an energy that is much harder to optimize. The main contribution of the paper is a new global optimization framework for a wide class of such energies. The framework is built upon two powerful techniques: graph cut and branch-and-bound. These techniques are unified through the derivation of lower bounds on the energies. Being computable via graph cut, these bounds are used to prune branches within a branch-and-bound search. We demonstrate that the new framework can compute globally optimal segmentations for a variety of segmentation scenarios in a reasonable time on a modern CPU. These scenarios include unsupervised segmentation of an object undergoing 3D pose change, category-specific shape segmentation, and the segmentation under intensity/color priors defined by Chan-Vese and GrabCut functionals. {\textcopyright} 2008 Springer Berlin Heidelberg.}, isbn = {3540886923}, issn = {03029743}, doi = {10.1007/978-3-540-88693-8-2}, author = {Lempitsky, Victor and Blake, Andrew and Carsten Rother} } @conference {Rhemann2008, title = {Improving color modeling for alpha matting}, booktitle = {BMVC 2008 - Proceedings of the British Machine Vision Conference 2008}, year = {2008}, abstract = {This paper addresses the problem of extracting an alpha matte from a single photograph given a user-defined trimap. A crucial part of this task is the color modeling step where for each pixel the optimal alpha value, together with its confidence, is estimated individually. This forms the data term of the objective function. It comprises of three steps: (i) Collecting a candidate set of potential fore- and background colors; (ii) Selecting high confidence samples from the candidate set; (iii) Estimating a sparsity prior to remove blurry artifacts. We introduce novel ideas for each of these steps and show that our approach considerably improves over state-of-the-art techniques by evaluating it on a large database of 54 images with known high-quality ground truth.}, doi = {10.5244/C.22.115}, author = {Rhemann, Christoph and Carsten Rother and Gelautz, Margrit} }