@article {Rother2011,
title = {Interactive foreground extraction using graph cut},
journal = {Advances in Markov \ldots},
year = {2011},
pages = {1{\textendash}20},
abstract = {Note, this is an extended version of chapter 7 from the book: Markov Random Fields for Vision and Image Processing, MIT Press [6]. In this Technical Report, references to other chapters are with respect to the book. The differences are, a new section 4.3 and extra details in section 3.2 and 3.3 The topic of interactive image segmentation has received considerable attention in the computer vision community in the last decade. Today, this topic is very mature and commercial products exist which feature advanced research solutions. This means that interactive image segmentation is today probably one of the most used computer vision technologies worldwide. In this chapter we review one class of interactive segmen-tation techniques, which use discrete optimization and a regional selection interface. We begin the chapter by explaining the seminal work of Boykov and Jolly [9]. After that the GrabCut technique [36] is introduced, which improves on [9]. GrabCut is the underlying algorithm for the Background Removal tool in the Microsoft Office 2010 product. In the third part of the chapter many interesting features and details are explained which are part of the product. In this process several recent research articles are reviewed. Finally, the Background Removal tool, as well as [9, 36], are evaluated in different ways on publicly available databases. This includes static and dynamic user inputs. 1},
url = {http://research.microsoft.com/pubs/147408/rotheretalmrfbook-grabcut.pdf},
author = {Carsten Rother and Kolmogorov, Vladimir}
}
@conference {Vicente2011,
title = {Object cosegmentation},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
year = {2011},
pages = {2217{\textendash}2224},
abstract = {Cosegmentation is typically defined as the task of jointly segmenting something similar in a given set of images. Existing methods are too generic and so far have not demonstrated competitive results for any specific task. In this paper we overcome this limitation by adding two new aspects to cosegmentation: (1) the "something" has to be an object, and (2) the "similarity" measure is learned. In this way, we are able to achieve excellent results on the recently introduced iCoseg dataset, which contains small sets of images of either the same object instance or similar objects of the same class. The challenge of this dataset lies in the extreme changes in viewpoint, lighting, and object deformations within each set. We are able to considerably outperform several competitors. To achieve this performance, we borrow recent ideas from object recognition: the use of powerful features extracted from a pool of candidate object-like segmentations. We believe that our work will be beneficial to several application areas, such as image retrieval. {\textcopyright} 2011 IEEE.},
isbn = {9781457703942},
issn = {10636919},
doi = {10.1109/CVPR.2011.5995530},
author = {Vicente, Sara and Carsten Rother and Kolmogorov, Vladimir}
}
@conference {Vicente2010,
title = {Cosegmentation revisited: Models and optimization},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {6312 LNCS},
number = {PART 2},
year = {2010},
pages = {465{\textendash}479},
abstract = {The problem of cosegmentation consists of segmenting the same object (or objects of the same class) in two or more distinct images. Recently a number of different models have been proposed for this problem. However, no comparison of such models and corresponding optimization techniques has been done so far. We analyze three existing models: the L1 norm model of Rother et al. [1], the L2 norm model of Mukherjee et al. [2] and the "reward" model of Hochbaum and Singh [3]. We also study a new model, which is a straightforward extension of the Boykov-Jolly model for single image segmentation [4]. In terms of optimization, we use a Dual Decomposition (DD) technique in addition to optimization methods in [1,2]. Experiments show a significant improvement of DD over published methods. Our main conclusion, however, is that the new model is the best overall because it: (i) has fewest parameters; (ii) is most robust in practice, and (iii) can be optimized well with an efficient EM-style procedure. {\textcopyright} 2010 Springer-Verlag.},
isbn = {3642155510},
issn = {16113349},
doi = {10.1007/978-3-642-15552-9_34},
author = {Vicente, Sara and Kolmogorov, Vladimir and Carsten Rother}
}
@conference {Vicente2009,
title = {Joint optimization of segmentation and appearance models},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision},
year = {2009},
pages = {755{\textendash}762},
abstract = {Many interactive image segmentation approaches use an objective function which includes appearance models as an unknown variable. Since the resulting optimization problem is NP-hard the segmentation and appearance are typically optimized separately, in an EM-style fashion. One contribution of this paper is to express the objective function purely in terms of the unknown segmentation, using higher-order cliques. This formulation reveals an interesting bias of the model towards balanced segmentations. Furthermore, it enables us to develop a new dual decomposition optimization procedure, which provides additionally a lower bound. Hence, we are able to improve on existing optimizers, and verify that for a considerable number of real world examples we even achieve global optimality. This is important since we are able, for the first time, to analyze the deficiencies of the model. Another contribution is to establish a property of a particular dual decomposition approach which involves convex functions depending on foreground area. As a consequence, we show that the optimal decomposition for our problem can be computed efficiently via a parametric maxflow algorithm. {\textcopyright}2009 IEEE.},
isbn = {9781424444205},
doi = {10.1109/ICCV.2009.5459287},
author = {Vicente, Sara and Kolmogorov, Vladimir and Carsten Rother}
}
@article {Szeliski2008,
title = {A comparative study of energy minimization methods for Markov random fields with smoothness-based priors},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {30},
number = {6},
year = {2008},
pages = {1068{\textendash}1080},
publisher = {Springer-Verlag},
abstract = {Among the most exciting advances in early vision has been the development of efficient energy minimization algorithms for pixel-labeling tasks such as depth or texture computation. It has been known for decades that such problems can be elegantly expressed as Markov random fields, yet the resulting energy minimization problems have been widely viewed as intractable. Recently, algorithms such as graph cuts and loopy belief propagation (LBP) have proven to be very powerful: for example, such methods form the basis for almost all the top-performing stereo methods. However, the tradeoffs among different energy minimization algorithms are still not well understood. In this paper we describe a set of energy minimization benchmarks and use them to compare the solution quality and running time of several common energy minimization algorithms. We investigate three promising recent methods graph cuts, LBP, and tree-reweighted message passing in addition to the well-known older iterated conditional modes (ICM) algorithm. Our benchmark problems are drawn from published energy functions used for stereo, image stitching, interactive segmentation, and denoising. We also provide a general-purpose software interface that allows vision researchers to easily switch between optimization methods. Benchmarks, code, images, and results are available at http://vision.middlebury.edu/ MRF/. {\textcopyright} 2008 IEEE.},
keywords = {Belief propagation, Global optimization, Graph Cuts, Markov random fields, Performance evaluation},
issn = {01628828},
doi = {10.1109/TPAMI.2007.70844},
url = {http://vision.middlebury.edu/MRF.},
author = {Szeliski, Richard and Zabih, Ramin and Scharstein, Daniel and Veksler, Olga and Kolmogorov, Vladimir and Agarwala, Aseem and Tappen, Marshall and Carsten Rother}
}
@article {Szeliski2008a,
title = {A comparative study of energy minimization methods for Markov random fields with smoothness-based priors},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {30},
number = {6},
year = {2008},
month = {jun},
pages = {1068{\textendash}1080},
abstract = {Among the most exciting advances in early vision has been the development of efficient energy minimization algorithms for pixel-labeling tasks such as depth or texture computation. It has been known for decades that such problems can be elegantly expressed as Markov random fields, yet the resulting energy minimization problems have been widely viewed as intractable. Recently, algorithms such as graph cuts and loopy belief propagation (LBP) have proven to be very powerful: for example, such methods form the basis for almost all the top-performing stereo methods. However, the tradeoffs among different energy minimization algorithms are still not well understood. In this paper we describe a set of energy minimization benchmarks and use them to compare the solution quality and running time of several common energy minimization algorithms. We investigate three promising recent methods graph cuts, LBP, and tree-reweighted message passing in addition to the well-known older iterated conditional modes (ICM) algorithm. Our benchmark problems are drawn from published energy functions used for stereo, image stitching, interactive segmentation, and denoising. We also provide a general-purpose software interface that allows vision researchers to easily switch between optimization methods. Benchmarks, code, images, and results are available at http://vision.middlebury.edu/ MRF/. {\textcopyright} 2008 IEEE.},
keywords = {Belief propagation, Global optimization, Graph Cuts, Markov random fields, Performance evaluation},
issn = {01628828},
doi = {10.1109/TPAMI.2007.70844},
author = {Szeliski, Richard and Zabih, Ramin and Scharstein, Daniel and Veksler, Olga and Kolmogorov, Vladimir and Agarwala, Aseem and Tappen, Marshall and Carsten Rother}
}
@conference {Torresani2008,
title = {Feature correspondence via graph matching: Models and global optimization},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {5303 LNCS},
number = {PART 2},
year = {2008},
pages = {596{\textendash}609},
abstract = {In this paper we present a new approach for establishing correspondences between sparse image features related by an unknown non-rigid mapping and corrupted by clutter and occlusion, such as points extracted from a pair of images containing a human figure in distinct poses. We formulate this matching task as an energy minimization problem by defining a complex objective function of the appearance and the spatial arrangement of the features. Optimization of this energy is an instance of graph matching, which is in general a NP-hard problem. We describe a novel graph matching optimization technique, which we refer to as dual decomposition (DD), and demonstrate on a variety of examples that this method outperforms existing graph matching algorithms. In the majority of our examples DD is able to find the global minimum within a minute. The ability to globally optimize the objective allows us to accurately learn the parameters of our matching model from training examples. We show on several matching tasks that our learned model yields results superior to those of state-of-the-art methods. {\textcopyright} 2008 Springer Berlin Heidelberg.},
isbn = {3540886850},
issn = {03029743},
doi = {10.1007/978-3-540-88688-4-44},
author = {Torresani, Lorenzo and Kolmogorov, Vladimir and Carsten Rother}
}
@conference {Torresani2008a,
title = {Feature correspondence via graph matching: Models and global optimization},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {5303 LNCS},
number = {PART 2},
year = {2008},
pages = {596{\textendash}609},
abstract = {In this paper we present a new approach for establishing correspondences between sparse image features related by an unknown non-rigid mapping and corrupted by clutter and occlusion, such as points extracted from a pair of images containing a human figure in distinct poses. We formulate this matching task as an energy minimization problem by defining a complex objective function of the appearance and the spatial arrangement of the features. Optimization of this energy is an instance of graph matching, which is in general a NP-hard problem. We describe a novel graph matching optimization technique, which we refer to as dual decomposition (DD), and demonstrate on a variety of examples that this method outperforms existing graph matching algorithms. In the majority of our examples DD is able to find the global minimum within a minute. The ability to globally optimize the objective allows us to accurately learn the parameters of our matching model from training examples. We show on several matching tasks that our learned model yields results superior to those of state-of-the-art methods. {\textcopyright} 2008 Springer Berlin Heidelberg.},
isbn = {3540886850},
issn = {03029743},
doi = {10.1007/978-3-540-88688-4-44},
author = {Torresani, Lorenzo and Kolmogorov, Vladimir and Carsten Rother}
}
@conference {Vicente2008,
title = {Graph cut based image segmentation with connectivity priors},
booktitle = {26th IEEE Conference on Computer Vision and Pattern Recognition, CVPR},
year = {2008},
abstract = {Graph cut is a popular technique for interactive image segmentation. However, it has certain shortcomings. In particular, graph cut has problems with segmenting thin elongated objects due to the "shrinking bias". To overcome this problem, we propose to impose an additional connectivity prior, which is a very natural assumption about objects. We formulate several versions of the connectivity constraint and show that the corresponding optimization problems are all NP-hard. For some of these versions we propose two optimization algorithms: (i) a practical heuristic technique which we call DijkstraGC, and (ii) a slow method based on problem decomposition which provides a lower bound on the problem. We use the second technique to verify that for some practical examples DijkstraGC is able to find the global minimum. {\textcopyright}2008 IEEE.},
isbn = {9781424422432},
doi = {10.1109/CVPR.2008.4587440},
author = {Vicente, Sara and Kolmogorov, Vladimir and Carsten Rother}
}
@conference {Kohli2008,
title = {On partial optimality in multi-label MRFs},
booktitle = {Proceedings of the 25th International Conference on Machine Learning},
year = {2008},
pages = {480{\textendash}487},
abstract = {We consider the problem of optimizing multilabel MRFs, which is in general NP-hard and ubiquitous in low-level computer vision. One approach for its solution is to formulate it as an integer linear programming and relax the integrality constraints. The approach we consider in this paper is to first convert the multi-label MRF into an equivalent binary-label MRF and then to relax it. The resulting relaxation can be efficiently solved using a maximum flow algorithm. Its solution provides us with a partially optimal labelling of the binary variables. This partial labelling is then easily transferred to the multi-label problem. We study the theoretical properties of the new relaxation and compare it with the standard one. Specifically, we compare tightness, and characterize a subclass of problems where the two relaxations coincide. We propose several combined algorithms based on the technique and demonstrate their performance on challenging computer vision problems. Copyright 2008 by the author(s)/owner(s).},
isbn = {9781605582054},
doi = {10.1145/1390156.1390217},
author = {Kohli, Pushmeet and Shekhovtsov, Alexander and Carsten Rother and Kolmogorov, Vladimir and Torr, Philip}
}
@conference {Kolmogorov2007b,
title = {Applications of parametric maxflow in computer vision},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision},
year = {2007},
abstract = {The maximum flow algorithm for minimizing energy functions of binary variables has become a standard tool in computer vision. In many cases, unary costs of the energy depend linearly on parameter λ. In this paper we study vision applications for which it is important to solve the max-flow problem for different λ{\textquoteright}s. An example is a weighting between data and regularization terms in image segmentation or stereo: it is desirable to vary it both during training (to learn λ from ground truth data) and testing (to select best λ using high-knowledge constraints, e.g. user input). We review algorithmic aspects of this parametric maximum flow problem previously unknown in vision, such as the ability to compute all breakpoints of λ and corresponding optimal configurations in finite time. These results allow, in particular, to minimize the ratio of some geometric functionals, such as flux of a vector field over length (or area). Previously, such functionals were tackled with shortest path techniques applicable only in 2D. We give theoretical improvements for "PDE cuts" [5]. We present experimental results for image segmentation, 3D reconstruction, and the cosegmentation problem. {\textcopyright}2007 IEEE.},
doi = {10.1109/ICCV.2007.4408910},
author = {Kolmogorov, Vladimir and Boykov, Yuri and Carsten Rother}
}
@conference {Kolmogorov2007a,
title = {Applications of parametric maxflow in computer vision},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision},
year = {2007},
abstract = {The maximum flow algorithm for minimizing energy functions of binary variables has become a standard tool in computer vision. In many cases, unary costs of the energy depend linearly on parameter λ. In this paper we study vision applications for which it is important to solve the max-flow problem for different λ{\textquoteright}s. An example is a weighting between data and regularization terms in image segmentation or stereo: it is desirable to vary it both during training (to learn λ from ground truth data) and testing (to select best λ using high-knowledge constraints, e.g. user input). We review algorithmic aspects of this parametric maximum flow problem previously unknown in vision, such as the ability to compute all breakpoints of λ and corresponding optimal configurations in finite time. These results allow, in particular, to minimize the ratio of some geometric functionals, such as flux of a vector field over length (or area). Previously, such functionals were tackled with shortest path techniques applicable only in 2D. We give theoretical improvements for "PDE cuts" [5]. We present experimental results for image segmentation, 3D reconstruction, and the cosegmentation problem. {\textcopyright}2007 IEEE.},
doi = {10.1109/ICCV.2007.4408910},
author = {Kolmogorov, Vladimir and Boykov, Yuri and Carsten Rother}
}
@booklet {Kolmogorov2007,
title = {Minimizing nonsubmodular functions with graph cuts - A review},
howpublished = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {29},
number = {7},
year = {2007},
pages = {1274{\textendash}1279},
abstract = {Optimization techniques based on graph cuts have become a standard tool for many vision applications. These techniques allow to minimize efficiently certain energy functions corresponding to pairwise Markov Random Fields (MRFs). Currently, there is an accepted view within the computer vision community that graph cuts can only be used for optimizing a limited class of MRF energies (e.g., submodular functions). In this survey, we review some results that show that graph cuts can be applied to a much larger class of energy functions (in particular, nonsubmodular functions). While these results are well-known in the optimization community, to our knowledge they were not used in the context of computer vision and MRF optimization. We demonstrate the relevance of these results to vision on the problem of binary texture restoration. {\textcopyright} 2007 IEEE.},
keywords = {energy minimization, Markov random fields, Min cut/max flow, Quadratic pseudo-Boolean optimization, Texture restoration},
issn = {01628828},
doi = {10.1109/TPAMI.2007.1031},
author = {Kolmogorov, Vladimir and Carsten Rother}
}
@conference {Rother2007a,
title = {Optimizing binary MRFs via extended roof duality},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
year = {2007},
abstract = {Many computer vision applications rely on the efficient optimization of challenging, so-called non-submodular, binary pairwise MRFs. A promising graph cut based approach for optimizing such MRFs known as "roof duality" was recently introduced into computer vision. We study two methods which extend this approach. First, we discuss an efficient implementation of the "probing" technique introduced recently by Boros et al. [5]. It simplifies the MRF while preserving the global optimum. Our code is 400-700 faster on some graphs than the implementation of [5]. Second, we present a new technique which takes an arbitrary input labeling and tries to improve its energy. We give theoretical characterizations of local minima of this procedure. We applied both techniques to many applications, including image segmentation, new view synthesis, superresolution, diagram recognition, parameter learning, texture restoration, and image deconvolution. For several applications we see that we are able to find the global minimum very efficiently, and considerably outperform the original roof duality approach. In comparison to existing techniques, such as graph cut, TRW, BP, ICM, and simulated annealing, we nearly always find a lower energy. {\textcopyright} 2007 IEEE.},
isbn = {1424411807},
issn = {10636919},
doi = {10.1109/CVPR.2007.383203},
author = {Carsten Rother and Kolmogorov, Vladimir and Lempitsky, Victor and Szummer, Martin}
}
@article {Rother2007,
title = {Optimizing Binary MRFs via Extended Roof Duality Technical Report MSR-TR-2007-46},
journal = {Computing},
year = {2007},
abstract = {Many computer vision applications rely on the efficient optimization of challenging, so-called non-submodular, binary pairwise MRFs. A promising graph cut based approach for optimizing such MRFs known as "roof duality" was recently introduced into computer vision. We study two methods which extend this approach. First, we discuss an efficient implementation of the "probing" technique introduced recently by Boros et al. [8]. It simplifies the MRF while preserving the global optimum. Our code is 400-700 faster on some graphs than the implementation of [8]. Second , we present a new technique which takes an arbitrary input labeling and tries to improve its energy. We give theoretical characterizations of local minima of this procedure. We applied both techniques to many applications, including image segmentation, new view synthesis, super-resolution, diagram recognition, parameter learning, texture restoration, and image deconvolution. For several applications we see that we are able to find the global minimum very efficiently, and considerably outperform the original roof duality approach. In comparison to existing techniques , such as graph cut, TRW, BP, ICM, and simulated annealing, we nearly always find a lower energy.},
url = {http://research.microsoft.com/vision/cambridge/},
author = {Carsten Rother and Kolmogorov, Vladimir and Lempitsky, Victor and Szummer, Martin}
}
@conference {Kolmogorov2006b,
title = {Comparison of energy minimization algorithms for highly connected graphs},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {3952 LNCS},
year = {2006},
pages = {1{\textendash}15},
abstract = {Algorithms for discrete energy minimization play a fundamental role for low-level vision. Known techniques include graph cuts, belief propagation (BP) and recently introduced tree-reweighted message passing (TRW). So far, the standard benchmark for their comparison has been a 4-connected grid-graph arising in pixel-labelling stereo. This minimization problem, however, has been largely solved: recent work shows that for many scenes TRW finds the global optimum. Furthermore, it is known that a 4-connecled grid-graph is a poor stereo model since it does not take occlusions into account. We propose the problem of stereo with occlusions as a new test bed for minimization algorithms. This is a more challenging graph since it has much larger connectivity, and it also serves as a better stereo model. An attractive feature of this problem is that increased connectivity does not result in increased complexity of message passing algorithms. Indeed, one contribution of this paper is to show that sophisticated implementations of BP and TRW have the same time and memory complexity as that of 4-connecled grid-graph stereo. The main conclusion of our experimental study is that for our problem graph cut outperforms both TRW and BP considerably. TRW achieves consistently a lower energy than BP. However, as connectivity increases the speed of convergence of TRW becomes slower. Unlike 4-connected grids, the difference between the energy of the best optimization method and the lower bound of TRW appears significant. This shows the hardness of the problem and motivates future research. {\textcopyright} Springer-Verlag Berlin Heidelberg 2006.},
isbn = {3540338349},
issn = {16113349},
doi = {10.1007/11744047_1},
author = {Kolmogorov, Vladimir and Carsten Rother}
}
@conference {Kolmogorov2006a,
title = {Comparison of energy minimization algorithms for highly connected graphs},
booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
volume = {3952 LNCS},
year = {2006},
pages = {1{\textendash}15},
abstract = {Algorithms for discrete energy minimization play a fundamental role for low-level vision. Known techniques include graph cuts, belief propagation (BP) and recently introduced tree-reweighted message passing (TRW). So far, the standard benchmark for their comparison has been a 4-connected grid-graph arising in pixel-labelling stereo. This minimization problem, however, has been largely solved: recent work shows that for many scenes TRW finds the global optimum. Furthermore, it is known that a 4-connecled grid-graph is a poor stereo model since it does not take occlusions into account. We propose the problem of stereo with occlusions as a new test bed for minimization algorithms. This is a more challenging graph since it has much larger connectivity, and it also serves as a better stereo model. An attractive feature of this problem is that increased connectivity does not result in increased complexity of message passing algorithms. Indeed, one contribution of this paper is to show that sophisticated implementations of BP and TRW have the same time and memory complexity as that of 4-connecled grid-graph stereo. The main conclusion of our experimental study is that for our problem graph cut outperforms both TRW and BP considerably. TRW achieves consistently a lower energy than BP. However, as connectivity increases the speed of convergence of TRW becomes slower. Unlike 4-connected grids, the difference between the energy of the best optimization method and the lower bound of TRW appears significant. This shows the hardness of the problem and motivates future research. {\textcopyright} Springer-Verlag Berlin Heidelberg 2006.},
isbn = {3540338349},
issn = {16113349},
doi = {10.1007/11744047_1},
author = {Kolmogorov, Vladimir and Carsten Rother}
}
@conference {Rother2006,
title = {Cosegmentation of image pairs by histogram matching - Incorporating a global constraint into MRFs},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
volume = {1},
year = {2006},
pages = {994{\textendash}1000},
abstract = {We introduce the term cosegmentation which denotes the task of segmenting simultaneously the common parts of an image pair. A generative model for cosegmentation is presented. Inference in the model leads to minimizing an energy with an MRF term encoding spatial coherency and a global constraint which attempts to match the appearance histograms of the common parts. This energy has not been proposed previously and its optimization is challenging and NP-hard. For this problem a novel optimization scheme which we call trust region graph cuts is presented. We demonstrate that this framework has the potential to improve a wide range of research: Object driven image retrieval, video tracking and segmentation, and interactive image editing. The power of the framework lies in its generality, the common part can be a rigid/non-rigid object (or scene), observed from different viewpoints or even similar objects of the same class. {\textcopyright} 2006 IEEE.},
isbn = {0769525970},
issn = {10636919},
doi = {10.1109/CVPR.2006.91},
url = {http://research.microsoft.com/vision/cambridge/},
author = {Carsten Rother and Kolmogorov, Vladimir and Minka, Tom and Blake, Andrew}
}
@article {Kolmogorov2006,
title = {Probabilistic fusion of stereo with color and contrast for bilayer segmentation},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {28},
number = {9},
year = {2006},
pages = {1480{\textendash}1492},
abstract = {This paper describes models and algorithms for the real-time segmentation of foreground from background layers in stereo video sequences. Automatic separation of layers from color/contrast or from stereo alone is known to be error-prone. Here, color, contrast, and stereo matching information are fused to infer layers accurately and efficiently. The first algorithm, Layered Dynamic Programming (LDP), solves stereo in an extended six-state space that represents both foreground/background layers and occluded regions. The stereo-match likelihood is then fused with a contrast-sensitive color model that is learned on-the-fly and stereo disparities are obtained by dynamic programming. The second algorithm, Layered Graph Cut (LGC), does not directly solve stereo. Instead, the stereo match likelihood is marginalized over disparities to evaluate foreground and background hypotheses and then fused with a contrast-sensitive color model like the one used in LDP. Segmentation is solved efficiently by ternary graph cut. Both algorithms are evaluated with respect to ground truth data and found to have similar performance, substantially better than either stereo or color/contrast alone. However, their characteristics with respect to computational efficiency are rather different. The algorithms are demonstrated in the application of background substitution and shown to give good quality composite video output. {\textcopyright} 2006 IEEE.},
keywords = {3D/stereo scene analysis, Computer vision, Dynamic programming, Image processing and computer vision, Parameter learning},
issn = {01628828},
doi = {10.1109/TPAMI.2006.193},
url = {http://research.microsoft.com/vision/cambridge},
author = {Kolmogorov, Vladimir and Criminisi, Antonio and Blake, Andrew and Cross, Geoffrey and Carsten Rother}
}
@conference {Rother2005,
title = {Digital tapestry [automatic image synthesis]},
booktitle = {Computer Vision and Pattern Recognition, 2005. CVPR 2005. IEEE Computer Society Conference on},
volume = {1},
year = {2005},
pages = {589{\textendash}596},
abstract = {This paper addresses the novel problem of automatically synthesizing an output image from a large collection of different input images. The synthesized image, called a digital tapestry, can be viewed as a visual summary or a virtual {\textquoteright}thumbnail{\textquoteright} of all the images in the input collection. The problem of creating the tapestry is cast as a multi-class labeling problem such that each region in the tapestry is constructed from input image blocks that are salient and such that neighboring blocks satisfy spatial compatibility. This is formulated using a Markov random field and optimized via the graph cut based expansion move algorithm. The standard expansion move algorithm can only handle energies with metric terms, while our energy contains non-metric (soft and hard) constraints. Therefore we propose two novel contributions. First, we extend the expansion move algorithm for energy functions with non-metric hard constraints. Secondly, we modify it for functions with "almost" metric soft terms, and show that it gives good results in practice. The proposed framework was tested on several consumer photograph collections, and the results are presented.},
isbn = {0-7695-2372-2},
doi = {10.1109/CVPR.2005.130},
url = {http://research.microsoft.com/ http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1467321\%5Cnhttp://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=1467321},
author = {Carsten Rother and Kumar, Sanjiv and Kolmogorov, Vladimir and Blake, Andrew}
}
@conference {Rother2004,
title = {"GrabCut" - Interactive foreground extraction using iterated graph cuts},
booktitle = {ACM Transactions on Graphics},
volume = {23},
number = {3},
year = {2004},
pages = {309{\textendash}314},
abstract = {The problem of efficient, interactive foreground/background segmentation in still images is of great practical importance in image editing. Classical image segmentation tools use either texture (colour) information, e.g. Magic Wand, or edge (contrast) information, e.g. Intelligent Scissors. Recently, an approach based on optimization by graph-cut has been developed which successfully combines both types of information. In this paper we extend the graph-cut approach in three respects. First, we have developed a more powerful, iterative version of the optimisation. Secondly, the power of the iterative algorithm is used to simplify substantially the user interaction needed for a given quality of result. Thirdly, a robust algorithm for "border matting" has been developed to estimate simultaneously the alpha-matte around an object boundary and the colours of foreground pixels. We show that for moderately difficult examples the proposed method outperforms competitive tools.},
keywords = {Alpha Matting, Foreground extraction, Graph Cuts, Image Editing, Interactive Image Segmentation},
issn = {07300301},
doi = {10.1145/1015706.1015720},
author = {Carsten Rother and Kolmogorov, Vladimir and Blake, Andrew}
}