1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119 |
- \documentclass[10pt,journal,cspaper,compsoc]{IEEEtran}
- \IEEEoverridecommandlockouts
- % Math related
- \usepackage{amsmath,amssymb}
- \usepackage{bbm}
- % \usepackage{ruler}
- \usepackage{mathtools}
- \DeclarePairedDelimiter\ceil{\lceil}{\rceil}
- \DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
- \DeclareMathOperator*{\argmax}{arg\,max} % Jan Hlavacek
- \newcommand{\rectangle}{{%
- \ooalign{$\sqsubset\mkern3mu$\cr$\mkern3mu\sqsupset$\cr}%
- }}
- % table related
- \usepackage{colortbl}
- \usepackage{makecell}
- % Figure related
- \usepackage{graphicx, overpic, wrapfig, subfigure}
- \usepackage[dvipsnames]{xcolor}
- \definecolor{americanrose}{rgb}{1.0, 0.01, 0.24}
- \definecolor{myred}{rgb}{0.753, 0.314, 0.275}
- \definecolor{myblue}{rgb}{0.0, 0.24, 0.95}
- \definecolor{tbl_gray}{gray}{0.85}
- \newcommand\MYhyperrefoptions{bookmarks=true,bookmarksnumbered=true,
- pdfpagemode={UseOutlines},plainpages=false,pdfpagelabels=true,
- colorlinks=true,linkcolor={americanrose},citecolor={myblue},urlcolor={red},
- pdftitle={Deep Hough Transform for Semantic Line Detection},%<!CHANGE!
- pdfsubject={Typesetting},%<!CHANGE!
- pdfauthor={Kai Zhao et al.},%<!CHANGE!
- pdfkeywords={Straight line detection, Hough transform, CNN}}%<^!CHANGE!
- \usepackage[\MYhyperrefoptions,pdftex]{hyperref}
- % cref must be loaded after hyperref
- \usepackage{cleveref}
- \crefname{equation}{Eq.}{Eq.}
- \crefname{figure}{Fig.}{Fig.}
- \crefname{table}{Tab.}{Tab.}
- \crefname{section}{Sec.}{Sec.}
- \newcommand{\revise}[1]{{\textcolor{black}{#1}}}
- \newcommand{\rerevise}[1]{{\textcolor{black}{#1}}}
- %\newcommand{\GramaCheck}{}
- \ifdefined \GramaCheck
- \newcommand{\CheckRmv}[1]{}
- \renewcommand{\eqref}[1]{Equation 1}
- \else
- \newcommand{\CheckRmv}[1]{#1}
- \renewcommand{\eqref}[1]{Equation~(\ref{#1})}
- \fi
- \newcommand{\Arixv}{}
- \ifdefined \Arixv
- \newcommand{\ArxivRmv}[1]{}
- \else
- \newcommand{\ArxivRmv}[1]{#1}
- \fi
- \def\etal{\emph{et al.~}}
- \def\ie{\emph{i.e.,~}}
- \def\eg{\emph{e.g.,~}}
- \newcommand{\todo}[1]{{\textcolor{red}{#1}}}%
- \newcommand{\hq}[1]{{\textcolor{blue}{#1}}}
- % table related
- \usepackage{booktabs} % provides toprule
- \usepackage{array}
- \usepackage{diagbox}
- \usepackage{multirow}
- \usepackage{colortbl}
- \usepackage{silence}
- \hbadness=10000 \vbadness=10000
- \WarningFilter{latex}{Font shape declaration has incorrect series value}
- \graphicspath{{./figures/}{./figures/photos/}}
- \usepackage[nocompress]{cite}
- \hyphenation{op-tical net-works semi-conduc-tor}
- \begin{document}
- \title{Deep Hough Transform for Semantic Line Detection}
- \author{
- \IEEEauthorblockN{Kai Zhao\thanks{\IEEEauthorrefmark{1} The first two students contribute equally to this paper.}\IEEEauthorrefmark{1}},
- Qi Han\IEEEauthorrefmark{1},
- Chang-Bin Zhang,
- Jun Xu,
- \IEEEauthorblockN{Ming-Ming Cheng\thanks{\IEEEauthorrefmark{2} M.M. Cheng is the corresponding author (cmm@nankai.edu.cn).}\IEEEauthorrefmark{2}},~\IEEEmembership{Senior Member,~IEEE}
- \IEEEcompsocitemizethanks{
- \IEEEcompsocthanksitem Kai Zhao, Qi Han, Chang-Bin Zhang, and Ming-Ming Cheng are with the TKLNDST,
- College of Computer Science, Nankai University, Tianjin, China, 300350.
- %
- \IEEEcompsocthanksitem Jun Xu is with the School of Statistics and Data Science, Nankai University, Tianjin, China, 300071.
- \IEEEcompsocthanksitem A preliminary version of this work has been presented in \cite{eccv2020line}.
- }\\
- }
- % paper headers
- \markboth{IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. XX, NO. XX, April.~2021}%
- {Zhao \MakeLowercase{\textit{et al.}}:
- Deep Hough Transform for Semantic Line Detection}
- %{Shell \MakeLowercase{\textit{et al.}}: Bare Advanced Demo of IEEEtran.cls for IEEE Computer Society Journals}
- \IEEEtitleabstractindextext{%
- \begin{abstract}
- We focus on a fundamental task of detecting meaningful line structures,
- \textsl{a.k.a.}, semantic line, in natural scenes.
- %
- Many previous methods regard this problem as a special
- case of object detection
- and adjust existing object detectors for semantic line detection.
- %
- However, these methods neglect the inherent characteristics of lines,
- leading to sub-optimal performance.
- %
- Lines enjoy much simpler geometric property than complex objects
- and thus can be compactly parameterized by a few arguments.
- %
- To better exploit the property of lines, in this paper,
- we incorporate the classical Hough transform technique into
- deeply learned representations and
- propose a one-shot end-to-end learning framework for line detection.
- % Deep Hough Transform (DHT) network
- %
- By parameterizing lines with slopes and biases,
- we perform Hough transform to translate
- deep representations into the parametric domain,
- in which we perform line detection.
- %
- Specifically, we aggregate features along candidate lines on the
- feature map plane
- and then assign the aggregated features to corresponding locations
- in the parametric domain.
- %
- Consequently, the problem of detecting semantic lines in the spatial domain
- is transformed into spotting individual points in the parametric domain,
- making the post-processing steps,
- \ie non-maximal suppression, more efficient.
- %
- Furthermore, our method makes it easy to extract contextual line features
- % \eg features along lines close to a specific line,
- that are critical for accurate line detection.
- %
- In addition to the proposed method, we design an evaluation metric to assess the
- quality of line detection and construct a large scale dataset for the line detection task.
- %
- Experimental results on our proposed dataset and another public dataset
- demonstrate the advantages of our method over previous state-of-the-art
- alternatives.
- %
- % The \revise{dataset and} source code is available at~\url{https://mmcheng.net/dhtline/}.
- \end{abstract}
- % Note that keywords are not normally used for peerreview papers.
- \begin{IEEEkeywords}
- Semantic line detection, Hough transform, CNN, Deep Learning.
- \end{IEEEkeywords}
- }
- % make the title area
- \maketitle
- \IEEEdisplaynontitleabstractindextext
- \IEEEpeerreviewmaketitle
- \section{Introduction}\label{sec:introduction}
- \IEEEPARstart{D}etecting line structures from digital images has
- a long history in computer vision.
- %
- The organization of line structures is an early yet essential step to transform the visual signal into useful intermediate concepts
- for visual interpretation~\cite{burns1986extracting}.
- % %
- % Previous methods mainly focus on detecting arbitrary line segments using low-level features,
- % e.g., texture and gradient.
- %
- Though many techniques have been proposed to detect salient objects
- \cite{zhao2019optimizing,HouPami19Dss,gao2020sod100k,BorjiCVM2019,wang2021revisiting}
- and areas~\cite{cheng2015global,zhu2014saliency,Fan2020S4Net},
- little work has been made for detecting outstanding/structure-revealing line structures.
- %
- A recent study~\cite{lee2017semantic} was proposed to detect outstanding straight line(s),
- referred to as ``semantic line'', that outlines the conceptual structure of natural images.
- % ~\eg the straight lines that divides different semantic regions or highlight the structure
- % of scenes.
- %
- Identifying these semantic lines is of crucial importance for
- computer graphics and vision applications, such as
- photographic composition~\cite{liu2010optimizing,freeman2007photographer},
- structure-preserving image processing~\cite{TIP20_SP_NPR,hu2013patchnet},
- image aesthetic~\cite{ko2018pac,lee2019property,kong2016photo,mai2016composition},
- lane detection~\cite{fan2019spinnet},
- and artistic creation~\cite{krages2012photography,hu2013inverse,chen2009sketch2photo,zhang2020and}.
- %
- \revise{
- As demonstrated in~\cref{fig:photographic-composition},
- Liu \etal~\cite{liu2010optimizing} proposed to crop images according
- to the golden ratio by using `prominent line'.
- Detecting these `semantic lines' can help to produce images that
- are visually pleasing in the photographic composition.
- }
- %
- The Hough transform~\cite{duda1971use,ballard1981generating} is one
- representative method for line detection,
- which was first proposed to detect straight lines in bubble chamber
- photographs~\cite{hough1962method}.
- % generalized
- Since its simplicity and efficiency,
- HT is employed to detect lines in digital images~\cite{duda1971use},
- and further extended by~\cite{ballard1981generating} to detect other regular shapes like circles and rectangles.
- %
- The key idea of the Hough transform is to vote evidence from the image domain to the parametric domain, and
- then detect shapes in the parametric domain by identifying local-maximal responses.
- %
- In the case of line detection, a line in the image domain can be represented
- by its parameters, \eg slope, and \revise{offset} in the parametric space.
- %
- Hough transform collects evidence along with a line in an image
- and accumulates evidence to a single point in the parameter space.
- %
- Consequently, line detection in the image domain is converted to the problem of detecting peak responses in the
- parametric domain.
- %
- Classical Hough transform based line detectors
- ~\cite{fernandes2008real,yacoub1995hierarchical,princen1990hierarchical,kiryati1991probabilistic}
- usually detect continuous straight edges while neglecting the semantics in line structures.
- %
- Moreover, these methods are sensitive to light changes and occlusion.
- %
- Therefore, the results are often noisy and contain irrelevant lines~\cite{akinlar2011edlines},
- as shown in~\cref{fig:photographic-composition}(d).
- \newcommand{\addImg}[1]{\subfigure[]{\includegraphics[width=.245\linewidth]{figures/#1}}}
- \CheckRmv{
- \begin{figure*}[t]
- \centering
- \hfill
- \addImg{composition_1.pdf}
- \addImg{composition_2.pdf}
- \addImg{composition_3.pdf}
- \addImg{composition_4.pdf}
- \hfill
- \caption{
- Example pictures from~\cite{liu2010optimizing} reveal that semantic lines
- may help in the photographic composition.
- (a): a photo was taken with an arbitrary pose.
- %
- (b): a photo fits the golden ratio principle~\cite{caplin2008art,krages2012photography} which \revise{is} obtained by the method described
- in\cite{liu2010optimizing} using so-called `prominent lines'
- % and salient objects~\cite{gao2020sod100k,fan2020bbs,fan2019D3Net}
- in the image.
- %
- (c): Our detection results \revise{are} clean and comprise only a few meaningful
- lines that are potentially helpful in the photographic composition.
- %
- (d): Line detection results by the classical line detection algorithms
- often focus on fine detailed straight edges.
- %Hough transform is in chaos and full of noisy lines.
- }
- \label{fig:photographic-composition}
- \end{figure*}
- }
- Convolutional Neural Networks (CNNs) have achieved remarkable success in a wide range of computer vision tasks.
- %
- Several recent studies~\cite{lee2017semantic,zhang2019ppgnet} have proposed CNN-based methods for line detection.
- %
- Concretely, they regard line detection as a special case of object detection and employ existing object detectors
- \eg faster R-CNN~\cite{ren2015faster} or CornerNet~\cite{law2018cornernet},
- for line detection.
- %
- Limited by the ROI pooling and non-maximal suppression of lines,
- both~\cite{lee2017semantic} and~\cite{zhang2019ppgnet} are less efficient in terms of
- running time.
- %
- % For example,~\cite{lee2017semantic} takes 0.3 seconds to process an image.
- %
- Moreover, ROI pooling~\cite{girshick2015fast} aggregates features along with a
- single line, while many recent studies reveal that richer context information is critical to many
- tasks, \eg video classification~\cite{wang2018non} and semantic segmentation~\cite{huang2019ccnet}.
- %
- This point will be validated in~\cref{sec:ablation}, in which we experimentally verify that only aggregating features along a single line will produces sub-optimal results.
- Incorporate powerful CNNs to Hough transform is a promising direction for
- semantic line detection.
- %
- A simple way of combining CNN with Hough transform is performing edge detection
- with a CNN-based edge detector~\cite{RcfEdgePami2019,xie2015holistically}
- and then apply standard Hough transform to the edge maps.
- %
- However, the two components have diverse optimization targets, leading to sub-optimal results, as
- evidenced by our experiments.
- %
- In this paper, we propose to incorporate CNN with Hough transform into an end-to-end manner
- so that each component in our proposed method shares the same optimization target.
- %
- Our method first extracts pixel-wise representations with a CNN-based encoder
- and then performs Hough transform on the deep representations to convert representations
- from feature space into parametric space.
- %
- Then the global line detection problem is converted to simply detecting peak response in
- the transformed features, making the problem much simpler.
- %
- For example, the time-consuming non-maximal suppression (NMS) can be simply
- replaced by calculating the centroids of connected areas in the parametric space,
- making our method very efficient that can detect lines in real-time.
- %
- Moreover, in the detection stage, we use several convolutional layers on
- top of the transformed features to aggregate context-aware features of nearby lines.
- %
- Consequently, the final decision is made upon not only features of a single line,
- but also information about lines nearby.
- %
- As shown in \cref{fig:photographic-composition}(c),
- our method detects clean, meaningful and outstanding lines,
- that are helpful to photographic composition.
- To better evaluate line detection methods, we introduce a principled metric to assess
- the agreement of a detected line \textsl{w.r.t.} its corresponding ground-truth line.
- %
- Although~\cite{lee2017semantic} has proposed an evaluation metric that uses intersection areas to measure the similarity between a pair of lines,
- this measurement may lead to ambiguous and misleading results.
- %
- And at last, we collect a large scale dataset with \revise{6,500} carefully annotated images for semantic
- line detection.
- %
- The new dataset, namely \revise{NKL (short for \textbf{N}an\textbf{K}ai \textbf{L}ines),}
- contains images of diverse scenes,
- and the scale is much larger than the existing SEL~\cite{lee2017semantic} dataset
- in both terms of images and annotated lines.
- The contributions of this paper are summarized below:
- \begin{itemize}\setlength\itemsep{0.3em}
- \item We proposed an end-to-end framework for incorporating the feature learning capacity of CNN with Hough transform, resulting in an efficient real-time solution for semantic line detection.
-
- \item To facilitate the research of semantic line detection, we construct
- a new dataset with \revise{6,500} images, which is larger and more diverse than a previous SEL dataset~\cite{lee2017semantic}.
-
- \item We introduce a principled metric that measures the similarity between two lines.
- Compared with the previous IOU based metric~\cite{lee2017semantic},
- our metric has straightforward interpretation and \revise{simplicity in implementation},
- as detailed in~\cref{sec:metric}.
-
- \item Evaluation results on an open benchmark demonstrate that our method
- outperforms prior arts with a significant margin.
- \end{itemize}
- A preliminary version of this work was presented in~\cite{eccv2020line}.
- %
- In this extended work, we introduce three major improvements:
- \begin{itemize}\setlength\itemsep{0.3em}
- \item We propose a novel ``edge-guided refinement'' module to adjust line positions and obtain better detection performance with the help of accurate edge information.
- %
- This part is detailed in~\cref{sec:refine}.
- %
- \item We introduce a new large-scale dataset for semantic line detection, as presented in~\cref{sec:nkl-dataset}.
- %
- The new dataset, \revise{namely NKL
- (short for \textbf{N}an\textbf{K}ai \textbf{L}ines),
- contains 6,500 images in total}, and each image is annotated by multiple skilled annotators.
- %
- \item We employ the maximal bipartite graph matching~\cite{kuhn1955hungarian} to match ground-truth and detected lines during evaluation (\cref{sec:protocol}).
- %
- The matching procedure removes redundant true positives so that each ground-truth line is associated with
- at most one detected line and vice versa.
- \end{itemize}
- The rest of this paper is organized as follows:
- %
- \cref{sec:related-work} summarizes the related works.
- %
- \cref{sec:dht} elaborates the proposed Deep Hough transform method.
- %
- \cref{sec:metric} describes the proposed evaluating metric, which is used to assess the similarity between a pair of lines.
- %
- \cref{sec:nkl-dataset} introduces our newly constructed dataset.
- %
- \cref{sec:experiments} presents experimental details and report comparison results.
- %
- \cref{sec:conclusion} makes a conclusion remark.
- %-----------------------------------------------------------------------------------%
- \section{Related Work}\label{sec:related-work}
- %-----------------------------------------------------------------------------------%
- The research of line detection in digital images dates back to the very early stage of
- computer vision research.
- %
- Here, we first brief the evolution of Hough transform~\cite{duda1971use} (HT),
- one of the most fundamental tools, for line detection.
- %
- Then we introduce several recent CNN based methods for line detection.
- %
- At last, we summarize the methods and datasets for semantic line detection.
- \subsection{Hough transform}
- The Hough transform (HT) was firstly proposed in~\cite{hough1962method} for machine analysis of bubble chamber photographs.
- %
- It parametrizes straight lines with \revise{slope-offset}, leading to an unbounded transform space (since the slope can be infinity).
- %
- \cite{duda1971use} extended HT by using angle-radius rather than \revise{slope-offset}
- parameters, and is conceptually similar to two-dimensional Radom transform~\cite{radon20051}.
- %
- Then Ballard \etal \cite{ballard1981generating} generalized the idea of HT to localize arbitrary shapes, \eg ellipses and circles, from digital images.
- %
- For example, by parameterizing with angle and radius, line detection can be performed by voting edge evidence and finding peak response in the finite parametric space.
- %
- Typically, with the edge detectors such as Canny~\cite{canny1986computational} and Sobel~\cite{sobel},
- the detected lines are the maximal local response points in the transformed parametric space.
- %
- \revise{The core idea of HT is used in two recent
- works which
- parameterize the outputs of CNNs with offsets and orientations
- to predict surface meshes~\cite{chen2020bsp} or
- convex decomposition~\cite{deng2020cvxnet} of 3D shapes.}
- Despite the success of HT on line detection, it suffers from high computational costs and unstable performance.
- % There are many subsequent studies trying to remedy these shortcomings of the original algorithm.
- %
- To accelerate the voting of HT, Nahum \etal \cite{kiryati1991probabilistic}
- proposed the ``probabilistic Hough transform'' to randomly pick
- sample points from a line,
- while~\cite{finding1976} using the gradient direction of images to decide
- the voting points.
- %
- Meanwhile, the work of \cite{fernandes2008real,limberger2015real}
- employed kernel-based Hough transform to perform hough voting by using
- the elliptical-Gaussian kernel on collinear pixels to boost the original HT.
- %
- Besides, John \etal \cite{princen1990hierarchical,yacoub1995hierarchical} partitioned the input image into
- hierarchical image patches, and then applied HT independently to these patches.
- %
- Illingworth \etal \cite{illingworth1987adaptive} use a coarse-to-fine accumulation and search
- strategy to identify significant peaks in the Hough parametric spaces.
- %
- \cite{aggarwal2006line} tackled line detection within a
- regularized framework, to suppress the effect of noise and clutter corresponding to nonlinear image features.
- %
- The Hough voting scheme is also used in many other tasks such as
- detecting centroid of 3D shapes in point cloud~\cite{qi2019deep} and finding image correspondence~\cite{min2019hyperpixel}.
- % The research of line detection in digital images dates back to the very early
- % stage of computer vision.
- % %
- % Since the majority of line detection methods are based on the Hough transform~\cite{duda1971use},
- % we first brief the Hough transform, and then
- % summarize several early methods for line detection using Hough transform.
- % %
- % Finally, we describe two recently proposed CNN-based methods for line/segments
- % detection from natural images.
- % \subsection{Hough based line detectors.}
- % Hough transform (HT) is originally devised by Hough~\cite{hough1962method}
- % to detect straight lines from bubble chamber photographs.
- % %
- % The algorithm is then extended~\cite{duda1971use} and generalized
- % ~\cite{ballard1981generating} to localize arbitrary shapes,
- % \eg ellipses and circles, from digital images.
- % %
- % Traditional line detectors start by edge detection in an image, typically
- % with the Canny~\cite{canny1986computational} and Sobel~\cite{sobel} operators.
- % %
- % Then the next step is to apply the Hough transform and finally detect
- % lines by picking peak response in the transformed space.
- % %
- % HT collects edge response alone a line and accumulates them to
- % a single point in the parametric space.
- % There are many variants of Hough transform (HT) trying to remedy different
- % shortcomings of the original algorithm.
- % %
- % The original HT maps each image point to all points in the parameter space,
- % resulting in a many-to-many voting scheme.
- % %
- % Consequently, the original HT presents high computational cost,
- % especially when dealing with large-size images.
- % %
- % Nahum \etal \cite{kiryati1991probabilistic} try to accelerate HT
- % by proposing the `probabilistic Hough transform'
- % that randomly picks sample points from a line.
- % %
- % John \etal \cite{princen1990hierarchical,yacoub1995hierarchical} partition the input image into
- % hierarchical image patches, and then apply HT independently to these patches.
- % %
- % Fernandes \etal \cite{fernandes2008real} use an oriented elliptical-Gaussian kernel
- % to cast votes for only a few lines in the parameter space.
- % %
- % Illingworth \etal \cite{illingworth1987adaptive} use a `coarse to fine' accumulation and search
- % strategy to identify significant peaks in the Hough parametric spaces.
- % %
- % ~\cite{aggarwal2006line} approaches line detection within a
- % regularized framework, to suppress the effect of noise and clutter corresponding
- % to image features which are not linear.
- % %
- % It's worth noting that a clean input edge map is critical to these HT-based detectors.
- % %-----------------------------------------------------------------------------------%
- \subsection{Line Segments Detection}
- Though its robustness and parallelism, Hough transform cannot be directly used for line segments detection,
- since it cannot determine the endpoints of line segments.
- %
- Probabilistic Hough transform~\cite{kiryati1991probabilistic} uses random sampling in the voting scheme,
- and reconstructs line segments by localizing the sample locations.
- %
- But this method still prefers long straight lines.
- %
- In addition to Hough transform, many other studies have been developed to detect line segments.
- %
- Burns \etal \cite{burns1986extracting} used the edge orientation as the guide for line segments extraction.
- %
- The main advantage is that the orientation of the gradients can help to discover low-contrast lines
- and endpoints.
- %
- Etemadi \etal \cite{etemadi1992robust} established a chain from the given edge map and
- extracted line segments and orientations by walking over these chains.
- % quantized
- Chan \etal \cite{chan1996line} used a quantified edge orientation to search and merge short line segments.
- %
- Gioi \etal \cite{von2008lsd} proposed a linear-time line segment detector (LSD) without tuning parameters,
- and is used by many subsequent studies~\cite{akinlar2011edlines,akinlar2013edcircles,feng2013automatic}
- % 这里要给参考文献
- \subsection{CNN based Line Detection.}
- % There are two CNN-based line (segment) detectors that are closely related to
- % our method.
- %
- Recently, CNNs have brought remarkable improvements in computer vision tasks, and also be applied to line detection.
- %
- These methods either focus on straight line detection, \eg semantic line detection~\cite{lee2017semantic,ahmad2017comparison}, or line segments detection,
- \eg wireframe parsing~\cite{zhang2019ppgnet,huang2018learning,zhou2019end,xue2020holistically}.
- %
- Lee \etal \cite{lee2017semantic} followed the two-branch pipeline of faster-RCNN~\cite{ren2015faster}
- and proposed a straight line detection framework to find the meaningful semantic straight line in an image.
- %
- One branch verifies the existence of a line and the other branch further refines the position of the line
- by regression.
- %
- Zhang \etal \cite{zhang2019ppgnet} adopted the conception of CornerNet~\cite{law2018cornernet}
- to extract line segments as a pair of key points in indoor scenes.
- %
- Huang \etal \cite{huang2018learning} proposed a two-head network to predict lines and junction points for wireframe parsing.
- %
- This is extended in ~\cite{zhou2019end} by adding a line proposal sub-network.
- %
- Zhou \etal \cite{zhou2019end} proposed an end-to-end architecture to perform accurate line segments detection in wireframe parsing.
- All these methods extract line-wise feature vectors by LoI pooling that aggregate deep features solely along each line,
- leading to inadequate context information.
- \subsection{Semantic Line Detection}
- The meaningful straight line which helps photographic composition was firstly discussed in~\cite{lee2017semantic},
- and named as ``semantic line''.
- %
- \cite{lee2017semantic} regarded semantic line detection as a special case of object detection.
- %
- It first extracts CNN representations of line proposals using LoI pooling, which bilinearly interpolates the
- features along the entire straight line.
- %
- Then the line representations are verified by a classifier and a regressor, similar to Faster-RCNN~\cite{girshick2015fast}.
- %
- The line proposals are all unique lines in an image.
- %
- The metric of the intersection of union (IoU) of two straight lines is proposed in~\cite{lee2017semantic} to evaluate the similarity of two straight lines in an image.
- %
- This metric may produce ambiguous definitions in some scenarios, as will be mentioned in~\cref{sec:metric}.
- %
- Besides, Lee \etal \cite{lee2017semantic} collected a semantic line detection dataset which
- contains about 1,700 outdoor images, and most of them are natural landscape.
- % \cite{qi2019deep} proposes to vote 3D objects to their centroid for object detection in cloud points.
- %-----------------------------------------------------------------------------------%
- \section{Approach}\label{sec:dht}
- %-----------------------------------------------------------------------------------%
- In this section, we give the details of the proposed deep Hough transform for semantic line
- detection.
- %
- Our proposed method mainly contains four components:
- 1) a CNN encoder that extracts pixel-wise deep representations;
- 2) the deep Hough transform (DHT) that converts the deep representations from the spatial domain to the parametric domain;
- 3) a line detector that is responsible for detecting lines in the parametric space, and
- 4) a reverse Hough transform (RHT) component that converts the detected lines back to image space.
- %
- All these components are integrated in an end-to-end framework that performs forward inference and backward training within a single step.
- \revise{The pipeline is illustrated in~\cref{fig:pipeline}, and the detailed architecture is shown in the supplementary materials.}
- \CheckRmv{
- \begin{figure*}[tb!]
- \centering
- \begin{overpic}[scale=0.52]{figures/pipeline.pdf}
- \put(31, 28){$X$}
- \put(54, 28){$Y$}
- \put(40.2, 13.9){$\mathcal{H}$}
- \put(40.2, 17.65){$\mathcal{H}$}
- \put(40.2, 21.45){$\mathcal{H}$}
- \put(40.2, 25.35){$\mathcal{H}$}
- \put(83.65, 11.5){$\mathcal{R}$}
- \put(41.7, 1){$\mathcal{H}$}
- \put(55.3, 1){$\mathcal{R}$}
- \put(67.15, 1.2){\scriptsize CTX}
- \put(64.1, 14.2){\scriptsize CTX}
- \put(64.1, 18.4){\scriptsize CTX}
- \put(64.1, 21.5){\scriptsize CTX}
- \put(64.1, 25){\scriptsize CTX}
- \put(88.6, 19){\scriptsize Loss}
- \put(85, 27){\small Training only}
- \put(93.5, 7.5){\small Testing}
- \put(94.5, 5){\small only}
- \put(5, 0.8){Upsample + Concat}
- \put(30, 0.8){Add}
- \put(47, 0.8){DHT}
- \put(60, 0.8){RHT}
- \put(72, 0.8){CTX}
- \end{overpic}
- % \vspace{-8pt}
- \caption{
- \revise{The pipeline of our proposed method. DHT is short for the proposed Deep Hough Transform,
- and RHT represents the Reverse Hough Transform. CTX means the context-aware line detector which contains
- multiple convolutional layers.}
- }\label{fig:pipeline}
- \end{figure*}
- }
- %-----------------------------------------------------------------------------------%
- \subsection{Line Parameterization and Reverse} \label{sec:preliminary}
- \CheckRmv{
- \begin{figure}[!htb]
- \centering
- \begin{overpic}[height=.6\linewidth]{figures/line_param.pdf}
- \put(29, 29){$r_l$}
- \put(57, 55){$\theta_l$}
- \put(100, 50){$x$}
- \put(52, 100){$y$}
- \end{overpic}
- \caption{A line can be parameterized by bias $r_l$ and slope $\theta_l$.
- }\label{fig:param}
- \end{figure}
- }
- %
- As shown in~\cref{fig:param},
- given a 2D image \revise{$I_{H\times W}\in\mathbb{R}^{H\times W}$},
- we set the origin to the center of the image.
- %
- In the 2D plane, a straight line $l$ can be parameterized by two parameters: an orientation parameter $\theta_l\in [0, \pi)$ representing the angle between $l$ and the x-axis and a distance parameter $r_l$, indicating the distance between $l$ and the origin.
- %
- Obviously $\forall \ l \in I, r_l \in [-\sqrt{W^2+H^2}/2, \sqrt{W^2+H^2}/2]$.
- Given any line $l$ on $I$, we can parameterize it with the above formulations,
- and also we can perform a reverse mapping to translate any valid
- ($r, \theta$) pair to a line instance.
- %
- \revise{We define the line-to-parameters and the inverse mapping
- as:
- \begin{equation}
- \begin{split}
- r_l, \theta_l &\leftarrow P(l), \\
- l &\leftarrow P^{-1}(r_l, \theta_l).
- \label{eq:parameterize}
- \end{split}
- \end{equation}
- Obviously, both $P$ and $P^{-1}$ are bijective mappings.}
- %
- In practice, $r$ and $\theta$ are quantized to discrete bins to be processed by computer programs.
- %
- Suppose the quantization interval for $r$ and $\theta$ are $\Delta r$ and $\Delta \theta$, respectively.
- %
- Then the quantization can be formulated as below:
- \begin{equation}
- \begin{split}
- \hat{r}_l = \ceil*{\frac{r_l}{\Delta r}}, \
- \hat{\theta}_l = \ceil*{\frac{\theta_l}{\Delta \theta}},
- \end{split}\label{eq:quantization}
- \end{equation}
- where $\hat{r}_l$ and $\hat{\theta_l}$ are the quantized line parameters.
- %
- The number of quantization levels, denoted with $\Theta$ and $R$, are:
- \begin{equation}
- \begin{split}
- \Theta = \frac{\pi}{\Delta \theta}, \
- R = \frac{\sqrt{W^2+H^2}}{\Delta r},
- \end{split}\label{eq:grid-size}
- \end{equation}
- as shown in~\cref{fig:DHT}.
- %-----------------------------------------------------------------------------------%
- \subsection{Feature Transformation with Deep Hough Transform} \label{sec:dht-dht}
- \CheckRmv{
- \begin{figure*}[tb]
- \centering
- \hfill
- \subfigure[]{
- \label{fig:DHT}
- \begin{overpic}[height=.18\linewidth]{figures/dht.pdf}
- % \put(70.5, 11.5){$\mathcal{T}$}
- \put(-5, 19){$H$}
- \put(15, 33.3){$W$}
- %
- % \put(92, 16){$\Theta$}
- \put(55, 19){$\Theta$}
- \put(75, 33.3){$R$}
- %
- \put(20, 43){$\mathbf{X}$}
- \put(79, 43){$\mathbf{Y}$}
- \put(76, 8){$(\hat{\theta}_l, \hat{r}_l)$}
- \end{overpic}
- }\hfill
- \subfigure[]{
- \label{fig:nonlocal}
- \begin{overpic}[width=0.46\linewidth]{figures/nonlocal.pdf}
- \put(19, 39){$W$}
- \put(-6, 20){$H$}
- \put(80, 39){$\Theta$}
- \put(102, 20){$R$}
- \put(45, 24){DHT}
- \put(45, 11){RHT}
- \put(6, -4){feature space}
- \put(65, -4){parametric space}
- \end{overpic}
- }\hfill
- \caption{
- (a): Features along a line in the feature space (blue, left) are
- accumulated to a point $(\hat{r}_l,\hat{\theta}_l)$ in the parametric space (red, right).
- (b): Illustration of the proposed context-aware feature aggregation.
- %
- Features of nearby lines in the feature space (left)
- are translated into neighbor points in the parametric space (right).
- %
- In the parametric space, a simple $3\times 3$ convolutional operation can
- easily capture contextual information for the central line (orange).
- %
- Best viewed in color.
- }
- \end{figure*}
- }
- %-----------------------------------------------------------------------------------%
- \subsubsection{Deep Hough transform.}
- Given an input image $I$, we first extract deep CNN features
- $\mathbf{X} \in \mathbb{R} ^ {C \times H \times W}$ with the encoder network,
- where $C$ indicates the number of channels and $H$ and $W$ are the spatial size.
- %
- Afterward, the deep Hough transform (DHT) takes $\mathbf{X}$ as input and produces
- the transformed features, $\mathbf{Y} \in \mathbb{R} ^ {C \times \Theta \times R}$.
- %
- The size of transformed features, $\Theta, R$, is determined by the quantization intervals, as
- described in~\cref{eq:grid-size}.
- As shown in~\cref{fig:DHT},
- given an arbitrary line $l$ on the image,
- we aggregate features of all pixels along $l$,
- to $(\hat{\theta}_l, \hat{r}_l)$ in the parametric space $Y$:
- \begin{equation}
- \mathbf{Y}(\hat{\theta}_l, \hat{r}_l) = \sum_{i\in l} \mathbf{X}(i),
- \label{eq:dht}
- \end{equation}
- where $i$ is the positional index.
- %
- $\hat{\theta}_l$ and $\hat{r}_l$ are determined by the parameters of line $l$, according
- to~\cref{eq:parameterize}, and then quantized into discrete grids, according to~\cref{eq:quantization}.
- \revise{Given the number of quantization levels $\Theta$ and $R$,
- we have $\Theta\cdot R$ unique line candidates.
- %
- Then the DHT is applied to all these candidate lines and their respective features
- are aggregated to the corresponding position in $\mathbf{Y}$.}
- %
- % These lines are obtained by connecting arbitrary pairs of pixels on the edges
- % of an image, and then excluding the duplicated lines.
- %
- It is worth noting that DHT is order-agnostic in both the feature space and the parametric space,
- making it highly parallelizable.
- %-----------------------------------------------------------------------------------%
- \subsubsection{Multi-scale DHT with FPN.}\label{sec:ms-dht-fpn}
- Our proposed DHT could be easily applied to arbitrary spatial features.
- %
- We use the feature pyramid network (FPN)~\cite{lin2017feature} as our encoder.
- %
- FPN can help to extract multi-scale and rich semantic features.
- Specifically, the FPN outputs 4 feature maps $X_1, X_2, X_3, X_4$ and their respective
- resolutions are $1/4$, $1/8$, $1/16$, $1/16$ of the input resolution.
- %
- Then each feature map is transformed by a DHT module independently, as shown in~\cref{fig:pipeline}.
- %
- Since these feature maps are in different resolutions, the transformed features
- $Y_1, Y_2, Y_3, Y_4$ also have different sizes, because we use the same quantization
- interval in all stages (see~\cref{eq:grid-size} for details).
- %
- To fuse transformed features together, we interpolate $Y_2, Y_3, Y_4$
- to the size of $Y_1$, and then fuse them by concatenation.
- %-----------------------------------------------------------------------------------%
- \subsection{Line Detection in the Parametric Space}
- %-----------------------------------------------------------------------------------%
- \subsubsection{Context-aware line detector.}\label{sec:ctx-line-detector}
- % Our detector takes DHT-transformed features as input, and outputs a $\Theta\times R$
- % probability map.
- After the deep Hough transform (DHT), features are translated to the parametric space
- where grid location $(\theta, r)$ corresponds to
- features along an entire line $l=P^{-1}(\theta, r)$ in the feature space.
- %
- An important reason to transform the features into the parametric space
- is that the line structures could
- be more compactly represented.
- %
- As shown in~\cref{fig:nonlocal},
- lines nearby a specific line $l$ are translated to
- surrounding points near $(\theta_l, r_l)$.
- %
- Consequently, features of nearby lines can be efficiently aggregated
- using convolutional layers in the parametric space.
- In each stage of the FPN,
- we use two $3\times 3$ convolutional layers
- to aggregate contextual line features.
- %
- Then we interpolate features
- to match the resolution of features from different stages, as illustrated in
- ~\cref{fig:pipeline},
- and concatenate the interpolated features together.
- %
- Finally, a $1\times 1$ convolutional layer is applied to the concatenated feature maps
- to produce pointwise predictions.
- %-----------------------------------------------------------------------------------%
- \subsubsection{Loss function.}\label{sec:loss-func}
- Since the prediction is directly produced in the parametric space,
- we calculate the loss in the same space as well.
- %
- For a training image $I$, the ground-truth lines are first converted into
- the parametric space with the standard Hough transform.
- %
- Then to help converging faster, we smooth and expand the ground-truth with a
- Gaussian kernel.
- %
- Similar tricks have been used in many other tasks like
- crowed counting~\cite{liu2019context,cheng2019learning} and road segmentation~\cite{VecRoad_20CVPR}.
- %
- Formally, let $\mathbf{G}$ be the binary ground-truth map in the parametric space,
- $\mathbf{G}_{i,j} = 1$ indicates there is a line located at $i,j$ in the parametric space.
- %
- The expanded ground-truth map is
- $$\hat{\mathbf{G}} = \mathbf{G}\circledast K,$$
- where $K$ is a $5\times 5$ Gaussian kernel and $\circledast$ denotes the convolution operation.
- %
- An example pair of smoothed ground-truth and the predicted map is shown
- in~\cref{fig:pipeline}.
- In the end, we compute the cross-entropy between the smoothed ground-truth
- and the predicted map in the parametric space:
- \begin{equation}
- L = -\sum_i \Big\{ \hat{\mathbf{G}}_i\cdot\log(\mathbf{P}_i) +
- (1-\hat{\mathbf{G}}_i)\cdot\log(1-\mathbf{P}_i)
- \Big\}
- \end{equation}
- %-----------------------------------------------------------------------------------%
- \subsection{Reverse Mapping}\label{sec:reverse}
- Our detector produces predictions in the parametric space representing
- the probability of the existence of lines.
- %
- The predicted map is then binarized with a threshold (\eg 0.01).
- %
- Then we find each connected area and calculate respective centroids.
- %
- These centroids are regarded as the parameters of detected lines.
- %
- At last, all lines are mapped back to the image space with
- $P^{-1}(\cdot)$, as formulated in~\cref{eq:parameterize}.
- %
- We refer to the ``mapping back'' step as ``Reverse Mapping of Hough Transform (RHT)'',
- as shown in~\cref{fig:pipeline}.
- %-----------------------------------------------------------------------------------%
- \subsection{Edge-guided Line Refinement}\label{sec:refine}
- Semantic lines are outstanding structures that
- separate different regions in a scene.
- %
- Therefore, edges may serve as indicators for semantic lines.
- %
- We propose to refine the detection results by aligning line positions using
- edge information.
- %
- First, we compute an edge map $E$ using HED~\cite{xie2015holistically}.
- %
- Afterward, given a detected line $l$, the edge density of $l$ is defined as the average
- edge response along $l$:
- \begin{equation}
- \rho(l) = \frac{\sum_{i\in l} E_i}{|l|},
- \label{eq:rho}
- \end{equation}
- where $|l|$ is the number of pixels on $l$.
- %
- For the sake of stability, we widen $l$ by 1 pixel on both sides (totally the width is 3)
- when dealing with~\cref{eq:rho}.
- Let $\mathcal{L}$ be a set of lines that are close to $l$.
- %
- These lines are obtained by moving the end-points of $l$ by $\delta_r$ pixels clockwise
- and anti-clockwise.
- %
- Since there are two end-points and each one has
- $\delta_r+1$ possible locations, the size of the set is $||\mathcal{L}|| = (\delta_r+1)^2$.
- %
- Then the refinement can be achieved by finding the optimal line $l^*$ from $\mathcal{L}$
- that holds the highest edge density:
- \begin{equation}
- l^* = \argmax_{l\in\mathcal{L}} \ \rho(l).\label{eq:refine-search}
- \end{equation}
- The performance of ``edge-guided line refinement'' with different $\delta_r$
- is recorded in ~\cref{sec:ablation-refinement}.
- % \cref{fig:edge} shows the processing of the refinement.
- % \begin{figure}[!htb]
- % \centering
- % \begin{overpic}[width=1.0\linewidth]{figures/edge_align.pdf}
- % \put(13, -5){(a)}
- % \put(48, -5){(b)}
- % \put(81, -5){(c)}
- % \end{overpic}
- % \vspace{4pt}
- % \caption{
- % Example of refining a line with edge information.
- % (a): The origin detected line (yellow).
- % (b): Density of detected line (yellow area) and a nearby line (green).
- % (c): The detected line is replaced by the nearby line (green) which has a larger density.
- % }
- % \label{fig:edge}
- % \end{figure}
- \CheckRmv{
- \begin{figure*}[tb!]
- \centering
- \subfigure[]{
- \begin{overpic}[width=.19\linewidth]{figures/metric_analysis_1.pdf}
- \put(54,66){m}
- \put(74,32){n}
- \put(10,50){p}
- \put(7,25){q}
- \put(19,10){{IOU(m,n)=0.66}}
- \put(20,0){{IOU(p,q)=0.95}}
- \end{overpic}\label{fig:metric_a}
- }
- \hfill
- \subfigure[]{
- \begin{overpic}[width=.19\linewidth]{figures/metric_analysis_2.pdf}
- \put(18,10){{IOU(red)=0.53}}
- \put(16.5,0){{IOU(blue)=0.46}}
- \end{overpic}\label{fig:metric_b}
- }
- \hfill
- \subfigure[]{
- \begin{overpic}[width=.19\linewidth]{figures/metric_analysis_3.pdf}
- \put(54,66){m}
- \put(74,32){n}
- \put(10,50){p}
- \put(7,25){q}
- %
- \put(22,10){{$\mathcal{S}$(m,n)=0.24}}
- \put(23,0){{$\mathcal{S}$(p,q)=0.34}}
- \end{overpic}\label{fig:metric_c}
- }
- \hfill
- \subfigure[]{
- \begin{overpic}[width=.19\linewidth]{figures/metric_analysis_4.pdf}
- \put(16,10){{$\mathcal{S}_d$=0.54, $\mathcal{S}_\theta$=0.02}}
- \put(28.5,0){{$\mathcal{S}$=0.00}}
- \end{overpic}\label{fig:metric_d}
- }\vspace{-10pt}
- \caption{
- (a): Two pairs of lines with similar relative position could have very
- different IOU scores.
- %
- (b): Even humans cannot determine which area (blue or red) should be considered as
- the intersection in the IOU-based metric~\cite{lee2017semantic}.
- %
- (c) and (d): Our proposed metric considers both Euclidean distance and angular distance
- between a pair of lines, resulting in consistent and reasonable scores.
- %
- Best viewed in color.
- }
- \label{fig:metric_analysis}
- \end{figure*}
- }
- \CheckRmv{
- \begin{figure*}[t!]
- \centering
- \begin{overpic}[width=1.0\linewidth]{figures/sl_scores.pdf}
- \put(6, -1.5){$\mathcal{S}=0.1$}
- \put(26.5, -1.5){$\mathcal{S}=0.3$}
- \put(46.5, -1.5){$\mathcal{S}=0.5$}
- \put(66.5, -1.5){$\mathcal{S}=0.7$}
- \put(86.5, -1.5){$\mathcal{S}=0.9$}
- % \put(7, 0.5){$\mathcal{S}=0.8$}
- % \put(31.5, 0.5){$\mathcal{S}=0.85$}
- % \put(57.5, 0.5){$\mathcal{S}=0.9$}
- % \put(82, 0.5){$\mathcal{S}=0.95$}
- \end{overpic}
- \caption{
- Example lines with various EA-scores ($\mathcal{S}$ in ~\cref{eq:ea-score}).
- \revise{The larger the EA-score is, the more similar the lines are.}
- }\label{fig:metric_show}
- \end{figure*}
- }
- %-----------------------------------------------------------------------------------%
- \section{The Proposed Evaluation Metric}
- \label{sec:metric}
- %-----------------------------------------------------------------------------------%
- In this section, we elaborate on the
- proposed evaluation metric that measures the agreement, or alternatively,
- the similarity between the two lines in an image.
- %
- Firstly, we review several widely used metrics in the computer vision community
- and then explain why these existing metrics are not proper for our task.
- %
- Finally, we introduce our newly proposed metric, which measures the agreement between two lines considering
- both Euclidean distance and angular distance.
- %
- %-----------------------------------------------------------------------------------%
- \subsection{Review of Existing Metrics}
- The intersection over union (IOU) is widely used in object detection, semantic segmentation
- and many other tasks to measure the agreement between detected bounding boxes
- (segments) w.r.t the ground-truth.
- %
- Lee \etal \cite{lee2017semantic} adopt the original IOU into line detection,
- and propose the line-based IOU to evaluate the quality of detected lines.
- %
- Concretely, the similarity between the two lines is measured by the intersection areas
- of lines divided by the image area.
- %
- Take~\cref{fig:metric_a} as an example, the similarity between line $m$ and $n$
- is $\text{IOU}(m,n) = area({{red}})/area(I)$.
- However, we argue that this IOU-based metric is improper and may lead to unreasonable
- or ambiguous results under specific circumstances.
- %
- As illustrated in~\cref{fig:metric_a},
- two pairs of lines (m, n, and p, q) with similar structures could have
- very different IOU scores.
- %
- In~\cref{fig:metric_b}, even humans cannot determine which areas
- ({red} or {blue}) should be used as
- intersection areas in line based IOU.
- \rerevise{There are other metrics,
- \eg the Earth Mover's Distance (EMD)~\cite{rubner2000earth}
- and the Chamfer distance (CD)~\cite{borgefors1986distance},
- that can be used to measure line similarities.
- %
- However, these metrics require to rasterize the lines into pixels
- and then calculate pixel-wise distances, which is less efficient.
- }
- \rerevise{
- To remedy the deficiencies,
- we propose a simple yet effective metric that measures the similarity
- of two lines in the parametric space.
- %
- Our proposed metric is much more efficient than EMD and CD.
- %
- Quantitative comparisons in ~\cref{sec:quantitative} demonstrate that
- our proposed metric presents very similar results to EMD and CD.}
- \CheckRmv{
- \begin{figure*}[!htb]
- \centering
- \begin{overpic}[width=1\linewidth]{figures/dataset_vis.jpg}
- \end{overpic}\vspace{-8pt}
- \caption{
- Example images and annotations (yellow lines) of \revise{NKL}.
- %
- Images of \revise{NKL} present diverse scenes and rich line annotations.
- }
- \label{fig:dataset_vis}
- \end{figure*}
- }
- %-----------------------------------------------------------------------------------%
- \subsection{The Proposed Metric}
- Our proposed metric, \revise{termed \textbf{EA-score}}, considers both
- \textbf{E}uclidean distance and \textbf{A}ngular distance
- between a pair of lines.
- %
- Let $l_i, l_j$ be a pair of lines to be measured,
- the angular distance $\mathcal{S}_\theta$ is defined according to the
- angle between two lines:
- \begin{equation}
- \mathcal{S}_\theta = 1 - \frac{\theta(l_i, l_j)}{\pi/2},
- \label{eq:sa}
- \end{equation}
- where $\theta(l_i, l_j)$ is the angle between $l_i$ and $l_j$.
- %
- The Euclidean distance is defined as:
- %
- \begin{equation}
- \mathcal{S}_d = 1 - D(l_i, l_j),
- \label{eq:sd}
- \end{equation}
- where $D(l_i, l_j)$ is the Euclidean distance between midpoints of $l_i$ and $l_j$.
- %
- % Examples of $\mathcal{S}_\theta$ and $\mathcal{S}_d$ can be found in ~\cref{fig:metric_c} and ~\cref{fig:metric_d}.
- %
- Note that we normalize the image into a unit square before calculating $D(l_i, l_j)$.
- %
- Examples of $\mathcal{S}_d$ and $\mathcal{S}_\theta$ can be found in
- ~\cref{fig:metric_c} and~\cref{fig:metric_d}.
- %
- Finally, our proposed EA-score is:
- \begin{equation}
- \mathcal{S} = (\mathcal{S}_\theta \cdot \mathcal{S}_d)^2.
- \label{eq:ea-score}
- \end{equation}
- ~\cref{eq:ea-score} is squared to make it more sensitive and discriminative
- when the values are high.
- Several example line pairs and corresponding EA-scores
- are demonstrated in~\cref{fig:metric_show}.
- %-----------------------------------------------------------------------------------%
- \section{\revise{NKL: a semantic line detection dataset}}\label{sec:nkl-dataset}
- %-----------------------------------------------------------------------------------%
- To the best of our knowledge, there is only one dataset, SEL~\cite{lee2017semantic},
- specifically for semantic line detection.
- %
- SEL contains 1,715 images of which 175 images for testing
- and others for training.
- %
- To fulfill the gap between large CNN-based models and \revise{the} scale of the existing
- dataset,
- we collect a new dataset for semantic line detection.
- The new dataset, \revise{namely NKL (short for \textbf{N}an\textbf{K}ai \textbf{L}ines), contains 6,500 images} that present richer
- diversity in terms of both scenes and the number of lines.
- %
- Each image of \revise{NKL} is annotated by multiple skilled human annotators to
- ensure the annotation quality.
- %
- The dataset is open available on our project page.
- \CheckRmv{
- \begin{table}[hbt!]
- \renewcommand{\arraystretch}{1.3}
- \renewcommand\tabcolsep{5pt}
- \caption{Number of images and lines in SEL~\cite{lee2017semantic} and \revise{NKL}.}
- \vspace{-5pt}
- \begin{tabular}{c|c|c|c}
- \toprule
- Dataset &
- \makecell{Total \\ \#images, \#lines} &
- \makecell{Training \\ \#images, \#lines} &
- \makecell{Evaluation \\ \#images, \#lines} \\
- \hline
- SEL~\cite{lee2017semantic} & 1,715, \ 2,791 & 1,541, \ 2,493 & 175, \ 298\\
- \revise{NKL (Ours)} & \revise{6,500}, \ \revise{13,148} & \revise{5,200}, \ \revise{10,498} & \revise{1,300, \ 2,650} \\
- \bottomrule
- \end{tabular}
- \label{tab:number_statistics}
- \end{table}
- }
- % The new dataset is openly available at ~\url{http://kaizhao.net/sl5k}.
- \subsection{Data Collection and Annotation}\label{sec:data-collect-and-anno}
- All the images of \revise{NKL} are crawled from the internet
- % pixabay\footnote{\url{https://pixabay.com}}
- using specific keywords such as sea, grassland \etal
- %
- After copyright checking, we carefully filter out images with
- at least one semantic line.
- %
- Since the annotation of semantic lines is subjective and depends on annotators,
- each image is first annotated by 3 knowledgeable human annotators
- and verified by others.
- %
- A line is regarded as positive only if all of the 3 annotators are consistent.
- %
- Then the inconsistent lines are reviewed by two other annotators.
- %
- In a word, for each line, there are at least 3 and at most 5 annotators,
- and a line is regarded as positive only if the line is marked as positive by more than 3 annotators.
- \subsection{Dataset Statistics}
- \subsubsection{Number of images and semantic lines}
- % There are 6,500 images in NKL dataset.
- %
- % Images in NKL are randomly split into training and evaluation sets that contain
- % 5,200 and 1,300 images, respectively.
- %
- There are totally \revise{13,148 semantic lines in NKL} and 2,791 semantic lines in
- SEL~\cite{lee2017semantic} dataset over all images.
- %
- \cref{tab:number_statistics} summarizes the number of images and lines of the two datasets, respectively.
- ~\cref{fig:number_statistics} summarizes the histogram of the per-image number of lines
- in \revise{NKL} and SEL~\cite{lee2017semantic} datasets.
- %
- More than half \revise{(67\%, 4,356/6,500)} of the images in \revise{NKL} dataset contain more than 1 semantic line,
- while the percentage of SEL is only 45.5\%.
- \CheckRmv{
- \begin{figure}[!htb]
- \centering
- \begin{overpic}[width=1\linewidth]{figures/line_numbers.pdf}
- \put(15, 26){\rotatebox{30}{\scriptsize 33.0\%}}
- \put(19, 41){\rotatebox{30}{\scriptsize 54.5\%}}
-
- \put(26, 31){\rotatebox{29}{\scriptsize 40.2\%}}
- \put(30, 25.5){\rotatebox{30}{\scriptsize 31.5\%}}
- \put(37, 17.3){\rotatebox{30}{\scriptsize 19.4\%}}
- \put(40, 11.5){\rotatebox{30}{\scriptsize 11.3\%}}
- \put(47, 8.6){\rotatebox{30}{\scriptsize 6.7\%}}
- \put(51, 5){\rotatebox{30}{\scriptsize 2.1\%}}
- \put(58.5, 5.5){\rotatebox{30}{\scriptsize 0.54\%}}
- \put(62.2, 4.5){\rotatebox{30}{\scriptsize 0.41\%}}
- \put(69, 4.8){\rotatebox{30}{\scriptsize 0.01\%}}
- \put(73, 4.2){\rotatebox{30}{\scriptsize 0.01\%}}
- \put(80, 5){\rotatebox{30}{\scriptsize 0.01\%}}
- \put(83, 4){\rotatebox{30}{\scriptsize 0}}
- \put(90.5, 5){\rotatebox{30}{\scriptsize 0.003\%}}
- \put(93.5, 4){\rotatebox{30}{\scriptsize 0}}
- \put(78, 39.6){\scriptsize NKL (Ours)}
- \put(78, 34.4){\scriptsize SEL~\cite{lee2017semantic}}
- \end{overpic}
- \caption{
- \revise{Histogram chart of number of lines.
- %
- Lines of our dataset are more fairly distributed compared to
- SEL.}
- }
- \label{fig:number_statistics}
- \end{figure}
- }
- \subsubsection{Diversity Analysis}
- To analyze the diversity of SEL and \revise{NKL} datasets,
- we feed all the images into a ResNet50~\cite{he2016deep} network that is pretrained on the Place365~\cite{zhou2017places},
- and then collect the outputs as category labels.
- %
- The results are presented in ~\cref{fig:dataset_distribution}.
- There are totally 365 categories in Place365~\cite{zhou2017places} dataset,
- among which we got 167 unique category labels on SEL dataset and \revise{327 on NKL}.
- %
- Besides, as shown in ~\cref{fig:dataset_distribution}, scene labels on
- \revise{NKL} dataset are more fairly distributed compared to SEL dataset.
- %
- For example, in SEL dataset, top-3 populated categories (sky, field, desert) make up more
- than a quarter of the total.
- %
- While in \revise{NKL}, top-3 makes up less than one-fifth of the total.
- % As shown in~\cref{fig:number_statistics},
- % More than half images in SEL dataset contain only one semantic line.
- % %
- % About 38\% images in NKL-pre (ours) dataset contain one semantic line, 42\% images contain two semantic lines and 20\% images contain three or more semantic lines.
- % \todo{
- % we abtain 167 categories on SEL dataset and 288 categories on NKL-pre dataset. As shown in ~\cref{fig:dataset_distribution}, the distribution on NKL-pre is more diverse
- % with more categories and balanced xxx.}
- \CheckRmv{
- \begin{figure*}[tb]
- \centering
- \hfill
- \subfigure[]{
- \begin{overpic}[width=.48\linewidth]{figures/SEL.pdf}
- \put(6, 55){SEL~\cite{lee2017semantic}}
- \end{overpic}
- }\hfill
- \subfigure[]{
- \begin{overpic}[width=0.465\linewidth]{figures/NKL.pdf}
- \put(5, 57){NKL}
- \end{overpic}
- }\hfill
- \vspace{-10pt}
- \caption{
- \revise{
- Category distribution of SEL (a) and NKL (b) datasets.
- %
- Category labels are obtained through a Places365 pretrained model.
- %
- There are 327 (totally 365) scene labels presented in NKL dataset,
- in contrast to 167 in SEL dataset.
- %
- The labels of NKL are also more fairly distributed compared to
- that of SEL.}
- }
- \label{fig:dataset_distribution}
- \end{figure*}
- }
- %-----------------------------------------------------------------------------------%
- %-----------------------------------------------------------------------------------%
- \section{Experiments}\label{sec:experiments}
- %-----------------------------------------------------------------------------------%
- In this section, we introduce the implementation details of our system,
- and report experimental results compared with existing methods.
- %-----------------------------------------------------------------------------------%
- \subsection{Implementation Details} \label{sec:protocol}
- %
- Our system is implemented with the PyTorch~\cite{paszke2019pytorch} framework,
- and a Jittor~\cite{hu2020jittor} implementation is also available.
- %
- Since the proposed deep Hough transform (DHT) is highly parallelizable,
- we implement DHT with native CUDA programming,
- and all other parts are implemented based on framework level Python API.
- We use a single RTX 2080 Ti GPU for all experiments.
- %-----------------------------------------------------------------------------------%
- \subsubsection{Network architectures.}
- We use two representative network architectures,
- ResNet50~\cite{he2016deep} and VGGNet16~\cite{simonyan2014very}, as our backbone
- and the FPN~\cite{lin2017feature} to extract multi-scale deep representations.
- %
- For the ResNet network, following the common practice in previous works~\cite{zhao2017pyramid,chen2018encoder,pami20Res2net},
- the dilated convolution~\cite{yu2015multi} is used in the last layer to increase the resolution of feature maps. The common used batch normalization~\cite{ioffe2015batch} is also adopted in the network.
- The dilated convolutions and normalization can be tuned by~\cite{gao2021rbn,gao2021global2local} in the future work.
- %-----------------------------------------------------------------------------------%
- \subsubsection{Hyper-parameters.}
- The size of the Gaussian kernel used in~\cref{sec:loss-func} is $5\times5$.
- %
- All images are resized to (400, 400) and then wrapped into a mini-batch of 8.
- %
- We train all models for 30 epochs using Adam optimizer~\cite{kingma2014adam} without weight decay.
- %
- The learning rate and momentum are set to $2 \times 10^{-4}$ and 0.9, respectively.
- %
- The quantization intervals $\Delta\theta, \Delta r$ will be detailed in
- ~\cref{sec:quant-intervals} and~\cref{eq:intervals}.
- %-----------------------------------------------------------------------------------%
- \CheckRmv{
- \begin{figure*}[!htb]
- \centering
- \begin{overpic}[width=1\linewidth]{figures/matching.pdf}
- \put(-0.5,10){{\color{BlueGreen}{$p_1$}}}
- \put(-0.5,8){{\color{BlueGreen}{$p_2$}}}
- \put(-0.5,6){{\color{BlueGreen}{$p_3$}}}
- \put(-0.5,4.2){{\color{BlueGreen}{$p_4$}}}
- %
- \put(15.5,11){{\color{orange}{$g_1$}}}
- \put(15.5,7.2){{\color{orange}{$g_2$}}}
- \put(15.5,4.5){{\color{orange}{$g_3$}}}
- %
- \put(26,1.5){$g_1$}
- \put(31.5,1.5){$g_2$}
- \put(37.8,1.5){$g_3$}
- \put(22.5,15.2){$p_1$}
- \put(29,15.2){$p_2$}
- \put(35,15.2){$p_3$}
- \put(41.3,15.2){$p_4$}
- %
- \put(54,1.5){$g_1$}
- \put(61,1.5){$g_2$}
- \put(67,1.5){$g_3$}
- \put(51.5,15.2){$p_1$}
- \put(57.3,15.2){$p_2$}
- \put(63.3,15.2){$p_3$}
- \put(70,15.2){$p_4$}
- %
- \put(95.1,1.5){\scriptsize FN}
- \put(79,15.2){\scriptsize TP}
- \put(86,15.2){\scriptsize TP}
- \put(92,15.2){\scriptsize FP}
- \put(98,15.2){\scriptsize FP}
- %
- \put(20,8){$\rightarrow$}
- \put(47,8){$\rightarrow$}
- \put(75,8){$\rightarrow$}
- %
- \put(8,-1){(a)}
- \put(32,-1){(b)}
- \put(60,-1){(c)}
- \put(87.6,-1){(d)}
- \end{overpic}
- \caption{
- Illustration of the bipartite graph matching in evaluation.
- %
- (a) An example image with 3 ground-truth lines ($g_1, g_2, g_3$)
- and 4 predictions ($p_1, p_2, p_3, p_4$).
- %
- (b) the corresponding bipartite graph.
- %
- The edge between a pair of nodes represents the similarity ($\mathcal{S}$ in ~\cref{eq:ea-score}) between lines.
- %
- (c) after maximum matching of a bipartite graph, each node in a subgraph
- is connected with no more than 1 node from the other subgraph.
- %
- (d) true positive (TP), false positive (FP) and false negative (FN).
- }\vspace{20pt}
- \label{fig:matching}
- \end{figure*}
- }
- % \begin{equation}
- % \begin{split}
- % \Theta &= 100, \\
- % R &= \sqrt{\frac{W^2+H^2}{2}},
- % \end{split}\label{eq:intervals}
- % \end{equation}
- % where $H, W$ are the size of feature maps to be transformed in DHT.
- \CheckRmv{
- \begin{figure*}[htb!]
- \centering
- \begin{overpic}[width=0.9\linewidth]{figures/search_theta_r_pami.pdf}
- \put(95,1){$\times\pi$}
- \put(45,1){$\times\sqrt{2}$}
- \end{overpic}\vspace{-10pt}
- \caption{
- Left: performance under different distance quantization intervals $\Delta r$ with a fixed
- angular quantization interval $\Delta\theta=\pi/100$.
- %
- Larger $\Delta r$ indicates less quantization levels $R$.
- Right: performance under different angular quantization intervals $\Delta \theta$ with a fixed
- distance quantization interval $\Delta r=\sqrt{2}$.
- }\label{fig:ablation-interval}
- \end{figure*}
- }
- \subsubsection{Datasets and data augmentation.}
- \revise{Our experiments are conducted on the
- SEL~\cite{lee2017semantic} dataset and our Proposed
- NKL dataset.}
- %
- \revise{The Statistics of the two datasets are detailed in~\cref{sec:nkl-dataset}.}
- % which is, to the best of our knowledge, the only dataset for semantic line detection.
- %
- % The SEL dataset is composed of 1715 images, 1541 images for training, and 175 for testing.
- %
- % There are 1.63 lines per image on average, and
- % each image contains 1 line at least and 6 lines at most.
- %
- Following the setup in~\cite{lee2017semantic}, we use only left-right flip
- data augmentation in all our experiments.
- \subsection{Evaluation Protocol}
- We measure the quality of detection lines in terms of \emph{precision},
- \emph{recall} and \emph{F-measure}.
- %
- The first step is to match the detected lines and ground-truth lines.
- Let $\mathcal{P}$ and $\mathcal{G}$ be the sets of predicted lines
- and ground-truth lines, respectively.
- %
- $p_i$ and $g_j$ are individual predicted and ground-truth line.
- %
- We first match the lines in $\mathcal{P}$ and $\mathcal{G}$ based on bipartite matching.
- %
- Suppose $G = \{V, E\}$ be a bipartite graph
- \footnote{\url{https://en.wikipedia.org/wiki/Bipartite_graph}}.
- %
- The vertice set $V$ can be divided into two disjoint and independent sets,
- in our case, $\mathcal{P}$ and $\mathcal{G}$:
- \begin{equation*}
- \begin{split}
- V = \mathcal{P} \cup \mathcal{G} \\
- \mathcal{P} \cap \mathcal{G} = \emptyset.
- \end{split}
- \end{equation*}
- \rerevise{
- Each edge in $E$ denotes the similarity between a pair of lines under
- a certain similarity measure.
- %
- Apart from the proposed EA-score, we also use two other popular metrics:
- the earth mover's distance (EMD)~\cite{rubner2000earth} and the Chamfer distance (CD)~\cite{borgefors1986distance}, as described in~\cref{sec:metric}.
- %
- Note that we normalize both EMD and chamfer distance by their maximal possible
- value to bound the value
- within [0, 1] (for both EMD and Chamfer distance, the maximal distance occurs when
- two lines shrinkage to two points on the opposite diagonals).}
- Given the graph $G = \{V, E\}$, a matching in a Bipartite Graph is a set of the edges chosen in such a way that no two edges share
- a common vertice.
- %
- In our task, given the set of predicted lines $\mathcal{P}$ and the set of
- ground-truth lines $\mathcal{G}$, we seek to find a matching so that
- each ground-truth line $g_i$ corresponds to no more than one detected line $p_j$
- and vice versa.
- %
- This problem, maximum matching of a bipartite graph, can be easily solved using the
- classical Hungarian method~\cite{kuhn1955hungarian} with polynomial time complexity.
- After matching $\mathcal{P}$ and $\mathcal{G}$, we can calculate true positive
- (TP), false positive (FP) and false negative (FN) accordingly.
- %
- As illustrated in ~\cref{fig:matching}, predicted lines ($p_1, p_2$) that are paired with
- ground-truth lines ($g_2, g_1$) are considered as true positive.
- %
- Predicted line ($p_3$) that is not matched with any ground-truth line is
- a false positive,
- and ground-truth line ($g_3$) without a corresponding predicted line
- is a false negative.
- Finally, the \textbf{P}recision, \textbf{R}ecall, and \textbf{F}-measure are:
- \begin{equation}
- P = \frac{TP}{TP+FP}, \
- R = \frac{TP}{TP+FN}, \
- F = \frac{2PR}{P + R}.
- \label{eq:pr}
- \end{equation}
- We apply a series thresholds $\tau = 0.01, 0.02,...,0.99$ to prediction \& ground-truth pairs.
- %
- Accordingly, we derive a series of precision, recall, and F-measure scores.
- %
- Finally, we evaluate the performance in terms of average precision, recall, and
- F-measure. \rerevise{We use EMD~\cite{rubner2000earth}, CD~\cite{borgefors1986distance}, and our proposed EA metric
- for quantitative comparisons .
- %
- In the ablation study, we only use EA metric for simplicity.}
- %-----------------------------------------------------------------------------------%
- \subsection{\revise{Tuning the Quantization Intervals}}\label{sec:quant-intervals}
- The quantization intervals $\Delta\theta$ and $\Delta r$ in~\cref{eq:quantization}
- are important factors to the performance and running efficiency.
- %
- Larger intervals lead to fewer quantization levels, \ie $\Theta$ and $R$,
- and the model will be faster.
- %
- With smaller intervals, there will be more quantization levels,
- and the computational overhead is heavier.
- \revise{We perform a coordinate descent on SEL~\cite{lee2017semantic} dataset
- to find proper intervals that are computationally efficient and functionally
- effective.
- %
- Note that we use the EA-score as line similarity measure since its simplicity.
- %
- In the first round, we fix the angular quantization interval to $\Delta\theta=\pi/100$
- and then search for $\Delta r$,
- the results are shown in ~\cref{fig:ablation-interval}(a).
- %
- According to ~\cref{fig:ablation-interval}(a),
- the performance first rises slowly and then drops down with the decrease of $\Delta r$,
- and the turning point is near $\Delta r = \sqrt{2}$.
- %
- In the second round, we fix $\Delta r = \sqrt{2}$ and train with different $\Delta\theta$.
- %
- Similar to ~\cref{fig:ablation-interval}(a),
- the results in~\cref{fig:ablation-interval}(b) demonstrate that
- the performance first increases smoothly with the drop of $\Delta \theta$,
- and then quickly decreases with vibration.
- %
- Therefore, the turning point $\Delta\theta=\pi/100$ is a proper choice for angular quantization.}
- In summary, we use $\Delta\theta=\pi/100$ and $\Delta r = \sqrt{2}$
- in quantization, and corresponding quantization levels are:
- \begin{equation}
- % \begin{split}
- \Theta = 100, \ R = \sqrt{\frac{W^2+H^2}{2}},
- % \end{split}
- \label{eq:intervals}
- \end{equation}
- where $H, W$ are the size of feature maps to be transformed in DHT.
- %-----------------------------------------------------------------------------------%
- \subsection{Quantitative Comparisons} \label{sec:quantitative}
- %-----------------------------------------------------------------------------------%
- We compare our proposed method with the SLNet~\cite{lee2017semantic}
- and the classical Hough line detection~\cite{duda1971use} with HED~\cite{xie2015holistically}
- as the edge detector.
- %
- Note that we train the HED edge detector on the SEL~\cite{lee2017semantic} training set
- using the line annotations as edge ground-truth.
- \subsubsection{Results on SEL dataset}
- \cref{tab:quantitative} summarizes the results on the SEL dataset~\cite{lee2017semantic}.
- %
- With either VGG16 or ResNet50 as the backbone, Our proposed method consistently outperforms
- SLNet and HT+HED with a considerable margin.
- %
- In addition to~\cref{tab:quantitative}, we plot the F-measure \emph{v.s.} threshold and
- the precision~\emph{v.s.} recall curves.
- %
- \cref{fig:curves} reveals that our method achieves higher F-measure than others
- under a wide range of thresholds.
- % \CheckRmv{
- % \begin{table*}[!htb]
- % \renewcommand{\arraystretch}{1.3}
- % \renewcommand\tabcolsep{8.0pt}
- % \centering
- % \caption{
- % Quantitative comparisons
- % on the SEL~\cite{lee2017semantic} and NKL dataset.
- % %
- % On SEL~\cite{lee2017semantic} dataset, our method \revise{(without ER)} significantly outperforms other competitors in terms of
- % average F-measure.
- % `CD', `EMD' and `EA' are different evaluation metrics described in \cref{sec:metric}.
- % }\vspace{-6pt}
- % % \resizebox{0.97\textwidth}{!}{
- % \begin{tabular}{l|c|ccc|ccc|ccc}
- % \toprule
- % \multirow{2}{*}{Dataset} & \multirow{2}{*}{Method} & \multicolumn{3}{c|}{Avg. Precision} & \multicolumn{3}{c|}{Avg. Recall} & \multicolumn{3}{c}{Avg. F-measure} \\
- % & & CD & EMD & EA & CD & EMD & EA & CD & EMD & EA \\
- % \hline
- % \multirow{6}{*}{SEL~\cite{lee2017semantic}} &
- % HED~\cite{xie2015holistically} + HT~\cite{duda1971use} & 0.491 & 0.461 & 0.356 & 0.578 & 0.543 & 0.420 & 0.531 & 0.498 & 0.385 \\
- % & SLNet-iter1~\cite{lee2017semantic} & 0.740 & 0.723 & 0.654 & \textbf{0.905} & \textbf{0.888} & \textbf{0.803} & 0.812 & 0.797 & 0.721 \\
- % % & SLNet-iter3~\cite{lee2017semantic} & & & 0.731 & & & 0.755 & & & 0.743 & 1.92\\
- % & SLNet-iter5~\cite{lee2017semantic} & 0.826 & 0.810 & 0.735 & 0.841 & 0.824 & 0.747 & 0.834 & 0.817 & 0.741 \\
- % & SLNet-iter10~\cite{lee2017semantic} & 0.858 & 0.840 & 0.762 & 0.821 & 0.804 & 0.729 & 0.839 & 0.822 & 0.745 \\
- % % \cline{2-12}
- % & Ours (VGG16) & 0.841 & 0.830& 0.756 & 0.835& 0.824 & 0.774 & 0.838 & 0.827 & 0.765\\
- % % \cline{2-12}
- % % Ours(FPN-ResNet50) & 0.883 & 0.835 & 0.858 \\
- % & Ours (ResNet50) & \textbf{0.886} & \textbf{0.878} & \textbf{0.819} & 0.815 & 0.807 & 0.755 & \textbf{0.849} & \textbf{0.841} & \textbf{0.786} \\
- % \hline
- % \multirow{3}{*}{NKL} &
- % HED~\cite{xie2015holistically} + HT~\cite{duda1971use} & 0.301 & - & 0.213 & 0.878 & - & 0.622 & 0.448 & - & 0.318 \\
- % & Ours (VGG16) & 0.750 & 0.726 & 0.659 & 0.864 & 0.837 & 0.759 & 0.803 & 0.778 & 0.706 \\
- % & Ours (ResNet50) & 0.766 & 0.743 & 0.679 & 0.864 & 0.839 & 0.766 & 0.812 & 0.789 & 0.719 \\
- % \bottomrule
- % %-----------------------------------------------%
- % \end{tabular}
- % \label{tab:quantitative}
- % \end{table*}
- % }
- \CheckRmv{
- \begin{table*}[!htb]
- \renewcommand{\arraystretch}{1.3}
- \renewcommand\tabcolsep{8.0pt}
- \centering
- % \color{blue}
- \caption{
- Quantitative comparisons
- on the SEL~\cite{lee2017semantic} and NKL dataset.
- %
- On SEL~\cite{lee2017semantic} dataset, our method (without ER) significantly outperforms other competitors in terms of
- average F-measure.
- `CD,' `EMD,' and `EA' are different evaluation metrics described in \cref{sec:metric}.
- }\vspace{-6pt}
- % \resizebox{0.97\textwidth}{!}{
- \begin{tabular}{l|c|ccc|ccc|ccc}
- \toprule
- \multirow{2}{*}{Dataset} & \multirow{2}{*}{Method} & \multicolumn{3}{c|}{CD} & \multicolumn{3}{c|}{EMD} & \multicolumn{3}{c}{EA} \\
- & & Avg. P & Avg. R & Avg. F & Avg. P & Avg. R & Avg. F & Avg. P & Avg. R & Avg. F \\
- \hline
- \multirow{6}{*}{SEL~\cite{lee2017semantic}} &
- HED~\cite{xie2015holistically} + HT~\cite{duda1971use} & 0.491 & 0.578 & 0.531 & 0.461 & 0.543 & 0.498 & 0.356 & 0.420 & 0.385 \\
- & SLNet-iter1~\cite{lee2017semantic} & 0.740 & \textbf{0.905} & 0.812 & 0.723 & \textbf{0.888} & 0.797 & 0.654 & \textbf{0.803} & 0.721 \\
- % & SLNet-iter3~\cite{lee2017semantic} & & & 0.731 & & & 0.755 & & & 0.743 & 1.92\\
- & SLNet-iter5~\cite{lee2017semantic} & 0.826 & 0.841 & 0.834 & 0.810 & 0.824 & 0.817 & 0.735 & 0.747 & 0.741 \\
- & SLNet-iter10~\cite{lee2017semantic} & 0.858 & 0.821 & 0.839 & 0.840 & 0.804 & 0.822 &0.762 & 0.729 & 0.745 \\
- % \cline{2-12}
- & Ours (VGG16) & 0.841 & 0.835 & 0.838 & 0.830 & 0.824 & 0.827 & 0.756 & 0.774 & 0.765\\
- % \cline{2-12}
- % Ours(FPN-ResNet50) & 0.883 & 0.835 & 0.858 \\
- & Ours (ResNet50) & \textbf{0.886} & 0.815 & \textbf{0.849} & \textbf{0.878} & 0.807 & \textbf{0.841} & \textbf{0.819} & 0.755 & \textbf{0.786} \\
- \hline
- \multirow{3}{*}{NKL} &
- HED~\cite{xie2015holistically} + HT~\cite{duda1971use} & 0.301 & 0.878 & 0.448 & - & - & - & 0.213 & 0.622 & 0.318 \\
- & Ours (VGG16) & 0.750 & 0.864 & 0.803 & 0.726 & 0.837 & 0.778 & 0.659 & 0.759 & 0.706 \\
- & Ours (ResNet50) & 0.766 & 0.864 & 0.812 & 0.743 & 0.839 & 0.789 & 0.679 & 0.766 & 0.719 \\
- \bottomrule
- %-----------------------------------------------%
- \end{tabular}
- \label{tab:quantitative}
- \end{table*}
- }
- \CheckRmv{
- \begin{figure*}[!htb]
- \begin{center}
- \begin{overpic}[width=0.9\linewidth]{figures/fpr-curve-pami.pdf}
- \end{overpic}
- \end{center}\vspace{-10pt}
- \caption{
- Left: F-measure under various thresholds.
- Right: The precision-recall curve.
- %
- Out method outperforms SLNet~\cite{lee2017semantic} and classical
- Hough transform~\cite{duda1971use} with a
- considerable margin.
- %
- Moreover, even with 10 rounds of location refinement,
- SLNet still presents inferior performance.
- }\vspace{20pt}
- \label{fig:curves}
- \end{figure*}
- }
- \CheckRmv{
- \begin{figure*}[htb!]
- \begin{center}
- \begin{overpic}[width=1.0 \linewidth]{figures/visual.jpg}
- \put(0.5, 5){GT}
- \put(0, 19){Ours}
- \put(-1, 34.5){SLNet}
- \put(-1.3, 32){(iter10)}
- % \put(0, 29){\cite{lee2017semantic}}
- \put(-1, 47){SLNet}
- \put(-0.5, 44.5){(iter1)}
- % \put(0, 41.5){\cite{lee2017semantic}}
- % \put(-1, 59){SLNet}
- % \put(-0.5, 56.5){\scriptsize (iter1)}
- % \put(0, 41.5){\cite{lee2017semantic}}
- \put(-1, 59){HED}
- % \put(0, 58.5){\cite{xie2015holistically}}
- \put(-1, 56.5){+ HT}
- % \put(0.5, 53){\cite{duda1971use}}
- \end{overpic}
- \end{center}
- \vspace{-12pt}
- \caption{
- Example detection results of different methods on the SEL dataset.
- %
- Compared to SLNet~\cite{lee2017semantic}
- and classical Hough transform~\cite{duda1971use},
- our results are more consistent with the ground-truth.
- }
- \label{fig:detections-SEL}
- \end{figure*}
- }
- \subsubsection{Results on the \revise{NKL} dataset}
- We report the performance of our newly constructed \revise{NKL} dataset.
- %
- Since SLNet~\cite{lee2017semantic} did not release the training code,
- we only compare our method with HED+HT.
- %
- \rerevise{As shown in ~\cref{tab:quantitative}, our proposed method outperforms
- the baseline method (HED edge detector + Hough transform) with a clear margin.}
- % \CheckRmv{
- % \begin{table}[!htb]
- % \renewcommand{\arraystretch}{1.3}
- % \renewcommand\tabcolsep{5.0pt}
- % \centering
- % \caption{
- % Quantitative results on the \revise{NKL dataset (without ER)} .
- % }\vspace{-6pt}
- % % \resizebox{0.97\textwidth}{!}{
- % \begin{tabular}{l|c|c|c}
- % \toprule
- % Method & \makecell{Avg. \\ Precision} & \makecell{Avg. \\ Recall} & \makecell{Avg. \\ F-measure}\\
- % \hline
- % HED~\cite{xie2015holistically} + HT~\cite{duda1971use} & \revise{0.213} & \revise{0.622} & \revise{0.318} \\
- % Ours (VGG16) & \revise{0.659} & \revise{0.759} & \revise{0.706}\\
- % Ours (ResNet50) & \revise{0.679} & \revise{0.766} & \revise{0.719}\\
- % \bottomrule
- % %-----------------------------------------------%
- % \end{tabular}
- % \label{tab:quantitative-nkl}
- % \end{table}
- % }
- \CheckRmv{
- \begin{figure*}[tb!]
- \begin{center}
- \begin{overpic}[width=1.0 \linewidth]{figures/sl6500_results.pdf}
- \end{overpic}
- \end{center}
- \vspace{-12pt}
- \caption{
- Detection results of our method on the \revise{NKL} dataset.
- %
- Our method produces results that are visually compatible with human perception.
- }
- \label{fig:detections-NKL}
- \end{figure*}
- }
- %-----------------------------------------------------------------------------------%
- \subsubsection{Runtime efficiency.}
- In this section, we benchmark the runtime of different methods including SLNet~\cite{lee2017semantic}
- with various iteration steps, classical Hough transform and our proposed method.
- Both SLNet~\cite{lee2017semantic} and HT require HED~\cite{xie2015holistically}
- edge detector as a preprocessing step.
- %
- The non-maximal suppression (NMS) in SLNet requires edge maps as guidance,
- %
- and the classical Hough transform takes an edge map as input.
- %
- Moreover, SLNet uses a refining network to enhance the results iteratively.
- Therefore, the inference speed is related to the iteration steps.
- %
- In contrast, our method produces output results with a single forward pass,
- and the NMS is as simple as computing the centroids of each connected area
- in the parametric space.
- Results in~\cref{tab:speed} illustrate that our method is significantly faster than
- all other competitors with a very considerable margin.
- %
- Even with only 1 iteration step, SLNet is still slower than our method.
- \CheckRmv{
- \begin{table}[!htb]
- \renewcommand{\arraystretch}{1.4}
- \renewcommand\tabcolsep{4.4pt}
- \centering
- \caption{
- Quantitative speed comparisons.
- %
- Our method \revise{(without ER)} is much faster than the other two competitors in network forward.
- %
- Furthermore, our method doesn't require any extra-process \eg edge detection.
- %
- As a result, our method can run at 49 FPS, which is remarkably
- higher than the other two methods.
- }\vspace{-6pt}
- \label{tab:speed}
- \begin{tabular}{l|c|c|c|c}
- \toprule
- Method & Network forward & NMS & Edge & Total \\
- \hline
- SLNet-iter1~\cite{lee2017semantic} & 0.354\ s & 0.079\ s & 0.014\ s & 0.447\ s \\
- SLNet-iter3~\cite{lee2017semantic} & 0.437\ s & 0.071\ s & 0.014\ s & 0.522\ s \\
- SLNet-iter10~\cite{lee2017semantic} & 0.827\ s & 0.068\ s & 0.014\ s & 0.909\ s \\
- HED~\cite{xie2015holistically} + HT~\cite{duda1971use} & 0.014\ s & 0.117\ s & 0.024\ s & 0.155\ s \\
- \hline
- Ours (VGG16) & 0.03\ s & 0.003\ s & 0 & 0.033\ s \\
- \hline
- Ours (ResNet50) & 0.017\ s & 0.003\ s & 0 & \textbf{0.020\ s} \\
- \bottomrule
- %-----------------------------------------------%
- \end{tabular}
- \end{table}
- }
- %-----------------------------------------------------------------------------------%
- \subsection{Qualitative Comparisons } \label{sec:visual}
- Here we give several example results of our proposed method along with SNLet
- and HED+HT.
- %
- As shown in~\cref{fig:detections-SEL}, compared with other methods, our results are more compatible with
- the ground-truth as well as human cognition.
- %
- In addition to the results in~\cref{fig:detections-SEL}, we provide all the detection results of our method
- and SLNet \revise{in the supplementary materials}.
- %-----------------------------------------------------------------------------------%
- \subsection{Ablation Study} \label{sec:ablation}
- In this section, we ablate each of the components in our method.
- \subsubsection{Components in DHT} \label{sec:ablation-dht}
- We first ablate components of ``deep Hough transform''.
- %
- Specifically, they are:
- (a) the Deep Hough transform (DHT) module detailed in~\cref{sec:dht-dht};
- (b) the multi-scale (MS) DHT architecture described in~\cref{sec:ms-dht-fpn};
- (c) the context-aware (CTX) line detector proposed in~\cref{sec:ctx-line-detector}.
- %
- Experimental results are shown in~\cref{tab:ablation}.
- We first construct a baseline model with plain ResNet50 and DHT module.
- %
- % Note that the baseline model achieves 0.845 average F-measure, which
- % has already surpassed the SLNet competitor.
- %
- Then we verify the effectiveness of the multi-scale (MS) strategy and context-aware
- line detector (CTX), individually.
- %
- We separately append MS and CTX to the baseline model and then evaluate their performance, respectively.
- %
- Results in~\cref{tab:ablation} indicate that both MS and CTX can improve
- the performance of the baseline model.
- At last, we combine all the components to form our final full method,
- which achieves the best performance among all other combinations.
- %
- Experimental results in this section clearly demonstrate that each component of
- our proposed method contributes to the success of our method.
- \CheckRmv{
- \begin{table}[!htb]
- \renewcommand{\arraystretch}{1.3}
- \renewcommand\tabcolsep{1.0pt}
- \newcolumntype{C}{>{\centering\arraybackslash}p{0.11\textwidth}}
- \centering
- \caption{
- Ablation study for each component. MS indicates DHTs with multi-scale features as described in~\cref{sec:ms-dht-fpn},
- and CTX means context-aware aggregation as described in~\cref{sec:ctx-line-detector}.
- }\vspace{-6pt}
- % \resizebox{0.8\textwidth}{!}{
- \begin{tabular}{C|C|C|C}
- \toprule
- DHT & MS & CTX & F-measure \\
- % HED+HT & DHT+RHT & MS & CTX & mean \emph{F-measure} \\
- \hline
- % \checkmark & & & 0.846 \\
- % \checkmark & & & & 0.829 \\
- \checkmark & & & 0.664 \\
- \checkmark & \checkmark & & 0.758 \\
- \checkmark & & \checkmark & 0.771 \\
- % \checkmark & \checkmark & & 0.852 \\
- \checkmark & \checkmark & \checkmark & 0.786 \\
- \bottomrule
- %-----------------------------------------------%
- \end{tabular}
-
- \label{tab:ablation}
- \end{table}
- }
- \subsubsection{Edge-guided Refinement} \label{sec:ablation-refinement}
- Here we ablate the ``Edge-guided Refinement'' module \revise{(abbreviated as ER)}.
- %
- First, we test the performance of DHT+ER using different $\delta_r$.
- %
- The $\delta_r$ parameter controls the size of the searching space
- in ER ($\mathcal{L}$ in ~\cref{eq:refine-search}).
- %
- This experiment is conducted on the SEL dataset using the ResNet50 backbone.
- \CheckRmv{
- \begin{table}[!htb]
- \renewcommand{\arraystretch}{1.3}
- \newcolumntype{C}{>{\centering\arraybackslash}p{0.08\textwidth}}
- \centering
- \caption{
- Performance DHT+ER with different $\delta_r$.
- %
- Models are trained/tested on the SEL dataset using the Resnet50 backbone.
- %
- $\delta_r=0$ represents with vanilla DHT method without ER.
- }\vspace{-6pt}
- \newcommand{\CC}{\cellcolor{gray!20}}
- \begin{tabular}{C|C|C|C}
- \toprule
- $\delta_r$ & Precision & Recall & F-measure\\
- \hline
- \CC 0 & \CC 0.8190 & \CC 0.7530 & \CC 0.7861 \\
- 1 & 0.8199 & 0.7561 & 0.7866 \\
- 3 & 0.8208 & 0.7569 & 0.7874 \\
- 5 & 0.8214 & 0.7574 & 0.7880 \\
- 7 & 0.8213 & 0.7573 & 0.7878 \\
- 9 & 0.8212 & 0.7571 & 0.7877 \\
- \bottomrule
- %-----------------------------------------------%
- \end{tabular}
- \label{tab:ablation-refinement-1}
- \end{table}
- }
- Results in ~\cref{tab:ablation-refinement-1} tells that the performance first increases and
- then gets saturated with the growth of $\delta_r$.
- %
- Since the peak performance occurs when $\delta_r = 5$,
- %
- we set $\delta_r=5$ for better performance.
- %
- After setting $\delta_r$ to 5, we compare the performance of our method with and without
- ER, using different backbones \revise{and} datasets.
- \CheckRmv{
- \begin{table}[!htb]
- \renewcommand{\arraystretch}{1.3}
- \centering
- \caption{
- Performance with and without ER ($\delta_r=5$) using different backbones \revise{and} datasets.
- }\vspace{-6pt}
- \newcommand{\CC}{\cellcolor{gray!20}}
- \begin{tabular}{l|c|c|c|c|c|c}
- \toprule
- Dataset & Arch & Edge & P & R & F & F@0.95\\
- \hline
- \multirow{4}{*}{SEL~\cite{lee2017semantic}} & VGG16 & & 0.756 & 0.774 & 0.765 & 0.380\\
- & \CC VGG16 & \CC \checkmark & \CC 0.758 & \CC 0.777 & \CC 0.770 & \CC 0.439 \\
- & Resnet50 & & 0.819 & 0.753 & 0.786 & 0.420\\
- & \CC Resnet50 & \CC \checkmark & \CC 0.821 & \CC 0.757 & \CC 0.788 & \CC 0.461\\
- \hline
- \multirow{4}{*}{\revise{NKL}} & VGG16 & & \revise{0.659} & \revise{0.759} & \revise{0.706} & \revise{0.434}\\
- & \CC VGG16 & \CC\checkmark & \CC \revise{0.664} & \CC \revise{0.765} & \CC \revise{0.711} & \CC \revise{0.472}\\
- & Resnet50 & & \revise{0.679} & \revise{0.766} & \revise{0.719} & \revise{0.459}\\
- & \CC Resnet50 & \CC \checkmark & \CC \revise{0.684} & \CC \revise{0.771} & \CC \revise{0.725} & \CC \revise{0.486}\\
- \bottomrule
- %-----------------------------------------------%
- \end{tabular}
- \label{tab:ablation-refinement-2}
- \end{table}
- }
- Results in ~\cref{tab:ablation-refinement-2} clearly demonstrate that
- edge-guided refinement can effectively improve detection results regardless
- of backbone architectures and datasets.
- %-----------------------------------------------------------------------------------%
- \section{Conclusions}\label{sec:conclusion}
- %-----------------------------------------------------------------------------------%
- In this paper, we proposed a simple yet effective method for semantic line detection in
- natural scenes.
- %
- By incorporating the strong learning ability of CNNs into classical Hough transform,
- our method is able to capture complex textures and rich contextual semantics of lines.
- %
- To better assess the similarity between a pair of lines,
- we designed a new evaluation metric considering both Euclidean distance and angular distance
- between lines.
- %
- Besides, a new dataset for semantic line detection
- was constructed to fulfill the gap between the scale of existing datasets
- and the complexity of modern CNN models.
- %
- Both quantitative and qualitative results revealed that
- our method significantly outperforms previous arts in terms of both detection quality and speed.
- \section*{Acknowledgment}
- This research was supported by the Major Project for New Generation of AI
- under Grant No. 2018AAA0100400, NSFC (61922046,61620106008,62002176),
- S\&T innovation project from Chinese Ministry of Education,
- and Tianjin Natural Science Foundation (17JCJQJC43700).
- \bibliographystyle{IEEEtran}
- \bibliography{line}
- \ifCLASSOPTIONcaptionsoff
- \newpage
- \fi
- \ArxivRmv{
- \newcommand{\AddPhoto}[1]{\includegraphics%
- [width=1in,height=1.25in,clip,keepaspectratio]{figures/photos/#1}}
- \begin{IEEEbiography}[\AddPhoto{kai}]{Kai Zhao}
- received his B.S. and M.S. from Shanghai University.
- He is currently a Ph.D. Candidate with the
- College of Computer Science, Nankai University, under the supervision of
- Prof. Ming-Ming Cheng.
- His research interests include statistical learning and computer vision.
- \end{IEEEbiography}
- \vspace{-.4in}
- \begin{IEEEbiography}[\AddPhoto{hanqi}]{Qi Han}
- is a master student from the College of Computer Science, Nankai University,
- under the supervision of Prof. Ming-Ming Cheng.
- He received his bachelor degree from Xidian University in 2019.
- His research interests include deep learning and computer vision.
- \end{IEEEbiography}
- \vspace{-.4in}
- \begin{IEEEbiography}[\AddPhoto{zhangchbin}]{Chang-Bin Zhang}
- is a master student from the College of Computer Science at Nankai University,
- under the supervision of Prof. Ming-Ming Cheng.
- Before that, he received his bachelor degree from China University of Mining and Technology in 2019.
- His research interests include deep learning and computer vision.
- \end{IEEEbiography}
- \vspace{-.4in}
- \begin{IEEEbiography}[\AddPhoto{xujun}]{Jun Xu}
- received his B.Sc. and M.Sc. degrees from School of Mathematics
- Science, Nankai University in 2011 and 2014, and his Ph.D. degree from Department of Computing, The Hong Kong Polytechnic University, in 2018. He is a Lecturer with the School of Statistics and Data Science, Nankai University.
- His homepage is \url{https://csjunxu.github.io/}.
- \end{IEEEbiography}
- \vspace{-.4in}
- \begin{IEEEbiography}[\AddPhoto{cmm}]{Ming-Ming Cheng}
- received his PhD degree
- from Tsinghua University in 2012. Then he did
- 2 years research fellow, with Prof. Philip Torr in
- Oxford. He is now a professor at Nankai University, leading the Media Computing Lab. His
- research interests includes computer graphics,
- computer vision, and image processing. He received research awards including ACM China
- Rising Star Award, IBM Global SUR Award,
- CCF-Intel Young Faculty Researcher Program,
- \etal .
- \end{IEEEbiography}
- }
- \end{document}
|