@TechReport {Starner/Pentland:1995, author= { Thad Starner and Alex Pentland}, year= { 1995}, keywords= { ASL HMM}, institution= { MIT - The Media Laboratory}, title= { Real-Time American Sign Language Recognition from Video Using Hidden Markov Models}, journal= { MIT Media Laboratory Perceptual Computing Section Technical Report}, volume= { 375}, annote= { This paper experimentally demonstrates a real time Hidden Markov Model based system for recognizing American Sign Language(ASL) without explicitly modelling fingers.It uses a 40 word lexicon.The system uses a color camera to track hand in real time.The hands are tracked using their color - first using colored gloves and then by natural skin color.The tracking does not attempt a fine description of human hand, instead it only produces a coarse description of hand shape,orientation and trajectory.In the tracking process,the subject is made to wear gloves of fixed color (different for both hands) and the images are scanned for a pixel of appropriate color.After this pixel is found,it is used as a seed and the region is grown by checking eight neighbouring pixels for appropriate color.Each pixel checked is considered a part of the hand. This in effect helps to prevent edge and lighting aberrations.The centriod is stored as seed for next frame.Second Moment analysis is performed on the resulting bitmap and the seed.The tracking for natural skin color is done in almost same way using the fact that all human hands have approximately the same hue and saturation.In both cases the obtained information is input to a HMM for recognition of signed words.The HMM in this case is assumed to be a first order Markov process.Experimental verification shows the error rates to be low. This work is very useful because ASL is the language of choice for most deaf people in United States.Moreover changing background and occlusion is not a problem for the tracking method used. -Soumyadeep Paul 2/98}, } @Article{Rehg/Kanade:1994, author= { Rehg,James M. and Takeo Kanade}, year= { 1994}, keywords= { HUMAN MOTION NONRIGID GESTURE RECOGNITION VISUAL TRACKING MODEL-BASED VISION}, institution= { CMU - CS}, title= { DigitEyes: Vision-Based Human Hand Tracking}, journal= { Proceedings of European Conference on Computer Vision}, month= { May}, annote= { This paper describes a model-based hand tracking system, called DigitEyes that can recover the state of a 27 DOF hand model from gray scale images at speeds of up to 10 Hz.The Denavit-Hartenburg representation is used in which each finger link has an attached link coordinate system.The transformation of these frames is used to model the kinematics of the hand.The hand is modelled as a collection of 16 rigid bodies - each finger having 3 individual finger links and a palm.The four fingers are assumed to have planar motions only with 4 degrees of freedom.The thumb is modelled with a 5 DOF model.Each finger has a rigid anchor point with which it is attached to the palm.Finger links are modelled as cylinders and the tips as hemispheres.Tracking is done using local image-based trackers.The output of the trackers are hand features which is input to the state estimation algorithm.In the state estimation algorithm the current state is used to predict the feature locations in the next frame. The state estimation algorithm uses a residual vector which is the difference between the measured and predicted states.The state correction is obtained by minimizing the magnitude of residual vector squared.If the magnitude of residual vector is zero then the hand model is perfectly aligned with image data.Modified Gauss-Newton algorithm is used to solve the problem of minimizing the residual.The paper also extends the work to multiple camera case. The above system is tested on a 3D graphical mouse with a single camera.It is also tested on whole hand tracking with two cameras.It performs good in both the cases. The paper is a very comprehensive one and since the system extracts the features from unmarked and unadorned hands, it is convenient too.The paper made a very good reading. - Soumyadeep Paul 2/98}, } @InProceedings {OHYA/EBIHARA_et_al:1996, author= { Jun OHYA and Kazuyuki EBIHARA and Jun KURUMISAWA and Ryohei NAKATSU}, year= { 1996}, keywords= { VIRTUAL KABUKI THEATER POSTURE ESTIMATION}, institution= { ATR - MIC Research Lab.}, title= { Virtual Kabuki Theater: Towards the Realisation of Human Metamorphosis Systems}, journal= { IEEE International Workshop on Robot and Human Communication}, annote= { This paper describes the Virtual Kabuki Theater the authors have recently developed. In such a theater , people at different locations can perform as Kabuki actors in a Kabuki environment. The system used consist of three different modules.In the first module 3D models of Kabuki actors are created in advance. The model is created by a wireframe model in which color texture is mapped.This model could be deformed according to facial expression data obtained.In the second module,facial expressions and body motions are detected in real time using an infrared camera.Facial expression is detected using a camera fit to the helmet.In the third module, these movements are reproduced in Kabuki actor's model by deforming the wireframe.The human posture is obtained by first detecting the significant points such as tips of hands and top of the head.The positions of elbow and knee are now expressed as a linear combination of significant points and the coefficients are found using GA. Though the method followed is good,but the authors have neglected the movement of the fingers.A good reading. -Soumyadeep Paul 4/96}, }