@ARTICLE{kishore-cssp-2022, author={R Kishore {Kumar} and K. Sreenivasa {Rao}}, journal={Circuits Systems and Signal Processing, (Springer)}, title={Phoneme Segmentation based Unsupervised Pattern Discovery and Clustering of Speech Signals}, year={2022}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kishore-csl-2022, author={R Kishore {Kumar} and K. Sreenivasa {Rao}}, journal={Computer Speech and Language, (Elsevier)}, title={A Novel approach to Unsupervised Pattern Discovery in Speech using Convolutional Neural Network}, year={2022}, volume={71}, number={}, pages={1-13}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{nirmalya-ijct-2021, author={Nirmalya {Sen} and Md {Sahidullah} and Hemant {Patil} and Shyamal Kumar {Dasmandal} and K. Sreenivasa {Rao} and Tapan Kumar {Basu}}, journal={International Journal of Speech Technology (Springer)}, title={Utterance partitioning for speaker recognition: an experimental review and analysis with new findings under GMM-SVM framework}, year={2021}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{hareesh-ieeeaccess-2021, author={Hareesh {Mandalapu} and P N Aravinda {Reddy} and Raghavendra {Ramachandra} and K. Sreenivasa {Rao} and Pabitra {Mitra} and S. R. Mahadeva {Prasanna} and Christoph {Busch}}, journal={IEEE Access}, title={Audio-Visual Biometric Recognition and Presentation Attack Detection: A Comprehensive Survey}, year={2021}, volume={9}, number={}, pages={37431-37455}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kumud-aihc-2021, author={Kumud {Tripathi} and K. Sreenivasa {Rao}}, journal={Journal of Ambient Intelligence and Humanized Computing (Springer)}, title={VOP Detection for Read and Conversation Speech using CWT Coefficients and Phone Boundaries}, year={2021}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kumud-mmt-2021, author={Kumud {Tripathi} and K. Sreenivasa {Rao}}, journal={Multimedia Tools and Applications (Springer)}, title={Robust Vowel Region Detection Method for Multimode Speech}, year={2021}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{tanumay-iethcl-2021, author={Tanumay {Mandal} and Krothapalli Sreenivasa {Rao} and Sanjay K. {Gupta} }, journal={IET Healthcare Technology Letters}, title={Identification of glottal instants using electroglottographic signal for vulnerable cases of voicing}, year={2021}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{manju-tallip-2021, author={Manjunath K {E} and Dinesh Babu {Jayagopi} and K. Sreenivasa {Rao} and Srinivasa {Raghavan} and and V. {Ramasubramanian}}, journal={ACM Transactions on Asian and Low-Resource Language Information Processing, (TALLIP)}, title={Approaches for Multilingual Phone Recognition in Code-Switched and Non-Code-Switched Scenarios using Indian Languages}, year={2021}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{pradeep-cssp-2020b, author={Pradeep {Rengaswamy} and K. Sreenivasa {Rao} and and Pallab {Dasgupta}}, journal={Circuits, Systems and Signal Processing (CSSP), Springer}, title={SongF0: A Spectrum based Fundamental Frequency Estimation for Monophonic Songs}, year={2020}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{debopriyo-acm-2020, author={Debopriyo {Banerjee} and Krothapall Sreenivas {Rao} Shamik {Sural} and and Niloy {Ganguly}}, journal={ACM Transactions on Intelligent Systems and Technology}, title={BOXREC: Recommending a Box of Preferred Outfits in Online Shopping}, year={2020}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kiran-ieeeaccess-2020, author={M. Kiran {Reddy} and Pavu {Alku} and and K. Sreenivasa {Rao}}, journal={IEEE Access}, title={Detection of Specific Language Impairment in Children Using Glottal Source Features}, year={2020}, volume={8}, number={}, pages={15273-15279}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{pradeep-cssp-2020a, author={Pradeep {Rengaswamy} and M. Gurunath {Reddy} and K. Sreenivasa {Rao} and and Pallab {Dasgupta}}, journal={Circuits, Systems and Signal Processing (CSSP)}, title={hf0: A hybrid pitch extraction method for multimodal voice}, year={2020}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{manju-sad-2020, author={Manjunath K {E} and Dinesh Babu {Jayagopi} and K. Sreenivasa {Rao} and and V. {Ramasubramanian}}, journal={SADHANA, Academy Proceedings in Engineering Sciences, Indian Academy of Sciences, Springer}, title={Articulatory feature based methods for performance improvement of multilingual phone recognition systems using Indian languages}, year={2020}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kumud-sc-2020, author={Kumud {Tripathi} and M. Kiran {Reddy} and and K. Sreenivasa {Rao}}, journal={Speech Communication}, title={Multilingual and multimode phone recognition for Indian languages}, year={2020}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kumud-iete-2020, author={Kumud {Tripathi} and and K. Sreenivasa {Rao}}, journal={IETE Journal of Research (Taylor & Francis)}, title={VEP detection for Read, Extempore and Conversation speech}, year={2020}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{pradeep-sc-2020, author={R {Pradeep} and M. Kiran {Reddy} and K. Sreenivasa {Rao} and and Pallab {Dasgupta}}, journal={Speech Communication}, title={Robust f0 extraction from monophonic signals using adaptive sub-band filtering}, year={2020}, volume={116}, number={}, pages={77-85}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kiran-csl-2019, author={M. Kiran {Reddy} and and K. Sreenivasa {Rao}}, journal={Computer Speech and Language}, title={Excitation modelling using epoch features for statistical parametric speech synthesis}, year={2019}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{kiran-npl-2019, author={M. Kiran {Reddy} and and K. Sreenivasa {Rao}}, journal={Neural Processing Letters (Springer)}, title={DNN-based cross-lingual voice conversion using Bottleneck Features}, year={2019}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{pradeep-accs-2019, author={R. {Pradeep} and M. Kiran {Reddy} and and K. Sreenivasa {Rao}}, journal={Automatic Control and Computer Sciences (Springer)}, title={LSTM-based robust voicing decision applied to DNN-based speech synthesis}, year={2019}, volume={53}, number={4}, pages={328-332}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{hari2019, author={Hari {Krishna} D M and and K. Sreenivasa {Rao}}, journal={Transactions on Asian and Low-Resource Language Information Processing (TALLIP)}, title={Children Story Classification in Indian Languages using Linguistic and Keyword based Features}, year={2019}, volume={}, number={}, pages={}, doi={}, ISSN={}, url={}, month={},} @ARTICLE{8758152, author={S. {Biswas} and P. {Mitra} and and K. Sreenivasa {Rao}}, journal={IEEE/ACM Transactions on Computational Biology and Bioinformatics}, title={Relation Prediction of Co-morbid Diseases Using Knowledge Graph Completion}, year={2019}, volume={}, number={}, pages={1-1}, keywords={Disease Co-morbidity;knowledge graph;Markov clustering;embedding;protein-protein interaction}, doi={10.1109/TCBB.2019.2927310}, ISSN={1545-5963}, url={https://ieeexplore.ieee.org/document/8758152}, month={},} @ARTICLE{8731683, author={Y. M. {Keerthana} and M. K. {Reddy} and K. Sreenivasa {Rao}}, journal={IEEE Signal Processing Letters}, title={CWT-Based Approach for Epoch Extraction From Telephone Quality Speech}, year={2019}, volume={26}, number={8}, pages={1107-1111}, keywords={speech processing;telephone sets;wavelet transforms;vocal tract system;clean speech signals;fundamental frequency component;epoch extraction method;speech signal;epoch identification rate;CWT-based approach;impulse-like discontinuities;band-limited telephonic quality speech;impulse-like discontinuity extraction;continuous wavelet transform;JMK speakers;BDL speakers;SLT speakers;CMU Arctic database;G.191 software tools;Continuous wavelet transforms;Telephone sets;Speech processing;Band-pass filters;Time-frequency analysis;Smoothing methods;Continuous wavelet transform;Epoch extraction;Telephonic speech;Hilbert transform}, doi={10.1109/LSP.2019.2921229}, ISSN={1070-9908}, url={https://ieeexplore.ieee.org/document/8731683}, month={Aug},} @Article{Pradeep2019, author="Pradeep, R. and Rao, K. Sreenivasa", title="Incorporation of Manner of Articulation Constraint in LSTM for Speech Recognition", journal="Circuits, Systems, and Signal Processing", year="2019", month="Aug", day="01", volume="38", number="8", pages="3482--3500", abstract="The variants of recurrent neural networks such as long short-term memory (LSTM) and gated recurrent unit are successful in sequence modelling such as automatic speech recognition. However, the decoded sequence is prune to have false substitutions, insertions and deletions. In our work, we investigate the outcome of the hidden layers in LSTM trained on TIMIT dataset. We found interestingly that the first hidden layer was capturing information related to some broad manners of articulation. The successive hidden layers try to cluster among the broad manners of articulation. We detected two broad manners of articulation, namely sonorants (vowels, semi-vowels, nasals) and obstruents (fricatives, stops, affricates) by exploiting the spectral flatness measure (SFM) on the linear prediction coefficients. We define a additional gate called manner of articulation gate that is high if the broad manners of articulation of tth frame are same as that of {\$}{\$}(t+1){\$}{\$}(t+1)th frame. The manner of articulation detection is embedded at the output of the activation gate of LSTM at the first hidden layer. By doing so, the sonorants being substituted as obstruents are minimized at the output layer. The proposed method decreased the phone error rates by 0.7{\%} when evaluated on the core test set of the TIMIT.", issn="1531-5878", doi="10.1007/s00034-019-01074-5", url="https://doi.org/10.1007/s00034-019-01074-5" } @article{R2018254, title = "A robust unsupervised pattern discovery and clustering of speech signals", journal = "Pattern Recognition Letters", volume = "116", pages = "254 - 261", year = "2018", issn = "0167-8655", doi = "https://doi.org/10.1016/j.patrec.2018.10.035", url = "http://www.sciencedirect.com/science/article/pii/S016786551830864X", author = "Kishore Kumar R and Lokendra Birla and Sreenivasa Rao K", keywords = "Speech processing, Unsupervised pattern discovery, Clustering of speech utterances", abstract = "In this paper, a novel approach to unsupervised pattern discovery for speech signals is proposed. The proposed work deviates from the standard speech recognition task, and aims to cluster the speech utterances based on the vocabulary of a broad topic. It attempts to discover the matched sequence of phonetic units by making use of the repeated patterns between the speech signals. Identification of matched sequence of phonetic patterns helps in clustering the speech signals, automatically. The proposed approach uses the posterior features derived from Gaussian mixture model (GMM) to find the repeated structure between the speech signals. Image processing techniques are used to identify these matched acoustic patterns. An angle histogram-based method is used to extract the desired matched keyword/phrase patterns present in a pair of speech utterances. The performance of the proposed method is evaluated on Hindi and Bengali news speech corpora using standard objective measures, and also compared with state-of-the-art techniques. The matched pairs of speech utterances obtained by the proposed method are grouped into broader classes using an appropriate clustering technique. The final clusters represent the broader classes of information such as politics, sports, and weather." } @Article{GurunathReddy2018, author="Gurunath Reddy, M. and Sreenivasa Rao, K.", title="Predominant Melody Extraction from Vocal Polyphonic Music Signal by Time-Domain Adaptive Filtering-Based Method", journal="Circuits, Systems, and Signal Processing", year="2018", month="Jul", day="01", volume="37", number="7", pages="2911--2933", abstract="In this paper, a time-domain adaptive filtering-based melody extraction method is proposed. The proposed method works in multiple stages to extract the vocal melody (singer's fundamental frequency) from vocal polyphonic music signals. The vocal and non-vocal regions of the music signal are identified by the strength of excitation of the source signal. The vocal regions are further segmented into the sequence of notes by detecting their onsets in the frequency representation of the composite signal. The melody contour in each of the vocal note segment is obtained by adaptive zero-frequency filtering in the time domain. The performance of the proposed melody extraction method is compared with the current state-of-the-art melody extraction method in respect of voicing recall rate, voicing false alarm rate, raw pitch, and overall accuracy.", issn="1531-5878", doi="10.1007/s00034-017-0696-1", url="https://doi.org/10.1007/s00034-017-0696-1" } @ARTICLE{8370349, author={M. K. {Reddy} and K. Sreenivasa {Rao}}, journal={IET Signal Processing}, title={Inverse filter based excitation model for HMM-based speech synthesis system}, year={2018}, volume={12}, number={4}, pages={544-548}, keywords={speech synthesis;hidden Markov models;filtering theory;inverse filter based excitation model;HMM-based speech synthesis system;hidden Markov model;HTS;excitation signal modelling;residual signal;excitation features;excitation signal synthesis;target source features;performance evaluation}, doi={10.1049/iet-spr.2017.0546}, ISSN={1751-9675}, url={https://ieeexplore.ieee.org/document/8370349},} @Article{prasenjit, author="Prasanjit Dhara and K. Sreenivasa Rao", title="Automatic Note Transcription System for Hindustani Classical Music", journal="International Journal of Speech Technology", year="2018", month="", day="", pages="", volume="", abstract="", doi="", url="" } @article{doi:10.1080/09298215.2017.1330351, author = {S. Samsekai Manjabhat and Shashidhar G. Koolagudi and K. S. Rao and Pravin Bhaskar Ramteke}, title = {Raga and Tonic Identification in Carnatic Music}, journal = {Journal of New Music Research}, volume = {46}, number = {3}, pages = {229-245}, year = {2017}, publisher = {Routledge}, doi = {10.1080/09298215.2017.1330351}, URL = { https://doi.org/10.1080/09298215.2017.1330351 }, eprint = { https://doi.org/10.1080/09298215.2017.1330351 } } @article{DBLP:journals/cssp/ManjunathR18, author = {K. E. Manjunath and K. Sreenivasa Rao}, title = {Improvement of Phone Recognition Accuracy Using Articulatory Features}, journal = {{CSSP}}, volume = {37}, number = {2}, pages = {704--728}, year = {2018}, url = {https://doi.org/10.1007/s00034-017-0568-8}, doi = {10.1007/s00034-017-0568-8}, biburl = {https://dblp.org/rec/bib/journals/cssp/ManjunathR18}, bibsource = {dblp computer science bibliography, https://dblp.org} } @Article{Yadav2018, author="Yadav, Jainath and Rao, K. Sreenivasa", title="Neural network and GMM based feature mappings for consonant--vowel recognition in emotional environment", journal="International Journal of Speech Technology", year="2018", month="Sep", day="01", volume="21", number="3", pages="421--433", abstract="In this work, we propose a mapping function based feature transformation framework for developing consonant--vowel (CV) recognition system in the emotional environment. An effective way of conveying messages is by expressing emotions during human conversations. The characteristics of CV units differ from one emotion to other emotions. The performance of existing CV recognition systems is degraded in emotional environments. Therefore, we have proposed mapping functions based on artificial neural network and GMM models for increasing the accuracy of CV recognition in the emotional environment. The CV recognition system has been explored to transform emotional features to neutral features using proposed mapping functions at CV and phone levels to minimize mismatch between training and testing environments. Vowel onset and offset points have been used to identify vowel, consonant and transition segments. Transition segments are identified by considering initial 15{\%} speech samples between vowel onset and offset points. The average performance of CV recognition system is increased significantly using feature mapping technique at phone level in three emotional environments (anger, happiness, and sadness).", issn="1572-8110", doi="10.1007/s10772-017-9478-1", url="https://doi.org/10.1007/s10772-017-9478-1" } @Article{Tripathi2018, author="Tripathi, Kumud and Rao, K. Sreenivasa", title="Improvement of phone recognition accuracy using speech mode classification", journal="International Journal of Speech Technology", year="2018", month="Sep", day="01", volume="21", number="3", pages="489--500", abstract="In this work, we have developed a speech mode classification model for improving the performance of phone recognition system (PRS). In this paper, we have explored vocal tract system, excitation source and prosodic features for development of speech mode classification (SMC) model. These features are extracted from voiced regions of a speech signal. In this study, conversation, extempore, and read speech are considered as three different modes of speech. The vocal tract component of speech is extracted using Mel-frequency cepstral coefficients (MFCCs). The excitation source features are captured through Mel power differences of spectrum in sub-bands (MPDSS) and residual Mel-frequency cepstral coefficients (RMFCCs) of the speech signal. The prosody information is extracted from pitch and intensity. Speech mode classification models are developed using above described features independently, and in fusion. The experiments carried out on Bengali speech corpus to analyze the accuracy of the speech mode classification model using the artificial neural network (ANN), naive Bayes, support vector machines (SVMs) and k-nearest neighbor (KNN). We proposed four classification models which are combined using maximum voting approach for optimal performance. From the results, it is observed that speech mode classification model developed using the fusion of vocal tract system, excitation source and prosodic features of speech, yields the best performance of 98{\%}. Finally, the proposed speech mode classifier is integrated to the PRS, and the accuracy of phone recognition system is observed to be improved by 11.08{\%}.", issn="1572-8110", doi="10.1007/s10772-017-9483-4", url="https://doi.org/10.1007/s10772-017-9483-4" } @Article{Dutta2018, author="Dutta, Arup Kumar and Rao, K. Sreenivasa", title="Language identification using phase information", journal="International Journal of Speech Technology", year="2018", month="Sep", day="01", volume="21", number="3", pages="509--519", abstract="The present work investigates the importance of phase in language identification (LID). We have proposed three phase based features for the language recognition task. In this work, auto-regressive model with scale factor error augmentation have been used for better representation of phase based features. We have developed three group delay based systems, namely, normal group delay based system, auto-regressive model group delay based system and auto-regressive group delay with scale factor augmentation based system. As mel-frequency cepstral coefficients (MFCCs) are extracted from the magnitude of the Fourier transform, we have combined this MFCC-based system with our phase-based systems to exploit the complete information contained in a speech signal. In this work, we have used IITKGP-MLILSC speech database and OGI Multi-language Telephone Speech (OGI-MLTS) corpus for our experiments. We have used Gaussian mixture models for building the language models. From the experimental results it is observed that the LID accuracy obtained from our proposed phase based features is comparable with MFCC features. We have also observed some performance improvement in the LID accuracy on combining the proposed phase-based systems with the state of the art MFCC-based system.", issn="1572-8110", doi="10.1007/s10772-017-9482-5", url="https://doi.org/10.1007/s10772-017-9482-5" } @Article{GurunathReddy2018, author="Gurunath Reddy, M. and Sreenivasa Rao, K.", title="Predominant Melody Extraction from Vocal Polyphonic Music Signal by Time-Domain Adaptive Filtering-Based Method", journal="Circuits, Systems, and Signal Processing", year="2018", month="Jul", day="01", volume="37", number="7", pages="2911--2933", abstract="In this paper, a time-domain adaptive filtering-based melody extraction method is proposed. The proposed method works in multiple stages to extract the vocal melody (singer's fundamental frequency) from vocal polyphonic music signals. The vocal and non-vocal regions of the music signal are identified by the strength of excitation of the source signal. The vocal regions are further segmented into the sequence of notes by detecting their onsets in the frequency representation of the composite signal. The melody contour in each of the vocal note segment is obtained by adaptive zero-frequency filtering in the time domain. The performance of the proposed melody extraction method is compared with the current state-of-the-art melody extraction method in respect of voicing recall rate, voicing false alarm rate, raw pitch, and overall accuracy.", issn="1531-5878", doi="10.1007/s00034-017-0696-1", url="https://doi.org/10.1007/s00034-017-0696-1" } } @article{RAMUREDDY20161323, title = "Prosody modeling for syllable based text-to-speech synthesis using feedforward neural networks", journal = "Neurocomputing", volume = "171", pages = "1323 - 1334", year = "2016", issn = "0925-2312", doi = "https://doi.org/10.1016/j.neucom.2015.07.053", url = "http://www.sciencedirect.com/science/article/pii/S0925231215010395", author = "V. Ramu Reddy and K. Sreenivasa Rao", keywords = "Prosody, Text-to-speech synthesis, Feed-forward neural networks, Phonological features, Positional and contextual features, Articulatory features" } @article{doi:10.1002/acs.2357, author = {Vuppala Anil Kumar and Rao K. Sreenivasa}, title = {Speaker identification under background noise using features extracted from steady vowel regions}, journal = {International Journal of Adaptive Control and Signal Processing}, volume = {27}, number = {9}, pages = {781-792}, keywords = {speaker identification, background noise, steady vowel region, vowel onset points, epochs}, year = {2013}, doi = {10.1002/acs.2357}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/acs.2357}, eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/acs.2357}, abstract = {SUMMARY In this paper, we are exploring features extracted from steady vowel segments for improving the performance of speaker identification system under background noise. Steady vowel regions are produced by periodic impulse‐like excitation and they contain relatively high signal energy. Hence, speaker specific information present in steady vowel regions may be less affected by the noise. In this work, steady vowel regions are determined by using the knowledge of accurate vowel onset points and epochs. Speaker identification studies are carried out using TIMIT database for white and vehicle noises. Universal background model–Gaussian mixture model‐based modeling is explored for developing speaker models. Significant improvement in the performance of speaker identification is observed by using features extracted from steady vowel region in presence of noisy environments. Copyright © 2012 John Wiley \& Sons, Ltd.} } @article{VUPPALA2012697, title = "Improved vowel onset point detection using epoch intervals", journal = "AEU - International Journal of Electronics and Communications", volume = "66", number = "8", pages = "697 - 700", year = "2012", issn = "1434-8411", doi = "https://doi.org/10.1016/j.aeue.2011.12.013", url = "http://www.sciencedirect.com/science/article/pii/S1434841111003207", author = "Anil Kumar Vuppala and K. Sreenivasa Rao and Saswat Chakrabarti", keywords = "Vowel onset point (VOP), Excitation source, Spectral peaks, Modulation spectrum, Epoch locations, Zero frequency filtering, Epoch intervals" } @article{doi:10.1002/acs.1286, author = {Vuppala Anil Kumar and Sreenivasa Rao K. and Chakrabarti Saswat}, title = {Improved consonant–vowel recognition for low bit‐rate coded speech}, journal = {International Journal of Adaptive Control and Signal Processing}, volume = {26}, number = {4}, pages = {333-349}, year ={2012}, keywords = {consonant–vowel (CV) units recognition, speech coding, support vector machine (SVM), hidden Markov model (HMM), vowel onset point (VOP)}, doi = {10.1002/acs.1286}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/acs.1286}, eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/acs.1286}, abstract = {SUMMARY In this paper, we proposed a method for improving the recognition performance of 145 prominent consonant–vowel (CV) units in Indian languages for low bit‐rate coded speech. Proposed CV recognition method is carried out in two levels to reduce the similarity among a large number of CV classes. In the first level, vowel category of CV unit will be recognized, and in the second level, consonant category will be recognized. At each level of the proposed method, complementary evidences from support vector machine and hidden Markov models are combined to enhance the recognition performance. Effectiveness of the proposed two‐level CV recognition method is demonstrated by performing the recognition of isolated CV units and CV units collected from the Telugu broadcast news database. In this work, vowel onset point (VOP) is used as an anchor point for extracting accurate features from the CV unit. Therefore, a method is proposed for accurate detection of VOP in clean and coded speech. The proposed VOP detection method is based on the spectral energy in 500–2500 Hz frequency band of the speech segments present in the glottal closure region. Speech coders considered in this work are GSM full rate (ETSI 06.10), CELP (FS‐1016), and MELP (TI 2.4 kbps). Significant improvement in CV recognition performance is achieved using the proposed two‐level method compared with the existing methods under both clean and coded conditions. Copyright © 2011 John Wiley \& Sons, Ltd.} } @article{vuppala2013improved, title={Improved speaker identification in wireless environment}, author={Vuppala, Anil Kumar and Rao, K Sreenivasa and Chakrabarti, Saswat}, journal={International Journal of Signal and Imaging Systems Engineering}, volume={6}, number={3}, pages={130--137}, year={2013}, url ={https://www.inderscienceonline.com/doi/abs/10.1504/IJSISE.2013.054789}, publisher={Inderscience Publishers Ltd} } @Article{RAO2011, author="RAO, K. SREENIVASA", title="Role of neural network models for developing speech systems", journal="Sadhana", year="2011", month="Oct", day="01", volume="36", number="5", pages="783--836", abstract="This paper discusses the application of neural networks for developing different speech systems. Prosodic parameters of speech at syllable level depend on positional, contextual and phonological features of the syllables. In this paper, neural networks are explored to model the prosodic parameters of the syllables from their positional, contextual and phonological features. The prosodic parameters considered in this work are duration and sequence of pitch (F 0) values of the syllables. These prosody models are further examined for applications such as text to speech synthesis, speech recognition, speaker recognition and language identification. Neural network models in voice conversion system are explored for capturing the mapping functions between source and target speakers at source, system and prosodic levels. We have also used neural network models for characterizing the emotions present in speech. For identification of dialects in Hindi, neural network models are used to capture the dialect specific information from spectral and prosodic features of speech.", issn="0973-7677", doi="10.1007/s12046-011-0047-z", url="https://doi.org/10.1007/s12046-011-0047-z" } @article{rao2011identification, title={Identification of Hindi Dialects and Emotions using Spectral and Prosodic features of Speech}, author={Rao, K Sreenivasa and Koolagudi, Shashidhar G}, journal={Journal of Systemics}, volume={9}, number={4}, pages={24--33}, year={2011}, publisher={Directory of Open Access Journals} } @article{DBLP:journals/speech/YadavFR18, author = {Jainath Yadav and Md. S. Fahad and K. Sreenivasa Rao}, title = {Epoch detection from emotional speech signal using zero time windowing}, journal = {Speech Communication}, volume = {96}, pages = {142--149}, year = {2018}, url = {https://doi.org/10.1016/j.specom.2017.12.002}, doi = {10.1016/j.specom.2017.12.002}, biburl = {https://dblp.org/rec/bib/journals/speech/YadavFR18}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/bspc/KumarMR17, author = {S. B. Sunil Kumar and Tanumay Mandal and K. Sreenivasa Rao}, title = {Robust glottal activity detection using the phase of an electroglottographic signal}, journal = {Biomed. Signal Proc. and Control}, volume = {36}, pages = {27--38}, year = {2017}, url = {https://doi.org/10.1016/j.bspc.2017.03.007}, doi = {10.1016/j.bspc.2017.03.007}, biburl = {https://dblp.org/rec/bib/journals/bspc/KumarMR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/csl/NandiPR17, author = {Dipanjan Nandi and Debadatta Pati and K. Sreenivasa Rao}, title = {Implicit processing of {LP} residual for language identification}, journal = {Computer Speech {\&} Language}, volume = {41}, pages = {68--87}, year = {2017}, url = {https://doi.org/10.1016/j.csl.2016.06.002}, doi = {10.1016/j.csl.2016.06.002}, biburl = {https://dblp.org/rec/bib/journals/csl/NandiPR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/csl/NandiPR17a, author = {Dipanjan Nandi and Debadatta Pati and K. Sreenivasa Rao}, title = {Parametric representation of excitation source information for language identification}, journal = {Computer Speech {\&} Language}, volume = {41}, pages = {88--115}, year = {2017}, url = {https://doi.org/10.1016/j.csl.2016.05.001}, doi = {10.1016/j.csl.2016.05.001}, biburl = {https://dblp.org/rec/bib/journals/csl/NandiPR17a}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/csl/NarendraR17, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Generation of creaky voice for improving the quality of HMM-based speech synthesis}, journal = {Computer Speech {\&} Language}, volume = {42}, pages = {38--58}, year = {2017}, url = {https://doi.org/10.1016/j.csl.2016.08.002}, doi = {10.1016/j.csl.2016.08.002}, biburl = {https://dblp.org/rec/bib/journals/csl/NarendraR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/cssp/NarendraR17, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Parameterization of Excitation Signal for Improving the Quality of HMM-Based Speech Synthesis System}, journal = {{CSSP}}, volume = {36}, number = {9}, pages = {3650--3673}, year = {2017}, url = {https://doi.org/10.1007/s00034-016-0476-3}, doi = {10.1007/s00034-016-0476-3}, biburl = {https://dblp.org/rec/bib/journals/cssp/NarendraR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/HaqueR17, author = {Arijul Haque and Krothapalli Sreenivasa Rao}, title = {Modification of energy spectra, epoch parameters and prosody for emotion conversion in speech}, journal = {I. J. Speech Technology}, volume = {20}, number = {1}, pages = {15--25}, year = {2017}, url = {https://doi.org/10.1007/s10772-016-9386-9}, doi = {10.1007/s10772-016-9386-9}, biburl = {https://dblp.org/rec/bib/journals/ijst/HaqueR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/SarkarR17, author = {Sourjya Sarkar and K. Sreenivasa Rao}, title = {Supervector-based approaches in a discriminative framework for speaker verification in noisy environments}, journal = {I. J. Speech Technology}, volume = {20}, number = {2}, pages = {387--416}, year = {2017}, url = {https://doi.org/10.1007/s10772-017-9410-8}, doi = {10.1007/s10772-017-9410-8}, biburl = {https://dblp.org/rec/bib/journals/ijst/SarkarR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/spl/ReddyR17, author = {M. Kiran Reddy and K. Sreenivasa Rao}, title = {Robust Pitch Extraction Method for the HMM-Based Speech Synthesis System}, journal = {{IEEE} Signal Process. Lett.}, volume = {24}, number = {8}, pages = {1133--1137}, year = {2017}, url = {https://doi.org/10.1109/LSP.2017.2712646}, doi = {10.1109/LSP.2017.2712646}, biburl = {https://dblp.org/rec/bib/journals/spl/ReddyR17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/interspeech/KumarRM17, author = {S. B. Sunil Kumar and K. Sreenivasa Rao and Tanumay Mandal}, title = {Accurate Synchronization of Speech and {EGG} Signal Using Phase Information}, booktitle = {Interspeech 2017, 18th Annual Conference of the International Speech Communication Association, Stockholm, Sweden, August 20-24, 2017}, pages = {694--698}, year = {2017}, crossref = {DBLP:conf/interspeech/2017}, url = {http://www.isca-speech.org/archive/Interspeech_2017/abstracts/1374.html}, biburl = {https://dblp.org/rec/bib/conf/interspeech/KumarRM17}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/cssp/YadavR16, author = {Jainath Yadav and K. Sreenivasa Rao}, title = {Prosodic Mapping Using Neural Networks for Emotion Conversion in Hindi Language}, journal = {{CSSP}}, volume = {35}, number = {1}, pages = {139--162}, year = {2016}, url = {https://doi.org/10.1007/s00034-015-0051-3}, doi = {10.1007/s00034-015-0051-3}, biburl = {https://dblp.org/rec/bib/journals/cssp/YadavR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/ManjunathR16, author = {K. Manjunath and K. Sreenivasa Rao}, title = {Articulatory and excitation source features for speech recognition in read, extempore and conversation modes}, journal = {I. J. Speech Technology}, volume = {19}, number = {1}, pages = {121--134}, year = {2016}, url = {https://doi.org/10.1007/s10772-015-9329-x}, doi = {10.1007/s10772-015-9329-x}, biburl = {https://dblp.org/rec/bib/journals/ijst/ManjunathR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/speech/NarendraR16, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Time-domain deterministic plus noise model based hybrid source modeling for statistical parametric speech synthesis}, journal = {Speech Communication}, volume = {77}, pages = {65--83}, year = {2016}, url = {https://doi.org/10.1016/j.specom.2015.12.002}, doi = {10.1016/j.specom.2015.12.002}, biburl = {https://dblp.org/rec/bib/journals/speech/NarendraR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/speech/KumarR16, author = {S. B. Sunil Kumar and K. Sreenivasa Rao}, title = {Voice/non-voice detection using phase of zero frequency filtered speech signal}, journal = {Speech Communication}, volume = {81}, pages = {90--103}, year = {2016}, url = {https://doi.org/10.1016/j.specom.2016.01.008}, doi = {10.1016/j.specom.2016.01.008}, biburl = {https://dblp.org/rec/bib/journals/speech/KumarR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/PradeepR16, author = {R. Pradeep and K. Sreenivasa Rao}, title = {Deep neural networks for kannada phoneme recognition}, booktitle = {Ninth International Conference on Contemporary Computing, {IC3} 2016, Noida, India, August 11-13, 2016}, pages = {1--6}, year = {2016}, crossref = {DBLP:conf/ic3/2016}, url = {https://doi.org/10.1109/IC3.2016.7880202}, doi = {10.1109/IC3.2016.7880202}, biburl = {https://dblp.org/rec/bib/conf/ic3/PradeepR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icacci/DharaRR16, author = {Prasenjit Dhara and Pradeep Rengaswamy and K. Sreenivasa Rao}, title = {Designing automatic note transcription system for Hindustani classical music}, booktitle = {2016 International Conference on Advances in Computing, Communications and Informatics, {ICACCI} 2016, Jaipur, India, September 21-24, 2016}, pages = {899--903}, year = {2016}, crossref = {DBLP:conf/icacci/2016}, url = {https://doi.org/10.1109/ICACCI.2016.7732159}, doi = {10.1109/ICACCI.2016.7732159}, biburl = {https://dblp.org/rec/bib/conf/icacci/DharaRR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icacci/RaiNFYR16, author = {Manish Kumar Rai and Neetish and Md. S. Fahad and Jainath Yadav and K. Sreenivasa Rao}, title = {Language identification using {PLDA} based on i-vector in noisy environment}, booktitle = {2016 International Conference on Advances in Computing, Communications and Informatics, {ICACCI} 2016, Jaipur, India, September 21-24, 2016}, pages = {1014--1020}, year = {2016}, crossref = {DBLP:conf/icacci/2016}, url = {https://doi.org/10.1109/ICACCI.2016.7732177}, doi = {10.1109/ICACCI.2016.7732177}, biburl = {https://dblp.org/rec/bib/conf/icacci/RaiNFYR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icassp/ReddyR16, author = {Gurunath Reddy M. and K. Sreenivasa Rao}, title = {Predominant melody extraction from vocal polyphonic music signal by combined spectro-temporal method}, booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016}, pages = {455--459}, year = {2016}, crossref = {DBLP:conf/icassp/2016}, url = {https://doi.org/10.1109/ICASSP.2016.7471716}, doi = {10.1109/ICASSP.2016.7471716}, biburl = {https://dblp.org/rec/bib/conf/icassp/ReddyR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icassp/NarendraR16, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {A deterministic plus noise model of excitation signal using principal component analysis for parametric speech synthesis}, booktitle = {2016 {IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016}, pages = {5635--5639}, year = {2016}, crossref = {DBLP:conf/icassp/2016}, url = {https://doi.org/10.1109/ICASSP.2016.7472756}, doi = {10.1109/ICASSP.2016.7472756}, biburl = {https://dblp.org/rec/bib/conf/icassp/NarendraR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icon-nlp/TripathiSR16, author = {Kumud Tripathi and Parakrant Sarkar and K. Sreenivasa Rao}, title = {Sentence Based Discourse Classification for Hindi Story Text-to-Speech {(TTS)} System}, booktitle = {Proceedings of the 13th International Conference on Natural Language Processing, {ICON} 2016, Varanasi, India, December 17-20, 2016}, pages = {46--54}, year = {2016}, crossref = {DBLP:conf/icon-nlp/2016}, url = {https://aclanthology.info/papers/W16-6307/w16-6307}, biburl = {https://dblp.org/rec/bib/conf/icon-nlp/TripathiSR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/interspeech/RengaswamyMRD16, author = {Pradeep Rengaswamy and Gurunath Reddy M. and K. Sreenivasa Rao and Pallab Dasgupta}, title = {A Robust Non-Parametric and Filtering Based Approach for Glottal Closure Instant Detection}, booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016}, pages = {1795--1799}, year = {2016}, crossref = {DBLP:conf/interspeech/2016}, url = {https://doi.org/10.21437/Interspeech.2016-369}, doi = {10.21437/Interspeech.2016-369}, biburl = {https://dblp.org/rec/bib/conf/interspeech/RengaswamyMRD16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/interspeech/MR16, author = {Gurunath Reddy M. and K. Sreenivasa Rao}, title = {Enhanced Harmonic Content and Vocal Note Based Predominant Melody Extraction from Vocal Polyphonic Music Signals}, booktitle = {Interspeech 2016, 17th Annual Conference of the International Speech Communication Association, San Francisco, CA, USA, September 8-12, 2016}, pages = {3309--3313}, year = {2016}, crossref = {DBLP:conf/interspeech/2016}, url = {https://doi.org/10.21437/Interspeech.2016-856}, doi = {10.21437/Interspeech.2016-856}, biburl = {https://dblp.org/rec/bib/conf/interspeech/MR16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/cssp/NarendraR15, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Robust Voicing Detection and F0 Estimation for HMM-Based Speech Synthesis}, journal = {{CSSP}}, volume = {34}, number = {8}, pages = {2597--2619}, year = {2015}, url = {https://doi.org/10.1007/s00034-015-9977-8}, doi = {10.1007/s00034-015-9977-8}, biburl = {https://dblp.org/rec/bib/journals/cssp/NarendraR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/ManjunathR15, author = {K. Manjunath and K. Sreenivasa Rao}, title = {Source and system features for phone recognition}, journal = {I. J. Speech Technology}, volume = {18}, number = {2}, pages = {257--270}, year = {2015}, url = {https://doi.org/10.1007/s10772-014-9266-0}, doi = {10.1007/s10772-014-9266-0}, biburl = {https://dblp.org/rec/bib/journals/ijst/ManjunathR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/NandiPR15, author = {Dipanjan Nandi and Debadatta Pati and K. Sreenivasa Rao}, title = {Implicit excitation source features for robust language identification}, journal = {I. J. Speech Technology}, volume = {18}, number = {3}, pages = {459--477}, year = {2015}, url = {https://doi.org/10.1007/s10772-015-9288-2}, doi = {10.1007/s10772-015-9288-2}, biburl = {https://dblp.org/rec/bib/journals/ijst/NandiPR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/sivp/RaoK15, author = {K. Sreenivasa Rao and Shashidhar G. Koolagudi}, title = {Recognition of emotions from video using acoustic and facial features}, journal = {Signal, Image and Video Processing}, volume = {9}, number = {5}, pages = {1029--1045}, year = {2015}, url = {https://doi.org/10.1007/s11760-013-0522-6}, doi = {10.1007/s11760-013-0522-6}, biburl = {https://dblp.org/rec/bib/journals/sivp/RaoK15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/BagiYR15, author = {Randheer Bagi and Jainath Yadav and K. Sreenivasa Rao}, title = {Improved recognition rate of language identification system in noisy environment}, booktitle = {Eighth International Conference on Contemporary Computing, {IC3} 2015, Noida, India, August 20-22, 2015}, pages = {214--219}, year = {2015}, crossref = {DBLP:conf/ic3/2015}, url = {https://doi.org/10.1109/IC3.2015.7346681}, doi = {10.1109/IC3.2015.7346681}, biburl = {https://dblp.org/rec/bib/conf/ic3/BagiYR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/MMR15, author = {Harikrishna D. M. and Gurunath Reddy M. and K. Sreenivasa Rao}, title = {Multi-stage children story speech synthesis for Hindi}, booktitle = {Eighth International Conference on Contemporary Computing, {IC3} 2015, Noida, India, August 20-22, 2015}, pages = {220--224}, year = {2015}, crossref = {DBLP:conf/ic3/2015}, url = {https://doi.org/10.1109/IC3.2015.7346682}, doi = {10.1109/IC3.2015.7346682}, biburl = {https://dblp.org/rec/bib/conf/ic3/MMR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/SarkarR15, author = {Parakrant Sarkar and K. Sreenivasa Rao}, title = {Analysis and modeling pauses for synthesis of storytelling speech based on discourse modes}, booktitle = {Eighth International Conference on Contemporary Computing, {IC3} 2015, Noida, India, August 20-22, 2015}, pages = {225--230}, year = {2015}, crossref = {DBLP:conf/ic3/2015}, url = {https://doi.org/10.1109/IC3.2015.7346683}, doi = {10.1109/IC3.2015.7346683}, biburl = {https://dblp.org/rec/bib/conf/ic3/SarkarR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/DuttaR15, author = {Arup Kumar Dutta and K. Sreenivasa Rao}, title = {Robust language identification using Power Normalized Cepstral Coefficients}, booktitle = {Eighth International Conference on Contemporary Computing, {IC3} 2015, Noida, India, August 20-22, 2015}, pages = {253--256}, year = {2015}, crossref = {DBLP:conf/ic3/2015}, url = {https://doi.org/10.1109/IC3.2015.7346688}, doi = {10.1109/IC3.2015.7346688}, biburl = {https://dblp.org/rec/bib/conf/ic3/DuttaR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/HaqueR15, author = {Arijul Haque and K. Sreenivasa Rao}, title = {Analysis and modification of spectral energy for neutral to sad emotion conversion}, booktitle = {Eighth International Conference on Contemporary Computing, {IC3} 2015, Noida, India, August 20-22, 2015}, pages = {263--268}, year = {2015}, crossref = {DBLP:conf/ic3/2015}, url = {https://doi.org/10.1109/IC3.2015.7346690}, doi = {10.1109/IC3.2015.7346690}, biburl = {https://dblp.org/rec/bib/conf/ic3/HaqueR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/MR15, author = {Gurunath Reddy M. and K. Sreenivasa Rao}, title = {Neutral to happy emotion conversion by blending prosody and laughter}, booktitle = {Eighth International Conference on Contemporary Computing, {IC3} 2015, Noida, India, August 20-22, 2015}, pages = {342--347}, year = {2015}, crossref = {DBLP:conf/ic3/2015}, url = {https://doi.org/10.1109/IC3.2015.7346704}, doi = {10.1109/IC3.2015.7346704}, biburl = {https://dblp.org/rec/bib/conf/ic3/MR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icacci/MR15, author = {Harikrishna D. M. and K. Sreenivasa Rao}, title = {Children story classification based on structure of the story}, booktitle = {2015 International Conference on Advances in Computing, Communications and Informatics, {ICACCI} 2015, Kochi, India, August 10-13, 2015}, pages = {1485--1490}, year = {2015}, crossref = {DBLP:conf/icacci/2015}, url = {https://doi.org/10.1109/ICACCI.2015.7275822}, doi = {10.1109/ICACCI.2015.7275822}, biburl = {https://dblp.org/rec/bib/conf/icacci/MR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icacci/PradeepDRD15, author = {R. Pradeep and Prasenjit Dhara and K. Sreenivasa Rao and Pallab Dasgupta}, title = {Raga identification based on Normalized Note Histogram features}, booktitle = {2015 International Conference on Advances in Computing, Communications and Informatics, {ICACCI} 2015, Kochi, India, August 10-13, 2015}, pages = {1491--1496}, year = {2015}, crossref = {DBLP:conf/icacci/2015}, url = {https://doi.org/10.1109/ICACCI.2015.7275823}, doi = {10.1109/ICACCI.2015.7275823}, biburl = {https://dblp.org/rec/bib/conf/icacci/PradeepDRD15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icapr/KoolagudiBRR15, author = {Shashidhar G. Koolagudi and Shivakranthi B and K. Sreenivasa Rao and Pravin B. Ramteke}, title = {Contribution of Telugu vowels in identifying emotions}, booktitle = {Eighth International Conference on Advances in Pattern Recognition, {ICAPR} 2015, Kolkata, India, January 4-7, 2015}, pages = {1--6}, year = {2015}, crossref = {DBLP:conf/icapr/2015}, url = {https://doi.org/10.1109/ICAPR.2015.7050666}, doi = {10.1109/ICAPR.2015.7050666}, biburl = {https://dblp.org/rec/bib/conf/icapr/KoolagudiBRR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icapr/NarendraR15, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Optimal residual frame based source modeling for HMM-based speech synthesis}, booktitle = {Eighth International Conference on Advances in Pattern Recognition, {ICAPR} 2015, Kolkata, India, January 4-7, 2015}, pages = {1--5}, year = {2015}, crossref = {DBLP:conf/icapr/2015}, url = {https://doi.org/10.1109/ICAPR.2015.7050668}, doi = {10.1109/ICAPR.2015.7050668}, biburl = {https://dblp.org/rec/bib/conf/icapr/NarendraR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icapr/VermaSR15, author = {Rashmi Verma and Parakrant Sarkar and K. Sreenivasa Rao}, title = {Conversion of neutral speech to storytelling style speech}, booktitle = {Eighth International Conference on Advances in Pattern Recognition, {ICAPR} 2015, Kolkata, India, January 4-7, 2015}, pages = {1--6}, year = {2015}, crossref = {DBLP:conf/icapr/2015}, url = {https://doi.org/10.1109/ICAPR.2015.7050705}, doi = {10.1109/ICAPR.2015.7050705}, biburl = {https://dblp.org/rec/bib/conf/icapr/VermaSR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/interspeech/NarendraR15, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Automatic detection of creaky voice using epoch parameters}, booktitle = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, September 6-10, 2015}, pages = {2347--2351}, year = {2015}, crossref = {DBLP:conf/interspeech/2015}, url = {http://www.isca-speech.org/archive/interspeech_2015/i15_2347.html}, biburl = {https://dblp.org/rec/bib/conf/interspeech/NarendraR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ncc/SarkarR15, author = {Parakrant Sarkar and K. Sreenivasa Rao}, title = {Data-driven pause prediction for speech synthesis in storytelling style speech}, booktitle = {Twenty First National Conference on Communications, {NCC} 2015, Mumbai, India, February 27 - March 1, 2015}, pages = {1--5}, year = {2015}, crossref = {DBLP:conf/ncc/2015}, url = {https://doi.org/10.1109/NCC.2015.7084924}, doi = {10.1109/NCC.2015.7084924}, biburl = {https://dblp.org/rec/bib/conf/ncc/SarkarR15}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/asc/SarkarR14, author = {Sourjya Sarkar and K. Sreenivasa Rao}, title = {Stochastic feature compensation methods for speaker verification in noisy environments}, journal = {Appl. Soft Comput.}, volume = {19}, pages = {198--214}, year = {2014}, url = {https://doi.org/10.1016/j.asoc.2014.02.016}, doi = {10.1016/j.asoc.2014.02.016}, biburl = {https://dblp.org/rec/bib/journals/asc/SarkarR14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/RaoNK14, author = {K. Sreenivasa Rao and Dipanjan Nandi and Shashidhar G. Koolagudi}, title = {Film segmentation and indexing using autoassociative neural networks}, journal = {I. J. Speech Technology}, volume = {17}, number = {1}, pages = {65--74}, year = {2014}, url = {https://doi.org/10.1007/s10772-013-9206-4}, doi = {10.1007/s10772-013-9206-4}, biburl = {https://dblp.org/rec/bib/journals/ijst/RaoNK14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/RaoP14, author = {K. Sreenivasa Rao and Ketan Pachpande}, title = {Segmentation, indexing and retrieval of {TV} broadcast news bulletins using Gaussian mixture models and vector quantization codebooks}, journal = {I. J. Speech Technology}, volume = {17}, number = {3}, pages = {259--269}, year = {2014}, url = {https://doi.org/10.1007/s10772-014-9229-5}, doi = {10.1007/s10772-014-9229-5}, biburl = {https://dblp.org/rec/bib/journals/ijst/RaoP14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/SarkarHDMMDVPBYR14, author = {Parakrant Sarkar and Arijul Haque and Arup Kumar Dutta and Gurunath Reddy M. and Harikrishna D. M. and Prasenjit Dhara and Rashmi Verma and N. P. Narendra and Sunil Kr. S. B. and Jainath Yadav and K. Sreenivasa Rao}, title = {Designing prosody rule-set for converting neutral {TTS} speech to storytelling style speech for Indian languages: Bengali, Hindi and Telugu}, booktitle = {Seventh International Conference on Contemporary Computing, {IC3} 2014, Noida, India, August 7-9, 2014}, pages = {473--477}, year = {2014}, crossref = {DBLP:conf/ic3/2014}, url = {https://doi.org/10.1109/IC3.2014.6897219}, doi = {10.1109/IC3.2014.6897219}, biburl = {https://dblp.org/rec/bib/conf/ic3/SarkarHDMMDVPBYR14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/NandiDR14, author = {Dipanjan Nandi and Arup Kumar Dutta and K. Sreenivasa Rao}, title = {Significance of {CV} transition and steady vowel regions for language identification}, booktitle = {Seventh International Conference on Contemporary Computing, {IC3} 2014, Noida, India, August 7-9, 2014}, pages = {513--517}, year = {2014}, crossref = {DBLP:conf/ic3/2014}, url = {https://doi.org/10.1109/IC3.2014.6897226}, doi = {10.1109/IC3.2014.6897226}, biburl = {https://dblp.org/rec/bib/conf/ic3/NandiDR14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icon-nlp/ReddySR14, author = {V. Ramu Reddy and Parakrant Sarkar and K. Sreenivasa Rao}, title = {Duration Modeling by Multi-Models based on Vowel Production characteristics}, booktitle = {Proceedings of the 11th International Conference on Natural Language Processing, {ICON} 2014, Goa, India, December 18-21, 2014}, pages = {39--47}, year = {2014}, crossref = {DBLP:conf/icon-nlp/2014}, url = {https://aclanthology.info/papers/W14-5106/w14-5106}, biburl = {https://dblp.org/rec/bib/conf/icon-nlp/ReddySR14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/interspeech/SarkarR14, author = {Sourjya Sarkar and K. Sreenivasa Rao}, title = {A novel boosting algorithm for improved i-vector based speaker verification in noisy environments}, booktitle = {{INTERSPEECH} 2014, 15th Annual Conference of the International Speech Communication Association, Singapore, September 14-18, 2014}, pages = {671--675}, year = {2014}, crossref = {DBLP:conf/interspeech/2014}, url = {http://www.isca-speech.org/archive/interspeech_2014/i14_0671.html}, biburl = {https://dblp.org/rec/bib/conf/interspeech/SarkarR14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ncc/ManjunathR14, author = {K. E. Manjunath and K. Sreenivasa Rao}, title = {Automatic Phonetic Transcription for read, extempore and conversation speech for an Indian language: Bengali}, booktitle = {Twentieth National Conference on Communications, {NCC} 2014, Kanpur, India, February 28 - March 2, 2014}, pages = {1--6}, year = {2014}, crossref = {DBLP:conf/ncc/2014}, url = {https://doi.org/10.1109/NCC.2014.6811347}, doi = {10.1109/NCC.2014.6811347}, biburl = {https://dblp.org/rec/bib/conf/ncc/ManjunathR14}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/asc/NarendraR13, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Optimal weight tuning method for unit selection cost functions in syllable based text-to-speech synthesis}, journal = {Appl. Soft Comput.}, volume = {13}, number = {2}, pages = {773--781}, year = {2013}, url = {https://doi.org/10.1016/j.asoc.2012.09.023}, doi = {10.1016/j.asoc.2012.09.023}, biburl = {https://dblp.org/rec/bib/journals/asc/NarendraR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/csl/ReddyR13, author = {V. Ramu Reddy and K. Sreenivasa Rao}, title = {Two-stage intonation modeling using feedforward neural networks for syllable based text-to-speech synthesis}, journal = {Computer Speech {\&} Language}, volume = {27}, number = {5}, pages = {1105--1126}, year = {2013}, url = {https://doi.org/10.1016/j.csl.2013.02.003}, doi = {10.1016/j.csl.2013.02.003}, biburl = {https://dblp.org/rec/bib/journals/csl/ReddyR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/RaoKR13, author = {K. Sreenivasa Rao and Shashidhar G. Koolagudi and Vempada Ramu Reddy}, title = {Emotion recognition from speech using global and local prosodic features}, journal = {I. J. Speech Technology}, volume = {16}, number = {2}, pages = {143--160}, year = {2013}, url = {https://doi.org/10.1007/s10772-012-9172-2}, doi = {10.1007/s10772-012-9172-2}, biburl = {https://dblp.org/rec/bib/journals/ijst/RaoKR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/RaoK13, author = {Krothapalli Sreenivasa Rao and Shashidhar G. Koolagudi}, title = {Characterization and recognition of emotions from speech using excitation source information}, journal = {I. J. Speech Technology}, volume = {16}, number = {2}, pages = {181--201}, year = {2013}, url = {https://doi.org/10.1007/s10772-012-9175-z}, doi = {10.1007/s10772-012-9175-z}, biburl = {https://dblp.org/rec/bib/journals/ijst/RaoK13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/VuppalaR13, author = {Anil Kumar Vuppala and K. Sreenivasa Rao}, title = {Vowel onset point detection for noisy speech using spectral energy at formant frequencies}, journal = {I. J. Speech Technology}, volume = {16}, number = {2}, pages = {229--235}, year = {2013}, url = {https://doi.org/10.1007/s10772-012-9179-8}, doi = {10.1007/s10772-012-9179-8}, biburl = {https://dblp.org/rec/bib/journals/ijst/VuppalaR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/RaoMR13, author = {K. Sreenivasa Rao and Sudhamay Maity and V. Ramu Reddy}, title = {Pitch synchronous and glottal closure based speech analysis for language recognition}, journal = {I. J. Speech Technology}, volume = {16}, number = {4}, pages = {413--430}, year = {2013}, url = {https://doi.org/10.1007/s10772-013-9193-5}, doi = {10.1007/s10772-013-9193-5}, biburl = {https://dblp.org/rec/bib/journals/ijst/RaoMR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/ReddyMR13, author = {V. Ramu Reddy and Sudhamay Maity and K. Sreenivasa Rao}, title = {Identification of Indian languages using multi-level spectral and prosodic features}, journal = {I. J. Speech Technology}, volume = {16}, number = {4}, pages = {489--511}, year = {2013}, url = {https://doi.org/10.1007/s10772-013-9198-0}, doi = {10.1007/s10772-013-9198-0}, biburl = {https://dblp.org/rec/bib/journals/ijst/ReddyMR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/jois/SinghMRV13, author = {Avinash Kumar Singh and Jayanta Mukhopadhyay and K. Sreenivasa Rao and Kapinaiah Viswanath}, title = {Classification of Infant Cries Using Dynamics of Epoch Features}, journal = {J. Intelligent Systems}, volume = {22}, number = {3}, pages = {351--364}, year = {2013}, url = {https://doi.org/10.1515/jisys-2013-0050}, doi = {10.1515/jisys-2013-0050}, biburl = {https://dblp.org/rec/bib/journals/jois/SinghMRV13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/speech/RaoV13, author = {K. Sreenivasa Rao and Anil Kumar Vuppala}, title = {Non-uniform time scale modification using instants of significant excitation and vowel onset points}, journal = {Speech Communication}, volume = {55}, number = {6}, pages = {745--756}, year = {2013}, url = {https://doi.org/10.1016/j.specom.2013.03.002}, doi = {10.1016/j.specom.2013.03.002}, biburl = {https://dblp.org/rec/bib/journals/speech/RaoV13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/spl/YadavR13, author = {Jainath Yadav and K. Sreenivasa Rao}, title = {Detection of Vowel Offset Point From Speech Signal}, journal = {{IEEE} Signal Process. Lett.}, volume = {20}, number = {4}, pages = {299--302}, year = {2013}, url = {https://doi.org/10.1109/LSP.2013.2245647}, doi = {10.1109/LSP.2013.2245647}, biburl = {https://dblp.org/rec/bib/journals/spl/YadavR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ic3/YadavR13, author = {Jainath Yadav and K. Sreenivasa Rao}, title = {Analysis of detection of vowel offset point for coded speech}, booktitle = {Sixth International Conference on Contemporary Computing, {IC3} 2013, Noida, India, August 8-10, 2013}, pages = {485--490}, year = {2013}, crossref = {DBLP:conf/ic3/2013}, url = {https://doi.org/10.1109/IC3.2013.6612244}, doi = {10.1109/IC3.2013.6612244}, biburl = {https://dblp.org/rec/bib/conf/ic3/YadavR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/isda/ReddyR13, author = {V. Ramu Reddy and K. Sreenivasa Rao}, title = {High quality text-to-speech synthesis system with efficient duration models developed using coding schemes based on vowel production characteristics}, booktitle = {13th International Conference on Intellient Systems Design and Applications, {ISDA} 2013, Salangor, Malaysia, December 8-10, 2013}, pages = {7--12}, year = {2013}, crossref = {DBLP:conf/isda/2013}, url = {https://doi.org/10.1109/ISDA.2013.6920727}, doi = {10.1109/ISDA.2013.6920727}, biburl = {https://dblp.org/rec/bib/conf/isda/ReddyR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @book{DBLP:series/sbece/Rao12, author = {K. Sreenivasa Rao}, title = {Predicting Prosody from Text for Text-to-Speech Synthesis}, series = {Springer Briefs in Electrical and Computer Engineering}, publisher = {Springer}, year = {2012}, url = {https://doi.org/10.1007/978-1-4614-1338-7}, doi = {10.1007/978-1-4614-1338-7}, isbn = {978-1-4614-1337-0}, biburl = {https://dblp.org/rec/bib/series/sbece/Rao12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @book{DBLP:series/sbece/RaoK13, author = {K. Sreenivasa Rao and Shashidhar G. Koolagudi}, title = {Robust Emotion Recognition using Spectral and Prosodic Features}, series = {Springer Briefs in Electrical and Computer Engineering}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-1-4614-6360-3}, doi = {10.1007/978-1-4614-6360-3}, isbn = {978-1-4614-6359-7}, biburl = {https://dblp.org/rec/bib/series/sbece/RaoK13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @book{DBLP:series/sbece/RaoK13a, author = {K. Sreenivasa Rao and Shashidhar G. Koolagudi}, title = {Emotion Recognition using Speech Features}, series = {Springer Briefs in Electrical and Computer Engineering}, publisher = {Springer}, year = {2013}, url = {https://doi.org/10.1007/978-1-4614-5143-3}, doi = {10.1007/978-1-4614-5143-3}, isbn = {978-1-4614-5142-6}, biburl = {https://dblp.org/rec/bib/series/sbece/RaoK13a}, bibsource = {dblp computer science bibliography, https://dblp.org} } @book{DBLP:series/sbece/RaoV14, author = {K. Sreenivasa Rao and Anil Kumar Vuppala}, title = {Speech Processing in Mobile Environments}, series = {Springer Briefs in Electrical and Computer Engineering}, publisher = {Springer}, year = {2014}, url = {https://doi.org/10.1007/978-3-319-03116-3 }, doi = {10.1007/978-3-319-03116-3}, isbn = {978-3-319-03115-6}, biburl = {https://dblp.org/rec/bib/series/sbece/RaoV14 }, bibsource = {dblp computer science bibliography, https://dblp.org} } @book{rao2014robust, title={Robust speaker recognition in noisy environments}, author={Rao, K Sreenivasa and Sarkar, Sourjya}, year={2014}, publisher={Springer}, url = {https://www.springer.com/in/book/9783319071299 }, doi = {10.1007/978-3-319-07130-5} } @book{rao2015language, title={Language Identification Using Spectral and Prosodic Features}, author={Rao, K Sreenivasa and Reddy, V Ramu and Maity, Sudhamay}, year={2015}, publisher={Springer}, url = {https://www.springer.com/in/book/9783319171623 }, doi = {10.1007/978-3-319-17163-0} } @book{rao2015language, title={Language Identification Using Excitation Source Features}, author={Rao, K Sreenivasa and Nandi, Dipanjan}, year={2015}, publisher={Springer}, url = {https://www.springer.com/in/book/9783319177243 }, } @book{rao2017language, title={Speech Recognition Using Articulatory and Excitation Source Features}, author={Rao, K Sreenivasa and K E, Manjunath }, year={2017}, publisher={Springer}, url = {https://www.springer.com/in/book/9783319492193 }, } @book{rao2019, title={Source Modeling Techniques for Quality Enhancement in Statistical Parametric Speech Synthesis}, author={Rao, K Sreenivasa and N P, Narendra}, year={2019}, publisher={Springer}, url = {https://www.springer.com/gp/book/9783030027582 }, } @inbook{infact2019, author = {K. Sreenivasa Rao and A. K. Singh and J. Mukhopadhyay and Siva Ayyappa Kumar and Sunil Kumar S B and Ramu Raddy Vempada }, chapter = {Infant Cry Recognition using Source, System, Prosody and Epoch features}, title = {Acoustic analysis of Infant Cries, Toddler Vocalizations, and Yound Adult Dysarthria, Speech Technology in Medicine and Health Care}, pages = {}, year = {2019}, crossref = {}, url = {}, doi = {}, publisher ={de Gruyter}, address ={} } @inbook{DBLP:conf/mike/SenPMR13, author = {Nirmalya Sen and Hemant A. Patil and Shyamal Kr. Das Mandal and K. Sreenivasa Rao}, chapter = {Importance of Utterance Partitioning in {SVM} Classifier with {GMM} Supervectors for Text-Independent Speaker Verification}, title = {Mining Intelligence and Knowledge Exploration (LNCS)}, pages = {780--789}, year = {2013}, crossref = {DBLP:conf/mike/2013}, url = {https://doi.org/10.1007/978-3-319-03844-5_76}, doi = {10.1007/978-3-319-03844-5_76}, publisher ={}, address ={} } @inproceedings{DBLP:conf/ococosda/KumarRP13, author = {S. B. Sunil Kumar and K. Sreenivasa Rao and Debadatta Pati}, title = {Phonetic and Prosodically Rich Transcribed speech corpus in Indian languages: Bengali and Odia}, booktitle = {2013 International Conference Oriental {COCOSDA} held jointly with 2013 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE), Gurgaon, India, November 25-27, 2013}, pages = {1--5}, year = {2013}, crossref = {DBLP:conf/ococosda/2013}, url = {https://doi.org/10.1109/ICSDA.2013.6709901}, doi = {10.1109/ICSDA.2013.6709901}, biburl = {https://dblp.org/rec/bib/conf/ococosda/KumarRP13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ococosda/ManjunathRP13, author = {K. E. Manjunath and K. Sreenivasa Rao and Debadatta Pati}, title = {Development of phonetic engine for Indian languages: Bengali and Oriya}, booktitle = {2013 International Conference Oriental {COCOSDA} held jointly with 2013 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE), Gurgaon, India, November 25-27, 2013}, pages = {1--6}, year = {2013}, crossref = {DBLP:conf/ococosda/2013}, url = {https://doi.org/10.1109/ICSDA.2013.6709900}, doi = {10.1109/ICSDA.2013.6709900}, biburl = {https://dblp.org/rec/bib/conf/ococosda/ManjunathRP13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ococosda/NandiPR13, author = {Dipanjan Nandi and Debadatta Pati and K. Sreenivasa Rao}, title = {Language identification using Hilbert envelope and phase information of linear prediction residual}, booktitle = {2013 International Conference Oriental {COCOSDA} held jointly with 2013 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE), Gurgaon, India, November 25-27, 2013}, pages = {1--6}, year = {2013}, crossref = {DBLP:conf/ococosda/2013}, url = {https://doi.org/10.1109/ICSDA.2013.6709864}, doi = {10.1109/ICSDA.2013.6709864}, biburl = {https://dblp.org/rec/bib/conf/ococosda/NandiPR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ococosda/PatilPSSKKNCKRK13, author = {Hemant A. Patil and Tanvina B. Patel and Nirmesh J. Shah and Hardik B. Sailor and Raghava Krishnan and G. R. Kasthuri and T. Nagarajan and S. Lilly Christina and Naresh Kumar and Veera Raghavendra and S. P. Kishore and S. R. Mahadeva Prasanna and Nagaraj Adiga and Sanasam Ranbir Singh and Anand Konjengbam and Pranaw Kumar and Bira Chandra Singh and S. L. Binil Kumar and T. G. Bhadran and T. Sajini and Arup Saha and Tulika Basu and K. Sreenivasa Rao and N. P. Narendra and Anil Kumar Sao and Rakesh Kumar and Pranhari Talukdar and Purnendu Acharyaa and Somnath Chandra and Swaran Lata and Hema A. Murthy}, title = {A syllable-based framework for unit selection synthesis in 13 Indian languages}, booktitle = {2013 International Conference Oriental {COCOSDA} held jointly with 2013 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE), Gurgaon, India, November 25-27, 2013}, pages = {1--8}, year = {2013}, crossref = {DBLP:conf/ococosda/2013}, url = {https://doi.org/10.1109/ICSDA.2013.6709851}, doi = {10.1109/ICSDA.2013.6709851}, biburl = {https://dblp.org/rec/bib/conf/ococosda/PatilPSSKKNCKRK13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/ococosda/SarkarR13, author = {Sourjya Sarkar and K. Sreenivasa Rao}, title = {Significance of utterance partitioning in {GMM-SVM} based speaker verification in varying background environment}, booktitle = {2013 International Conference Oriental {COCOSDA} held jointly with 2013 Conference on Asian Spoken Language Research and Evaluation (O-COCOSDA/CASLRE), Gurgaon, India, November 25-27, 2013}, pages = {1--5}, year = {2013}, crossref = {DBLP:conf/ococosda/2013}, url = {https://doi.org/10.1109/ICSDA.2013.6709859}, doi = {10.1109/ICSDA.2013.6709859}, biburl = {https://dblp.org/rec/bib/conf/ococosda/SarkarR13}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inbook{DBLP:conf/premi/BhakatNR13, author = {Ravi Kalyan Bhakat and N. P. Narendra and Krothapalli Sreenivasa Rao}, chapter = {Corpus Based Emotional Speech Synthesis in Hindi}, title = {Pattern Recognition and Machine Intelligence (LNCS)}, pages = {390--395}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-45062-4_53}, doi = {10.1007/978-3-642-45062-4_53}, publisher = {Springer} } @inbook{DBLP:conf/premi/ReddyR13, author = {Vempada Ramu Reddy and Krothapalli Sreenivasa Rao}, chapter = {Duration Modeling Using Multi-model Based on Positional Information}, title = {Pattern Recognition and Machine Intelligence (LNCS)}, pages = {404--409}, year = {2013}, url = {https://doi.org/10.1007/978-3-642-45062-4_55}, doi = {10.1007/978-3-642-45062-4_55}, publisher ={Springer}, } @article{DBLP:journals/asc/LaskarCTRB12, author = {Rabul Hussain Laskar and D. Chakrabarty and Fazal Ahmed Talukdar and K. Sreenivasa Rao and Kalyan Banerjee}, title = {Comparing {ANN} and {GMM} in a voice conversion framework}, journal = {Appl. Soft Comput.}, volume = {12}, number = {11}, pages = {3332--3342}, year = {2012}, url = {https://doi.org/10.1016/j.asoc.2012.05.027}, doi = {10.1016/j.asoc.2012.05.027}, biburl = {https://dblp.org/rec/bib/journals/asc/LaskarCTRB12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/cssp/VuppalaRC12, author = {Anil Kumar Vuppala and K. Sreenivasa Rao and Saswat Chakrabarti}, title = {Spotting and Recognition of Consonant-Vowel Units from Continuous Speech Using Accurate Detection of Vowel Onset Points}, journal = {{CSSP}}, volume = {31}, number = {4}, pages = {1459--1474}, year = {2012}, url = {https://doi.org/10.1007/s00034-012-9391-4}, doi = {10.1007/s00034-012-9391-4}, biburl = {https://dblp.org/rec/bib/journals/cssp/VuppalaRC12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/cssp/Rao12, author = {Krothapalli Sreenivasa Rao}, title = {Unconstrained Pitch Contour Modification Using Instants of Significant Excitation}, journal = {{CSSP}}, volume = {31}, number = {6}, pages = {2133--2152}, year = {2012}, url = {https://doi.org/10.1007/s00034-012-9428-8}, doi = {10.1007/s00034-012-9428-8}, biburl = {https://dblp.org/rec/bib/journals/cssp/Rao12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/KoolagudiR12, author = {Shashidhar G. Koolagudi and K. Sreenivasa Rao}, title = {Emotion recognition from speech: a review}, journal = {I. J. Speech Technology}, volume = {15}, number = {2}, pages = {99--117}, year = {2012}, url = {https://doi.org/10.1007/s10772-011-9125-1}, doi = {10.1007/s10772-011-9125-1}, biburl = {https://dblp.org/rec/bib/journals/ijst/KoolagudiR12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/KoolagudiR12a, author = {Shashidhar G. Koolagudi and K. Sreenivasa Rao}, title = {Emotion recognition from speech using source, system, and prosodic features}, journal = {I. J. Speech Technology}, volume = {15}, number = {2}, pages = {265--289}, year = {2012}, url = {https://doi.org/10.1007/s10772-012-9139-3}, doi = {10.1007/s10772-012-9139-3}, biburl = {https://dblp.org/rec/bib/journals/ijst/KoolagudiR12a}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/RaoYSKV12, author = {Krothapalli Sreenivasa Rao and Jaynath Yadav and Sourjya Sarkar and Shashidhar G. Koolagudi and Anil Kumar Vuppala}, title = {Neural network based feature transformation for emotion independent speaker identification}, journal = {I. J. Speech Technology}, volume = {15}, number = {3}, pages = {335--349}, year = {2012}, url = {https://doi.org/10.1007/s10772-012-9148-2}, doi = {10.1007/s10772-012-9148-2}, biburl = {https://dblp.org/rec/bib/journals/ijst/RaoYSKV12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/LaskarBTR12, author = {Rabul Hussain Laskar and Kalyan Banerjee and Fazal Ahmed Talukdar and K. Sreenivasa Rao}, title = {A pitch synchronous approach to design voice conversion system using source-filter correlation}, journal = {I. J. Speech Technology}, volume = {15}, number = {3}, pages = {419--431}, year = {2012}, url = {https://doi.org/10.1007/s10772-012-9164-2}, doi = {10.1007/s10772-012-9164-2}, biburl = {https://dblp.org/rec/bib/journals/ijst/LaskarBTR12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/KoolagudiR12b, author = {Shashidhar G. Koolagudi and Krothapalli Sreenivasa Rao}, title = {Emotion recognition from speech using sub-syllabic and pitch synchronous spectral features}, journal = {I. J. Speech Technology}, volume = {15}, number = {4}, pages = {495--511}, year = {2012}, url = {https://doi.org/10.1007/s10772-012-9150-8}, doi = {10.1007/s10772-012-9150-8}, biburl = {https://dblp.org/rec/bib/journals/ijst/KoolagudiR12b}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/taslp/VuppalaYCR12, author = {Anil Kumar Vuppala and Jainath Yadav and Saswat Chakrabarti and K. Sreenivasa Rao}, title = {Vowel Onset Point Detection for Low Bit Rate Coded Speech}, journal = {{IEEE} Trans. Audio, Speech {\&} Language Processing}, volume = {20}, number = {6}, pages = {1894--1903}, year = {2012}, url = {https://doi.org/10.1109/TASL.2012.2191284}, doi = {10.1109/TASL.2012.2191284}, biburl = {https://dblp.org/rec/bib/journals/taslp/VuppalaYCR12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/tslp/NarendraR12, author = {N. P. Narendra and K. Sreenivasa Rao}, title = {Syllable Specific Unit Selection Cost Functions for Text-to-Speech Synthesis}, journal = {{TSLP}}, volume = {9}, number = {3}, pages = {5:1--5:24}, year = {2012}, url = {http://doi.acm.org/10.1145/2382434.2382435}, doi = {10.1145/2382434.2382435}, biburl = {https://dblp.org/rec/bib/journals/tslp/NarendraR12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/cube/KoolagudiFR12, author = {Shashidhar G. Koolagudi and Shan E. Fatima and K. Sreenivasa Rao}, title = {Speaker recognition in the case of emotional environment using transformation of speech features}, booktitle = {{CUBE} International {IT} Conference {\&} Exhibition, {CUBE} '12, Pune, India - September 03 - 06, 2012}, pages = {118--123}, year = {2012}, crossref = {DBLP:conf/cube/2012}, url = {http://doi.acm.org/10.1145/2381716.2381739}, doi = {10.1145/2381716.2381739}, biburl = {https://dblp.org/rec/bib/conf/cube/KoolagudiFR12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/cube/BhartiKRCK12, author = {Santosh Kumar Bharti and Shashidhar G. Koolagudi and K. Sreenivasa Rao and Ankur Choudhary and Binod Kumar}, title = {Voice conversion using linear prediction coefficients and artificial neural network}, booktitle = {{CUBE} International {IT} Conference {\&} Exhibition, {CUBE} '12, Pune, India - September 03 - 06, 2012}, pages = {240--245}, year = {2012}, crossref = {DBLP:conf/cube/2012}, url = {http://doi.acm.org/10.1145/2381716.2381762}, doi = {10.1145/2381716.2381762}, biburl = {https://dblp.org/rec/bib/conf/cube/BhartiKRCK12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inbook{DBLP:conf/ic3/ReddyR12, author = {V. Ramu Reddy and K. Sreenivasa Rao}, chapter = {Intensity Modeling for Syllable Based Text-to-Speech Synthesis}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {106--117}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-32129-0_16}, doi = {10.1007/978-3-642-32129-0_16}, publisher = {Springer}, } @Inbook{10.1007/978-3-642-32112-2_15, author="Koolagudi, Shashidhar G. and Sharma, Kritika and Sreenivasa Rao, K.", editor="Mathew, Jimson and Patra, Priyadarshan and Pradhan, Dhiraj K. and Kuttyamma, A. J.", chapter="Speaker Recognition in Emotional Environment", title="Eco-friendly Computing and Communication Systems", year="2012", publisher="Springer Berlin Heidelberg", address="Berlin, Heidelberg", pages="117--124", abstract="This paper deals with development of speaker recognition system in emotional environments. In this paper, Mel-frequency cepstral coefficients (MFCC) have been used to represent the speaker specific information. A simulated speech corpus of Hindi language are used to check the performance of speaker recognition in emotional environment. The emotions included in this study are anger, neutral, sad, happy and surprise. Emotion recognition models are developed using Gaussian mixture models. Performance of speaker recognition is studied in emotional environment. The results show that emotions play vital role in speaker recognition.", isbn="978-3-642-32112-2", url="https://link.springer.com/chapter/10.1007/978-3-642-32112-2_15" } @inbook{DBLP:conf/ic3/GhoshR12, author = {Krishnendu Ghosh and K. Sreenivasa Rao}, chapter = {Data-Driven Phrase Break Prediction for Bengali Text-to-Speech System}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {118--129}, year = {2012}, url = {https://doi.org/10.1007/978-3-642-32129-0_17}, doi = {10.1007/978-3-642-32129-0_17}, publisher ={Springer}, } @inbook{DBLP:conf/ic3/KoolagudiBDR12, author = {Shashidhar G. Koolagudi and Anurag Barthwal and Swati Devliyal and K. Sreenivasa Rao}, chapter = {Real Life Emotion Classification from Speech Using Gaussian Mixture Models}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {250--261}, year = {2012}, crossref = {DBLP:conf/ic3/2012}, url = {https://doi.org/10.1007/978-3-642-32129-0_28}, doi = {10.1007/978-3-642-32129-0_28}, publisher ={Springer} } @inbook{DBLP:conf/ic3/KoolagudiDBR12, author = {Shashidhar G. Koolagudi and Swati Devliyal and Anurag Barthwal and K. Sreenivasa Rao}, chapter = {Emotion Recognition from Semi Natural Speech Using Artificial Neural Networks and Excitation Source Features}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {273--282}, year = {2012}, crossref = {DBLP:conf/ic3/2012}, url = {https://doi.org/10.1007/978-3-642-32129-0_30}, doi = {10.1007/978-3-642-32129-0_30}, publisher = {Springer} } @inbook{DBLP:conf/ic3/KoolagudiRR12, author = {Shashidhar G. Koolagudi and Deepika Rastogi and K. Sreenivasa Rao}, chapter = {Spoken Language Identification Using Spectral Features}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {496--497}, year = {2012}, crossref = {DBLP:conf/ic3/2012}, url = {https://doi.org/10.1007/978-3-642-32129-0_52}, doi = {10.1007/978-3-642-32129-0_52}, publisher = {Springer} } @inbook{DBLP:conf/mike/NarendraR15, author = {N. P. Narendra and K. Sreenivasa Rao}, chapter = {Hybrid Source Modeling Method Utilizing Optimal Residual Frames for HMM-based Speech Synthesis}, title = {Mining Intelligence and Knowledge Exploration - Third International Conference, {MIKE} 2015, Hyderabad, India, December 9-11, 2015, Proceedings}, pages = {277--286}, year = {2015}, publisher = {}, address = { }, crossref = {DBLP:conf/mike/2015}, url = {https://doi.org/10.1007/978-3-319-26832-3_27}, doi = {10.1007/978-3-319-26832-3_27}, } @inbook{DBLP:conf/ic3/VuppalaRC11, author = {Anil Kumar Vuppala and K. Sreenivasa Rao and Saswat Chakrabarti}, chapter = {Effect of Noise on Recognition of Consonant-Vowel {(CV)} Units}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {191--200}, year = {2011}, crossref = {DBLP:conf/ic3/2011}, url = {https://doi.org/10.1007/978-3-642-22606-9_22}, doi = {10.1007/978-3-642-22606-9_22}, publisher ={Springer} } @inbook{DBLP:conf/ic3/VuppalaYRC11, author = {Anil Kumar Vuppala and Jainath Yadav and K. Sreenivasa Rao and Saswat Chakrabarti}, chapter = {Effect of Noise on Vowel Onset Point Detection}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {201--211}, year = {2011}, url = {https://doi.org/10.1007/978-3-642-22606-9_23}, doi = {10.1007/978-3-642-22606-9_23}, publisher = {Springer} } @inbook{DBLP:conf/ic3/ChauhanYKR11, author = {Rahul Chauhan and Jainath Yadav and Shashidhar G. Koolagudi and K. Sreenivasa Rao}, chapter = {Text Independent Emotion Recognition Using Spectral Features}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {359--370}, year = {2011}, crossref = {DBLP:conf/ic3/2011}, url = {https://doi.org/10.1007/978-3-642-22606-9_37}, doi = {10.1007/978-3-642-22606-9_37}, publisher ={ Springer} } @inbook{DBLP:conf/ic3/NarendraR11, author = {N. P. Narendra and K. Sreenivasa Rao}, chapter = {Segment Specific Concatenation Cost for Syllable Based Bengali TTS}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {371--382}, year = {2011}, crossref = {DBLP:conf/ic3/2011}, url = {https://doi.org/10.1007/978-3-642-22606-9_38}, doi = {10.1007/978-3-642-22606-9_38}, publisher = {Springer} } @inbook{DBLP:conf/premi/RaoMTK09, author = {K. Sreenivasa Rao and Sudhamay Maity and Amol Taru and Shashidhar G. Koolagudi}, chapter = {Unit Selection Using Linguistic, Prosodic and Spectral distance for developing Text-to-Speech System in Hindi}, title = {Pattern Recognition and Machine Intelligence(LNCS)}, pages = {531--536}, year = {2009}, url = {https://doi.org/10.1007/978-3-642-11164-8_86}, doi = {10.1007/978-3-642-11164-8_86}, publisher = {Springer} } @inbook{DBLP:conf/premi/KoolagudiR09, author = {Shashidhar G. Koolagudi and K. Sreenivasa Rao}, chapter = {Exploring Speech Features for Classifying Emotions along Valence Dimension}, title = {Pattern Recognition and Machine Intelligence(LNCS)}, pages = {537--542}, year = {2009}, url = {https://doi.org/10.1007/978-3-642-11164-8_87}, doi = {10.1007/978-3-642-11164-8_87}, publisher ={Springer} } @inbook{DBLP:conf/ic3/VuppalaCR10, author = {Anil Kumar Vuppala and Saswat Chakrabarti and K. Sreenivasa Rao}, chapter = {Effect of Speech Coding on Recognition of Consonant-Vowel (CV) Units}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {284--294}, year = {2010}, url = {https://doi.org/10.1007/978-3-642-14834-7_27}, doi = {10.1007/978-3-642-14834-7_27}, publisher ={Springer} } @inbook{DBLP:conf/ic3/KoolagudiRR10, author = {Shashidhar G. Koolagudi and Sudhin Ray and K. Sreenivasa Rao}, chapter = {Emotion Classification Based on Speaking Rate}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {316--327}, year = {2010}, crossref = {DBLP:conf/ic3/2010-1}, url = {https://doi.org/10.1007/978-3-642-14834-7_30}, doi = {10.1007/978-3-642-14834-7_30}, publisher ={Springer} } @inbook{DBLP:conf/iconip/RaoPY04, author = {K. Sreenivasa Rao and S. R. Mahadeva Prasanna and B. Yegnanarayana}, chapter = {Two-Stage Duration Model for Indian Languages Using Neural Networks}, title = {Neural Information Processing, (LNCS)}, pages = {1179--1185}, year = {2004}, url = {https://doi.org/10.1007/978-3-540-30499-9_183}, doi = {10.1007/978-3-540-30499-9_183}, publisher = {Springer} } @Inbook{10.1007/978-3-319-07353-8_3, author="Singh, Piyush Kumar P. and Manjunath, K. E. and Ravi Kiran, R. and Yadav, Jainath and Sreenivasa Rao, K.", editor="Kumar Kundu, Malay and Mohapatra, Durga Prasad and Konar, Amit and Chakraborty, Aruna", chapter="Indexing and Retrieval of Speech Documents", title="Advanced Computing, Networking and Informatics- Volume 1", year="2014", publisher="Springer International Publishing", address="Cham", pages="17--24", abstract="In this paper, a speech document indexing system and similarity-based document retrieval method has been proposed. K-d tree is used as the index structure and codebooks derived from speech documents present in the database, are used during retrieval of desired document. Each document is represented as a sequence of codebook indices. The longest common subsequence based approach is proposed for retrieving the documents. Proposed retrieval method is evaluated using a speech database of 3 hours recorded by a male speaker and speech queries from 5 male and 5 female speakers. The accuracy of retrieval is found to be about 88{\%} for the queries given by male speakers.", isbn="978-3-319-07353-8", url="https://link.springer.com/chapter/10.1007/978-3-319-07353-8_3" } @Inbook{10.1007/978-3-642-32112-2_21, author="Koolagudi, Shashidhar G. and Thakur, Sujata Negi and Barthwal, Anurag and Singh, Manoj Kumar and Rawat, Ramesh and Sreenivasa Rao, K.", editor="Mathew, Jimson and Patra, Priyadarshan and Pradhan, Dhiraj K. and Kuttyamma, A. J.", chapter="Vowel Recognition from Telephonic Speech Using MFCCs and Gaussian Mixture Models", title="Communications in Computer and Information Science (CCIS): Eco-friendly Computing and Communication Systems", year="2012", publisher="Springer Berlin Heidelberg", address="Berlin, Heidelberg", pages="170--177", abstract="This paper presents vowel recognition from speech using mel frequency cepstral coefficients (MFCCs). In this work, microphone recorded speech and telephonic speech are used for conducting vowel recognition studies. The vowels considered for recognition are from Hindi alphabet namely अ(a), इ(i), उ(u), ए(e), ऐ(ai), ओ(o) and औ(au). Gaussian mixture models are used for developing vowel recognition models. Vowel recognition performance for microphone recorded speech and telephonic speech are 91.4{\%} and 84.2{\%} respectively.", isbn="978-3-642-32112-2", url="https://link.springer.com/chapter/10.1007/978-3-642-32112-2_21" } @Inbook{Koolagudi2012, author="Koolagudi, Shashidhar G. and Sreenivasa Rao, K. and Reddy, Ramu and Kumar, Vuppala Anil and Chakrabarti, Saswat", editor="Neustein, Amy and Patil, Hemant A.", chapter="Robust Speaker Recognition in Noisy Environments: Using Dynamics of Speaker-Specific Prosody", Title="Forensic Speaker Recognition: Law Enforcement and Counter-Terrorism", year="2012", publisher="Springer New York", address="New York, NY", pages="183--204", abstract="In this chapter, we propose speaker-specific prosodic features for improving the performance of speaker recognition in noisy environments. This approach can be especially useful in the forensic analysis of speech. Degradation in speaker recognition is a common phenomenon observed due to transmission and channel impairments, microphone variability and background noise. In this work spectral features are used to perform speaker recognition in the first stage and dynamic aspects of speaker-specific prosody are used to improve the performance in the second stage. For this task, speech corpus is collected at Indian Institute of Technology, Kharagpur, using 50 speakers recorded over the mobile phone. Background noise is simulated using additive white random noise from Noisex database. Speech enhancement techniques are used to improve the speaker recognition performance in the case of noisy speech. Gaussian mixture models (GMMs) and support vector machines support vector machines (SVMs SVMs ) are used for developing speaker models. Performance of the speaker recognition system is observed to be 55 and 66{\%} using prosodic and spectral features respectively, for TIMIT speech at 15 dB SNR. TIMIT database . The speaker recognition performance of around 73{\%} is achieved using the combination of spectral and prosodic features for noisy speech after speech enhancement.", isbn="978-1-4614-0263-3", doi="10.1007/978-1-4614-0263-3_8", url="https://doi.org/10.1007/978-1-4614-0263-3_8" } @Inbook{Rao2008, author="Rao, K. Sreenivasa", editor="Prasad, Bhanu and Prasanna, S. R. Mahadeva", chapter="Modeling Supra-Segmental Features of Syllables Using Neural Networks", Title="Speech, Audio, Image and Biomedical Signal Processing using Neural Networks", year="2008", publisher="Springer Berlin Heidelberg", address="Berlin, Heidelberg", pages="71--95", abstract="In this chapter we discuss modeling of supra-segmental features (intonation and duration) of syllables, and suggest some applications of these models. These supra-segmental features are also termed as prosodic features, and hence the corresponding models are known as prosody models. Neural networks are used to capture the implicit duration and intonation knowledge in the sequence of syllables of an utterance. A four layer feedforward neural network trained with backpropagation algorithm is used for modeling the duration and intonation knowledge of syllables separately. Labeled broadcast news data in the languages Hindi, Telugu and Tamil is used to develop neural network models in order to predict the duration and F0 of syllables in these languages. The input to the neural network consists of a feature vector representing the positional, contextual and phonological constraints. For improving the accuracy of prediction, further processing is done on the predicted values. We also propose a two-stage duration model for improving the accuracy of prediction. The performance of the prosody models is evaluated using objective measures such as average prediction error, standard deviation and correlation coefficient. The prosody models are examined for applications such as speaker recognition, language identification and text-to-speech synthesis.", isbn="978-3-540-75398-8", doi="10.1007/978-3-540-75398-8_4", url="https://doi.org/10.1007/978-3-540-75398-8_4" } @inbook{DBLP:conf/ic3/KoolagudiMVCR09, author = {Shashidhar G. Koolagudi and Sudhamay Maity and Anil Kumar Vuppala and Saswat Chakrabarti and K. Sreenivasa Rao}, chapter = {IITKGP-SESC: Speech Database for Emotion Analysis}, title = {Communications in Computer and Information Science (CCIS): Contemporary Computing}, pages = {485--492}, year = {2009}, url = {https://doi.org/10.1007/978-3-642-03547-0_46}, doi = {10.1007/978-3-642-03547-0_46}, publisher = {Springer} } @inproceedings{DBLP:conf/ihci/ReddyR12, author = {V. Ramu Reddy and K. Sreenivasa Rao}, title = {Better human computer interaction by enhancing the quality of text-to-speech synthesis}, booktitle = {4th International Conference on Intelligent Human Computer Interaction, {IHCI} 2012, Kharagpur, India, December 27-29, 2012}, pages = {1--6}, year = {2012}, crossref = {DBLP:conf/ihci/2012}, url = {https://doi.org/10.1109/IHCI.2012.6481857}, doi = {10.1109/IHCI.2012.6481857}, biburl = {https://dblp.org/rec/bib/conf/ihci/ReddyR12}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/eswa/RaoSMK11, author = {K. Sreenivasa Rao and V. K. Saroj and Sudhamay Maity and Shashidhar G. Koolagudi}, title = {Recognition of emotions from video using neural network models}, journal = {Expert Syst. Appl.}, volume = {38}, number = {10}, pages = {13181--13185}, year = {2011}, url = {https://doi.org/10.1016/j.eswa.2011.04.129}, doi = {10.1016/j.eswa.2011.04.129}, biburl = {https://dblp.org/rec/bib/journals/eswa/RaoSMK11}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/Rao11, author = {K. Sreenivasa Rao}, title = {Application of prosody models for developing speech systems in Indian languages}, journal = {I. J. Speech Technology}, volume = {14}, number = {1}, pages = {19--33}, year = {2011}, url = {https://doi.org/10.1007/s10772-010-9086-9}, doi = {10.1007/s10772-010-9086-9}, biburl = {https://dblp.org/rec/bib/journals/ijst/Rao11}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/KoolagudiK11, author = {Shashidhar G. Koolagudi and Rao Sreenivasa Krothapalli}, title = {Two stage emotion recognition based on speaking rate}, journal = {I. J. Speech Technology}, volume = {14}, number = {1}, pages = {35--48}, year = {2011}, url = {https://doi.org/10.1007/s10772-010-9085-x}, doi = {10.1007/s10772-010-9085-x}, biburl = {https://dblp.org/rec/bib/journals/ijst/KoolagudiK11}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/NarendraRGRM11, author = {N. P. Narendra and K. Sreenivasa Rao and Krishnendu Ghosh and Vempada Ramu Reddy and Sudhamay Maity}, title = {Development of syllable-based text to speech synthesis system in Bengali}, journal = {I. J. Speech Technology}, volume = {14}, number = {3}, pages = {167}, year = {2011}, url = {https://doi.org/10.1007/s10772-011-9094-4}, doi = {10.1007/s10772-011-9094-4}, biburl = {https://dblp.org/rec/bib/journals/ijst/NarendraRGRM11}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/ijst/VuppalaRCKP11, author = {Anil Kumar Vuppala and K. Sreenivasa Rao and Saswat Chakrabarti and P. Krishnamoorthy and S. R. M. Prasanna}, title = {Recognition of consonant-vowel {(CV)} units under background noise using combined temporal and spectral preprocessing}, journal = {I. J. Speech Technology}, volume = {14}, number = {3}, pages = {259}, year = {2011}, url = {https://doi.org/10.1007/s10772-011-9101-9}, doi = {10.1007/s10772-011-9101-9}, biburl = {https://dblp.org/rec/bib/journals/ijst/VuppalaRCKP11}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/csl/Rao10, author = {K. Sreenivasa Rao}, title = {Voice conversion by mapping the speaker-specific features using pitch synchronous approach}, journal = {Computer Speech {\&} Language}, volume = {24}, number = {3}, pages = {474--494}, year = {2010}, url = {https://doi.org/10.1016/j.csl.2009.03.003}, doi = {10.1016/j.csl.2009.03.003}, biburl = {https://dblp.org/rec/bib/journals/csl/Rao10}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/jsea/RaoK10, author = {Krothapalli S. Rao and Shashidhar G. Koolagudi}, title = {Selection of Suitable Features for Modeling the Durations of Syllables}, journal = {{JSEA}}, volume = {3}, number = {12}, pages = {1107--1117}, year = {2010}, url = {https://doi.org/10.4236/jsea.2010.312129}, doi = {10.4236/jsea.2010.312129}, biburl = {https://dblp.org/rec/bib/journals/jsea/RaoK10}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/jsip/Rao10, author = {Krothapalli Sreenivasa Rao}, title = {Real Time Prosody Modification}, journal = {J. Signal and Information Processing}, volume = {1}, number = {1}, pages = {50--62}, year = {2010}, url = {https://doi.org/10.4236/jsip.2010.11006}, doi = {10.4236/jsip.2010.11006}, biburl = {https://dblp.org/rec/bib/journals/jsip/Rao10}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/csl/RaoY09, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Intonation modeling for Indian languages}, journal = {Computer Speech {\&} Language}, volume = {23}, number = {2}, pages = {240--256}, year = {2009}, url = {https://doi.org/10.1016/j.csl.2008.06.005}, doi = {10.1016/j.csl.2008.06.005}, biburl = {https://dblp.org/rec/bib/journals/csl/RaoY09}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/speech/RaoY09, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Duration modification using glottal closure instants and vowel onset points}, journal = {Speech Communication}, volume = {51}, number = {12}, pages = {1263--1269}, year = {2009}, url = {https://doi.org/10.1016/j.specom.2009.06.004}, doi = {10.1016/j.specom.2009.06.004}, biburl = {https://dblp.org/rec/bib/journals/speech/RaoY09}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icapr/RaoPS09, author = {K. Sreenivasa Rao and S. R. Mahadeva Prasanna and T. V. Sagar}, title = {Significance of Word and Syllable Level Information for Expressive Speech Processing}, booktitle = {Seventh International Conference on Advances in Pattern Recognition, {ICAPR} 2009, Kolkata, India, 4-6 February 2009, Proceedings}, pages = {159--162}, year = {2009}, crossref = {DBLP:conf/icapr/2009}, url = {https://doi.org/10.1109/ICAPR.2009.47}, doi = {10.1109/ICAPR.2009.47}, biburl = {https://dblp.org/rec/bib/conf/icapr/RaoPS09}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/csl/RaoY07, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Modeling durations of syllables using neural networks}, journal = {Computer Speech {\&} Language}, volume = {21}, number = {2}, pages = {282--295}, year = {2007}, url = {https://doi.org/10.1016/j.csl.2006.06.003}, doi = {10.1016/j.csl.2006.06.003}, biburl = {https://dblp.org/rec/bib/journals/csl/RaoY07}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{DBLP:journals/spl/RaoPY07, author = {K. Sreenivasa Rao and S. R. Mahadeva Prasanna and Bayya Yegnanarayana}, title = {Determination of Instants of Significant Excitation in Speech Using Hilbert Envelope and Group Delay Function}, journal = {{IEEE} Signal Process. Lett.}, volume = {14}, number = {10}, pages = {762--765}, year = {2007}, url = {https://doi.org/10.1109/LSP.2007.896454}, doi = {10.1109/LSP.2007.896454}, biburl = {https://dblp.org/rec/bib/journals/spl/RaoPY07}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inbook{DBLP:conf/premi/RaoLK07, author = {K. Sreenivasa Rao and Rabul Hussain Laskar and Shashidhar G. Koolagudi}, chapter = {Voice Transformation by Mapping the Features at Syllable Level}, title = {Pattern Recognition and Machine Intelligence (LNCS)}, pages = {479--486}, year = {2007}, url = {https://doi.org/10.1007/978-3-540-77046-6_59}, doi = {10.1007/978-3-540-77046-6_59}, publisher = {Springer} } @article{DBLP:journals/taslp/RaoY06, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Prosody modification using instants of significant excitation}, journal = {{IEEE} Trans. Audio, Speech {\&} Language Processing}, volume = {14}, number = {3}, pages = {972--980}, year = {2006}, url = {https://doi.org/10.1109/TSA.2005.858051}, doi = {10.1109/TSA.2005.858051}, biburl = {https://dblp.org/rec/bib/journals/taslp/RaoY06}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/cit/RaoY06, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Voice Conversion by Prosody and Vocal Tract Modification}, booktitle = {9th International Conference in Information Technology, {ICIT} 2006, Bhubaneswar, Orissa, India, 18-21 December 2006}, pages = {111--116}, year = {2006}, crossref = {DBLP:conf/cit/2006}, url = {https://doi.org/10.1109/ICIT.2006.92}, doi = {10.1109/ICIT.2006.92}, biburl = {https://dblp.org/rec/bib/conf/cit/RaoY06}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icassp/RaoY04, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Modeling syllable duration in Indian languages using neural networks}, booktitle = {2004 {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2004, Montreal, Quebec, Canada, May 17-21, 2004}, pages = {313--316}, year = {2004}, crossref = {DBLP:conf/icassp/2004}, url = {https://doi.org/10.1109/ICASSP.2004.1327110}, doi = {10.1109/ICASSP.2004.1327110}, biburl = {https://dblp.org/rec/bib/conf/icassp/RaoY04}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/interspeech/RaoY04, author = {Krothapalli Sreenivasa Rao and Bayya Yegnanarayana}, title = {Intonation modeling for indian languages}, booktitle = {{INTERSPEECH} 2004 - ICSLP, 8th International Conference on Spoken Language Processing, Jeju Island, Korea, October 4-8, 2004}, year = {2004}, pages = {}, crossref = {DBLP:conf/interspeech/2004}, url = {http://www.isca-speech.org/archive/interspeech_2004/i04_0733.html}, biburl = {https://dblp.org/rec/bib/conf/interspeech/RaoY04}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icassp/RaoY03, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Prosodic manipulation using instants of significant excitation}, booktitle = {2003 {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} '03, Hong Kong, April 6-10, 2003}, pages = {528--531}, year = {2003}, crossref = {DBLP:conf/icassp/2003}, url = {https://doi.org/10.1109/ICASSP.2003.1198834}, doi = {10.1109/ICASSP.2003.1198834}, biburl = {https://dblp.org/rec/bib/conf/icassp/RaoY03}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icmcs/RaoY03, author = {K. Sreenivasa Rao and B. Yegnanarayana}, title = {Prosodic manipulation using instants of significant excitation}, booktitle = {Proceedings of the 2003 {IEEE} International Conference on Multimedia and Expo, {ICME} 2003, 6-9 July 2003, Baltimore, MD, {USA}}, pages = {389--392}, year = {2003}, crossref = {DBLP:conf/icmcs/2003}, url = {https://doi.org/10.1109/ICME.2003.1220936}, doi = {10.1109/ICME.2003.1220936}, biburl = {https://dblp.org/rec/bib/conf/icmcs/RaoY03}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{DBLP:conf/icassp/YegnanarayanaPR02, author = {B. Yegnanarayana and S. R. Mahadeva Prasanna and K. Sreenivasa Rao}, title = {Speech enhancement using excitation source information}, booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing, {ICASSP} 2002, May 13-17 2002, Orlando, Florida, {USA}}, pages = {541--544}, year = {2002}, crossref = {DBLP:conf/icassp/2002}, url = {https://doi.org/10.1109/ICASSP.2002.5743774}, doi = {10.1109/ICASSP.2002.5743774}, biburl = {https://dblp.org/rec/bib/conf/icassp/YegnanarayanaPR02}, bibsource = {dblp computer science bibliography, https://dblp.org} } @inproceedings{sarkar2015data, title={Data-driven pause prediction for synthesis of storytelling style speech based on discourse modes}, author={Sarkar, Parakrant and Rao, K Sreenivasa}, booktitle={Electronics, Computing and Communication Technologies (CONECCT), 2015 IEEE International Conference on}, pages={1--5}, year={2015}, organization={IEEE}, url={https://ieeexplore.ieee.org/abstract/document/7383906 } } @INPROCEEDINGS{7375711, author={A. Haque and K. S. Rao}, booktitle={2015 International Conference on Computer, Communication and Control (IC4)}, title={Modification and incorporation of excitation source features for emotion conversion}, year={2015}, volume={}, number={}, pages={1-5}, keywords={Gaussian processes;emotion recognition;feature extraction;regression analysis;speech processing;Gaussian normalization;PSOLA method;anger;emotion conversion;emotional speech;epoch sharpness;epoch strength;excitation source features;neutral speech;pitch contour;polynomial regression;sad;speech style;time domain pitch synchronous overlap and add method;Computers;Conferences;Distortion;Feature extraction;Filtering;Speech;Standards;Epoch;Gaussian normalization;epoch sharpness;epoch strength;zero frequency filter}, doi={10.1109/IC4.2015.7375711}, ISSN={}, month={Sept}, url={https://ieeexplore.ieee.org/document/7375711 } } @INPROCEEDINGS{7375718, author={A. K. Dutta and K. S. Rao}, booktitle={2015 International Conference on Computer, Communication and Control (IC4)}, title={Analysis of linear prediction residual signal, its magnitude and phase for language identification on NIST LRE (2003) database}, year={2015}, volume={}, number={}, pages={1-4}, keywords={Gaussian processes;audio databases;feature extraction;mixture models;natural language processing;speech recognition;GMM;Gaussian mixture model;LID;LID systems;LPR;NIST LRE speech database;glottal cycles;language identification;linear prediction residual signal analysis;magnitude analysis;phase analysis;source feature excitation;source features;subsegmental level processing;supra-segmental level processing;vocal tract features;Correlation;Correlation coefficient;Databases;Feature extraction;Mel frequency cepstral coefficient;NIST;Speech;Excitation source;Gaussian mixture model;MFCC;NIST LRE (2003);Segmental}, doi={10.1109/IC4.2015.7375718}, ISSN={}, month={Sept}, url={https://ieeexplore.ieee.org/document/7375718 } } @INPROCEEDINGS{7375669, author={M. G. Reddy and P. Sen and K. E. Manjunath and A. Dutta and A. Haque and P. Sarkar and K. S. Rao}, booktitle={2015 International Conference on Computer, Communication and Control (IC4)}, title={Automatic pitch accent contour transcription for Indian languages}, year={2015}, volume={}, number={}, pages={1-6}, keywords={natural language processing;smoothing methods;speech processing;Indian language;ZFF speech signal;automatic method;automatic pitch accent contour transcription;automatic pitch contour transcription method;automatic pitch contour transcription system;intonation like contour;intonation phrase;nonlinear smoothing technique;speaker-wise subjective evaluation;spurious pitch value;temporal variation;timing information;tonal variation;zero frequency filtered speech signal;Hidden Markov models;Market research;Maximum likelihood detection;Nonlinear filters;Resonator filters;Smoothing methods;Speech;Automatic pitch contour transcription;Conversation speech;Extempore speech;Intonation Contour;Intonation phrase;Pitch contour;Read speech;Saddle points;Zero frequency filter}, doi={10.1109/IC4.2015.7375669}, ISSN={}, month={Sept}, url={https://ieeexplore.ieee.org/document/7375669/ } } @INPROCEEDINGS{7375666, author={D. M. Harikrishna and K. S. Rao}, booktitle={2015 International Conference on Computer, Communication and Control (IC4)}, title={Classification of children stories in hindi using keywords and POS density}, year={2015}, volume={}, number={}, pages={1-5}, keywords={Bayes methods;computational linguistics;document handling;natural language processing;pattern classification;support vector machines;Hindi children story classification;POS tags;fable genre;folk-tale genre;k-nearest neighbour classifier;keyword based features;legend genre;linguistic based features;naive Bayes classifier;part-of-speech based features;support vector machine;term frequency inverse document frequency;Computers;Conferences;Niobium;Pragmatics;Support vector machines;Tagging;Text categorization;Document Classification;Hindi Story Classification;KNN;Naive Bayes;Part-of-Speech;SVM;Text-to-Speech;Vector Space Model}, doi={10.1109/IC4.2015.7375666}, ISSN={}, month={Sept}, url={https://ieeexplore.ieee.org/document/7375666/ } } @INPROCEEDINGS{7375714, author={T. Mandal and K. S. Rao}, booktitle={2015 International Conference on Computer, Communication and Control (IC4)}, title={Analysis of perturbation in pitch period and contact quotient for classifying age groups}, year={2015}, volume={}, number={}, pages={1-5}, keywords={medical signal processing;signal classification;DEGG signal;contact quotient;electrical impedance;first-order differential electroglottograph signal;fundamental frequency;Acoustic measurements;Acoustics;Computers;Contacts;Impedance;Jitter;Standards;Contact quotient;Electroglottographic signal;Glottal Closure Instant;Glottal Opening Instant;Pitch period}, doi={10.1109/IC4.2015.7375714}, ISSN={}, month={Sept}, url={https://ieeexplore.ieee.org/document/7375714/ } } @INPROCEEDINGS{7561102, author={N. P. Narendra and M. K. Reddy and K. S. Rao}, booktitle={2016 Twenty Second National Conference on Communication (NCC)}, title={Excitation modeling for HMM-based speech synthesis based on principal component analysis}, year={2016}, volume={}, number={}, pages={1-6}, keywords={hidden Markov models;principal component analysis;speech synthesis;HMM-based speech synthesis;PCA coefficients;component analysis is performed on the pitch-synchronous;excitation modeling method;excitation modeling methods;excitation signal;noise component;pitch-synchronous residual frames;principal component analysis;Databases;Dispersion;Hidden Markov models;High-temperature superconductors;Principal component analysis;Speech;Speech synthesis}, doi={10.1109/NCC.2016.7561102}, ISSN={}, month={March}, url={https://ieeexplore.ieee.org/document/7561102/ }} @INPROCEEDINGS{7561205, author={Harikrishna D M and K. S. Rao}, booktitle={2016 Twenty Second National Conference on Communication (NCC)}, title={Emotion-specific features for classifying emotions in story text}, year={2016}, volume={}, number={}, pages={1-4}, keywords={emotion recognition;speech synthesis;support vector machines;text analysis;KNN;Naive Bayes;SVM models;emotion classification;emotion-specific text features;five dimensional feature vector;k-nearest neighbour;story genre information;story speech synthesizing;story text;support vector machine;Databases;Information technology;Niobium;Semantics;Speech;Speech synthesis;Support vector machines}, doi={10.1109/NCC.2016.7561205}, ISSN={}, month={March}, url={https://ieeexplore.ieee.org/document/7561205/ }} @INPROCEEDINGS{7058226, author={K. E. Manjunath and K. S. Rao and M. G. Reddy}, booktitle={2015 International Conference on Signal Processing and Communication Engineering Systems}, title={Two-stage phone recognition system using articulatory and spectral features}, year={2015}, volume={}, number={}, pages={107-111}, keywords={feedforward neural nets;hidden Markov models;speech recognition;Indian language Bengali;Mel-frequency cepstral coefficients;TIMIT speech databases;articulatory features;feedforward neural networks;hidden Markov models;phone recognition models;spectral features;two-stage models;two-stage phone recognition system;Accuracy;Acoustics;Educational institutions;Hidden Markov models;Speech;Speech recognition;Training;Articulatory features;Bengali;FeedForward Neural Network;Hidden Markov Model;International Phonetic Alphabet;Manner of Articulation;Two-stage phone recognition}, doi={10.1109/SPACES.2015.7058226}, ISSN={}, month={Jan}, url={https://ieeexplore.ieee.org/document/7058226/ }} @INPROCEEDINGS{7058205, author={K. E. Manjunath and K. S. Rao and M. G. Reddy}, booktitle={2015 International Conference on Signal Processing and Communication Engineering Systems}, title={Improvement of phone recognition accuracy using source and system features}, year={2015}, volume={}, number={}, pages={501-505}, keywords={hidden Markov models;speech recognition;Bengali language;English language;Indian language;Mel-frequency cepstral coefficient feature;TEVIIT;excitation source information;hidden Markov model;linear prediction residual;phone recognition accuracy;phone recognition system;phonetic prosodically rich transcribed speech corpora;speech production system;time varying excitation;time varying vocal tract system;vocal tract information;Accuracy;Hidden Markov models;Mel frequency cepstral coefficient;Speech;Speech processing;Speech recognition;Training;RMFCCs;excitation source features;phone posteriors;phone recognition system;tandem systems}, doi={10.1109/SPACES.2015.7058205}, ISSN={}, month={Jan}, url={https://ieeexplore.ieee.org/document/7058205/ }} @INPROCEEDINGS{7058278, author={M. G. Reddy and D. M. Harikrishna and K. S. Rao and K. E. Manjunath}, booktitle={2015 International Conference on Signal Processing and Communication Engineering Systems}, title={Telugu emotional story speech synthesis using SABLE markup language}, year={2015}, volume={}, number={}, pages={331-335}, keywords={XML;emotion recognition;filtering theory;speech synthesis;SABLE markup format;SABLE markup language;Telugu emotional speech;Telugu emotional story speech synthesis;Telugu neutral festival TTS system;XML based markup langauge;ZFF signal;concatinative speech synthesizer;hand annotated story text;markup story text;phrase level;pitch intensity;pitch range;prosody modification factor;prosody modification parameter;prosody tag;story telling application;synthesized speech;zero frequency filtered signal;Databases;Educational institutions;Pragmatics;Speech;Speech synthesis;Synthesizers;Emotional speech;Emotions;Prosody tags;SABLE markup;Synthetic speech;ZFF}, doi={10.1109/SPACES.2015.7058278}, ISSN={}, month={Jan}, url={https://ieeexplore.ieee.org/document/7058278/ }} @INPROCEEDINGS{6983974, author={D. Nandi and D. Pati and K. S. Rao}, booktitle={2014 International Conference on Signal Processing and Communications (SPCOM)}, title={Sub-segmental, segmental and supra-segmental analysis of linear prediction residual signal for language identification}, year={2014}, volume={}, number={}, pages={1-6}, keywords={Gaussian processes;mixture models;natural language processing;prediction theory;speech processing;GMM;Gaussian mixture modelling;IITKGP-MLILSC speech database;LID task;LP residual signal;excitation signal;excitation source features;excitation source information;language identification;language models;linear prediction residual signal;subsegmental analysis;subsegmental level information;supra-segmental analysis;supra-segmental level information;vocal tract;Accuracy;Correlation;Feature extraction;Mel frequency cepstral coefficient;Production;Speech;IITKGP-MLILSC;LP residual;MFCC;segmental;sub-segmental;suprasegmental}, doi={10.1109/SPCOM.2014.6983974}, ISSN={2165-0608}, month={July}, url={https://ieeexplore.ieee.org/document/6983974/ } } @INPROCEEDINGS{7045735, author={J. Yadav and A. Kumari and K. Sreenivasa Rao}, booktitle={2015 International Conference on Communication, Information Computing Technology (ICCICT)}, title={Emotion recognition using LP residual at sub-segmental, segmental and supra-segmental levels}, year={2015}, volume={}, number={}, pages={1-6}, keywords={Gaussian processes;emotion recognition;mixture models;speech recognition;Berlin emotional database;GMM;Gaussian mixture models;IIT Kharagpur simulated emotion speech corpus;LP residual;inverse filtering;linear prediction residual;segmental level;speech signal based emotion recognition;sub-segmental level;supra-segmental level;Databases;Emotion recognition;Feature extraction;Gaussian mixture model;Speech;Speech recognition;Feature vector;Gaussian Mixture Model;Linear prediction analysis;Linear prediction coefficient;Probability density;Residual signal;Segmental level;Simulated database;Speech;Speech sample;Sub-segmental;Suprasegmental level}, doi={10.1109/ICCICT.2015.7045735}, ISSN={}, month={Jan}, url={https://ieeexplore.ieee.org/document/7045735/ } } @INPROCEEDINGS{7100694, author={J. Yadav and K. S. Rao}, booktitle={2015 International Conference on Cognitive Computing and Information Processing(CCIP)}, title={Generation of emotional speech by prosody imposition on sentence, word and syllable level fragments of neutral speech}, year={2015}, volume={}, number={}, pages={1-5}, keywords={emotion recognition;feature extraction;natural language processing;speech synthesis;Hindi emotional-speech generation;Hindi neutral-speech;Praat script;Praat tool;acoustic aspects;intensity contour;pitch contour;prosodic features;prosody imposition;sentence level fragments;syllable level fragments;word level fragments;Conferences;Databases;Hidden Markov models;Speech;Speech synthesis;Neutral text-to-speech system;PSOLA Algorithm;Praat;Praat Script;duration pattern;intonation pattern;prosody}, doi={10.1109/CCIP.2015.7100694}, ISSN={}, month={March}, url={https://ieeexplore.ieee.org/document/7100694/ } } @INPROCEEDINGS{6726106, author={A. K. Singh and J. Mukhopadhyay and S. B. S. Kumar and K. S. Rao}, booktitle={2013 Annual IEEE India Conference (INDICON)}, title={Infant cry recognition using excitation source features}, year={2013}, volume={}, number={}, pages={1-5}, keywords={Gaussian processes;feature extraction;speech recognition;EIC;ESC;GMM;Gaussian mixture models;epoch interval contour;epoch sharpness;epoch strength contour;infant cries;infant cry recognition;source feature excitation;Accuracy;Feature extraction;Pain;Pediatrics;Resonant frequency;Speech;Vectors;Epoch Interval Contour (EIC);Epoch Strength Contour (ESC);Infant Cry Recognition System (ICRS);Zero Frequency Filtering}, doi={10.1109/INDCON.2013.6726106}, ISSN={2325-940X}, month={Dec}, url={https://ieeexplore.ieee.org/document/6726106/ } } @INPROCEEDINGS{6726131, author={S. Sarkar and K. S. Rao and D. Nandi and S. B. S. Kumar}, booktitle={2013 Annual IEEE India Conference (INDICON)}, title={Multilingual speaker recognition on Indian languages}, year={2013}, volume={}, number={}, pages={1-5}, keywords={Gaussian processes;speaker recognition;IITKGP-MLILSC speech corpus;Indian languages;average language-independent speaker identification;closed-set speaker identification;language mismatch;multilingual speaker recognition;speaker verification experiments;standard GMM-based speaker recognition framework;Equations;Mathematical model;Mel frequency cepstral coefficient;Speaker recognition;Speech;Vectors;Gaussian Mixture Models;Indian Languages;Multilingual Speaker Recognition;Speaker Verification}, doi={10.1109/INDCON.2013.6726131}, ISSN={2325-940X}, month={Dec}, url={https://ieeexplore.ieee.org/document/6726131/ } } @INPROCEEDINGS{6726109, author={K. E. Manjunath and S. B. S. Kumar and D. Pati and B. Satapathy and K. S. Rao}, booktitle={2013 Annual IEEE India Conference (INDICON)}, title={Development of Consonant-Vowel Recognition Systems for Indian languages: Bengali and Odia}, year={2013}, volume={}, number={}, pages={1-6}, keywords={cepstral analysis;feature extraction;natural language processing;speech recognition;support vector machines;Bengali language;CV classes;CV models;CV unit sequence;CVRS development;CVRS performance evaluation;Indian languages;MFCC;Mel frequency cepstral coefficients;Odia language;SVMs;VOP;anchor points;consonant-vowel recognition system development;consonant-vowel unit sequence;feature extraction;read speech corpus;speaker dependent modes;speaker independent modes;speech utterance;support vector machines;syllable boundary marking;vowel onset points;Accuracy;Feature extraction;Hidden Markov models;Speech;Support vector machines;Training;Vectors;Consonant-Vowel recognition;International Phonetic Alphabet;Support Vector Machine;Syllable recognition;Vowel Onset Point}, doi={10.1109/INDCON.2013.6726109}, ISSN={2325-940X}, month={Dec}, url={https://ieeexplore.ieee.org/document/6726109/ } } @INPROCEEDINGS{6529409, author={A. K. Singh and J. Mukhopadhyay and K. S. Rao}, booktitle={2013 Indian Conference on Medical Informatics and Telemedicine (ICMIT)}, title={Classification of infant cries using source, system and supra-segmental features}, year={2013}, volume={}, number={}, pages={58-63}, keywords={Gaussian processes;acoustic signal processing;cepstral analysis;feature extraction;medical signal processing;modulation spectra;signal classification;Gaussian Mixture Models;IIT-KGP;Infant cry database;RMFCC;acoustic signal;eNPCS;hunger;implicit LP residual features;infant cry classification;infant cry recognition;infant cry specific information;mel-frequency cepstral coefficients;modulation spectrum;pain;recognition performance;residual MFCC;score level fusion;source feature;suprasegmental feature;system feature;telemedicine project;time domain envelope features;wet-diaper;Covariance matrices;Feature extraction;Mel frequency cepstral coefficient;Modulation;Pain;Time-domain analysis;Vectors;Gaussian Mixture Model;Infant cry recognition;Modulation Spectrum;Spectral features;Time domain envelope}, doi={10.1109/IndianCMIT.2013.6529409}, ISSN={}, month={March}, url={https://ieeexplore.ieee.org/document/6529409/ } } @INPROCEEDINGS{6487995, author={S. Sarkar and K. S. Rao}, booktitle={2013 National Conference on Communications (NCC)}, title={Speaker verification in noisy environment using GMM supervectors}, year={2013}, volume={}, number={}, pages={1-5}, keywords={Adaptation models;Kernel;Noise;Noise measurement;Speech;Support vector machines;Training;Gaussian Mixture Models;Kernel;Speaker Verification;Supervectors;Support Vector Machines;Universal Background Model}, doi={10.1109/NCC.2013.6487995}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6487995/ } } @INPROCEEDINGS{6487998, author={M. Bhaykar and J. Yadav and K. S. Rao}, booktitle={2013 National Conference on Communications (NCC)}, title={Speaker dependent, speaker independent and cross language emotion recognition from speech using GMM and HMM}, year={2013}, volume={}, number={}, pages={1-5}, keywords={Databases;Emotion recognition;Hidden Markov models;Speech;Speech recognition;Testing;Training;Cross language emotion recognition;Emotion Recognition;GMM;HMM;IITKGP-SEHSC;IITKGP-SESC;MFCC;Speaker dependent emotion recognition;Speaker independent emotion recognition}, doi={10.1109/NCC.2013.6487998}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6487998/ } } @INPROCEEDINGS{6487999, author={A. K. Singh and J. Mukhopadhyay and K. S. Rao}, booktitle={2013 National Conference on Communications (NCC)}, title={Classification of infant cries using epoch and spectral features}, year={2013}, volume={}, number={}, pages={1-5}, keywords={Accuracy;Feature extraction;Mel frequency cepstral coefficient;Pain;Resonant frequency;Speech;Vectors;Epoch Interval Contour;Gaussian Mixture Model;Infant cry recognition;Spectral features}, doi={10.1109/NCC.2013.6487999}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6487999/ } } @INPROCEEDINGS{6176848, author={K. S. Rao and K. Pachpande and R. R. Vempada and S. Maity}, booktitle={2012 National Conference on Communications (NCC)}, title={Segmentation of TV broadcast news using speaker specific information}, year={2012}, volume={}, number={}, pages={1-5}, keywords={Gaussian distribution;speaker recognition;television broadcasting;GMM;Gaussian mixture models;MFCC;TV broadcast news bulletins;background music;gross level segmentation;initial headlines;mel frequency cepstral coefficients;speaker specific information;two-stage segmentation;Accuracy;Feature extraction;Indexing;Mel frequency cepstral coefficient;Speech;TV;TV broadcasting;Gaussian mixture model;TV broadcast news;speaker specific information;two-stage segmentation}, doi={10.1109/NCC.2012.6176848}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6176848/ } } @INPROCEEDINGS{6176824, author={R. R. Vempada and K. S. Rao}, booktitle={2012 National Conference on Communications (NCC)}, title={Modeling the intensity of syllables using classification and Regression Trees}, year={2012}, volume={}, number={}, pages={1-5}, keywords={pattern classification;prediction theory;regression analysis;speech intelligibility;speech synthesis;CART;classification and regression tree;correlation coefficient;phonological features;predicted syllable intensities;prediction error;speech segments intensities;standard deviation;synthesized speech generation;synthesized speech quality;text-to-speech synthesis systems;Computational modeling;Context modeling;Pragmatics;Predictive models;Regression tree analysis;Speech;Speech synthesis;CART;Contextual;Intelligibility;MOS;Naturalness;Objective;Phonological;Positional;Subjective;TTS}, doi={10.1109/NCC.2012.6176824}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6176824/ } } @INPROCEEDINGS{6176831, author={S. Maity and A. Kumar Vuppala and K. S. Rao and D. Nandi}, booktitle={2012 National Conference on Communications (NCC)}, title={IITKGP-MLILSC speech database for language identification}, year={2012}, volume={}, number={}, pages={1-5}, keywords={Gaussian processes;cepstral analysis;feature extraction;natural language processing;pattern classification;speaker recognition;speech processing;Gaussian mixture models;IITKGP-MLILSC speech database;Indian languages;automatic language identification system;language specific information analysis;linear predictive cepstral coefficients;mel-frequency cepstral coefficients;multilingual speech corpus;speaker dependent environments;speaker recognition performance;spectral feature exploration;speech feature classification models;systematic speech feature analysis;Computational modeling;Databases;Mel frequency cepstral coefficient;Predictive models;Speech;Speech recognition;Gaussian mixture models (GMMs);Indian Language Database;Language Identification;Linear prediction cepstral coefficients (LPCCs);Mel-frequency cepstral coefficients (MFCCs)}, doi={10.1109/NCC.2012.6176831}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6176831/ } } @INPROCEEDINGS{6176832, author={K. Ghosh and K. S. Rao}, booktitle={2012 National Conference on Communications (NCC)}, title={Subword based approach for grapheme-to-phoneme conversion in Bengali text-to-speech synthesis system}, year={2012}, volume={}, number={}, pages={1-5}, keywords={natural language processing;speech synthesis;text analysis;vocabulary;Bengali text-to-speech synthesis system;G2P conversion;TTS synthesis system;compound word;grapheme-to-phoneme conversion;inflected word;manual based approach;morphological information;pronunciation concatenation;pronunciation dictionary;root word;rule-based approach;subword based approach;vocabulary coverage;word segmentation;Accuracy;Compounds;Databases;Dictionaries;Information technology;Manuals;Speech;Subword method;grapheme-to-phoneme conversion;stemming;text-to-speech synthesis}, doi={10.1109/NCC.2012.6176832}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6176832/ } } @INPROCEEDINGS{6176851, author={R. R. Vempada and B. Siva Ayyappa Kumar and K. S. Rao}, booktitle={2012 National Conference on Communications (NCC)}, title={Characterization of infant cries using spectral and prosodic features}, year={2012}, volume={}, number={}, pages={1-5}, keywords={speech recognition;support vector machines;IIT-KGP;MFCC;STE;SVM models;Telemedicine project;hunger cry;infant cry characterization;infant cry database;infant cry recognition;mel-frequency cepstral coefficients;pain cry;pause duration;prosodic features;prosodic information;short-time frame energies;spectral features;spectral information;support vector machines;wet-diaper cry;Mel frequency cepstral coefficient;Pain;Pediatrics;Speech;Speech recognition;Support vector machines;Vectors;Infant cry recognition;Prosodic features;Spectral features;Support Vector Machine}, doi={10.1109/NCC.2012.6176851}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/6176851/ } } @INPROCEEDINGS{REDDY20122772, title = "Intonation Modeling Using Linguistic, Production and Prosodic Constraints for Syllable based TTS Systems", volume = "38", pages = "2772 - 2783", year = "2012", booktitle = "INTERNATIONAL CONFERENCE ON MODELLING OPTIMIZATION AND COMPUTING (ICMOC)", issn = "1877-7058", doi = "https://doi.org/10.1016/j.proeng.2012.06.325", url = "http://www.sciencedirect.com/science/article/pii/S1877705812022382", author = "V. Ramu Reddy and K. Sreenivasa Rao", keywords = "Intonation, TTS, FFNN, CART, Contextual, Phonological, Positional, Articulatory, Prosody, Festival" } @INPROCEEDINGS{KOOLAGUDI20123892, title = "Real Life Emotion Classification using Spectral Features and Gaussian Mixture Models", volume = "38", pages = "3892 - 3899", year = "2012", booktitle = "INTERNATIONAL CONFERENCE ON MODELLING OPTIMIZATION AND COMPUTING (ICMOC)", issn = "1877-7058", doi = "https://doi.org/10.1016/j.proeng.2012.06.447", url = "http://www.sciencedirect.com/science/article/pii/S1877705812023600", author = "Shashidhar G. Koolagudi and Anurag Barthwal and Swati Devliyal and K. Sreenivasa Rao", keywords = "emotion classification, spectral features, GMM, MFCC, LPCC, text dependent emotion recognition, text independent emotion recognition" } @INPROCEEDINGS{KOOLAGUDI20123409, title = "Recognition of Emotions from Speech using Excitation Source Features", volume = "38", pages = "3409 - 3417", year = "2012", booktitle = "INTERNATIONAL CONFERENCE ON MODELLING OPTIMIZATION AND COMPUTING", issn = "1877-7058", doi = "https://doi.org/10.1016/j.proeng.2012.06.394", url = "http://www.sciencedirect.com/science/article/pii/S1877705812023077", author = "Shashidhar G. Koolagudi and Swati Devliyal and Bhavna Chawla and Anurag Barthwal and K. Sreenivasa Rao", keywords = "emotion recognition, excitation source features, LP residual, GMM (Gaussian mixture models), GEU Semi Natural Emotion Speech Corpus" } @INPROCEEDINGS{KOOLAGUDI20123391, title = "Identification of Language using Mel-Frequency Cepstral Coefficients (MFCC)", volume = "38", pages = "3391 - 3398", year = "2012", booktitle = "INTERNATIONAL CONFERENCE ON MODELLING OPTIMIZATION AND COMPUTING", issn = "1877-7058", doi = "https://doi.org/10.1016/j.proeng.2012.06.392", url = "http://www.sciencedirect.com/science/article/pii/S1877705812023053", author = "Shashidhar G. Koolagudi and Deepika Rastogi and K. Sreenivasa Rao", keywords = "Gaussian Mixture Model, Language identification, Mel-frequency Cepstral Coefficient, Spectral features" } @INPROCEEDINGS{5738535, author={A. K. Vuppala and J. Yadav and S. Chakrabarti and K. S. Rao}, booktitle={2011 International Conference on Devices and Communications (ICDeCom)}, title={Effect of Low Bit Rate Speech Coding on Epoch Extraction}, year={2011}, volume={}, number={}, pages={1-4}, keywords={dynamic programming;filtering theory;speech coding;vocoders;CELP;CMU-Arctic data;ETSI 06.10;FS-1016;GSM full rate;dynamic programming projected phase slope;electroglottograph;epoch extraction methods;excitation source;low bit rate speech coding;mobile environment;speech coders;speech signal;speech systems;time-varying characteristics;vocal-tract system;zero frequency filter;Bit rate;Databases;GSM;Speech;Speech coding;Speech processing}, doi={10.1109/ICDECOM.2011.5738535}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/5738535/ } } @INPROCEEDINGS{5738540, author={S. G. Koolagudi and R. Reddy and J. Yadav and K. S. Rao}, booktitle={2011 International Conference on Devices and Communications (ICDeCom)}, title={IITKGP-SEHSC : Hindi Speech Corpus for Emotion Analysis}, year={2011}, volume={}, number={}, pages={1-5}, keywords={cepstral analysis;emotion recognition;natural language processing;speech processing;Hindi speech corpus;Mel frequency cepstral coefficients;emotion classification;emotion recognition;neutral text prompts;prosodic features;spectral features;speech signals;Computational modeling;Databases;Emotion recognition;Feature extraction;Speech;Speech recognition;Support vector machines}, doi={10.1109/ICDECOM.2011.5738540}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/5738540/ } } @INPROCEEDINGS{5738536, author={S. G. Koolagudi and N. Kumar and K. S. Rao}, booktitle={2011 International Conference on Devices and Communications (ICDeCom)}, title={Speech Emotion Recognition Using Segmental Level Prosodic Analysis}, year={2011}, volume={}, number={}, pages={1-5}, keywords={emotion recognition;speech recognition;support vector machines;Gaussian mixture model;IITKGP-SESC;SVM;segmental level prosodic analysis;speech emotion recognition;speech segmentation;support vector machine;vowel onset point;Databases;Emotion recognition;Feature extraction;Speech;Speech processing;Speech recognition;Support vector machines}, doi={10.1109/ICDECOM.2011.5738536}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/abstract/document/5738536/ } } @INPROCEEDINGS{6075122, author={N. P. Narendra and K. S. Rao}, booktitle={2011 2nd International Conference on Computer and Communication Technology (ICCCT-2011)}, title={Syllable specific target cost formulation for syllable based text-to-speech synthesis in Bengali}, year={2011}, volume={}, number={}, pages={180-184}, keywords={natural languages;regression analysis;speech synthesis;Bengali language;CART;classification and regression trees;contextual features;phonological features;subjective listening tests;syllable based text-to-speech synthesis;syllable classification;syllable specific positional;syllable specific target cost formulation;synthesized speech;word syllable;Context;Cost function;Databases;Feature extraction;Pragmatics;Speech;Speech synthesis;Bengali TTS;target cost formulation;text-to-speech synthesis;unit selection}, doi={10.1109/ICCCT.2011.6075122}, ISSN={}, month={Sept}, url={https://ieeexplore.ieee.org/document/6075122/ } } @INPROCEEDINGS{6075155, author={V. R. Reddy and K. S. Rao}, booktitle={2011 2nd International Conference on Computer and Communication Technology (ICCCT-2011)}, title={Intonation modeling using FFNN for syllable based Bengali text to speech synthesis}, year={2011}, volume={}, number={}, pages={334-339}, keywords={feedforward neural nets;speech synthesis;Bengali text-to-speech synthesis;Indian language;average prediction error;classification and regression tree model;contextual feature;correlation coefficient;feedforward neural network;intonation modeling;phonological feature;positional feature;root mean squared error;subjective listening test;Buildings;Computational modeling;Correlation;Pragmatics;Predictive models;Speech;Training;CART;Contextual;FFNN;Festival;Intonation;Phonological;Positional;Prosody;TTS}, doi={10.1109/ICCCT.2011.6075155}, ISSN={}, month={Sept}, url={https://ieeexplore.ieee.org/document/6075155/ } } @INPROCEEDINGS{6139334, author={K. Ghosh and K. S. Rao}, booktitle={2011 Annual IEEE India Conference}, title={Memory-based data-driven approach for grapheme-to-phoneme conversion in Bengali text-to-speech synthesis system}, year={2011}, volume={}, number={}, pages={1-4}, keywords={graphs;speech processing;speech synthesis;word processing;bengali text-to-speech synthesis system;grapheme-to-phoneme conversion;linguistic feature;memory-based data-driven approach;morphological analyzer;morphological knowledge;phonetic feature;rule-based Bengali G2P conversion technique;text corpus;Accuracy;Context;Dictionaries;Hidden Markov models;Manuals;Testing;Training;Alignment problem;Bengali;Data-driven method;Grapheme-to-phoneme conversion;Text-to-speech synthesis}, doi={10.1109/INDCON.2011.6139334}, ISSN={2325-940X}, month={Dec}, url={https://ieeexplore.ieee.org/document/6139334/ } } @INPROCEEDINGS{6139376, author={N. P. Narendra and K. S. Rao and K. Ghosh and V. R. Reddy and S. Maity}, booktitle={2011 Annual IEEE India Conference}, title={Development of Bengali screen reader using Festival speech synthesizer}, year={2011}, volume={}, number={}, pages={1-4}, keywords={Internet;handicapped aids;human computer interaction;information retrieval;natural language processing;screens (display);speech synthesis;Bengali screen reader development;Bengali text-to-speech synthesis system;Festival speech synthesizer;Web browsing;system usability;visually challenged people;Buildings;Computers;Databases;Speech;Synthesizers;Time factors;Usability;Bengali screen reader;database pruning;unrestricted Bengali TTS}, doi={10.1109/INDCON.2011.6139376}, ISSN={2325-940X}, month={Dec}, url={https://ieeexplore.ieee.org/document/6139376/ } } @inproceedings{ghosh2010grapheme, title={Grapheme to phoneme conversion in bengali for festival based tts framework}, author={Ghosh, Krishnendu and Reddy, Ramu V}, booktitle={8th international conference on natural language processing (ICON)}, pages={1-6}, year={2010}, url={https://www.researchgate.net/profile/Krishnendu_Ghosh11/publication/259562055_Grapheme_to_phoneme_conversion_in_Bengali_for_festival_based_tts_framework/links/57e2150b08ae1f0b4d93fe2f/Grapheme-to-phoneme-conversion-in-Bengali-for-festival-based-tts-framework.pdf } } @INPROCEEDINGS{5712728, author={S. G. Koolagudi and K. S. Rao}, booktitle={2010 Annual IEEE India Conference (INDICON)}, title={Real life emotion classification using VOP and pitch based spectral features}, year={2010}, volume={}, number={}, pages={1--4}, keywords={Gaussian processes;emotion recognition;speech recognition;Gaussian mixture models;IITKGP-SESC;VOP;emotion recognition models;emotion speech corpus;linear prediction;pitch based spectral features;pitch synchronous analysis;real life emotion classification;speech emotion classification;vowel onset points;Databases;Emotion recognition;Feature extraction;Motion pictures;Speech;Speech processing;Speech recognition;CV-transition region;Consonant region;Emo-DB;Emotion classification;IITKGP-MESC;IITKGP-SESC;Spectral features;Vowel onset point;Vowel region}, doi={10.1109/INDCON.2010.5712728}, ISSN={2325-940X}, month={Dec}, url={https://ieeexplore.ieee.org/document/5712728/ } } @INPROCEEDINGS{5712604, author={A. K. Vuppala and K. S. Rao and S. Chakrabarti}, booktitle={2010 Annual IEEE India Conference (INDICON)}, title={Effect of speech coding on speaker identification}, year={2010}, volume={}, number={}, pages={1--4}, keywords={GSM;Mel frequency cepstral coefficient;Silicon;Speaker recognition;Speech;Speech coding;CELP (FS-1016);GSM full rate (ETSI 06.10);MELP (TI 2.4kbps);Speaker identification;Wireless environment;speech coders}, doi={10.1109/INDCON.2010.5712604}, ISSN={2325-940X}, month={Dec}, url={https://ieeexplore.ieee.org/document/5712604/ } } @inproceedings{Vuppala2010TwostageIC, title={Two-stage Isolated Consonant-Vowel (CV) Unit Recognition in Indian Languages}, author={Anil Kumar Vuppala and Saswat Chakrabarti and K. S. Rao}, booktitle={International Conference on Computer and Communication Devices (ICCCD)}, pages={1--5}, year={2010}, url={https://www.semanticscholar.org/paper/Two-stage-Isolated-Consonant-Vowel-(CV)-Unit-in-Vuppala-Chakrabarti/66cf62ae89075827ef4f0414b6af3322c05e3a04 } } @INPROCEEDINGS{5560542, author={K. S. Rao and A. K. Vuppala and S. Chakrabarti and L. Dutta}, booktitle={2010 International Conference on Signal Processing and Communications (SPCOM)}, title={Robust speaker recognition on mobile devices}, year={2010}, volume={}, number={}, pages={1-5}, keywords={database management systems;mobile computing;mobile handsets;neural nets;speaker recognition;speech enhancement;Gaussian noise;NOISEX data;TIMIT;autoassociative neural network models;car;cellular databases;factory;high frequency;microphone speech databases;mobile devices;multi-SNR multi-environment speaker models;pink noise;robust speaker recognition;speech enhancement preprocessing methods;text independent speaker identification system;Mobile handsets;Noise;Noise measurement;Speaker recognition;Speech;Speech enhancement;Strontium;Auto-Associative neural network (AANN);Multi-SNR multi-environment speaker models;speaker recognition for mobile devices}, doi={10.1109/SPCOM.2010.5560542}, ISSN={2165-0608}, month={July}, url={https://ieeexplore.ieee.org/document/5560542/ } } @INPROCEEDINGS{5560541, author={S. G. Koolagudi and R. Reddy and K. S. Rao}, booktitle={2010 International Conference on Signal Processing and Communications (SPCOM)}, title={Emotion recognition from speech signal using epoch parameters}, year={2010}, volume={}, number={}, pages={1-5}, keywords={Gaussian processes;emotion recognition;speech processing;support vector machines;Berlin emotional database;Gaussian mixture models;IITKGP-Simulated Emotion Speech Corpus;Telugu database;emotion recognition;epoch parameters;linear prediction residual;speech signal;support vector machines;zero frequency filtered speech signal;Databases;Emotion recognition;Feature extraction;Resonant frequency;Speech;Speech recognition;Support vector machines;Epoch;GMM;Instantaneous frequency;SVM;Sharpness of epoch;Source features;Strength of epoch}, doi={10.1109/SPCOM.2010.5560541}, ISSN={2165-0608}, month={July}, url={https://ieeexplore.ieee.org/abstract/document/5560541/ } } @inproceedings{rao2010characterization, title={Characterization of emotions using the dynamics of prosodic features}, author={Rao, K Sreenivasa and Reddy, Ramu and Maity, Sudhamay and Koolagudi, Shashidhar G}, booktitle={Speech Prosody 2010-Fifth International Conference}, year={2010}, pages={1--4}, url={https://www.isca-speech.org/archive/sp2010/sp10_941.html } } @inproceedings{prasanna2010fast, title={Fast prosody modification using instants of significant excitation}, author={Prasanna, SRM and Govind, D and Rao, K Sreenivasa and Yegnanarayana, Bayya}, booktitle={Speech Prosody 2010-Fifth International Conference}, year={2010}, pages={1--4}, url={https://www.isca-speech.org/archive/sp2010/sp10_941.html } } @inproceedings{raoneural, title={Neural Network Models for Emotion Recognition using Glottal Pulse Characteristics}, author={Rao, K Sreenivasa and Vuppala, Anil Kumar and Koolagudi, Shashidhar G}, booktitle={14th Int. Conf. Cognitive and Neural systems (ICCNS 2010)}, year={2010}, pages={1--1}, url={http://cdn.iiit.ac.in/cdn/speech.iiit.ac.in/svlpubs/conference/ksrao-anil.pdf } } @inproceedings{Rao2010TwoSN, title={Two Stage Neural Network model for Recognition of Indian Languages from Speech}, author={K. Sreenivasa Rao and Jainath Yadav and Anil Kumar Vuppala and Shashidhar G. Koolagudi}, year={2010}, booktitle={14th Int. Conf. Cognitive and Neural systems (ICCNS 2010)}, year={2010}, pages={1--1}, url={https://pdfs.semanticscholar.org/c515/b4a719d7f6e91f7a30670e0a17b9be5a0825.pdf?_ga=2.201337873.1266673656.1526402132-1611373938.1526402132 } } @INPROCEEDINGS{5469151, author={Sabin Kafley and A. Kumar Vuppala and Arun Chauhan and K. S. Rao}, booktitle={2010 IEEE Students Technology Symposium (TechSym)}, title={Continuous digit recognition in mobile environment}, year={2010}, volume={}, number={}, pages={217-222}, keywords={mean square error methods;speech coding;speech recognition;background noise;channel impairment;coding impairment;continuous digit recognition;minimum mean square error method;mobile environment;noisy coded condition;noisy speech recognition;spectral processing;spectral subtraction method;speech coding;Acoustic noise;Background noise;Degradation;Frequency;Paper technology;Speech analysis;Speech enhancement;Speech processing;Speech recognition;Working environment noise}, doi={10.1109/TECHSYM.2010.5469151}, ISSN={}, month={April}, url={https://ieeexplore.ieee.org/document/5469151/ }} @INPROCEEDINGS{5469162, author={Arun Chauhan and S. G. Koolagudi and Sabin Kafley and K. S. Rao}, booktitle={2010 IEEE Students Technology Symposium (TechSym)}, title={Emotion recognition using LP residual}, year={2010}, volume={}, number={}, pages={255-261}, keywords={Gaussian processes;emotion recognition;neural nets;speech processing;Gaussian mixture models;IITKGP simulated emotion speech corpus;autoassociative neural network;emotion recognition;emotion specific information;inverse filtering;linear prediction residual;speech signal;Emotion recognition;Emotion recognition;Emotion-specific information;Excitation source;IITKGP-SESC;LP Residual}, doi={10.1109/TECHSYM.2010.5469162}, ISSN={}, month={April}, url={https://ieeexplore.ieee.org/abstract/document/5469162/ } } @INPROCEEDINGS{4782764, author={K. S. Rao and S. R. M. Prasanna and T. V. Sagar}, booktitle={2009 Seventh International Conference on Advances in Pattern Recognition}, title={Significance of Word and Syllable Level Information for Expressive Speech Processing}, year={2009}, volume={}, number={}, pages={159-162}, keywords={emotion recognition;speech recognition;speech synthesis;emotion recognition;facial movement;gesture analysis;prosodic feature;speech expression;speech processing;speech synthesis;suprasegmental feature;syllable level information;utterance level;word level information;Emotion recognition;Feature extraction;Humans;Information technology;Pattern recognition;Performance analysis;Speech analysis;Speech processing;Speech recognition;Speech synthesis;Prosodic features;duration;emotions;energy;expression;pitch;syllable level;utterance level;word level}, doi={10.1109/ICAPR.2009.47}, ISSN={}, month={Feb}, url={https://ieeexplore.ieee.org/document/4782764/ } } @INPROCEEDINGS{4809202, author={S. G. Koolagudi and S. Nandy and K. S. Rao}, booktitle={2009 IEEE International Advance Computing Conference}, title={Spectral Features for Emotion Classification}, year={2009}, volume={}, number={}, pages={1292-1296}, keywords={emotion recognition;speech processing;emotion classification;emotion recognition;linear predictive cepstral coefficient;log frequency power coefficient;mel frequency cepstral coefficient;spectral feature;speech feature vector quantizer;Cepstral analysis;Emotion recognition;Humans;Information technology;Loudspeakers;Mel frequency cepstral coefficient;Shape;Speech processing;Speech recognition;Speech synthesis;Emotion recognition;IITKGP-SESC;Log frequency power coefficients (LFPC);Ltnear predictive cepstral coefficzents (LPCC);Mel frequency cepstral coefficients (MFCC);Vector quantization}, doi={10.1109/IADCC.2009.4809202}, ISSN={}, month={March}, url={https://ieeexplore.ieee.org/document/4809202/ } } @INPROCEEDINGS{4426043, author={K. S. Rao and S. G. Koolagudi}, booktitle={15th International Conference on Advanced Computing and Communications (ADCOM 2007)}, title={Transformation of Speaker Characteristics in Speech Using Support Vector Machines}, year={2007}, volume={}, number={}, pages={660-665}, keywords={speaker recognition;support vector machines;time-varying filters;linear prediction residual;mapping functions;prosodic parameters;speaker characteristics;speaker transformation;support vector machines;time varying filter;vocal tract system;Artificial neural networks;Filters;Frequency estimation;Humans;Loudspeakers;Natural languages;Signal synthesis;Speech synthesis;Support vector machines;Testing}, doi={10.1109/ADCOM.2007.47}, ISSN={}, month={Dec}, url={https://ieeexplore.ieee.org/document/4426043/ } } @INPROCEEDINGS{4086299, author={T. V. Sagar and K. S. Rao and S. R. M. Prasanna and S. Dandapat}, booktitle={2006 Annual IEEE India Conference}, title={Characterization and Incorporation of Emotions in Speech}, year={2006}, volume={}, number={}, pages={1-5}, keywords={emotion recognition;speech processing;speech recognition;SUSE database;emotional speech;listening test;prosodic feature;speech transformation;speech under simulated emotion;suprasegmental level;Cepstral analysis;Emotion recognition;Frequency;Hidden Markov models;Humans;Man machine systems;Residual stresses;Speech analysis;Speech recognition;Speech synthesis;Anger;Compassion;Emotion;Happy;Incorporation of emotion;LP residual;Neutral;Prosodic features}, doi={10.1109/INDCON.2006.302828}, ISSN={2325-940X}, month={Sept}, url={https://ieeexplore.ieee.org/abstract/document/4086299/ } } @INPROCEEDINGS{1529458, author={K. Sreenivasa Rao and B. Yegnanarayana}, booktitle={Proceedings of 2005 International Conference on Intelligent Sensing and Information Processing, 2005.}, title={Modeling syllable duration in Indian languages using support vector machines}, year={2005}, volume={}, number={}, pages={258-263}, keywords={natural languages;pattern classification;regression analysis;support vector machines;Indian languages;SVM classification;SVM regression model;correlation coefficient;mean absolute error;standard deviation;support vector machines;syllable duration modeling;Accuracy;Broadcasting;Data mining;Feature extraction;Measurement standards;Natural languages;Performance analysis;Predictive models;Support vector machine classification;Support vector machines}, doi={10.1109/ICISIP.2005.1529458}, ISSN={}, month={Jan}, url={https://ieeexplore.ieee.org/document/1529458/ } } @INPROCEEDINGS{1529486, author={M. Leena and K. Srinivasa Rao and B. Yegnanarayana}, booktitle={Proceedings of 2005 International Conference on Intelligent Sensing and Information Processing, 2005.}, title={Neural network classifiers for language identification using phonotactic and prosodic features}, year={2005}, volume={}, number={}, pages={404-408}, keywords={feedforward neural nets;natural languages;pattern classification;speech processing;speech recognition;FFNN classifier training;Indian languages;feedforward neural network classifier;language identification;phonetics;phonotactic features;prosodic features;Automatic speech recognition;Computer science;Feedforward neural networks;Frequency;Laboratories;Natural languages;Neural networks;Signal processing;Speech recognition;Vocabulary}, doi={10.1109/ICISIP.2005.1529486}, ISSN={}, month={Jan}, url={https://ieeexplore.ieee.org/document/1529486/ } } @INPROCEEDINGS{1223447, author={S. V. Gangashetty and K. S. Rao and A. N. Khan and C. C. Sekhar and B. Yegnanarayana}, booktitle={Proceedings of the International Joint Conference on Neural Networks, 2003.}, title={Combining evidence from multiple modular networks for recognition of consonant-vowel units of speech}, year={2003}, volume={1}, number={}, pages={686-691 vol.1}, keywords={feature extraction;neural nets;pattern classification;speech processing;speech recognition;consonant-vowel units;evidence combination;isolated utterance recognition;multiple classifier;multiple modular network;nonlinear compression;phonetic description;reduced dimensional pattern;speech recognition;sum rule;training data set;uncompressed feature vector;Broadcasting;Computer science;Data engineering;Databases;Laboratories;Neural networks;Speech recognition;Training data}, doi={10.1109/IJCNN.2003.1223447}, ISSN={1098-7576}, month={July}, url={https://ieeexplore.ieee.org/document/1223447/ } } @InProceedings{10.1007/978-3-319-07353-8_3, author="Singh, Piyush Kumar P. and Manjunath, K. E. and Ravi Kiran, R. and Yadav, Jainath and Sreenivasa Rao, K.", editor="Kumar Kundu, Malay and Mohapatra, Durga Prasad and Konar, Amit and Chakraborty, Aruna", title="Indexing and Retrieval of Speech Documents", booktitle="Advanced Computing, Networking and Informatics- Volume 1", year="2014", publisher="Springer International Publishing", address="Cham", pages="17--24", abstract="In this paper, a speech document indexing system and similarity-based document retrieval method has been proposed. K-d tree is used as the index structure and codebooks derived from speech documents present in the database, are used during retrieval of desired document. Each document is represented as a sequence of codebook indices. The longest common subsequence based approach is proposed for retrieving the documents. Proposed retrieval method is evaluated using a speech database of 3 hours recorded by a male speaker and speech queries from 5 male and 5 female speakers. The accuracy of retrieval is found to be about 88{\%} for the queries given by male speakers.", isbn="978-3-319-07353-8" } @inproceedings{ksrao-2002, author ={K. Kiran Kumar, and K. Sreenivasa Rao, and B. Yegnanarayana}, title = {Duration Knowledge for Text-to-Speech system for Telugu}, year = {2002}, booktitle = {Int. Conf. Knowledge based computer systems}, pages = {} } @inproceedings{ksrao, author ={K. Sreenivasa Rao and S. V. Gangashetty, and A.Nayeemullakhan}, title = {Distribution capturing ability of autoassociative neural network models for recognition of consonant-vowel utterances}, year = {2003}, booktitle = {Conf. on Cognitive and Neural systems}, pages = {} } @inproceedings{ksrao, author ={S.Rajendran, K. Sreenivasa Rao, B.Yegnanarayana, and K.N. Reddy}, title = {Syllable duration in broadcast news in Telugu: A preliminary study}, year = {2003}, booktitle = {National Conf. on Language Technology Tools: Implementation of Telugu/Urdu}, pages = {} } @inproceedings{ksrao, author ={K. Sreenivasa Rao and S. V. Gangashetty, and B. Yegnanarayana}, title = {Duration analysis for Telugu language}, year = {2003}, booktitle = {Int. Conf. on Natural Language Processing}, pages = {} } @inproceedings{ksrao, author ={L. Mary and K. Sreenivasa Rao and S.V. Gangashetty, and B.Yegnanarayana}, title = {Neural network models for capturing duration and intonation knowledge for language and speaker identification}, year = {2004}, booktitle = {8th Int. Conf. on Cognitive and Neural systems, Boston}, pages = {} } @inproceedings{ksrao, author ={K. Sreenivasa Rao and B.Yegnanarayana}, title = {Neural network models for text-to-speech synthesis}, year = {2004}, booktitle = {5th International Conference on Knowledge Based Computer Systems}, pages = {520-530} } @inproceedings{ksrao, author ={K. Sreenivasa Rao and B.Yegnanarayana}, title = {Impact of constraints on prosody modeling for Indian languages}, year = {2004}, booktitle = {International Conference on Natural Language Processing (ICON-2004)}, pages = {225-236} } @inproceedings{ksrao, author ={K. Sreenivasa Rao and S. R. M. Prasanna and T. Vidya Sagar}, title = {Emotion Recognition using Multilevel Prosodic Information}, year = {2007}, booktitle = {Workshop on Image and Signal Processing (WISP-2007)}, pages = {} } @inproceedings{ksrao, author ={Shashidhar G. Koolagudi and K. Sreenivasa Rao}, title = {Neural Network Models for Capturing Prosodic Knowledge for Emotion Recognition}, year = {2008}, booktitle = {12 th Int. Conf. on Cognitive and Neural systems, Boston}, pages = {} } @inproceedings{ksrao, author ={Suparnakanti Das and Sudhamay Maity and K. Sreenivasa Rao and Pabitra Mitra}, title = {Strategies for selecting optimal text for Bengali ASR system}, year = {2009}, booktitle = {13-th International Conference on Speech and Computer (SPECOM'2009)}, pages = {} } @inproceedings{ksrao, author ={Anil Kumar Vuppala and K. Sreenivasa Rao}, title = {Neural Network Models for Speech Recognition in Mobile Environments}, year = {2009}, booktitle = {13 th Int. Conf. on Cognitive and Neural systems}, pages = {} } @inproceedings{ksrao, author ={K. Sreenivasa Rao, Anil Kumar Vuppala, Sudhin Ray, and Shashidhar G. Koolagudi}, title = {Feature mapping using neural network models for coded speech recognition}, year = {2010}, booktitle = {14th Int. Conf. Cognitive and Neural systems (ICCNS 2010)}, pages = {} } @inproceedings{ksrao, author ={K. Sreenivasa Rao, V K Saroj, Sudhamay Maity and Shashidhar G Koolagudi}, title = {Recognition of Emotions from Video}, year = {2010}, booktitle = {14 th World Multiconference on Systemics, Cybernetics and Informatics(WMSCI 2010)}, pages = {} } @inproceedings{ksrao, author ={K. Sreenivasa Rao and Saurav Nandy and Shashidhar G Koolagudi}, title = {Identification of Hindi Dialects using Speech}, year = {2010}, booktitle = {14 th World Multiconference on Systemics, Cybernetics and Informatics (WMSCI 2010)}, pages = {} } @inproceedings{ksrao, author ={Krishnendu Ghosh and Ramu Reddy Vempada and K. Sreenivasa Rao}, title = {Phrase Break Prediction for Bengali Text to Speech Synthesis System}, year = {2011}, booktitle = {International Conference on Natural Language Processing (ICON)}, pages = {} } @inproceedings{ksrao, author ={Avinash, Kumar and Singh and Jayanta, Mukhopadhyay and Sreenivasa, Rao K. and Viswanath K}, title = {Classification of Infant Cries Using Dynamics of Epoch Features}, year = {2013}, booktitle = {International Conference on Communications, VLSI and Signal Processing}, pages = {} } @inproceedings{ksrao, author ={Dipanjan Nandi and Debadatta Pati and K. Sreenivasa Rao}, title = {Robustness of Excitation Source Information for Language Independent Speaker Recognition}, year = {2013}, booktitle = {16 th International Oriental COCOSDA Conference}, pages = {} } @inproceedings{ksrao, author ={Bellamkonda Bhaskar and Dipanjan Nandi and K. Sreenivasa Rao}, title = {Analysis of Language Identification Performance based on Gender and Hierarchial Grouping Approaches}, year = {2013}, booktitle = {International Conference on Natural Language Processing (ICON-2013)}, pages = {} } @inproceedings{ksrao, author ={Jainath Yadav and K. Sreenivasa Rao}, title = {Emotional-speech synthesis from neutral-speech usingprosody imposition}, year = {2014}, booktitle = {International Conference on Recent Trends in Computer Science and Engineering (ICRTCSE-2014)}, pages = {} } @inproceedings{ksrao, author ={M. Bhaykar and K. Sreenivasa Rao and K. E. Manjunath}, title = {Speaker Independent Recognition from Speech using Combination of Different Classification Models}, year = {2014}, booktitle = {18th World Multi-Conference on Systemics, Cybernetics and Informatics (WMSCI-2014)}, pages = {} } @inproceedings{ksrao, author ={Parakrant Sarkar and K. Sreenivasa Rao}, title = {Modeling pauses for synthesis of storytelling style speech using unsupervised word features}, year = {2015}, booktitle = {International Conference on Advances in Computing, Communications and Informatics (ICACCI-2015)}, pages = {} } @inproceedings{ksrao, author ={Jainath Yadav and Md. Shah Fahad and Ranjeet Kumar and K. Sreenivasa Rao}, title = {Speaker Identificationin Emotional Environment using Trajectory-based Stochastic Feature Mapping}, year = {2016}, booktitle = {International Conference on Recent Advances and Innovations in Engineering (ICRAIE-2016)}, pages = {} } @inproceedings{ksrao, author ={Pradeep R and K. Sreenivasa Rao}, title = {Split Acoustic Modeling in Decoder for Phoneme Recognition}, year = {2017}, booktitle = {14th IEEE India Council International Conference (INDICON)}, pages = {} } @inproceedings{ksrao, author ={M. Kiran Reddy and K. Sreenivasa Rao}, title = {Excitation modeling method based on inverse filtering for HMM-based speech synthesis}, year = {2017}, booktitle = {Machine Intelligence and Signal Processing (AISC series of Springer)}, pages = {} } @inproceedings{ksrao, author ={Gurunath Reddy, M. and K. Sreenivasa Rao}, title = {Neutral to Joyous Happy Emotion Conversion}, year = {2017}, booktitle = {14th IEEE India Council International Conference (INDICON)}, pages = {} } @inproceedings{ksrao, author ={Gurunath Reddy, M. and K. Sreenivasa Rao}, title = {Automatic Evaluation of Hindustani Learner’s SARGAM Practice}, year = {2017}, booktitle = {EUSIPCO, Kos, Greece}, pages = {} } @inproceedings{ksrao, author ={Gurunath Reddy, M. and K. Sreenivasa Rao}, title = {Predominant vocal melody extraction from enhanced partial harmonic content}, year = {2017}, booktitle = {EUSIPCO, Kos, Greece}, pages = {} } @inproceedings{ksrao, author ={R. Pradeep and K. Sreenivasa Rao}, title = {Manner of Articulation Based Split Lattices for Phoneme Recognition}, year = {2018}, booktitle = {National Conference on Communications (NCC-2018)}, pages = {} } @inproceedings{ksrao, author ={Tanumay Mandal and K. Sreenivasa Rao}, title = {Robust Detection of Glottal Activity using Unwrapped Phase Electroglottographic Signal}, year = {2018}, booktitle = {International Conference on Acoustics, Speech, and Signal Processing (ICASSP-2018)}, pages = {} } @inproceedings{ksrao, author ={Debopriyo Banerjee and Niloy Ganguly and Shamik Sural and K. Sreenivasa Rao}, title = {One for the Road: Recommendign Male Street Attire}, year = {2018}, booktitle = {22 nd Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD-2018)}, pages = {} } @inproceedings{ksrao, author ={Suma S M and Shashidhar G. Koolagudi and Pravin B. Ramteke and K. Sreenivasa Rao}, title = {NoteTranscription from Carnatic Music}, year = {2018}, booktitle = {International Conference on Advanced Computing Networking and Informatics (ICACNI-2018)}, pages = {} } @inproceedings{ksrao, author ={Gurunath Reddy M and K. Sreenivasa Rao and Partha Pratim Das}, title = {Harmonic-Percussive Source Separation of Polyphonic Music by Suppressing Impulsive Noise Events}, year = {2018}, booktitle = {INTERSPEECH (ISCA) 2018}, pages = {} } @inproceedings{ksrao, author ={Kumud Tripathi and K. Sreenivasa Rao}, title = {Analysis of sparse representation based feature on speech mode classification}, year = {2018}, booktitle = {INTERSPEECH (ISCA) 2018}, pages = {} } @inproceedings{ksrao1, author ={Tanumay Mandal and K. Sreenivasa Rao and Sanjay Kumar Gupta}, title = {Classification of disorders in vocal folds using Electroglottographic Signal}, year = {2018}, booktitle = {INTERSPEECH (ISCA) 2018}, pages = {} } @inproceedings{ksrao, author ={Manjunath K E and K. Sreenivasa Rao and Dinesh Babu Jayagopi and V Ramasubramanian}, title = {Indian languages ASR: A multilingual phone recognition framework with IPA based common phone-set, predicted articulatory features and feature fusion}, year = {2018}, booktitle = {INTERSPEECH (ISCA) 2018}, pages = {} } @inproceedings{ksrao, author ={Kumud Tripathi and K. Sreenivasa Rao}, title = {Discriminative sparse representation for speech mode classification}, year = {2018}, booktitle = {7th International Conference on Advances in Computing, Communications and Informatics (ICACCI) 2018}, pages = {} } @inproceedings{ksrao, author ={Kishore Kumar R and Sandipan Sarkar and Pradeep Rengaswamy and K. Sreenivasa Rao}, title = {Audio Mining: Unsupervised Spoken Term Detection over an Audio Database}, year = {2018}, booktitle = {7th International Conference on Advances in Computing, Communications and Informatics (ICACCI) 2018}, pages = {} } @inproceedings{ksrao, author ={M. Kiran Reddy and K. Sreenivasa Rao}, title = {DNN-based Bilingual (Telugu-Hindi) Polyglot Speech Synthesis}, year = {2018}, booktitle = {International Conference on Advances in Computing, Communications and Informatics (ICACCI), Bangalore, India, 2018}, pages = {} } @inproceedings{ksrao, author ={R. Pradeep and K. Sreenivasa Rao}, title = {Modifying LSTM Posteriors with Manner of Articulation Knowledge to Improve Speech Recognition Performance}, year = {2018}, booktitle = {IEEE 17th International Conference on Machine Learning and Applications (ICMLA 2018), Orlando, Florida, USA, 2018}, pages = {} } @inproceedings{Manjunath2018IndianLA, title={Indian Languages ASR: A Multilingual Phone Recognition Framework with IPA Based Common Phone-set, Predicted Articulatory Features and Feature fusion}, author={K. E. Manjunath and K. Sreenivasa Rao and Dinesh Babu Jayagopi and V. Ramasubramanian}, booktitle={INTERSPEECH}, year={2018}, pages = {} } @inproceedings{Tripathi2018AnalysisOS, title={Analysis of sparse representation based feature on speech mode classification}, author={Kumud Tripathi and K. Sreenivasa Rao}, booktitle={INTERSPEECH}, pages={}, year={2018} } @inproceedings{reddy2018harmonic, title={Harmonic-Percussive Source Separation of Polyphonic Music by Suppressing Impulsive Noise Events.}, author={Reddy, Gurunath and Rao, K Sreenivasa and Das, Partha Pratim}, booktitle={Interspeech}, pages={831--835}, year={2018} } @inproceedings{mandal2018classification, title={Classification of Disorders in Vocal Folds Using Electroglottographic Signal.}, author={Mandal, Tanumay and Rao, K Sreenivasa and Gupta, Sanjay Kumar}, booktitle={Interspeech}, pages={3002--3006}, year={2018} } @INPROCEEDINGS{8554693, author={M. K. {Reddy} and K. S. {Rao}}, booktitle={2018 International Conference on Advances in Computing, Communications and Informatics (ICACCI)}, title={DNN-based Bilingual (Telugu-Hindi) Polyglot Speech Synthesis}, year={2018}, volume={}, number={}, pages={1808-1811}, keywords={hidden Markov models;natural language processing;neural nets;speech synthesis;text analysis;DNN-based synthesizer;HMM-based synthesizer;Telugu-Hindi;text-to-speech systems;bilingual text;deep neural network-based bilingual speech synthesis system;polyglot speech corpus;bilingual female speaker;mother tongue;TTS systems;mean opinion score test;MOS test;Hidden Markov models;Speech synthesis;Synthesizers;Neural networks;Training;High-temperature superconductors;Feature extraction;Speech synthesis;bilingual polyglot synthesis;hidden Markov models;deep neural networks}, doi={10.1109/ICACCI.2018.8554693}, ISSN={}, month={Sep.},} @inproceedings{tripathi2018discriminative, title={Discriminative sparse representation for speech mode classification}, author={Tripathi, Kumud and Rao, K Sreenivasa}, booktitle={2018 International Conference on Advances in Computing, Communications and Informatics (ICACCI)}, pages={655--659}, year={2018}, organization={IEEE} } @inproceedings{kumar2018audio, title={Audio Mining: Unsupervised Spoken Term Detection over an Audio Database}, author={R Kishore {Kumar} and Sandipan {Sarkar} and Pradeep {Rengaswamy} and and K Sreenivasa {Rao}}, booktitle={2018 International Conference on Advances in Computing, Communications and Informatics (ICACCI)}, pages={514--518}, year={2018}, organization={IEEE} } @inproceedings{DBLP:conference/corr/abs-1811-09956, author = {Gurunath Reddy {M} and Tanumay {Mandal} and and K Sreenivasa {Rao}}, title = {Glottal Closure Instants Detection From Pathological Acoustic Speech Signal Using Deep Learning}, booktitle ={NIPS, ML4H, Montreal, Canada}, pages={}, year = {2018}, } @inproceedings{kiranjul2019, title={Mel-scaled Wavelet-based Features for Spoofing Speech Detection}, author={Kiran Reddy {M} and and K. Sreenivasa {Rao}}, booktitle={International Conference on Electrical, Control and Computer Engineering, University of Malaysia, Pahang, Malaysia}, month={July}, pages={}, year={2019}, } @inproceedings{guru2019sept, title={Glottal Closure Instants Detection from Speech Signal by Deep Features Extracted from Raw Speech and Linear Prediction Residual}, author={Gurunath Reddy {M} and and K. Sreenivasa {Rao}}, booktitle={INTERSPEECH}, month={September}, pages={}, year={2019}, } @inproceedings{manjunath2019, title={Comparison of Common Multilingual Phone-set Based and LID-switched Monolingual Approaches for Multilingual Phone Recognition using Indian Languages}, author={Manjunath K {E} and Srinivasa Raghavan K. {M} and K. Sreenivasa {Rao} and Dinesh Babu {Jayagopi} and and V {Ramasubramanian}}, booktitle={IEEE CONNECT}, month={}, pages={}, year={2019}, } @inproceedings{kumud2020ncc, title={Multi-Lingual Speech Mode Classification Model for Indian Languages}, author={Kumud {Tripathi} and and K. Sreenivasa {Rao}}, booktitle={National Conference on Communications (NCC-2020) (IEEE Explore)}, month={Feb}, pages={}, year={2020}, }