import codecs
import nltk
textpath="text"

f1=codecs.open(textpath,'r','utf-8')
s=f1.read()
tokens=nltk.word_tokenize(s)
#print len(tokens[3])
store=[]
temp=u'\u0020'
matrapath="matra"
h=codecs.open(matrapath,'r','utf-8')
matra=h.read()
halant=matra[len(matra)-1]
vowelpath="vowel"
f=codecs.open(vowelpath,'r','utf-8')
vowel=f.read()

consonantpath="consonant"
k=codecs.open(consonantpath,'r','utf-8')
consonant=k.read()



def  case1(shabda,vowel,matra,consonant ):
	coding=["U"]*len(shabda)
	#print len(coding)
	if(shabda[0] in consonant):
	   	coding[0]="U"
	if(shabda[0] in vowel):
		coding[0]="F"		
	for i in range(1,len(shabda)-1):
		flag=0
		if((shabda[i] in vowel)): 
			flag=1
			coding[i]="F"
		elif(shabda[i] in matra):
				if(shabda[i-1] in consonant):
					flag=1
					coding[i]="F"
		elif(shabda[i] in consonant):
			if(shabda[i]==halant):
				flag=1
				coding[i]="H"
			if(shabda[i-1] ==halant):
				flag=1
				coding[i]="H"
        

		if not(flag):
			coding[i]="U"

	return coding

def case2(shabda,coding,matra):
	list1=matra[5:9]
	for i in range(1,len(shabda)-1):
		if(coding[i]=="U" and shabda[i]=="y"):
			if((shabda[i-1] in list1)):
				coding[i]="F"
	return coding

def case3(shabda,coding,consonant):
	list2=consonant[26:30]
	for i in range(1,len(shabda)-1):
		if(coding[i]=="U"):
			if(shabda[i] in list2):
				if((shabda[i-1] in consonant) and (coding[i-1]=="H")):
					coding[i]="F"
	return coding

def case4(shabda,coding):
	for i in range(1,len(shabda)-1):
		if(shabda[i] in vowel):
			if((shabda[i-1] in consonant) and (coding[i-1]=="U")):
				coding[i-1]="F"
	return coding

def case5(shabda,coding):
        for i in range(1,len(shabda)-1):
      		if(coding[i]=="F"):
      			if((shabda[i] in vowel) or (shabda[i] in consonant)):
      				if(coding[i-1]=="U"):
      					if(shabda[i-1] in consonant):
      						coding[i-1]=="F"
      	return coding

def case6(shabda,coding):
	n=len(shabda)
	if(shabda[n-1] in consonant):
		if(coding[n-1]=="U"):
			coding[n-1]=="H"
	return coding

def case7(shabda,coding):
	for i in range(1,len(shabda)-1):
		if(shabda[i] in consonant):
			if(coding[i]=="H"):
				if(shabda[i-1] in consonant):
					if(coding[i-1]=="U"):
						coding[i-1]=="F"
	return coding

def case8(shabda,coding):
	flag1=0
	if(shabda[1] in consonant):
		if(coding[1]=="U"):
			coding[1]=="F"

	for i in range(1,len(shabda)-1):
		if(shabda[i] in consonant):
			if(coding[i]=="U"):
				if((coding[i-1]=="F") and (coding[i+1] != "H")):
					coding[i]=="H"
					flag1=1
		if not(flag1):
			coding[i]="F"
	return coding

def case9(shabda,coding):
	#print coding
	#print len(coding)
	#print len(shabda)
	for i in range(0,len(shabda)-1):
		if(shabda[i] in consonant ):
			#print coding[i]
			#print type(coding[i])
			if(coding[i]=="H"):
				shabda[i]=shabda[i]+halant
	return shabda
#----------------------------------------------------------------------------------------------------------------------------------

def  schwa_delete(shabda,matra,consonant,vowel):
	templist=[]
	coding=case1(shabda,vowel,matra,consonant)

	coding=case2(shabda,coding,matra)
	coding=case3(shabda,coding,consonant)
	coding=case4(shabda,coding)
	coding=case5(shabda,coding)
	coding=case6(shabda,coding)
	coding=case7(shabda,coding)

	coding=case8(shabda,coding)
	#print len(coding)
	shabda=case9(shabda,coding)  
	templist=[shabda,coding]    
	return templist
 #-------------------------------------------------------------------------------------------------------------------------------------
syll_list=[]
def procedure2(shabda,coding):
	h=0
	syll_list=[]
	prev=0
	syll_marker=[]
	#print shabda
	#print coding
	#print len(coding)
	if(len(shabda)>=2):
		#print("hu ha ha")
 		for i in range(1,len(shabda)-1):
			if (coding[i]=="F" and (shabda[i] not in vowel)):
				if(coding[i-1]=="F"): 				
 					syll_list.append(shabda[prev:i])
 					prev=i
 					syll_marker.append(i+1)
 					h=1
 		if(h==0):
 			syll_list.append(shabda)
 	else:
 		syll_list.append(shabda)
 	templist=[syll_list,syll_marker]
 	return templist

def procedure3(shabda,coding,syll_list,syll_marker):
	h=0
 	for i in range(1,len(shabda)-1):
 		if(coding[i]=="F"):
			if(coding[i-1]=="H"):
				if(i!=2 and (i-1 not in syll_marker)):
					syll_list.append(shabda[prev:i])
					h=1
		elif((i==len(shabda)-1) and not h):
 			syll_list.append(shabda)
	return syll_list



def syll_breaker(shabda,coding):
	hlist=procedure2(shabda,coding)
	#print syll_list
	syll_list=hlist[0]
	#print syll_list
	syll_marker=hlist[1]
	syll_list=procedure3(shabda,coding,syll_list,syll_marker)
	#print syll_list
	return syll_list
#len(tokens)
#print len(tokens)
syllable_list=[]
for shabda in tokens:
	#shabda1=[]
	#if(len(shabda)<=2):
		#if(shabda[1] in matra):
		#	shabda1.append(shabda[0:1])
		#else:
		#	shabda1[0:1]=shabda[0:1]

	#if(len(shabda)>=3):
	#	for i in range(2,len(shabda)-1):
	#		if(shabda[i] in matra):
	#			shabda1.append(shabda[i:i-1])
	#		else:
				#shabda1.append(shabda[i])
	#if(len(shabda1)==1):
	#	syll_list.append(shabda1)
	#else:
	klist=schwa_delete(shabda,matra,consonant,vowel)
	shabda=klist[0]
	coding=klist[1]
	#print coding
		#print len(shabda1)
		#print len(coding)
	syll_list=syll_breaker(shabda,coding)
	if(syllable_list==[]):

		syllable_list=syll_list
	else:
		syllable_list=[syllable_list,syll_list]

	#print syllable_list

	#for syll in word:
     #            if(syll in vowel):
	#		temp=temp+syll
	#		store.append(temp)
	#		temp=u'\u0020'
	#	 else:
	#		temp=temp+syll
	#store.append(temp)
#sdict={}


#for word in tokens:
#print len(syll_list)
#print type(syll_list)
print syllable_list
sdict={}
for item in syllable_list:
	#print type(item)
	#print item
	item1=''.join(item)
	if item in sdict:
		sdict[item]=sdict[item]+1
	else:
		sdict[item]=1

for key, value in sdict.iteritems() :
    print key, value
	
#print s
#print tokens
#print str(sdict)
#print store[2]
