#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
separators = [u"।", u",", u"."]
dat=open(r"C:\Users\User\Desktop\text4.txt",'r').read()
text=dat.decode("utf-8")
wros=text.split()
out=""
import string
space=" "
counter=0;
for word in wros:
out=u" ".join(word)
writ=open("C:\\Users\\User\\Desktop\\text5.txt",'w')
writ.write(out.encode('utf-8'))
writ.close()
text4.txt包含भारत का इतिहास काफी समृद्ध एवं विस्तृत है।
text5.txt输出为ह ै ।
所需的输出为भारत का इतिहास काफी समृद्ध एवं विस्तृत है।
答案 0 :(得分:1)
我不知道你与word
有什么关系,但我会这样做:
text = open('text4.txt').read()
text = text.decode("utf-8")
# split one string into list of (old) words
words = text.split()
# list for new words
out = []
# modify words
for word in words:
# here - do something with `word`
out.append(word)
# concatenate all new words to one string
result = u' '.join(out)
result = result.encode('utf-8')
writ = open('text5.txt', 'w')
writ.write(result)
writ.close()
答案 1 :(得分:0)
这是一个模板:
# no need for coding:utf8 unless *source code* has non-ASCII.
import io # Contains modern version of open compatible with Python 3.
# Use with and file is automatically closed when block is exited.
with io.open(r'c:\users\user\desktop\text4.txt',encoding='utf8') as dat:
text = dat.read()
words = text.split()
#edit words
text = u' '.join(words)
with io.open(r'c:\users\user\desktop\text5.txt','w',encoding='utf8') as writ:
writ.write(text)