如何使用Python在一个文件中搜索和替换文本?

时间:2011-12-14 05:09:41

标签: python text-files replace

我有一个文件 file1.txt

I Show more flower you can see by link
All is Beautyfull.You Can View Here !
Link View :

http://lincoln.com/view/12432134/flower1.jpg    
http://lincoln.com/view/34645645/flower3456.jpg    
http://lincoln.com/view/75634534/flower56.jpg    
http://lincoln.com/view/86764454/flower2.jpg

和另一个 file2.txt

http://lincoln.com/view/43645645/flower1.jpg    
http://lincoln.com/view/84344454/flower3456.jpg    
http://lincoln.com/view/43343433/flower56.jpg    
http://lincoln.com/view/13424324/flower2.jpg    
http://kashi.com/view/343434344/flower1.jpg    
http://kashi.com/view/766454544/flower3456.jpg    
http://kashi.com/view/32634545/flower56.jpg    
http://kashi.com/view/84353453/flower2.jpg

我想要的是以下内容:

I Show more flower you can see by link
All is Beautyfull.You Can View Here !
Link View :

http://lincoln.com/view/43645645/flower1.jpg    
http://lincoln.com/view/84344454/flower3456.jpg    
http://lincoln.com/view/43343433/flower56.jpg    
http://lincoln.com/view/13424324/flower2.jpg

More Link VIew:

http://kashi.com/view/343434344/flower1.jpg    
http://kashi.com/view/766454544/flower3456.jpg    
http://kashi.com/view/32634545/flower56.jpg    
http://kashi.com/view/84353453/flower2.jpg    
++++++++++++++++++++++++++++++++++++++++

I Show more candy you can see by link
All is Beautyfull.You Can View Here !
http://photobucket.com

伪代码是:

if filename exists in file1 but not in file2:
    remove filename
else if filename exists in file1 and in file2:
    the version in file2 replaces the line in file1
else if filename exists in file2 but not in file1:
    do nothing

add the links with the domaine name "http://kashi.com" from file2.txt
in a section "More link view"
add "++++++++++++++++++++++++++"

我试过这个算法:

def file_merge(file1name,file2name):
    file1contents = list()
    file2contents = list()
    file1 = open(file1name, 'U')
    for line in file1:
        line = line.replace('\n','')
        line = line.split('/')
        file1contents.append(line)
    file1.close()
    file2 = open(file2name, 'U')
    for line in file2:
        line = line.replace('\n','')
        line = line.split('/')
        file2contents.append(line)
    file2.close()
    file3contents = file1contents
    for x in file2contents:
        for y in file1contents:
            if x[-1] == y[-1]:
                file3contents[file3contents.index(y)] = x

    file3 = open(file1name,'w')
    for line in file3contents:
        file3.write(str('/'.join(line))+'\n')
    file3.close()

file_merge('file1.txt','file2.txt')

谢谢!


简单的是: 将file2.txt中的“filename”替换为file1.txt中的“filename”

并将链接http://kashi.com和'filename'附加到关键字“++++++++++++++++++++++++++++++++++++++++ +++++++++++“

为:

[img]http://photobucket.com/98a267d32b056fb0a5c8c07dd4c35cc5.jpg[/img]

Fomart:IMG 
Quality:High



http://lincoln.com/view/4148476844/flower1.jpg
http://lincoln.com/view/4148476994/flowe2.jpg
http://lincoln.com/view/4148501374/flower3.jpg
http://lincoln.com/view/4148476324/flower4.jpg
http://lincoln.com/view/4148494685/flower5.jpg
http://lincoln.com/view/4148626615/flowew6.jpg

You Can VIEW More Link !



More Link:
http://kashi.com/view/414865/flower1.jpg
http://kashi.com/view/6344353/flower2.jpg
http://kashi.com/view/234234/flower3.jpg
http://kashi.com/view/6543534/flower4.jpg
http://kashi.com/view/2342342/flower5.jpg
http://kashi.com/view/234234/flower6.jpg


++++++++++++++++++++++++++++++++++++
[img]http://phtobucket.com/004_8N9FEWFZD7ECJC6.jpg[/img]
Format:Img
Quality:High


http://lincoln.com/view/4148633524/candy1.jpg
http://lincoln.com/view/4148538064/candy2.jpg
http://lincoln.com/view/4148537824/candy3.jpg
http://lincoln.com/view/4148562565/candy4.jpg
http://lincoln.com/view/4148562035/candy5.jpg




You Can VIEW More Link !




More Link:
http://kashi.com/view/23423423/candy1.jpg
http://kashi.com/view/345345/candy2.jpg
http://kashi.com/view/234234/candy3.jpg
http://kashi.com/view/3434342/candy4.jpg
http://kashi.com/view/234234234/candy5.jpg
+++++++++++++++++++++++++++++++++++++++++++++
...
...
And more content as above

3 个答案:

答案 0 :(得分:2)

试试这个

import urlparse
import os

def file_merge(file1name,file2name):

    file1contents = list()
    file2contents = list()

    file1 = open(file1name, 'U')
    file1contents = file1.readlines()
    file1.close()

    file2 = open(file2name, 'U')
    file2contents = file2.readlines()
    file2.close()

    file3contents = []

    for link in file2contents:
        temp = urlparse.urlsplit(link)
        dirname, filename = os.path.split(temp.path)

        file3contents.append(link)

        linkin1 = False
        for l_link in file1contents[4:]:
            if l_link.endswith(filename):
                linkin1 = True

        if not linkin1:
            urllist = list(temp)
            urllist[1] = 'kashi.com'
            file3contents[-1] = urlparse.urlunsplit(urllist)


    file3 = open(file1name,'w')
    for line in file3contents:
        file3.write(line)
    file3.close()

file_merge('/tmp/file1.txt','/tmp/file2.txt')

答案 1 :(得分:2)

试试这个,请:

with open('file2.txt','r') as f2:
    dic2 = {}
    li2 = []
    for line in f2:
        spli = line.rstrip().replace('http://','').split('/')
        dic2[(spli[0],spli[-1])] = line if line[-1]=='\n' else line+'\n'
        li2.append((spli[0],spli[-1]))

with open('file1.txt','r') as f1,open('file3.txt','w') as f3:

    itr1 = iter(f1)

    for line in itr1:
        f3.write(line)
        if line.strip()=='':
            break

    for line in itr1:
        if line.strip():
            spli = line.rstrip().replace('http://','').split('/')
            x = (spli[0],spli[-1])
            if x in li2:
                f3.write(dic2[x])
                li2.remove((spli[0],spli[-1]))
    klu = '\n' if line.rstrip()==line else ''
    # to add a blank line if the last line wasn't '\n'-ended
    f3.write(klu + '\nMore Link VIew:\n\n')

    for remain in li2:
        f3.write(dic2[remain])

    f3.write('++++++++++++++++++++++++++++++++++++++++ ')

答案 2 :(得分:1)

这有效;然而,在我看来,这对我来说真的是一个非常奇怪的问题...抱歉......

from urlparse import urlparse
import os.path

def read_links2(f):
    for line in f:
        line = line.strip()
        url = urlparse(line)
        if url.scheme in ('http', 'https'):
            key = (url.netloc, os.path.split(url.path)[1])
            yield (key, url)

links2 = dict(read_links2(open('f2.txt', 'U')))

for line in open('f1.txt', 'U'):
    line = line.rstrip()
    url = urlparse(line)
    if url.scheme in ('http', 'https'):
        key = (url.netloc, os.path.split(url.path)[1])
        if key in links2:
            print links2[key].geturl()
    else:
        print line

print 'More Link VIew:'

for url in links2.values():
    if url.netloc == 'kashi.com':
        print url.geturl()


print '+++++++++++++++++++'