我有一个文件 file1.txt :
I Show more flower you can see by link
All is Beautyfull.You Can View Here !
Link View :
http://lincoln.com/view/12432134/flower1.jpg
http://lincoln.com/view/34645645/flower3456.jpg
http://lincoln.com/view/75634534/flower56.jpg
http://lincoln.com/view/86764454/flower2.jpg
和另一个 file2.txt :
http://lincoln.com/view/43645645/flower1.jpg
http://lincoln.com/view/84344454/flower3456.jpg
http://lincoln.com/view/43343433/flower56.jpg
http://lincoln.com/view/13424324/flower2.jpg
http://kashi.com/view/343434344/flower1.jpg
http://kashi.com/view/766454544/flower3456.jpg
http://kashi.com/view/32634545/flower56.jpg
http://kashi.com/view/84353453/flower2.jpg
我想要的是以下内容:
I Show more flower you can see by link
All is Beautyfull.You Can View Here !
Link View :
http://lincoln.com/view/43645645/flower1.jpg
http://lincoln.com/view/84344454/flower3456.jpg
http://lincoln.com/view/43343433/flower56.jpg
http://lincoln.com/view/13424324/flower2.jpg
More Link VIew:
http://kashi.com/view/343434344/flower1.jpg
http://kashi.com/view/766454544/flower3456.jpg
http://kashi.com/view/32634545/flower56.jpg
http://kashi.com/view/84353453/flower2.jpg
++++++++++++++++++++++++++++++++++++++++
I Show more candy you can see by link
All is Beautyfull.You Can View Here !
http://photobucket.com
伪代码是:
if filename exists in file1 but not in file2:
remove filename
else if filename exists in file1 and in file2:
the version in file2 replaces the line in file1
else if filename exists in file2 but not in file1:
do nothing
add the links with the domaine name "http://kashi.com" from file2.txt
in a section "More link view"
add "++++++++++++++++++++++++++"
我试过这个算法:
def file_merge(file1name,file2name):
file1contents = list()
file2contents = list()
file1 = open(file1name, 'U')
for line in file1:
line = line.replace('\n','')
line = line.split('/')
file1contents.append(line)
file1.close()
file2 = open(file2name, 'U')
for line in file2:
line = line.replace('\n','')
line = line.split('/')
file2contents.append(line)
file2.close()
file3contents = file1contents
for x in file2contents:
for y in file1contents:
if x[-1] == y[-1]:
file3contents[file3contents.index(y)] = x
file3 = open(file1name,'w')
for line in file3contents:
file3.write(str('/'.join(line))+'\n')
file3.close()
file_merge('file1.txt','file2.txt')
谢谢!
并将链接http://kashi.com和'filename'附加到关键字“++++++++++++++++++++++++++++++++++++++++ +++++++++++“
为:
[img]http://photobucket.com/98a267d32b056fb0a5c8c07dd4c35cc5.jpg[/img]
Fomart:IMG
Quality:High
http://lincoln.com/view/4148476844/flower1.jpg
http://lincoln.com/view/4148476994/flowe2.jpg
http://lincoln.com/view/4148501374/flower3.jpg
http://lincoln.com/view/4148476324/flower4.jpg
http://lincoln.com/view/4148494685/flower5.jpg
http://lincoln.com/view/4148626615/flowew6.jpg
You Can VIEW More Link !
More Link:
http://kashi.com/view/414865/flower1.jpg
http://kashi.com/view/6344353/flower2.jpg
http://kashi.com/view/234234/flower3.jpg
http://kashi.com/view/6543534/flower4.jpg
http://kashi.com/view/2342342/flower5.jpg
http://kashi.com/view/234234/flower6.jpg
++++++++++++++++++++++++++++++++++++
[img]http://phtobucket.com/004_8N9FEWFZD7ECJC6.jpg[/img]
Format:Img
Quality:High
http://lincoln.com/view/4148633524/candy1.jpg
http://lincoln.com/view/4148538064/candy2.jpg
http://lincoln.com/view/4148537824/candy3.jpg
http://lincoln.com/view/4148562565/candy4.jpg
http://lincoln.com/view/4148562035/candy5.jpg
You Can VIEW More Link !
More Link:
http://kashi.com/view/23423423/candy1.jpg
http://kashi.com/view/345345/candy2.jpg
http://kashi.com/view/234234/candy3.jpg
http://kashi.com/view/3434342/candy4.jpg
http://kashi.com/view/234234234/candy5.jpg
+++++++++++++++++++++++++++++++++++++++++++++
...
...
And more content as above
答案 0 :(得分:2)
试试这个
import urlparse
import os
def file_merge(file1name,file2name):
file1contents = list()
file2contents = list()
file1 = open(file1name, 'U')
file1contents = file1.readlines()
file1.close()
file2 = open(file2name, 'U')
file2contents = file2.readlines()
file2.close()
file3contents = []
for link in file2contents:
temp = urlparse.urlsplit(link)
dirname, filename = os.path.split(temp.path)
file3contents.append(link)
linkin1 = False
for l_link in file1contents[4:]:
if l_link.endswith(filename):
linkin1 = True
if not linkin1:
urllist = list(temp)
urllist[1] = 'kashi.com'
file3contents[-1] = urlparse.urlunsplit(urllist)
file3 = open(file1name,'w')
for line in file3contents:
file3.write(line)
file3.close()
file_merge('/tmp/file1.txt','/tmp/file2.txt')
答案 1 :(得分:2)
试试这个,请:
with open('file2.txt','r') as f2:
dic2 = {}
li2 = []
for line in f2:
spli = line.rstrip().replace('http://','').split('/')
dic2[(spli[0],spli[-1])] = line if line[-1]=='\n' else line+'\n'
li2.append((spli[0],spli[-1]))
with open('file1.txt','r') as f1,open('file3.txt','w') as f3:
itr1 = iter(f1)
for line in itr1:
f3.write(line)
if line.strip()=='':
break
for line in itr1:
if line.strip():
spli = line.rstrip().replace('http://','').split('/')
x = (spli[0],spli[-1])
if x in li2:
f3.write(dic2[x])
li2.remove((spli[0],spli[-1]))
klu = '\n' if line.rstrip()==line else ''
# to add a blank line if the last line wasn't '\n'-ended
f3.write(klu + '\nMore Link VIew:\n\n')
for remain in li2:
f3.write(dic2[remain])
f3.write('++++++++++++++++++++++++++++++++++++++++ ')
答案 2 :(得分:1)
这有效;然而,在我看来,这对我来说真的是一个非常奇怪的问题...抱歉......
from urlparse import urlparse
import os.path
def read_links2(f):
for line in f:
line = line.strip()
url = urlparse(line)
if url.scheme in ('http', 'https'):
key = (url.netloc, os.path.split(url.path)[1])
yield (key, url)
links2 = dict(read_links2(open('f2.txt', 'U')))
for line in open('f1.txt', 'U'):
line = line.rstrip()
url = urlparse(line)
if url.scheme in ('http', 'https'):
key = (url.netloc, os.path.split(url.path)[1])
if key in links2:
print links2[key].geturl()
else:
print line
print 'More Link VIew:'
for url in links2.values():
if url.netloc == 'kashi.com':
print url.geturl()
print '+++++++++++++++++++'