我的任务是从目录中读取每个html文件。条件是查找每个文件是否包含标签
(1) <strong>OO</strong>
(2) <strong>QQ</strong>
然后
答案 0 :(得分:0)
您的write
函数嵌套在for
循环中,这就是您为index.txt
写多行的原因,只需将write
移出parti_names
即可。循环并将所有parti文本放入变量participants = soup.find(find_participant)
parti_names = ""
for parti in participants.find_next_siblings("p"):
if parti.find("strong", text=re.compile(r"(Operator)")):
break
parti_names += parti.get_text(strip=True)+","
print parti.get_text(strip=True)
indexFile = open('index.txt', 'a+')
indexFile.write(filename + ', ' + title.get_text(strip=True) + ticker.get_text(strip=True) + ', ' + d_date.get_text(strip=True) + ', ' + parti_names + '\n' )
indexFile.close()
,如下所示:
basename
<强>更新强>
您可以使用from os.path import basename
# you can call it directly with basename
print(basename("C:/Users/.../output/100107-.html"))
获取文件名:
100107-.html
输出:
defmodule Learn.Team do
def start_link do
spawn_link(__MODULE__, :loop, [%{}])
end
def loop(state) do
receive do
{:add, name} ->
player = Learn.Player.find(name)
new_state = Map.put(state, name, player)
loop(new_state)
{:remove, name} ->
new_state = Map.delete(state, name)
loop(new_state)
{:team, pid} ->
send(pid, {:ok, state})
loop(state)
end
end
end