我为此编写了我的ruby脚本。您可以检查“all_data”是否包含所有必需的内容。
def urlparse(urlColumnElem):
try:
conn = urllib.request.urlopen(urlColumnElem)
except urllib.error.HTTPError as e:
return (e.code)
except urllib.error.URLError as e:
return ('URL_Error')
else:
redirect=conn.geturl()
#check redirect
if(redirect == urlColumnElem):
#print ("same: ")
#print(redirect)
return (redirect)
else:
#print("Not the same url ")
return(redirect)
下载上面的文件并将sctip和docx(源文件)放在同一个文件夹中。当您运行脚本表单终端($。/ script.rb)时,您将看到文件夹名称为01,02 .....等。在里面会有md扩展名的文件。
#!/usr/bin/env ruby
require 'docx'
file_data = []
name_file = "test"
t = ""
array_desc = []
heading_hash = {}
all_data = {}
temp = ""
output = ""
folder_name = ""
directory_name = ""
flag = true
count = 0
md_file_name = ''
Dir.glob("**/*.docx") do |file_name|
doc = Docx::Document.open(file_name)
first_table = doc.tables[0]
doc.tables.each do |table|
table.rows.each do |row| # Row-based iteration
row.cells.each_with_index do |cell, i|
if i == 2
file_data << cell.text.gsub('=','')
end
end
end
end
file_data.each_with_index do |l, d|
if l.include? file_data[d]
if ((l.strip)[0].to_i != 0)
md_file_name = file_data[d].split(".")
#start folder name
if flag
directory_name = md_file_name[0].to_i
flag = false
end
count +=1
t = file_data[d+1]
if(array_desc.size > 0)
heading_hash[temp] = array_desc
all_data[md_file_name[0].strip] = heading_hash
array_desc = []
end
else
if(t != l)
array_desc << l
temp = t
end
end
end
end
if(array_desc.size> 0)
heading_hash[temp] = array_desc
all_data[md_file_name[0].strip] = heading_hash
array_desc = []
end
all_data.each do |k, v|
v.each do |(hk, hv)|
if hk != ""
chapter_no = k
if (k[0,1] == 0.to_s)
chapter_no = k
else
chapter_no = "0#{k}"
end
Dir.mkdir("#{chapter_no}") unless File.exists?("#{chapter_no}")
output_name = "#{chapter_no}/#{File.basename("01", '.*')}.md"
output = File.open(output_name, 'w')
# output << "#"+"#{hk}\n\n"
# output << "#{hv} \n\n"
hv.each do |des|
# puts des
end
end
end
end
end
答案 0 :(得分:0)
请使用我的代码并检查它是否有效。
Dir.glob("**/*.docx") do |file_name|
doc = Docx::Document.open(file_name)
first_table = doc.tables[0]
doc.tables.each do |table|
table.rows.each do |row|
row.cells.each_with_index do |cell, i|
if i == 2
file_data << cell.text.gsub('=','')
end
end
end
end
file_data.each_with_index do |l, d|
if ((l.strip)[0].to_i != 0)
md_file_name = file_data[d].split(".")
#start folder name
if flag
directory_name = md_file_name[0].to_i
flag = false
end
count +=1
t = file_data[d+1]
if(array_desc.size > 0)
heading_hash[temp] = array_desc
array_desc=[]
all_data[file_data[d+1]] = array_desc
end
else
if(t != l)
array_desc << l
temp = t
end
end
end
chapter_no = 1
all_data.each do |k, v|
Dir.mkdir("#{chapter_no}") unless File.exists?("#{chapter_no}")
output_name = "#{chapter_no}/#{File.basename("01", '.*')}.md"
output = File.open(output_name, 'a')
output << "#"+"#{k}\n\n"
v.each do |d|
output << "#{d} \n"
end
chapter_no= chapter_no+1
end
end
它将提供您在上面分享的确切输出。如果您需要更多帮助,请告诉我。