require "openssl"
require "nokogiri"
require 'csv'
require "open-uri"
OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
$n=0
#~ Open_Page
page = ('http://www.residentadvisor.net/dj/aguycalledgerald/tracks?sort=mostcharted')
html = Nokogiri::HTML(open(page))
#~ Array
names= []
html.css('a').each do |x|
names<< x.text.strip.gsub(/\t/,'')
names.delete('RA on YouTube')
names.delete('Login')
names.delete('Register')
names.delete('Resident Advisor')
names.delete('Submit')
names.delete('Listings')
names.delete('Clubs')
names.delete('News')
names.delete('Reviews')
names.delete('Features')
names.delete('Films')
names.delete('Submit event')
names.delete('Artists')
names.delete('Photos')
names.delete('DJ Charts')
names.delete('Labels')
names.delete('Podcasts')
names.delete('Search')
names.delete('Top 1000')
names.delete('Top 100')
names.delete('Local')
names.delete('Favourites')
names.delete('Create an artist profile')
names.delete('Reviews')
names.delete('Features')
names.delete('A')
names.delete('B')
names.delete('C')
names.delete('D')
names.delete('E')
names.delete('F')
names.delete('G')
names.delete('H')
names.delete('I')
names.delete('J')
names.delete('K')
names.delete('L')
names.delete('M')
names.delete('N')
names.delete('O')
names.delete('P')
names.delete('Q')
names.delete('R')
names.delete('S')
names.delete('T')
names.delete('U')
names.delete('V')
names.delete('W')
names.delete('X')
names.delete('Y')
names.delete('Z')
names.delete('0-9')
names.delete('RA')
names.delete('About')
names.delete('Advertise')
names.delete('Jobs')
names.delete('RA In Residence')
names.delete('Ticketing FAQ')
names.delete('Sell tickets on RA')
names.delete('Privacy')
names.delete('Terms')
names.delete('RA is also available in Japanese. 日本版')
names.delete('Download the RA Guide')
names.delete('RA on Twitter')
names.delete('RA on Facebook')
names.delete('RA on Google+')
names.delete('RA on Instagram')
names.delete('RA on Soundcloud')
names.delete('Biography')
names.delete('Events')
names.delete('Tracks')
names.delete('RA News')
names.delete('RA Editorial')
names.delete('Remixes')
names.delete('Solo productions')
names.delete('Collaborations')
names.delete('Laboratory Instinct')
names.delete('Highgrade Records')
names.delete('Bosconi')
names.delete('!K7')
names.delete('Perlon')
names.delete('Beatstreet')
names.delete('Title')
names.delete('Label')
names.delete('Release Date')
names.delete('51 chartings')
puts names
end
#~ To_CSV
for $n in 0..names.count do
CSV.open('Most_Charted.csv','a+') do |csv|
csv << [names[$n]]
end
end
创建一个CSV文件:
PositiveNoise (Carl Craig remix) System 7 & Guy Called Gerald A-Wave 22 chartings
Voodoo Ray (Shield Re-Edit) A Guy Called Gerald 18 chartings
Falling (D. Digglers Cleptomania remix) Tom Clark & Benno Blome feat.
A Guy Called Gerald 18 chartings
How Long Is Now A Guy Called Gerald 14 chartings
Groove Of The Ghetto A Guy Called Gerald 12 chartings
Voodoo Ray A Guy Called Gerald 10 chartings
Falling (D Diggler's Rescue remix) Tom Clark & Benno Blome feat. A
Guy Called Gerald 9 chartings
等等。
如何只将前5首歌曲名称传递给CSV文件?
答案 0 :(得分:1)
确保在禁用SSL检查时知道自己在做什么。
您可以为曲目列表找到更好的选择器,因此您不需要所有这些“删除”。这些曲目都在ul.tracks
然后我建议你把整个事情变成一个班级。所以你可以封装行为。然后不要使用$
全局变量。不需要,通常是不良代码的标志。
以下是一份工作样本:
require "openssl"
require "nokogiri"
require 'csv'
require "open-uri"
OpenSSL::SSL::VERIFY_PEER = OpenSSL::SSL::VERIFY_NONE
class Tracklist
def initialize(url)
@url = url
end
def parse(top = nil)
html = Nokogiri::HTML(open(url))
result = []
html.css('ul.tracks li').each do |node|
title = node.css('div.title a:nth-child(1)').first
result << title.text if !title.nil?
break if top && result.length == top
end
result
end
private
attr_reader :url
end
list = Tracklist.new("https://www.residentadvisor.net/dj/aguycalledgerald/tracks?sort=mostcharted")
p list.parse(5)
如果您需要有关曲目的更多信息,那么您可以在parse
方法中的循环中提取更多详细信息。
此代码在达到top
后停止解析。之后,您可以根据需要构建CSV。