|
|
|
@ -11,6 +11,7 @@ class Downloader < SimpleDelegator
|
|
|
|
|
|
|
|
|
|
def initialize(*args)
|
|
|
|
|
super(Hydra.new(*args))
|
|
|
|
|
@counter = 0
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def processor(&block)
|
|
|
|
@ -53,7 +54,7 @@ class Downloader < SimpleDelegator
|
|
|
|
|
|
|
|
|
|
def queue(*args, &block)
|
|
|
|
|
run while queue_size > MAX_QUEUE_SIZE
|
|
|
|
|
__getobj__(*args, &block)
|
|
|
|
|
__getobj__.queue *args, &block
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def page(src, target)
|
|
|
|
@ -82,6 +83,20 @@ class Downloader < SimpleDelegator
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
base_dir = File.dirname(path)
|
|
|
|
|
doc.css('a[href]').each do |a|
|
|
|
|
|
href = a['href']
|
|
|
|
|
next if href =~ %r{^(?:[^:]+:|[#?]|$)}
|
|
|
|
|
href = CGI.unescape(href)
|
|
|
|
|
|
|
|
|
|
np = File.join(base_dir, href)
|
|
|
|
|
if File.exists?("#{np}.html")
|
|
|
|
|
href << '.html'
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
a['href'] = href
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
doc.css('style').each do |style|
|
|
|
|
|
style.content = process_stylesheet(src, style.content, rdir)
|
|
|
|
|
end
|
|
|
|
@ -145,8 +160,6 @@ class Downloader < SimpleDelegator
|
|
|
|
|
prefix = 1
|
|
|
|
|
tfile = rfile
|
|
|
|
|
|
|
|
|
|
puts rfile
|
|
|
|
|
|
|
|
|
|
loop do
|
|
|
|
|
path = File.join(dir, tfile)
|
|
|
|
|
break path unless File.exists?(path)
|
|
|
|
|