diff --git a/lib/downloader.rb b/lib/downloader.rb index ee11c294..696ddd70 100644 --- a/lib/downloader.rb +++ b/lib/downloader.rb @@ -11,6 +11,7 @@ class Downloader < SimpleDelegator def initialize(*args) super(Hydra.new(*args)) + @counter = 0 end def processor(&block) @@ -53,7 +54,7 @@ class Downloader < SimpleDelegator def queue(*args, &block) run while queue_size > MAX_QUEUE_SIZE - __getobj__(*args, &block) + __getobj__.queue *args, &block end def page(src, target) @@ -82,6 +83,20 @@ class Downloader < SimpleDelegator end end + base_dir = File.dirname(path) + doc.css('a[href]').each do |a| + href = a['href'] + next if href =~ %r{^(?:[^:]+:|[#?]|$)} + href = CGI.unescape(href) + + np = File.join(base_dir, href) + if File.exists?("#{np}.html") + href << '.html' + end + + a['href'] = href + end + doc.css('style').each do |style| style.content = process_stylesheet(src, style.content, rdir) end @@ -145,8 +160,6 @@ class Downloader < SimpleDelegator prefix = 1 tfile = rfile - puts rfile - loop do path = File.join(dir, tfile) break path unless File.exists?(path)