From 005db388cec113f8956c56ef5787d8deb50c02b4 Mon Sep 17 00:00:00 2001 From: Cimbali Date: Wed, 2 Jun 2021 00:20:51 +0200 Subject: [PATCH] Rewrite links by generating scraper :replace_paths from entries filter --- lib/docs/filters/r/entries.rb | 9 ++++++--- lib/docs/scrapers/r.rb | 14 ++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/docs/filters/r/entries.rb b/lib/docs/filters/r/entries.rb index a9793e07..ed09345d 100644 --- a/lib/docs/filters/r/entries.rb +++ b/lib/docs/filters/r/entries.rb @@ -4,14 +4,17 @@ module Docs PKG_INDEX_ENTRIES = Hash.new [] - def initialize(*) - super - + def call if slug_parts[-1] == '00Index' + dir = File.dirname(result[:subpath]) css('tr a').each do |link| PKG_INDEX_ENTRIES[link['href']] += [link.text] + next if link['href'] == link.text + context[:replace_paths][File.join(dir, "#{link.text}.html")] = File.join(dir, "#{link['href']}.html") end end + + super end def slug_parts diff --git a/lib/docs/scrapers/r.rb b/lib/docs/scrapers/r.rb index e0e43355..308d1a6b 100644 --- a/lib/docs/scrapers/r.rb +++ b/lib/docs/scrapers/r.rb @@ -29,14 +29,12 @@ module Docs /\.pdf$/ ] - ## We want to fix links like so − but only if the targets don’t exist, - ## as these target packages or keywords that do not have their own file, - ## but exist on another page, and we properly record it. - # - #options[:fix_urls] = ->(url) do - # url.sub!(%r'/library/([^/]+)/doc/index.html$') { |m| "/r-#{$1.parameterize.downcase}/" } - # url.sub!(%r'/library/([^/]+)/html/([^/]+).html$') { |m| "/library/#{$1.parameterize.downcase}/html/#{$2.parameterize.downcase}" } - #end + options[:replace_paths] = { + ## We want to fix links like so − but only if the targets don’t exist: + # 'library/MASS/html/cov.mve.html' => 'library/MASS/html/cov.rob.html' + ## Paths for target packages or keywords that do not have their own file + ## are generated in the entries filter from 00Index.html files + } options[:skip] = %w( doc/html/packages-head-utf8.html