Reindex R documentation, include 2 manuals

Now each page is indexed by their title (by default), and each index
term declared for it on the index

2 manuals are included, the data import/export as its own category (as
it is rather short), and each top-level section of the R introduction
manual (as it is quite a bit longer).

Add some manual cleanup.

Some pages still seem missing:
- either belonging to non-default packages, i.e. it is normal that they miss
- or corresponding to index words without their own package (!)
pull/1547/head
Cimbali 4 years ago
parent c3b93377c3
commit 94b404450c

@ -3,7 +3,13 @@ module Docs
class CleanHtmlFilter < Filter
def call
slug_parts = slug.split('/')
if slug_parts[0] == 'library'
if root_page?
css('a[href$="/00index"]').each do |pkg|
pkg['href'] = "/r-#{pkg['href'].split('/')[1]}/"
end
elsif slug_parts[0] == 'library'
title = at_css('h2')
title.inner_html = "<code>#{slug_parts[3]}</code> #{title.content}"
@ -11,12 +17,31 @@ module Docs
summary.remove if summary
elsif slug_parts[-2] == 'manual'
css('table.menu, div.header, hr, h2.contents-heading, div.contents, table.index-cp, table.index-vr, table[summary]').remove
css('h2').each do |node|
node.remove if node.content.end_with? ' index'
end
css('span[id] + h1, span[id] + h2, span[id] + h3, span[id] + h4, span[id] + h5, span[id] + h6').each do |node|
id = node.previous['id']
node.previous.remove
node['id'] = id.sub(/-1$/, '') if id
# We need the first of the series of span with ids
span = node.previous_element
while span.previous
prev = span.previous_element
break unless prev.name == 'span' and prev['id']
span.remove
span = prev
end
node['id'] = span['id']
span.remove
css('div.example').each do |node|
node.replace(node.children)
end
end
css('table.menu, div.header, hr').remove
css('h1 + h1').remove
css('.footnote h5').each do |node|
anchor = node.at_css('a[id]')

@ -2,11 +2,16 @@ module Docs
class R
class EntriesFilter < Docs::EntriesFilter
@@include_manual = false
@@include_misc = false
PKG_INDEX_ENTRIES = Hash.new []
def initialize(*)
super
if slug_parts[-1] == '00Index'
css('tr a').each do |link|
PKG_INDEX_ENTRIES[link['href']] += [link.text]
end
end
end
def slug_parts
@ -18,11 +23,11 @@ module Docs
end
def is_manual?
slug_parts[-2] == 'manual'
slug_parts[1] == 'manual'
end
def get_name
return slug_parts[3] + ' ' + at_css('h2').content if is_package?
return at_css('h2').content if is_package?
title = at_css('h1.settitle')
title ? title.content : at_css('h1, h2').content
end
@ -30,24 +35,41 @@ module Docs
def get_type
return slug_parts[1] if is_package?
return at_css('h1.settitle').content if is_manual?
'Miscellaneous'
end
def include_default_entry?
if is_manual? or slug_parts[-1] == '00Index' or slug_parts[-1] == 'index'
return false
end
is_package? or self.include_misc
is_package? and not slug_parts[-1] == '00Index'
end
def manual_section(node)
title = node.content.sub /^((Appendix )?[A-Z]|[0-9]+)(\.[0-9]+)* /, ''
title unless ['References', 'Preface', 'Acknowledgements'].include?(title) or title.end_with?(' index')
end
def additional_entries
return [] unless is_manual? and self.include_manual
if is_package? and slug_parts[-1] != '00Index'
page = slug_parts[-1]
return [page] + PKG_INDEX_ENTRIES.fetch(page, [])
end
return [] unless is_manual?
entries = []
css('div.contents > ul > li').each do |node|
node.css('a').each do |link|
link_name = link.content.sub /^[0-9A-Z]+(\.[0-9]+)* /, ''
entries << [link_name, link['href'].split('#')[1], name]
unless slug_parts[-1].downcase == 'r-intro'
# Single top-level category
css('div.contents > ul a').each do |link|
link_name = manual_section(link)
entries << [link_name, link['href'].split('#')[1], name] unless link_name.nil?
end
else
# Split 1st level of manual into different categories
css('div.contents > ul > li').each do |node|
type = manual_section(node.at_css('a'))
next if type.nil?
node.css('> ul a').each do |link|
link_name = link.content.sub /^[0-9A-Z]+(\.[0-9]+)* /, ''
entries << [link_name, link['href'].split('#')[1], type]
end
end
end
return entries

@ -21,10 +21,33 @@ module Docs
HTML
# Never want those
options[:skip_patterns] = [
/\/DESCRIPTION$/,
/\/NEWS(\.[^\/]*)?$/,
/\/demo$/,
/\.pdf$/
]
## We want to fix links like so but only if the targets dont exist,
## as these target packages or keywords that do not have their own file,
## but exist on another page, and we properly record it.
#
#options[:fix_urls] = ->(url) do
# url.sub!(%r'/library/([^/]+)/doc/index.html$') { |m| "/r-#{$1.parameterize.downcase}/" }
# url.sub!(%r'/library/([^/]+)/html/([^/]+).html$') { |m| "/library/#{$1.parameterize.downcase}/html/#{$2.parameterize.downcase}" }
#end
options[:skip] = %w(
doc/html/packages-head-utf8.html
doc/html/SearchOn.html
doc/html/Search.html
doc/html/UserManuals.html
doc/html/faq.html
doc/manual/R-FAQ.html
doc/manual/R-admin.html
doc/manual/R-exts.html
doc/manual/R-ints.html
doc/manual/R-lang.html
)
end

Loading…
Cancel
Save