From 7b7aa34b7069a89cfc17b7e45956e5e84b3c1349 Mon Sep 17 00:00:00 2001 From: Thibaut Courouble Date: Sun, 7 Oct 2018 12:04:24 -0400 Subject: [PATCH] Improve Rust scraper --- assets/stylesheets/pages/_rust.scss | 11 ++++++++- lib/docs/filters/rust/clean_html.rb | 36 +++++++++++++++++++++++++---- lib/docs/scrapers/rust.rb | 2 +- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/assets/stylesheets/pages/_rust.scss b/assets/stylesheets/pages/_rust.scss index f7f3c90c..fdf80bb1 100644 --- a/assets/stylesheets/pages/_rust.scss +++ b/assets/stylesheets/pages/_rust.scss @@ -3,9 +3,18 @@ h4 { @extend %block-label; } .docblock { margin-left: 1em; } + div.information, div.important-traits { + @extend %note; + + > pre { margin: .5rem 0; } + } div.stability { margin-bottom: 1em; } em.stab, span.stab { @extend %label; } em.stab.unstable, span.stab.unstable { @extend %label-orange; } - .since, .out-of-band { float: right; } + .out-of-band { float: right; } + .since, .srclink { + float: right; + margin-left: .5rem; + } } diff --git a/lib/docs/filters/rust/clean_html.rb b/lib/docs/filters/rust/clean_html.rb index 416e6e44..2c062eda 100644 --- a/lib/docs/filters/rust/clean_html.rb +++ b/lib/docs/filters/rust/clean_html.rb @@ -2,8 +2,6 @@ module Docs class Rust class CleanHtmlFilter < Filter def call - puts subpath if at_css('#versioninfo') - if slug.start_with?('book') || slug.start_with?('reference') @doc = at_css('#content main') elsif slug == 'error-index' @@ -29,12 +27,16 @@ module Docs css('.rusttest', '.test-arrow', 'hr').remove + css('.docblock.attributes').each do |node| + node.remove if node.content.include?('#[must_use]') + end + css('a.header').each do |node| node.first_element_child['id'] = node['name'] || node['id'] node.before(node.children).remove end - css('.docblock > h1').each { |node| node.name = 'h4' } + css('.docblock > h1:not(.section-header)').each { |node| node.name = 'h4' } css('h2.section-header').each { |node| node.name = 'h3' } css('h1.section-header').each { |node| node.name = 'h2' } @@ -44,7 +46,7 @@ module Docs end end - css('> .impl-items', '> .docblock', 'pre > pre').each do |node| + css('> .impl-items', '> .docblock', 'pre > pre', '.tooltiptext', '.tooltip').each do |node| node.before(node.children).remove end @@ -65,6 +67,32 @@ module Docs doc.first_element_child.name = 'h1' if doc.first_element_child.name = 'h2' at_css('h1').content = 'Rust Documentation' if root_page? + css('.table-display').each do |node| + node.css('td').each do |td| + node.before(td.children) + end + node.remove + end + + css('h2 .important-traits', 'h3 .important-traits', 'h4 .important-traits').each do |node| + content = node.at_css('.content.hidden .content') + node.at_css('.content.hidden').replace(content) if content + node.parent.after(node) + end + + css('code.content').each do |node| + node.name = 'pre' + node.css('.fmt-newline').each do |line| + line.inner_html = line.inner_html + "\n" + end + node.inner_html = node.inner_html.gsub('
', "\n") + node.content = node.content + end + + css('.since + .srclink').each do |node| + node.previous_element.before(node) + end + doc end end diff --git a/lib/docs/scrapers/rust.rb b/lib/docs/scrapers/rust.rb index ba2f41ed..d960c42b 100644 --- a/lib/docs/scrapers/rust.rb +++ b/lib/docs/scrapers/rust.rb @@ -1,7 +1,7 @@ module Docs class Rust < UrlScraper self.type = 'rust' - self.release = '1.28.0' + self.release = '1.29.1' self.base_url = 'https://doc.rust-lang.org/' self.root_path = 'book/second-edition/index.html' self.initial_paths = %w(