From 3a6a158f14501a41e9cee613f31ef65be0cde44c Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Sat, 17 Aug 2019 15:16:57 +0200 Subject: [PATCH] trio: finish scraper and filters --- .gitignore | 1 - .../javascripts/templates/pages/about_tmpl.coffee | 5 +++++ lib/docs/filters/trio/clean_html.rb | 12 ++++++++---- lib/docs/filters/trio/entries.rb | 4 +++- lib/docs/scrapers/trio.rb | 12 ++++++++---- public/icons/docs/trio/{16@2.png => 16@2x.png} | Bin 6 files changed, 24 insertions(+), 10 deletions(-) rename public/icons/docs/trio/{16@2.png => 16@2x.png} (100%) diff --git a/.gitignore b/.gitignore index 53bbb745..f89ecb61 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,3 @@ public/fonts public/docs/**/* docs/**/* !docs/*.md -vendor diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index c3c0fdec..5fc27d3c 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -721,6 +721,11 @@ credits = [ '2018 HashiCorp', 'MPL', 'https://raw.githubusercontent.com/hashicorp/terraform-website/master/LICENSE.md' + ], [ + 'Trio', + '2017 Nathaniel J. Smith', + 'MIT', + 'https://raw.githubusercontent.com/python-trio/trio/master/LICENSE.MIT' ], [ 'Twig', '2009-2018 The Twig Team', diff --git a/lib/docs/filters/trio/clean_html.rb b/lib/docs/filters/trio/clean_html.rb index 17542601..00919535 100644 --- a/lib/docs/filters/trio/clean_html.rb +++ b/lib/docs/filters/trio/clean_html.rb @@ -3,6 +3,7 @@ module Docs class CleanHtmlFilter < Filter def call @doc = at_css('div[role="main"]') + css('.section, [itemprop=articleBody]').each do |node| node.replace node.children end @@ -11,9 +12,11 @@ module Docs css('dt').each do |node| node.name = 'h3' + if node.parent.classes.include? 'field-list' node.name = 'h4' node['style'] = 'margin: 0' + if node.text == 'Parameters' or node.text == 'Raises' node.next_element.css('strong').each do |n| n.name = 'code' @@ -21,30 +24,31 @@ module Docs end else code = doc.document.create_element 'code' + if em = node.at_css('.property') code.inner_html = "#{em.text.strip} " em.remove end + code.inner_html += node.inner_text.strip node.inner_html = code end end css('pre').each do |node| + node.content = node.content.strip + classes = node.parent.parent.classes if classes.include? 'highlight-python3' - node['class'] = 'language-python' node['data-language'] = 'python' end + node.parent.parent.replace(node) end css('.admonition').each do |node| node.name = 'blockquote' node.at_css('.admonition-title').name = 'h4' - # new_node = node.document.create_element 'blockquote' - # new_node.inner_html = node.inner_html - # node.replace new_node end doc diff --git a/lib/docs/filters/trio/entries.rb b/lib/docs/filters/trio/entries.rb index 4ee6aab1..7c6aa76b 100644 --- a/lib/docs/filters/trio/entries.rb +++ b/lib/docs/filters/trio/entries.rb @@ -11,7 +11,6 @@ module Docs def additional_entries css('.descname').each_with_object [] do |node, entries| - name = node.text if node.previous.classes.include? 'descclassname' name = node.previous.text + name @@ -25,9 +24,11 @@ module Docs or dl.classes.include?('data') parent = dl.parent.previous_element cls = '' + if n = parent.at_css('.descclassname') cls += n.text end + if n = parent.at_css('.descname') if n.text == "The nursery interface" cls += "Nursery." @@ -35,6 +36,7 @@ module Docs cls += n.text + '.' end end + name = cls + name end diff --git a/lib/docs/scrapers/trio.rb b/lib/docs/scrapers/trio.rb index 8eb7cd26..b8719bea 100644 --- a/lib/docs/scrapers/trio.rb +++ b/lib/docs/scrapers/trio.rb @@ -1,8 +1,8 @@ module Docs class Trio < UrlScraper self.type = 'simple' - self.release = '0.11' - self.base_url = 'https://trio.readthedocs.io/en/latest/' + self.release = '0.12.1' + self.base_url = 'https://trio.readthedocs.io/en/v0.12.1/' self.root_path = 'index.html' self.links = { home: 'https://trio.readthedocs.io/', @@ -19,9 +19,13 @@ module Docs ] options[:attribution] = <<-HTML - © 2017-2019 Nathaniel J. Smith
- Licensed under MIT and Apache2. + © 2017 Nathaniel J. Smith
+ Licensed under the MIT License. HTML + def get_latest_version(opts) + doc = fetch_doc('https://trio.readthedocs.io/en/stable/', opts) + doc.at_css('.rst-other-versions a[href^="/en/v"]').content[1..-1] + end end end diff --git a/public/icons/docs/trio/16@2.png b/public/icons/docs/trio/16@2x.png similarity index 100% rename from public/icons/docs/trio/16@2.png rename to public/icons/docs/trio/16@2x.png