From 4dc9557032d0052f0e6248375f262af2dae5ca93 Mon Sep 17 00:00:00 2001 From: Simon Legner Date: Fri, 29 Jan 2021 23:49:35 +0100 Subject: [PATCH] mdn: update scraper to mdn/yari --- assets/stylesheets/pages/_mdn.scss | 1 + lib/docs/filters/mdn/clean_html.rb | 15 +++++++++++++++ lib/docs/filters/svg/clean_html.rb | 2 +- lib/docs/scrapers/mdn/mdn.rb | 12 +----------- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/assets/stylesheets/pages/_mdn.scss b/assets/stylesheets/pages/_mdn.scss index fb2cce38..10e144f3 100644 --- a/assets/stylesheets/pages/_mdn.scss +++ b/assets/stylesheets/pages/_mdn.scss @@ -27,6 +27,7 @@ p > code, li > code { @extend %label; } > .note, + .notecard, // MDN 2021 .notice, .warning, .overheadIndicator, diff --git a/lib/docs/filters/mdn/clean_html.rb b/lib/docs/filters/mdn/clean_html.rb index 540be3e1..b78f1def 100644 --- a/lib/docs/filters/mdn/clean_html.rb +++ b/lib/docs/filters/mdn/clean_html.rb @@ -41,6 +41,18 @@ module Docs node.parent['id'] = node['name'] node.before(node.content).remove end + css('h2 > a, h3 > a').each do |node| + node.parent.content = node.content + end + + css('.notecard > h4').each do |node| + node.name = 'strong' + end + + css('svg.deprecated').each do |node| + node.name = 'span' + node.content = node.content + end css('dt > a[id]').each do |node| next if node['href'] @@ -64,6 +76,9 @@ module Docs end # New compatibility tables + # FIXME(2021): + # - fetched from external JSON: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/alignment-baseline/bcd.json + # - https://github.com/mdn/yari/blob/master/build/bcd-urls.js css('.bc-data #Legend + dl', '.bc-data #Legend', '.bc-data #Legend_2 + dl', '.bc-data #Legend_2', '.bc-browser-name').remove diff --git a/lib/docs/filters/svg/clean_html.rb b/lib/docs/filters/svg/clean_html.rb index 4494e875..3468cfb7 100644 --- a/lib/docs/filters/svg/clean_html.rb +++ b/lib/docs/filters/svg/clean_html.rb @@ -11,7 +11,7 @@ module Docs end def other - css('.prevnext').remove + css('.prev-next').remove if at_css('p').content.include?("\u{00AB}") at_css('p').remove diff --git a/lib/docs/scrapers/mdn/mdn.rb b/lib/docs/scrapers/mdn/mdn.rb index 1ed97605..04e39e39 100644 --- a/lib/docs/scrapers/mdn/mdn.rb +++ b/lib/docs/scrapers/mdn/mdn.rb @@ -3,12 +3,9 @@ module Docs self.abstract = true self.type = 'mdn' - params[:raw] = 1 - params[:macros] = 1 - html_filters.push 'mdn/clean_html' - options[:rate_limit] = 200 + options[:container] = '#content' options[:trailing_slash] = false options[:skip_link] = ->(link) { @@ -23,12 +20,5 @@ module Docs def get_latest_version(opts) get_latest_github_commit_date('mdn', 'content', opts) end - - private - - def process_response?(response) - response.effective_url.host = 'developer.mozilla.org' if response.effective_url.host == 'wiki.developer.mozilla.org' - super && response.effective_url.query == 'raw=1¯os=1' - end end end