From 21443dc9143ae64d51152f8f65ee66abd7d176a3 Mon Sep 17 00:00:00 2001 From: Cimbali Date: Sat, 30 Nov 2019 23:54:35 +0100 Subject: [PATCH 1/2] Addd WebExtensions --- lib/docs/filters/web_extensions/clean_html.rb | 14 ++++++++++ lib/docs/filters/web_extensions/entries.rb | 28 +++++++++++++++++++ lib/docs/scrapers/web_extensions.rb | 26 +++++++++++++++++ 3 files changed, 68 insertions(+) create mode 100644 lib/docs/filters/web_extensions/clean_html.rb create mode 100644 lib/docs/filters/web_extensions/entries.rb create mode 100644 lib/docs/scrapers/web_extensions.rb diff --git a/lib/docs/filters/web_extensions/clean_html.rb b/lib/docs/filters/web_extensions/clean_html.rb new file mode 100644 index 00000000..c5737371 --- /dev/null +++ b/lib/docs/filters/web_extensions/clean_html.rb @@ -0,0 +1,14 @@ +module Docs + class WebExtensions + class CleanHtmlFilter < Filter + def call + + # Remove all the cruft. + content = at_css('main#content') + content.at_css('aside.metadata').remove + + content + end + end + end +end diff --git a/lib/docs/filters/web_extensions/entries.rb b/lib/docs/filters/web_extensions/entries.rb new file mode 100644 index 00000000..84c095e7 --- /dev/null +++ b/lib/docs/filters/web_extensions/entries.rb @@ -0,0 +1,28 @@ +module Docs + class WebExtensions + class EntriesFilter < Docs::EntriesFilter + def get_name + at_css('main#content h1').text + end + + def get_type + slug_parts = slug.split('/') + if slug_parts[0] == 'API' and slug_parts.length() > 1 + if slug_parts[1] == 'WebRequest' + return 'webRequest' + else + return slug_parts[1] + end + elsif slug_parts[0] == 'manifest.json' + return slug_parts[0] + elsif slug_parts[0] == 'user_interface' + return 'User Interface' + elsif slug_parts.length() > 1 + return slug_parts[0] + else + return 'Miscellaneous' + end + end + end + end +end diff --git a/lib/docs/scrapers/web_extensions.rb b/lib/docs/scrapers/web_extensions.rb new file mode 100644 index 00000000..15d2c30a --- /dev/null +++ b/lib/docs/scrapers/web_extensions.rb @@ -0,0 +1,26 @@ +module Docs + class WebExtensions < UrlScraper + self.name = 'Web Extensions' + self.slug = 'web_extensions' + self.type = 'simple' + self.links = { + home: 'https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions' + } + + self.base_url = 'https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions' + + html_filters.push 'web_extensions/entries', 'web_extensions/clean_html' + + options[:skip_patterns] = [ + /\/contributors\.txt$/ + ] + + options[:attribution] = -> (filter) { + <<-HTML + #{filter.result()[:entries][0].name} © 2005-2021 Mozilla and individual contributors.
+ Licensed under the Creative Commons Attribution-ShareAlike license + HTML + } + + end +end From 6abc208535b8c2ddeef833c0199ba59642d30ed8 Mon Sep 17 00:00:00 2001 From: Cimbali Date: Wed, 26 May 2021 22:27:26 +0200 Subject: [PATCH 2/2] Use MDN scraper for web extensions --- lib/docs/filters/web_extensions/clean_html.rb | 7 +----- lib/docs/filters/web_extensions/entries.rb | 22 ++++++++----------- lib/docs/scrapers/{ => mdn}/web_extensions.rb | 10 +-------- 3 files changed, 11 insertions(+), 28 deletions(-) rename lib/docs/scrapers/{ => mdn}/web_extensions.rb (52%) diff --git a/lib/docs/filters/web_extensions/clean_html.rb b/lib/docs/filters/web_extensions/clean_html.rb index c5737371..88ec0373 100644 --- a/lib/docs/filters/web_extensions/clean_html.rb +++ b/lib/docs/filters/web_extensions/clean_html.rb @@ -2,12 +2,7 @@ module Docs class WebExtensions class CleanHtmlFilter < Filter def call - - # Remove all the cruft. - content = at_css('main#content') - content.at_css('aside.metadata').remove - - content + doc end end end diff --git a/lib/docs/filters/web_extensions/entries.rb b/lib/docs/filters/web_extensions/entries.rb index 84c095e7..f04efb37 100644 --- a/lib/docs/filters/web_extensions/entries.rb +++ b/lib/docs/filters/web_extensions/entries.rb @@ -1,26 +1,22 @@ module Docs class WebExtensions class EntriesFilter < Docs::EntriesFilter + TYPE_BY_PATH = { + 'manifest.json' => 'manifest.json', + 'user_interface' => 'User Interface', + 'WebRequest' => 'webRequest', + } + def get_name - at_css('main#content h1').text + at_css('h1').text end def get_type slug_parts = slug.split('/') if slug_parts[0] == 'API' and slug_parts.length() > 1 - if slug_parts[1] == 'WebRequest' - return 'webRequest' - else - return slug_parts[1] - end - elsif slug_parts[0] == 'manifest.json' - return slug_parts[0] - elsif slug_parts[0] == 'user_interface' - return 'User Interface' - elsif slug_parts.length() > 1 - return slug_parts[0] + return TYPE_BY_PATH.fetch(slug_parts[1], slug_parts[1]) else - return 'Miscellaneous' + return TYPE_BY_PATH.fetch(slug_parts[0], slug_parts.length() > 1 ? slug_parts[0] : 'Miscellaneous') end end end diff --git a/lib/docs/scrapers/web_extensions.rb b/lib/docs/scrapers/mdn/web_extensions.rb similarity index 52% rename from lib/docs/scrapers/web_extensions.rb rename to lib/docs/scrapers/mdn/web_extensions.rb index 15d2c30a..1a5f528a 100644 --- a/lib/docs/scrapers/web_extensions.rb +++ b/lib/docs/scrapers/mdn/web_extensions.rb @@ -1,8 +1,7 @@ module Docs - class WebExtensions < UrlScraper + class WebExtensions < Mdn self.name = 'Web Extensions' self.slug = 'web_extensions' - self.type = 'simple' self.links = { home: 'https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions' } @@ -15,12 +14,5 @@ module Docs /\/contributors\.txt$/ ] - options[:attribution] = -> (filter) { - <<-HTML - #{filter.result()[:entries][0].name} © 2005-2021 Mozilla and individual contributors.
- Licensed under the Creative Commons Attribution-ShareAlike license - HTML - } - end end