Merge pull request #1473 from freeCodeCamp/mdn/yari

mdn: update scraper to mdn/yari
pull/1524/head
Simon Legner 4 years ago committed by GitHub
commit 22e7767145
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -203,7 +203,7 @@ credits = [
'https://raw.githubusercontent.com/apache/cordova-docs/master/LICENSE' 'https://raw.githubusercontent.com/apache/cordova-docs/master/LICENSE'
], [ ], [
'CSS<br>DOM<br>HTTP<br>HTML<br>JavaScript<br>SVG<br>XPath', 'CSS<br>DOM<br>HTTP<br>HTML<br>JavaScript<br>SVG<br>XPath',
'2005-2020 Mozilla and individual contributors', '2005-2021 MDN contributors',
'CC BY-SA', 'CC BY-SA',
'https://creativecommons.org/licenses/by-sa/2.5/' 'https://creativecommons.org/licenses/by-sa/2.5/'
], [ ], [

@ -27,6 +27,7 @@
p > code, li > code { @extend %label; } p > code, li > code { @extend %label; }
> .note, > .note,
.notecard, // MDN 2021
.notice, .notice,
.warning, .warning,
.overheadIndicator, .overheadIndicator,

@ -41,6 +41,18 @@ module Docs
node.parent['id'] = node['name'] node.parent['id'] = node['name']
node.before(node.content).remove node.before(node.content).remove
end end
css('h2 > a, h3 > a').each do |node|
node.parent.content = node.content
end
css('.notecard > h4').each do |node|
node.name = 'strong'
end
css('svg.deprecated').each do |node|
node.name = 'span'
node.content = node.content
end
css('dt > a[id]').each do |node| css('dt > a[id]').each do |node|
next if node['href'] next if node['href']
@ -64,6 +76,9 @@ module Docs
end end
# New compatibility tables # New compatibility tables
# FIXME(2021):
# - fetched from external JSON: https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/alignment-baseline/bcd.json
# - https://github.com/mdn/yari/blob/master/build/bcd-urls.js
css('.bc-data #Legend + dl', '.bc-data #Legend', '.bc-data #Legend_2 + dl', '.bc-data #Legend_2', '.bc-browser-name').remove css('.bc-data #Legend + dl', '.bc-data #Legend', '.bc-data #Legend_2 + dl', '.bc-data #Legend_2', '.bc-browser-name').remove

@ -1,19 +0,0 @@
module Docs
class Mdn
class ContributeLinkFilter < Filter
def call
return html if current_url.host != 'developer.mozilla.org'
html << <<-HTML.strip_heredoc
<div class="_attribution">
<p class="_attribution-p">
<a href="#{current_url}$edit" class="_attribution-link">Edit this page on MDN</a>
</p>
</div>
HTML
html
end
end
end
end

@ -11,7 +11,7 @@ module Docs
end end
def other def other
css('.prevnext').remove css('.prev-next').remove
if at_css('p').content.include?("\u{00AB}") if at_css('p').content.include?("\u{00AB}")
at_css('p').remove at_css('p').remove

@ -7,8 +7,6 @@ module Docs
html_filters.push 'http/clean_html', 'http/entries', 'title' html_filters.push 'http/clean_html', 'http/entries', 'title'
options[:mdn_tag] = 'HTTP'
options[:root_title] = 'HTTP' options[:root_title] = 'HTTP'
options[:title] = ->(filter) { filter.current_url.host == 'tools.ietf.org' ? false : filter.default_title } options[:title] = ->(filter) { filter.current_url.host == 'tools.ietf.org' ? false : filter.default_title }
options[:container] = ->(filter) { filter.current_url.host == 'tools.ietf.org' ? '.content' : nil } options[:container] = ->(filter) { filter.current_url.host == 'tools.ietf.org' ? '.content' : nil }

@ -6,8 +6,6 @@ module Docs
html_filters.push 'css/clean_html', 'css/entries', 'title' html_filters.push 'css/clean_html', 'css/entries', 'title'
options[:mdn_tag] = 'CSS'
options[:root_title] = 'CSS' options[:root_title] = 'CSS'
options[:skip] = %w(/CSS3 /Media/Visual /paged_media /Media/TV /Media/Tactile) options[:skip] = %w(/CSS3 /Media/Visual /paged_media /Media/TV /Media/Tactile)

@ -1,92 +1,12 @@
module Docs module Docs
class Dom < Mdn class Dom < Mdn
prepend FixInternalUrlsBehavior
prepend FixRedirectionsBehavior
self.name = 'DOM' self.name = 'DOM'
self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/API' self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/API'
html_filters.push 'dom/clean_html', 'dom/entries', 'title' html_filters.push 'dom/clean_html', 'dom/entries', 'title'
options[:mdn_tag] = 'XSLT_Reference'
options[:root_title] = 'DOM' options[:root_title] = 'DOM'
options[:skip] = %w(
/Reference
/Index
/Document_Object_Model
/document/createProcessingInstruction
/document/documentURIObject
/document/loadOverlay
/document/tooltipNode
/Document/cookie/Simple_document.cookie_framework
/DOMErrorHandler
/DOMLocator
/DOMObject
/DOMStringList
/Event/Comparison_of_Event_Targets
/Format
/IDBDatabaseException
/IndexedDB_API/Using_JavaScript_Generators_in_Firefox
/Notation
/ProcessingInstruction
/TypeInfo
/window/getAttention
/window/messageManager
/window/updateCommands
/window/pkcs11
/OES_texture_float)
options[:skip_patterns] = [
/NS/,
/XPC/,
/moz/i,
/gecko/i,
/webkit/i,
/gamepad/i,
/UserData/,
/Bluetooth/,
/FMRadio/i,
/XDomainRequest/i,
/\A\/Camera/,
/\A\/Data_Store_API/,
/\A\/DataStore/,
/\A\/DeviceStorage/,
/\A\/DocumentTouch/,
/\A\/document\/xml/,
/\A\/XMLDocument/,
/\A\/DOMCursor/,
/\A\/DOMRequest/,
/\A\/InstallTrigger/,
/\A\/Entity/,
/\A\/Settings/,
/telephony/i,
/\A\/NFC_API/,
/\A\/Window\/\w+bar/i,
/\A\/Apps/,
/\A\/Contact/,
/\A\/L10n/,
/\A\/Permission/]
options[:fix_urls] = ->(url) do
return if url.include?('_') || url.include?('?')
url.sub! 'https://developer.mozilla.org/en-US/docs/DOM/', "#{Dom.base_url}/"
url.sub! 'https://developer.mozilla.org/en/DOM/', "#{Dom.base_url}/"
url.sub! 'https://developer.mozilla.org/Web/API/', "#{Dom.base_url}/"
url.sub! "#{Dom.base_url}/Console", "#{Dom.base_url}/console"
url.sub! "#{Dom.base_url}/Document\/", "#{Dom.base_url}/document\/"
url.sub! "#{Dom.base_url}/Element", "#{Dom.base_url}/element"
url.sub! "#{Dom.base_url}/History", "#{Dom.base_url}/history"
url.sub! "#{Dom.base_url}/Location", "#{Dom.base_url}/location"
url.sub! "#{Dom.base_url}/Navigator", "#{Dom.base_url}/navigator"
url.sub! "#{Dom.base_url}/Screen", "#{Dom.base_url}/screen"
url.sub! "#{Dom.base_url}/Window\/", "#{Dom.base_url}/window\/"
url.sub! "#{Dom.base_url}/notification", "#{Dom.base_url}/Notification"
url.sub! "#{Dom.base_url}/range", "#{Dom.base_url}/Range"
url.sub! "#{Dom.base_url}/event", "#{Dom.base_url}/Event"
url.sub! '/en/DOM/Manipulating_the_browser_history', "/en-US/docs/Web/API/History_API"
url
end
end end
end end

@ -1,26 +0,0 @@
module Docs
class DomEvents < Mdn
prepend FixInternalUrlsBehavior
self.name = 'DOM Events'
self.slug = 'dom_events'
self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/Events'
html_filters.insert_after 'clean_html', 'dom_events/clean_html'
html_filters.push 'dom_events/entries', 'title'
options[:mdn_tag] = 'events'
options[:root_title] = 'DOM Events'
options[:skip] = %w(/MozOrientation)
options[:skip_patterns] = [/\A\/moz/i]
options[:fix_urls] = ->(url) do
url.sub! 'https://developer.mozilla.org/en-US/Mozilla_event_reference', DomEvents.base_url
url.sub! 'https://developer.mozilla.org/en-US/docs/Mozilla_event_reference', DomEvents.base_url
url.sub! 'https://developer.mozilla.org/en-US/docs/Web/Reference/Events', DomEvents.base_url
url
end
end
end

@ -7,8 +7,6 @@ module Docs
html_filters.push 'html/clean_html', 'html/entries', 'title' html_filters.push 'html/clean_html', 'html/entries', 'title'
options[:mdn_tag] = 'HTML'
options[:root_title] = 'HTML' options[:root_title] = 'HTML'
options[:title] = ->(filter) do options[:title] = ->(filter) do

@ -8,8 +8,6 @@ module Docs
html_filters.push 'javascript/clean_html', 'javascript/entries', 'title' html_filters.push 'javascript/clean_html', 'javascript/entries', 'title'
options[:mdn_tag] = 'JavaScript'
options[:root_title] = 'JavaScript' options[:root_title] = 'JavaScript'
# Don't want # Don't want

@ -3,13 +3,9 @@ module Docs
self.abstract = true self.abstract = true
self.type = 'mdn' self.type = 'mdn'
params[:raw] = 1
params[:macros] = 1
html_filters.push 'mdn/clean_html' html_filters.push 'mdn/clean_html'
text_filters.insert_before 'attribution', 'mdn/contribute_link'
options[:rate_limit] = 200 options[:container] = '#content > .main-page-content'
options[:trailing_slash] = false options[:trailing_slash] = false
options[:skip_link] = ->(link) { options[:skip_link] = ->(link) {
@ -17,19 +13,12 @@ module Docs
} }
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; 2005&ndash;2020 Mozilla and individual contributors.<br> &copy; 2005&ndash;2021 MDN contributors.<br>
Licensed under the Creative Commons Attribution-ShareAlike License v2.5 or later. Licensed under the Creative Commons Attribution-ShareAlike License v2.5 or later.
HTML HTML
def get_latest_version(opts) def get_latest_version(opts)
get_latest_github_commit_date('mdn', 'content', opts) get_latest_github_commit_date('mdn', 'content', opts)
end end
private
def process_response?(response)
response.effective_url.host = 'developer.mozilla.org' if response.effective_url.host == 'wiki.developer.mozilla.org'
super && response.effective_url.query == 'raw=1&macros=1'
end
end end
end end

@ -8,8 +8,6 @@ module Docs
html_filters.push 'svg/clean_html', 'svg/entries', 'title' html_filters.push 'svg/clean_html', 'svg/entries', 'title'
options[:mdn_tag] = 'XSLT_Reference'
options[:root_title] = 'SVG' options[:root_title] = 'SVG'
options[:title] = ->(filter) do options[:title] = ->(filter) do

@ -8,8 +8,6 @@ module Docs
html_filters.push 'xslt_xpath/clean_html', 'xslt_xpath/entries', 'title' html_filters.push 'xslt_xpath/clean_html', 'xslt_xpath/entries', 'title'
options[:mdn_tag] = 'XSLT_Reference'
options[:root_title] = 'XSLT' options[:root_title] = 'XSLT'
options[:only_patterns] = [/\A\/XSLT/, /\A\/XPath/] options[:only_patterns] = [/\A\/XSLT/, /\A\/XPath/]

Loading…
Cancel
Save