wordpress: finish scraper and filters

pull/773/head
Jasper van Merle 6 years ago
parent a7a3864f9a
commit 78168366cf

@ -728,6 +728,12 @@ credits = [
'CC BY', 'CC BY',
'https://creativecommons.org/licenses/by/4.0/' 'https://creativecommons.org/licenses/by/4.0/'
], [ ], [
'Wordpress',
'2003-2019 WordPress Foundation',
'GPLv2+',
'https://wordpress.org/about/license/'
],
[
'Yarn', 'Yarn',
'2016-present Yarn Contributors', '2016-present Yarn Contributors',
'BSD', 'BSD',

@ -7,12 +7,19 @@ module Docs
return doc return doc
end end
article = at_css('article[id^="post-"]')
@doc = at_css('article[id^="post-"]') unless article.nil?
css('hr', '.screen-reader-text', '.table-of-contents', css('hr', '.screen-reader-text', '.table-of-contents',
'.anchor', '.toc-jump', '.source-code-links', '.user-notes', '.anchor', '.toc-jump', '.source-code-links', '.user-notes',
'.show-more', '.hide-more').remove '.show-more', '.hide-more').remove
br = /<br\s?\/?>/i br = /<br\s?\/?>/i
header = at_css('h1')
header.content = header.content.strip
doc.prepend_child header
# Add PHP code highlighting # Add PHP code highlighting
css('pre').each do |node| css('pre').each do |node|
node['data-language'] = 'php' node['data-language'] = 'php'
@ -29,4 +36,4 @@ module Docs
end end
end end
end end
end end

@ -1,12 +1,6 @@
module Docs module Docs
class Wordpress class Wordpress
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
def breadcrumbs
@breadcrumbs ||= css('.breadcrumbs .trail-inner a')
.map(&:content)
.map(&:strip)
end
def get_name def get_name
at_css('.breadcrumbs .trail-end').content at_css('.breadcrumbs .trail-end').content
end end
@ -18,12 +12,8 @@ module Docs
'Hooks' 'Hooks'
elsif subpath.starts_with?('functions') elsif subpath.starts_with?('functions')
'Functions' 'Functions'
elsif breadcrumbs.size > 1
breadcrumbs.drop(1).join(': ')
else
at_css('.breadcrumbs .trail-end').content
end end
end end
end end
end end
end end

@ -2,7 +2,7 @@ module Docs
class Wordpress < UrlScraper class Wordpress < UrlScraper
self.name = 'WordPress' self.name = 'WordPress'
self.type = 'wordpress' self.type = 'wordpress'
self.release = '4.9.4' self.release = '5.2.2'
self.base_url = 'https://developer.wordpress.org/reference/' self.base_url = 'https://developer.wordpress.org/reference/'
self.initial_paths = %w( self.initial_paths = %w(
functions/ functions/
@ -15,10 +15,10 @@ module Docs
code: 'https://github.com/WordPress/WordPress' code: 'https://github.com/WordPress/WordPress'
} }
html_filters.push 'wordpress/clean_html', 'wordpress/entries' html_filters.push 'wordpress/entries', 'wordpress/clean_html'
options[:container] = '#content-area' options[:container] = '#content-area'
options[:trailing_slash] = true options[:trailing_slash] = false
options[:only_patterns] = [ options[:only_patterns] = [
/\Afunctions\//, /\Afunctions\//,
/\Ahooks\//, /\Ahooks\//,
@ -32,8 +32,8 @@ module Docs
] ]
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; 2003&ndash;2018 WordPress Foundation<br> &copy; 2003&ndash;2019 WordPress Foundation<br>
Licensed under the GNU GPLv2+ License. Licensed under the GNU GPLv2+ License.
HTML HTML
end end
end end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 958 B

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.2 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Loading…
Cancel
Save