diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee
index 913e00dd..4f1ea1aa 100644
--- a/assets/javascripts/templates/pages/about_tmpl.coffee
+++ b/assets/javascripts/templates/pages/about_tmpl.coffee
@@ -578,7 +578,7 @@ credits = [
'https://raw.githubusercontent.com/pydata/pandas/master/LICENSE'
], [
'Perl',
- '1993-2016 Larry Wall and others',
+ '1993-2020 Larry Wall and others',
'GPLv1',
'https://perldoc.perl.org/index-licence.html'
], [
diff --git a/assets/stylesheets/pages/_perl.scss b/assets/stylesheets/pages/_perl.scss
index 57be0c84..777eac8e 100644
--- a/assets/stylesheets/pages/_perl.scss
+++ b/assets/stylesheets/pages/_perl.scss
@@ -1,5 +1,9 @@
._perl {
@extend %simple;
- > h4 { @extend %block-label; }
+ dt + dt { margin-top: 1em; }
+
+ > dl > dt { @extend %block-label; }
+ > dl > dt.function { @extend %label-blue; }
+ > dl > dt.variable { @extend %label-green; }
}
diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md
index 4ad4fff0..33a58145 100644
--- a/docs/file-scrapers.md
+++ b/docs/file-scrapers.md
@@ -128,8 +128,6 @@ bsdtar --extract --to-stdout --file openjdk-8-doc_8u272-b10-1_all.deb data.tar.x
bsdtar --extract --xz --file - --strip-components=6 --directory=docs/openjdk\~8/ ./usr/share/doc/openjdk-8-jre-headless/api/
```
-## Perl
-
## PHP
## Python
diff --git a/lib/docs/filters/perl/clean_html.rb b/lib/docs/filters/perl/clean_html.rb
index 11ae9b15..4230f661 100644
--- a/lib/docs/filters/perl/clean_html.rb
+++ b/lib/docs/filters/perl/clean_html.rb
@@ -2,49 +2,21 @@ module Docs
class Perl
class CleanHtmlFilter < Filter
def call
- root_page? ? root : other
- doc
- end
-
- def root
- doc.inner_html = '
Perl 5 Documentation
'
- end
-
- def other
- @doc = at_css('#content_body')
-
- css('noscript', '#recent_pages', '#from_search', '#page_index', '.mod_az_list').remove
-
css('h1, h2, h3, h4').each do |node|
node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
end
- at_css('h2').name = 'h1'
-
- css('a[name] + h2', 'a[name] + h3', 'a[name] + h4', 'a[name] + h5').each do |node|
- node['id'] = node.previous_element['name']
- end
-
- css('li > a[name]').each do |node|
- node.parent['id'] = node['name']
- end
-
- css('pre').each do |node|
- node.css('li').each do |li|
- li.content = li.content + "\n"
- end
+ css('pre > code').each do |node|
+ node.parent['data-language'] = 'perl'
node.content = node.content
- node.inner_html = node.inner_html.strip_heredoc
- node['data-language'] = 'perl'
end
- if slug =~ /functions/ || slug == 'perlvar'
- css('ul > li[id]').each do |node|
- heading = node.at_css('b')
- heading.name = 'h2'
- heading['id'] = node['id']
- node.parent.before(node.children)
- node.remove
+ css('dl > dt').each do |node|
+ case slug
+ when 'perlfunc'
+ node['class'] = 'function'
+ when 'perlvar'
+ node['class'] = 'variable'
end
end
diff --git a/lib/docs/filters/perl/entries.rb b/lib/docs/filters/perl/entries.rb
index 505e9b15..1fbf6637 100644
--- a/lib/docs/filters/perl/entries.rb
+++ b/lib/docs/filters/perl/entries.rb
@@ -2,54 +2,101 @@ module Docs
class Perl
class EntriesFilter < Docs::EntriesFilter
REPLACE_TYPES = {
- 'Platform specific' => 'Platform Specific',
- 'Internals and C language interface' => 'Internals',
+ 'Platform-Specific' => 'Platform Specific',
+ 'Internals and C Language Interface' => 'Internals',
+ 'Tutorials' => 'Manual: Tutorials',
+ 'Overview' => 'Manual: Overview'
+ }
+
+ # Individual pages within the Perl documentation are missing all context
+ # for anything even resembling a 'type'. So we're going to grab it
+ # elsewhere with a neat trick: dynamically generate a map from a few
+ # ~index~ pages at runtime which is then referenced on future pages.
+ # Prepopulate w/ edge cases
+ TYPES = {
+ 'pod2man' => 'Utilities',
+ 'pod2text' => 'Utilities',
+ 'encguess' => 'Utilities',
+ 'streamzip' => 'Utilities',
+ 'pl2pm' => 'Utilities',
+ 'perl' => 'Manual: Overview',
+ 'perldoc' => 'Manual: Overview',
+ 'perlintro' => 'Manual: Overview',
'perlop' => 'Operators',
'perlvar' => 'Variables',
- 'Functions' => 'Functions'
+ 'perlref' => 'Reference Manual',
+ 'modules' => 'Standard Modules',
+ 'perlutil' => 'Utilities',
+
+ 'warnings' => 'Pragmas',
+ 'strict' => 'Pragmas',
+
+ 'Pod::Text::Overstrike' => 'Standard Modules',
+ 'Test2::EventFacet::Hub' => 'Standard Modules'
}
- MANUAL_TYPES = %w(Overview Tutorials FAQs)
+ def call
+ case slug
+ when 'perl'
+ css('h2').each do |heading|
+ heading.next_element.css('a').each do |node|
+ TYPES[node.content] = heading.content
+ end
+ end
- def breadcrumbs
- @breadcrumbs ||= at_css('#breadcrumbs').content.split('>').each { |s| s.strip! }
- end
+ when 'modules'
+ node = at_css('#Pragmatic-Modules')
+ node = node.next_element while node.name != 'ul'
+ node.css('li').each do |n|
+ TYPES[n.at_css('a').content] = 'Pragmas'
+ end
- def include_default_entry?
- slug !~ /\Aindex/
+ node = at_css('#Standard-Modules')
+ node = node.next_element while node.name != 'ul'
+ node.css('li').each do |n|
+ TYPES[n.at_css('a').content] = 'Standard Modules'
+ end
+
+ when 'perlutil'
+ css('dl > dt').each do |node|
+ TYPES[node['id']] = "Utilities"
+ end
+ end
+
+ super
end
def get_name
- at_css('h1').content.strip
+ slug
end
def get_type
- case breadcrumbs[1]
- when 'Language reference'
- REPLACE_TYPES[breadcrumbs[2]] || 'Language'
- when /\ACore modules/
- 'Core Modules'
+ case slug
+ when /perl.*faq/
+ 'Manual: FAQs'
else
- type = REPLACE_TYPES[breadcrumbs[1]] || breadcrumbs[1]
- type.prepend 'Manual: ' if MANUAL_TYPES.include?(type)
- type
+ if TYPES.key? name
+ REPLACE_TYPES[TYPES[name]] || TYPES[name]
+ else
+ 'Other'
+ end
end
end
def additional_entries
case slug
+ when 'perlfunc'
+ css(':not(p) + dl > dt').each_with_object [] do |node, entries|
+ entries << [node.content, node['id'], 'Functions']
+ end
when 'perlop'
- css('h2').map do |node|
- name = node.content
- id = node.previous_element['name']
- [name, id]
+ css('h2').each_with_object [] do |node, entries|
+ entries << [node.content, node['id'], 'Operators']
end
when 'perlvar'
- css('#content_body > ul > li > b').map do |node|
- name = node.content
- id = node.previous_element['name']
- [name, id]
+ css('> dl > dt').each_with_object [] do |node, entries|
+ entries << [node.content, node['id'], 'Variables']
end
else
[]
diff --git a/lib/docs/filters/perl/pre_clean_html.rb b/lib/docs/filters/perl/pre_clean_html.rb
new file mode 100755
index 00000000..957bd431
--- /dev/null
+++ b/lib/docs/filters/perl/pre_clean_html.rb
@@ -0,0 +1,17 @@
+module Docs
+ class Perl
+ class PreCleanHtmlFilter < Filter
+ def call
+ css('#links', '.leading-notice', '.permalink').remove
+
+ # Bug somewhere prevents these two ids from loading
+ if slug == 'perlvar'
+ at_css('#\$\"')['id'] = '$ls'
+ at_css('#\$\#')['id'] = '$hash'
+ end
+
+ doc
+ end
+ end
+ end
+end
diff --git a/lib/docs/scrapers/perl.rb b/lib/docs/scrapers/perl.rb
index 8c0462e5..90844c47 100644
--- a/lib/docs/scrapers/perl.rb
+++ b/lib/docs/scrapers/perl.rb
@@ -1,46 +1,62 @@
module Docs
- class Perl < FileScraper
+ class Perl < UrlScraper
self.name = 'Perl'
self.type = 'perl'
- self.root_path = 'index.html'
+# self.root_path = 'index.html'
+ self.initial_paths = ['modules.html', 'perlutil.html', 'perl.html']
self.links = {
home: 'https://www.perl.org/'
}
- html_filters.push 'perl/entries', 'perl/clean_html'
+ html_filters.push 'perl/pre_clean_html', 'perl/entries', 'perl/clean_html', 'title'
+
+ options[:container] = '#perldocdiv'
options[:skip] = %w(
- preferences.html
- perlartistic.html
- perlgpl.html
- perlhist.html
- perltodo.html )
+ perlbook perlcommunity perlexperiment perlartistic perlgpl perlhist
+ perlcn perljp perlko perltw
+ perlboot perlbot perlrepository perltodo perltooc perltoot )
- options[:skip_patterns] = [/\.pdf/, /delta\.html/]
+ options[:skip_patterns] = [/\Afunctions/, /\Avariables/, /\.pdf/, /delta/]
options[:attribution] = <<-HTML
- © 1993–2016 Larry Wall and others
+ © 1993–2020 Larry Wall and others
Licensed under the GNU General Public License version 1 or later, or the Artistic License.
The Perl logo is a trademark of the Perl Foundation.
HTML
+ version '5.32' do
+ self.release = '5.32.0'
+ self.base_url = "https://perldoc.perl.org/#{self.release}/"
+ end
+
+ version '5.30' do
+ self.release = '5.30.3'
+ self.base_url = "https://perldoc.perl.org/#{self.release}/"
+ end
+
+ version '5.28' do
+ self.release = '5.28.3'
+ self.base_url = "https://perldoc.perl.org/#{self.release}/"
+ end
+
version '5.26' do
- self.release = '5.26.0'
+ self.release = '5.26.3'
self.base_url = "https://perldoc.perl.org/#{self.release}/"
end
version '5.24' do
- self.release = '5.24.0'
+ self.release = '5.24.4'
self.base_url = "https://perldoc.perl.org/#{self.release}/"
end
version '5.22' do
- self.release = '5.22.0'
+ self.release = '5.22.4'
self.base_url = "https://perldoc.perl.org/#{self.release}/"
end
version '5.20' do
- self.release = '5.20.2'
+ self.release = '5.20.3'
self.base_url = "https://perldoc.perl.org/#{self.release}/"
end