From 3af0bbe37baad3ee9a6e70f1d7e4330c9d7e5302 Mon Sep 17 00:00:00 2001 From: Phil Scherer Date: Wed, 25 Nov 2020 18:36:16 +0000 Subject: [PATCH] Update Perl documentation (5.32) --- .../templates/pages/about_tmpl.coffee | 2 +- assets/stylesheets/pages/_perl.scss | 6 +- docs/file-scrapers.md | 2 - lib/docs/filters/perl/clean_html.rb | 44 ++------- lib/docs/filters/perl/entries.rb | 99 ++++++++++++++----- lib/docs/filters/perl/pre_clean_html.rb | 17 ++++ lib/docs/scrapers/perl.rb | 44 ++++++--- 7 files changed, 134 insertions(+), 80 deletions(-) create mode 100755 lib/docs/filters/perl/pre_clean_html.rb diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index 913e00dd..4f1ea1aa 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -578,7 +578,7 @@ credits = [ 'https://raw.githubusercontent.com/pydata/pandas/master/LICENSE' ], [ 'Perl', - '1993-2016 Larry Wall and others', + '1993-2020 Larry Wall and others', 'GPLv1', 'https://perldoc.perl.org/index-licence.html' ], [ diff --git a/assets/stylesheets/pages/_perl.scss b/assets/stylesheets/pages/_perl.scss index 57be0c84..777eac8e 100644 --- a/assets/stylesheets/pages/_perl.scss +++ b/assets/stylesheets/pages/_perl.scss @@ -1,5 +1,9 @@ ._perl { @extend %simple; - > h4 { @extend %block-label; } + dt + dt { margin-top: 1em; } + + > dl > dt { @extend %block-label; } + > dl > dt.function { @extend %label-blue; } + > dl > dt.variable { @extend %label-green; } } diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index 4ad4fff0..33a58145 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -128,8 +128,6 @@ bsdtar --extract --to-stdout --file openjdk-8-doc_8u272-b10-1_all.deb data.tar.x bsdtar --extract --xz --file - --strip-components=6 --directory=docs/openjdk\~8/ ./usr/share/doc/openjdk-8-jre-headless/api/ ``` -## Perl - ## PHP ## Python diff --git a/lib/docs/filters/perl/clean_html.rb b/lib/docs/filters/perl/clean_html.rb index 11ae9b15..4230f661 100644 --- a/lib/docs/filters/perl/clean_html.rb +++ b/lib/docs/filters/perl/clean_html.rb @@ -2,49 +2,21 @@ module Docs class Perl class CleanHtmlFilter < Filter def call - root_page? ? root : other - doc - end - - def root - doc.inner_html = '

Perl 5 Documentation

' - end - - def other - @doc = at_css('#content_body') - - css('noscript', '#recent_pages', '#from_search', '#page_index', '.mod_az_list').remove - css('h1, h2, h3, h4').each do |node| node.name = node.name.sub(/\d/) { |i| i.to_i + 1 } end - at_css('h2').name = 'h1' - - css('a[name] + h2', 'a[name] + h3', 'a[name] + h4', 'a[name] + h5').each do |node| - node['id'] = node.previous_element['name'] - end - - css('li > a[name]').each do |node| - node.parent['id'] = node['name'] - end - - css('pre').each do |node| - node.css('li').each do |li| - li.content = li.content + "\n" - end + css('pre > code').each do |node| + node.parent['data-language'] = 'perl' node.content = node.content - node.inner_html = node.inner_html.strip_heredoc - node['data-language'] = 'perl' end - if slug =~ /functions/ || slug == 'perlvar' - css('ul > li[id]').each do |node| - heading = node.at_css('b') - heading.name = 'h2' - heading['id'] = node['id'] - node.parent.before(node.children) - node.remove + css('dl > dt').each do |node| + case slug + when 'perlfunc' + node['class'] = 'function' + when 'perlvar' + node['class'] = 'variable' end end diff --git a/lib/docs/filters/perl/entries.rb b/lib/docs/filters/perl/entries.rb index 505e9b15..1fbf6637 100644 --- a/lib/docs/filters/perl/entries.rb +++ b/lib/docs/filters/perl/entries.rb @@ -2,54 +2,101 @@ module Docs class Perl class EntriesFilter < Docs::EntriesFilter REPLACE_TYPES = { - 'Platform specific' => 'Platform Specific', - 'Internals and C language interface' => 'Internals', + 'Platform-Specific' => 'Platform Specific', + 'Internals and C Language Interface' => 'Internals', + 'Tutorials' => 'Manual: Tutorials', + 'Overview' => 'Manual: Overview' + } + + # Individual pages within the Perl documentation are missing all context + # for anything even resembling a 'type'. So we're going to grab it + # elsewhere with a neat trick: dynamically generate a map from a few + # ~index~ pages at runtime which is then referenced on future pages. + # Prepopulate w/ edge cases + TYPES = { + 'pod2man' => 'Utilities', + 'pod2text' => 'Utilities', + 'encguess' => 'Utilities', + 'streamzip' => 'Utilities', + 'pl2pm' => 'Utilities', + 'perl' => 'Manual: Overview', + 'perldoc' => 'Manual: Overview', + 'perlintro' => 'Manual: Overview', 'perlop' => 'Operators', 'perlvar' => 'Variables', - 'Functions' => 'Functions' + 'perlref' => 'Reference Manual', + 'modules' => 'Standard Modules', + 'perlutil' => 'Utilities', + + 'warnings' => 'Pragmas', + 'strict' => 'Pragmas', + + 'Pod::Text::Overstrike' => 'Standard Modules', + 'Test2::EventFacet::Hub' => 'Standard Modules' } - MANUAL_TYPES = %w(Overview Tutorials FAQs) + def call + case slug + when 'perl' + css('h2').each do |heading| + heading.next_element.css('a').each do |node| + TYPES[node.content] = heading.content + end + end - def breadcrumbs - @breadcrumbs ||= at_css('#breadcrumbs').content.split('>').each { |s| s.strip! } - end + when 'modules' + node = at_css('#Pragmatic-Modules') + node = node.next_element while node.name != 'ul' + node.css('li').each do |n| + TYPES[n.at_css('a').content] = 'Pragmas' + end - def include_default_entry? - slug !~ /\Aindex/ + node = at_css('#Standard-Modules') + node = node.next_element while node.name != 'ul' + node.css('li').each do |n| + TYPES[n.at_css('a').content] = 'Standard Modules' + end + + when 'perlutil' + css('dl > dt').each do |node| + TYPES[node['id']] = "Utilities" + end + end + + super end def get_name - at_css('h1').content.strip + slug end def get_type - case breadcrumbs[1] - when 'Language reference' - REPLACE_TYPES[breadcrumbs[2]] || 'Language' - when /\ACore modules/ - 'Core Modules' + case slug + when /perl.*faq/ + 'Manual: FAQs' else - type = REPLACE_TYPES[breadcrumbs[1]] || breadcrumbs[1] - type.prepend 'Manual: ' if MANUAL_TYPES.include?(type) - type + if TYPES.key? name + REPLACE_TYPES[TYPES[name]] || TYPES[name] + else + 'Other' + end end end def additional_entries case slug + when 'perlfunc' + css(':not(p) + dl > dt').each_with_object [] do |node, entries| + entries << [node.content, node['id'], 'Functions'] + end when 'perlop' - css('h2').map do |node| - name = node.content - id = node.previous_element['name'] - [name, id] + css('h2').each_with_object [] do |node, entries| + entries << [node.content, node['id'], 'Operators'] end when 'perlvar' - css('#content_body > ul > li > b').map do |node| - name = node.content - id = node.previous_element['name'] - [name, id] + css('> dl > dt').each_with_object [] do |node, entries| + entries << [node.content, node['id'], 'Variables'] end else [] diff --git a/lib/docs/filters/perl/pre_clean_html.rb b/lib/docs/filters/perl/pre_clean_html.rb new file mode 100755 index 00000000..957bd431 --- /dev/null +++ b/lib/docs/filters/perl/pre_clean_html.rb @@ -0,0 +1,17 @@ +module Docs + class Perl + class PreCleanHtmlFilter < Filter + def call + css('#links', '.leading-notice', '.permalink').remove + + # Bug somewhere prevents these two ids from loading + if slug == 'perlvar' + at_css('#\$\"')['id'] = '$ls' + at_css('#\$\#')['id'] = '$hash' + end + + doc + end + end + end +end diff --git a/lib/docs/scrapers/perl.rb b/lib/docs/scrapers/perl.rb index 8c0462e5..90844c47 100644 --- a/lib/docs/scrapers/perl.rb +++ b/lib/docs/scrapers/perl.rb @@ -1,46 +1,62 @@ module Docs - class Perl < FileScraper + class Perl < UrlScraper self.name = 'Perl' self.type = 'perl' - self.root_path = 'index.html' +# self.root_path = 'index.html' + self.initial_paths = ['modules.html', 'perlutil.html', 'perl.html'] self.links = { home: 'https://www.perl.org/' } - html_filters.push 'perl/entries', 'perl/clean_html' + html_filters.push 'perl/pre_clean_html', 'perl/entries', 'perl/clean_html', 'title' + + options[:container] = '#perldocdiv' options[:skip] = %w( - preferences.html - perlartistic.html - perlgpl.html - perlhist.html - perltodo.html ) + perlbook perlcommunity perlexperiment perlartistic perlgpl perlhist + perlcn perljp perlko perltw + perlboot perlbot perlrepository perltodo perltooc perltoot ) - options[:skip_patterns] = [/\.pdf/, /delta\.html/] + options[:skip_patterns] = [/\Afunctions/, /\Avariables/, /\.pdf/, /delta/] options[:attribution] = <<-HTML - © 1993–2016 Larry Wall and others
+ © 1993–2020 Larry Wall and others
Licensed under the GNU General Public License version 1 or later, or the Artistic License.
The Perl logo is a trademark of the Perl Foundation. HTML + version '5.32' do + self.release = '5.32.0' + self.base_url = "https://perldoc.perl.org/#{self.release}/" + end + + version '5.30' do + self.release = '5.30.3' + self.base_url = "https://perldoc.perl.org/#{self.release}/" + end + + version '5.28' do + self.release = '5.28.3' + self.base_url = "https://perldoc.perl.org/#{self.release}/" + end + version '5.26' do - self.release = '5.26.0' + self.release = '5.26.3' self.base_url = "https://perldoc.perl.org/#{self.release}/" end version '5.24' do - self.release = '5.24.0' + self.release = '5.24.4' self.base_url = "https://perldoc.perl.org/#{self.release}/" end version '5.22' do - self.release = '5.22.0' + self.release = '5.22.4' self.base_url = "https://perldoc.perl.org/#{self.release}/" end version '5.20' do - self.release = '5.20.2' + self.release = '5.20.3' self.base_url = "https://perldoc.perl.org/#{self.release}/" end