Finish Perl scraper

pull/393/merge
Thibaut Courouble 9 years ago
parent c6da1d0c23
commit f78b3658b2

Binary file not shown.

Before

Width:  |  Height:  |  Size: 43 KiB

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 111 KiB

After

Width:  |  Height:  |  Size: 113 KiB

@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
(result[@_groupFor(type)] ||= []).push(type)
result.filter (e) -> e.length > 0
GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started)($|[\s\):])/i
GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started|manual)($|[\s\):])/i
_groupFor: (type) ->
if GUIDES_RGX.test(type.name)

@ -1,5 +1,8 @@
[
[
"2016-04-17",
"New documentation: <a href=\"/perl/\">Perl</a>"
], [
"2016-04-10",
"New documentations: <a href=\"/browser_support_tables/\">Support tables (caniuse.com)</a>, <a href=\"/gcc/\">GCC</a> and <a href=\"/gnu_fortran/\">GNU Fortran</a>"
], [

@ -340,6 +340,11 @@ credits = [
'2010-2016 The OpenTSDB Authors',
'LGPLv2.1',
'https://raw.githubusercontent.com/OpenTSDB/opentsdb.net/gh-pages/COPYING.LESSER'
], [
'Perl',
'1993-2016 Larry Wall and others',
'GPLv1',
'http://perldoc.perl.org/index-licence.html'
], [
'Phalcon',
'2011-2015 Phalcon Framework Team',

@ -1,6 +0,0 @@
#= require views/pages/base
class app.views.PerlPage extends app.views.BasePage
prepare: ->
@highlightCode @findAllByTag('pre'), 'perl'
return

@ -9,6 +9,7 @@ class app.views.SimplePage extends app.views.BasePage
app.views.EmberPage =
app.views.GoPage =
app.views.MeteorPage =
app.views.PerlPage =
app.views.RamdaPage =
app.views.ReactPage =
app.views.RethinkdbPage =

@ -136,3 +136,4 @@
._icon-browser_support_tables:before { background-position: 0rem -11rem; }
._icon-gnu_fortran:before { background-position: -1rem -11rem; }
._icon-gcc:before { background-position: -2rem -11rem; }
._icon-perl:before { background-position: -3rem -11rem; }

@ -1,12 +1,5 @@
._perl {
@extend %simple;
h2 { @extend %block-heading; }
h3 { @extend %block-label; }
h4 { @extend %block-label, %label-blue; }
.perlvar,
.perlfunction {
@extend %block-label, %label-blue;
}
> h4 { @extend %block-label; }
}

@ -1,16 +1,9 @@
module Docs
class Perl
class CleanHtmlFilter < Filter
REMOVE_LIST = %w(
noscript
#recent_pages
#from_search
#page_index
.mod_az_list
)
def call
root_page? ? root : other
doc
end
def root
@ -20,12 +13,13 @@ module Docs
def other
@doc = at_css('#content_body')
css(*REMOVE_LIST).remove
css('noscript', '#recent_pages', '#from_search', '#page_index', '.mod_az_list').remove
css('h1, h2, h3, h4').each do |node|
node.name = node.name.sub(/\d/) { |i| i.to_i + 1 }
end
css('h4').each { |node| node.name = 'h5' }
css('h3').each { |node| node.name = 'h4' }
css('h2').each { |node| node.name = 'h3' }
css('h1').drop(1).each { |node| node.name = 'h2' }
at_css('h2').name = 'h1'
css('a[name] + h2', 'a[name] + h3', 'a[name] + h4', 'a[name] + h5').each do |node|
node['id'] = node.previous_element['name']
@ -39,7 +33,19 @@ module Docs
node.css('li').each do |li|
li.content = li.content + "\n"
end
node.content = node.content
node.content = node.content
node.inner_html = node.inner_html.strip_heredoc
node['data-language'] = 'perl'
end
if slug =~ /functions/ || slug == 'perlvar'
css('ul > li[id]').each do |node|
heading = node.at_css('b')
heading.name = 'h2'
heading['id'] = node['id']
node.parent.before(node.children)
node.remove
end
end
doc

@ -5,19 +5,19 @@ module Docs
'Platform specific' => 'Platform Specific',
'Internals and C language interface' => 'Internals',
'perlop' => 'Perl Operators',
'perlvar' => 'Perl Variables',
'perlop' => 'Operators',
'perlvar' => 'Variables',
'Functions' => 'Functions'
}
MANUAL_TYPES = %w(Overview Tutorials FAQs)
def breadcrumbs
at_css('#breadcrumbs').content.split('>').each { |s| s.strip! }
@breadcrumbs ||= at_css('#breadcrumbs').content.split('>').each { |s| s.strip! }
end
def include_default_entry?
not slug =~ /\Aindex/ and
not slug =~ /perlop\z/ and
not slug =~ /perlvar/
slug !~ /\Aindex/
end
def get_name
@ -26,41 +26,34 @@ module Docs
def get_type
case breadcrumbs[1]
when 'Language reference'
REPLACE_TYPES[breadcrumbs[2]] || 'Language Reference'
when /\ACore modules/
'Core Modules'
else
REPLACE_TYPES[breadcrumbs[1]] || breadcrumbs[1]
when 'Language reference'
REPLACE_TYPES[breadcrumbs[2]] || 'Language'
when /\ACore modules/
'Core Modules'
else
type = REPLACE_TYPES[breadcrumbs[1]] || breadcrumbs[1]
type.prepend 'Manual: ' if MANUAL_TYPES.include?(type)
type
end
end
def additional_entries
entries = []
case slug
when /perlop\z/
css('h2').each do |node|
name = node.content
id = node.previous_element['name']
entries << [name, id, get_type]
end
when /perlvar/
css('#content_body > ul > li > b').each do |node|
node['class'] = 'perlvar'
name = node.content
id = node.previous_element['name']
entries << [name, id, get_type]
end
when /functions/
css('#content_body > ul > li > b').each do |node|
node['class'] = 'perlfunction'
end
when 'perlop'
css('h2').map do |node|
name = node.content
id = node.previous_element['name']
[name, id]
end
when 'perlvar'
css('#content_body > ul > li > b').map do |node|
name = node.content
id = node.previous_element['name']
[name, id]
end
else
[]
end
entries
end
end
end

@ -2,9 +2,7 @@ module Docs
class Perl < FileScraper
self.name = 'Perl'
self.type = 'perl'
self.release = '5.22.0'
self.dir = ''
self.base_url = 'http://perldoc.perl.org/'
self.dir = '/Users/Thibaut/DevDocs/Docs/Perl'
self.root_path = 'index.html'
self.links = {
home: 'https://www.perl.org/'
@ -17,19 +15,24 @@ module Docs
perlartistic.html
perlgpl.html
perlhist.html
perltodo.html
perlunifaq.html
)
perltodo.html )
options[:skip_patterns] = [
/\.pdf/,
/delta\.html/,
/\Aperlfaq/
]
options[:skip_patterns] = [/\.pdf/, /delta\.html/]
options[:attribution] = <<-HTML
&copy; 2010&ndash;2015 <br>
Dual Licensed under the GNU General Public License version 1+ or the Artistic License.
&copy; 1993&ndash;2016 Larry Wall and others<br>
Licensed under the GNU General Public License version 1 or later, or the Artistic License.<br>
The Perl logo is a trademark of the Perl Foundation.
HTML
version '5.22' do
self.release = '5.22.0'
self.base_url = "http://perldoc.perl.org/#{self.release}/"
end
version '5.20' do
self.release = '5.20.2'
self.base_url = "http://perldoc.perl.org/#{self.release}/"
end
end
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 816 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 636 B

After

Width:  |  Height:  |  Size: 2.1 KiB

Loading…
Cancel
Save