Finish pandas scraper

pull/496/head
Thibaut Courouble 8 years ago
parent 1941687bf0
commit 659cf94fe8

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 130 KiB

After

Width:  |  Height:  |  Size: 130 KiB

@ -1,7 +1,7 @@
[
[
"2016-09-18",
"New documentation: <a href=\"/twig/\">Twig</a>"
"New documentations: <a href=\"/pandas/\">pandas</a> and <a href=\"/twig/\">Twig</a>"
], [
"2016-09-05",
"New documentations: <a href=\"/fish/\">Fish</a>, <a href=\"/bottle/\">Bottle</a> and <a href=\"/scikit_image/\">scikit-image</a>"

@ -399,6 +399,11 @@ credits = [
'2010-2016 Padrino',
'MIT',
'https://raw.githubusercontent.com/padrino/padrino-framework/master/padrino/LICENSE.txt'
], [
'pandas',
'2011-2012 Lambda Foundry, Inc. and PyData Development Team<br>&copy; 2008-2011 AQR Capital Management, LLC<br>&copy; 2008-2014 the pandas development team',
'BSD',
'https://raw.githubusercontent.com/pydata/pandas/master/LICENSE'
], [
'Perl',
'1993-2016 Larry Wall and others',

@ -119,6 +119,7 @@
._icon-fish:before { background-position: -5rem -6rem; @extend %darkIconFix !optional; }
._icon-scikit_image:before { background-position: -6rem -6rem; }
._icon-twig:before { background-position: -7rem -6rem; }
._icon-pandas:before { background-position: -8rem -6rem; }
._icon-bottle:before { background-position: 0 -7rem; }
._icon-docker:before { background-position: -1rem -7rem; }
._icon-cakephp:before { background-position: -2rem -7rem; }

@ -1,5 +1,6 @@
%sphinx {
h2, h3 { @extend %block-heading; }
h2 { @extend %block-heading; }
h3 { @extend %block-label; }
h4 { font-size: 1em; }
> dl:not(.docutils) > dt { @extend %block-label, %label-blue; }
dd > dl:not(.docutils) > dt { @extend %block-label; }

@ -4,6 +4,17 @@ module Docs
def call
@doc = at_css('.body')
if root_page?
css('a[href$=".zip"]', 'a[href$=".pdf"]', '.toctree-wrapper').remove
at_css('h1').content = 'pandas'
end
css('h2 > a.reference', 'h3 > a.reference').each do |node|
node.before(node.children).remove
end
css('.anchor-link').remove
doc
end
end

@ -2,20 +2,25 @@ module Docs
class Pandas
class EntriesFilter < Docs::EntriesFilter
def get_name
if dt = at_css('dt')
name = dt.content.strip
if subpath.start_with?('generated')
name = at_css('dt').content.strip
name.sub! %r{\(.*}, '()'
name.remove! %r{\s=.*}
name.remove! %r{\A(class(method)?) }
name.remove! %r{\A(class(method)?) (pandas\.)?}
else
name = at_css('h1').content.strip
name.prepend "#{css('.toctree-l1 > a:not([href^="http"])').to_a.index(at_css('.toctree-l1.current > a')) + 1}. "
end
name.remove! "\u{00B6}"
name
end
def get_type
css(".toctree-l2.current > a").last.content
if subpath.start_with?('generated')
css('.toctree-l2.current > a').last.content
else
'Manual'
end
end
end
end

@ -49,7 +49,7 @@ module Docs
end
css('dt').each do |node|
next unless node['id'] || node.at_css('code')
next unless node['id'] || node.at_css('code, .classifier')
links = []
links << node.children.last.remove while node.children.last.try(:name) == 'a'
node.inner_html = "<code>#{node.content.strip}</code> "

@ -2,7 +2,7 @@ module Docs
class Pandas < UrlScraper
self.name = 'pandas'
self.type = 'sphinx'
self.root_path = 'api.html'
self.root_path = 'index.html'
self.links = {
home: 'http://pandas.pydata.org/',
code: 'https://github.com/pydata/pandas'
@ -13,12 +13,13 @@ module Docs
# Cannot take only the body, as the sidebar gives info about the type.
options[:container] = '.document'
# Using the above container, leads to tons of anchors. Only keep the generated/ pages.
options[:only_patterns] = [/\Agenerated\//]
options[:skip] = %w(internals.html release.html contributing.html whatsnew.html)
options[:attribution] = <<-HTML
&copy; 2008&ndash;2014, the pandas development team.<br>
Licensed under the BSD license.
&copy; 2011&ndash;2012 Lambda Foundry, Inc. and PyData Development Team<br>
&copy; 2008&ndash;2011 AQR Capital Management, LLC<br>
&copy; 2008&ndash;2014 the pandas development team<br>
Licensed under the 3-clause BSD License.
HTML
version '0.18' do

Binary file not shown.

Before

Width:  |  Height:  |  Size: 307 B

After

Width:  |  Height:  |  Size: 324 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 414 B

After

Width:  |  Height:  |  Size: 441 B

Loading…
Cancel
Save