From 2b5fd63cf4782395ce660b0ddf70c234ab2c5971 Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Mon, 5 Aug 2019 23:36:44 +0200 Subject: [PATCH] Update versions, use https and update filter for 0.24 and 0.25 --- lib/docs/filters/pandas/entries.rb | 8 +++++--- lib/docs/scrapers/pandas.rb | 22 ++++++++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/docs/filters/pandas/entries.rb b/lib/docs/filters/pandas/entries.rb index 2ef5a42b..badf5e22 100644 --- a/lib/docs/filters/pandas/entries.rb +++ b/lib/docs/filters/pandas/entries.rb @@ -2,8 +2,10 @@ module Docs class Pandas class EntriesFilter < Docs::EntriesFilter def get_name - if subpath.start_with?('generated') - name = at_css('dt').content.strip + if subpath.start_with?('generated') || (subpath.include?('reference') && !subpath.include?('reference/index')) + name_node = at_css('dt') + name_node = at_css('h1') if name_node.nil? + name = name_node.content.strip name.sub! %r{\(.*}, '()' name.remove! %r{\s=.*} name.remove! %r{\A(class(method)?) (pandas\.)?} @@ -16,7 +18,7 @@ module Docs end def get_type - if subpath.start_with?('generated') + if subpath.start_with?('generated') || (subpath.include?('reference') && !subpath.include?('reference/index')) css('.toctree-l2.current > a').last.content.remove(/\s\(.+?\)/) else 'Manual' diff --git a/lib/docs/scrapers/pandas.rb b/lib/docs/scrapers/pandas.rb index f5a2f831..e333da7f 100644 --- a/lib/docs/scrapers/pandas.rb +++ b/lib/docs/scrapers/pandas.rb @@ -14,45 +14,51 @@ module Docs options[:container] = '.document' options[:skip] = %w(internals.html release.html contributing.html whatsnew.html) + options[:skip_patterns] = [/whatsnew\//] options[:attribution] = <<-HTML © 2008–2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
Licensed under the 3-clause BSD License. HTML + version '0.25' do + self.release = '0.25.0' + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + end + version '0.24' do self.release = '0.24.2' - self.base_url = "http://pandas.pydata.org/pandas-docs/version/#{self.release}/" + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" end version '0.23' do self.release = '0.23.4' - self.base_url = "http://pandas.pydata.org/pandas-docs/version/#{self.release}/" + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" end version '0.22' do self.release = '0.22.0' - self.base_url = "http://pandas.pydata.org/pandas-docs/version/#{self.release}/" + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" end version '0.21' do - self.release = '0.21.0' - self.base_url = "http://pandas.pydata.org/pandas-docs/version/#{self.release}/" + self.release = '0.21.1' + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" end version '0.20' do self.release = '0.20.3' - self.base_url = "http://pandas.pydata.org/pandas-docs/version/#{self.release}/" + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" end version '0.19' do self.release = '0.19.2' - self.base_url = "http://pandas.pydata.org/pandas-docs/version/#{self.release}/" + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" end version '0.18' do self.release = '0.18.1' - self.base_url = "http://pandas.pydata.org/pandas-docs/version/#{self.release}/" + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" end def get_latest_version(opts)