From 2d1e8aa00cce957358cb741f180c3ff8dabfbefa Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Fri, 8 Mar 2019 15:13:11 +0100 Subject: [PATCH] Add better logging and get_latest_version implementations for 10 scrapers --- lib/docs/core/scraper.rb | 34 +++++++++++++++++++- lib/docs/scrapers/angular.rb | 4 +++ lib/docs/scrapers/angularjs.rb | 4 +++ lib/docs/scrapers/ansible.rb | 6 ++++ lib/docs/scrapers/apache.rb | 6 ++++ lib/docs/scrapers/apache_pig.rb | 6 ++++ lib/docs/scrapers/async.rb | 7 ++++ lib/docs/scrapers/babel.rb | 6 ++++ lib/docs/scrapers/backbone.rb | 7 ++++ lib/docs/scrapers/bash.rb | 7 ++++ lib/docs/scrapers/bluebird.rb | 4 +++ lib/docs/scrapers/pygame.rb | 1 + lib/tasks/updates.thor | 57 +++++++++++++++++++++------------ 13 files changed, 128 insertions(+), 21 deletions(-) diff --git a/lib/docs/core/scraper.rb b/lib/docs/core/scraper.rb index 89191e48..a7e388a8 100644 --- a/lib/docs/core/scraper.rb +++ b/lib/docs/core/scraper.rb @@ -132,7 +132,7 @@ module Docs end end - def get_latest_version + def get_latest_version(&block) raise NotImplementedError end @@ -231,6 +231,38 @@ module Docs {} end + # Utility methods for get_latest_version + + def fetch(url, &block) + Request.run(url) do |response| + if response.success? + block.call response.body + else + block.call nil + end + end + end + + def fetch_doc(url, &block) + fetch(url) do |body| + parser = Parser.new(body) + block.call parser.html + end + end + + def fetch_json(url, &block) + fetch(url) do |body| + json = JSON.parse(body) + block.call json + end + end + + def get_npm_version(package, &block) + fetch_json("https://registry.npmjs.com/#{package}") do |json| + block.call json['dist-tags']['latest'] + end + end + module FixInternalUrlsBehavior def self.included(base) base.extend ClassMethods diff --git a/lib/docs/scrapers/angular.rb b/lib/docs/scrapers/angular.rb index c318ce25..fa03eb36 100644 --- a/lib/docs/scrapers/angular.rb +++ b/lib/docs/scrapers/angular.rb @@ -155,6 +155,10 @@ module Docs end end + def get_latest_version(&block) + get_npm_version('@angular/core', &block) + end + private def parse(response) diff --git a/lib/docs/scrapers/angularjs.rb b/lib/docs/scrapers/angularjs.rb index b8ff08b9..aa74ca1c 100644 --- a/lib/docs/scrapers/angularjs.rb +++ b/lib/docs/scrapers/angularjs.rb @@ -69,5 +69,9 @@ module Docs self.release = '1.2.32' self.base_url = "https://code.angularjs.org/#{release}/docs/partials/" end + + def get_latest_version(&block) + get_npm_version('angular', &block) + end end end diff --git a/lib/docs/scrapers/ansible.rb b/lib/docs/scrapers/ansible.rb index 2d62909a..60fb1953 100644 --- a/lib/docs/scrapers/ansible.rb +++ b/lib/docs/scrapers/ansible.rb @@ -87,5 +87,11 @@ module Docs quickstart.html list_of_all_modules.html) end + + def get_latest_version(&block) + fetch_doc('https://docs.ansible.com/ansible/latest/index.html') do |doc| + block.call doc.at_css('.DocSiteProduct-CurrentVersion').content.strip + end + end end end diff --git a/lib/docs/scrapers/apache.rb b/lib/docs/scrapers/apache.rb index 9ee82f12..5eca041e 100644 --- a/lib/docs/scrapers/apache.rb +++ b/lib/docs/scrapers/apache.rb @@ -33,5 +33,11 @@ module Docs © 2018 The Apache Software Foundation
Licensed under the Apache License, Version 2.0. HTML + + def get_latest_version(&block) + fetch_doc('http://httpd.apache.org/docs/') do |doc| + block.call doc.at_css('#apcontents > ul a')['href'][0...-1] + end + end end end diff --git a/lib/docs/scrapers/apache_pig.rb b/lib/docs/scrapers/apache_pig.rb index 65897a78..15c477bf 100644 --- a/lib/docs/scrapers/apache_pig.rb +++ b/lib/docs/scrapers/apache_pig.rb @@ -43,5 +43,11 @@ module Docs self.base_url = "https://pig.apache.org/docs/r#{release}/" end + def get_latest_version(&block) + fetch_doc('https://pig.apache.org/') do |doc| + item = doc.at_css('div[id="menu_1.2"] > .menuitem:last-child') + block.call item.content.strip.sub(/Release /, '') + end + end end end diff --git a/lib/docs/scrapers/async.rb b/lib/docs/scrapers/async.rb index 40022f19..930820b4 100644 --- a/lib/docs/scrapers/async.rb +++ b/lib/docs/scrapers/async.rb @@ -17,5 +17,12 @@ module Docs © 2010–2018 Caolan McMahon
Licensed under the MIT License. HTML + + def get_latest_version(&block) + fetch_doc('https://caolan.github.io/async/') do |doc| + version = doc.at_css('#version-dropdown > a').content.strip[1..-1] + block.call version + end + end end end diff --git a/lib/docs/scrapers/babel.rb b/lib/docs/scrapers/babel.rb index c9e40212..cc8bec6d 100644 --- a/lib/docs/scrapers/babel.rb +++ b/lib/docs/scrapers/babel.rb @@ -22,5 +22,11 @@ module Docs stub '' do '
' end + + def get_latest_version(&block) + fetch_doc('https://babeljs.io/docs/en/') do |doc| + block.call doc.at_css('a[href="/versions"] > h3').content + end + end end end diff --git a/lib/docs/scrapers/backbone.rb b/lib/docs/scrapers/backbone.rb index b72b1084..2fb7662f 100644 --- a/lib/docs/scrapers/backbone.rb +++ b/lib/docs/scrapers/backbone.rb @@ -20,5 +20,12 @@ module Docs © 2010–2016 Jeremy Ashkenas, DocumentCloud
Licensed under the MIT License. HTML + + def get_latest_version(&block) + fetch_doc('https://backbonejs.org/') do |doc| + version = doc.at_css('.version').content + block.call version[1...-1] + end + end end end diff --git a/lib/docs/scrapers/bash.rb b/lib/docs/scrapers/bash.rb index feb0ddce..b62868a6 100644 --- a/lib/docs/scrapers/bash.rb +++ b/lib/docs/scrapers/bash.rb @@ -17,5 +17,12 @@ module Docs Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
Licensed under the GNU Free Documentation License. HTML + + def get_latest_version(&block) + fetch('https://www.gnu.org/software/bash/manual/html_node/index.html') do |body| + version = body.scan(/, Version ([0-9.]+)/)[0][0] + block.call version[0...-1] + end + end end end diff --git a/lib/docs/scrapers/bluebird.rb b/lib/docs/scrapers/bluebird.rb index e5cd6b59..73888004 100644 --- a/lib/docs/scrapers/bluebird.rb +++ b/lib/docs/scrapers/bluebird.rb @@ -18,5 +18,9 @@ module Docs © 2013–2017 Petka Antonov
Licensed under the MIT License. HTML + + def get_latest_version(&block) + get_npm_version('bluebird', &block) + end end end diff --git a/lib/docs/scrapers/pygame.rb b/lib/docs/scrapers/pygame.rb index 9da3148d..892619e4 100644 --- a/lib/docs/scrapers/pygame.rb +++ b/lib/docs/scrapers/pygame.rb @@ -2,6 +2,7 @@ module Docs class Pygame < UrlScraper self.type = 'simple' self.release = '1.9.4' + self.base_url = 'https://www.pygame.org/docs/' self.root_path = 'py-modindex.html' self.links = { home: 'https://www.pygame.org/', diff --git a/lib/tasks/updates.thor b/lib/tasks/updates.thor index 35d52e28..eb3467f2 100644 --- a/lib/tasks/updates.thor +++ b/lib/tasks/updates.thor @@ -9,7 +9,8 @@ class UpdatesCLI < Thor super end - desc 'check [doc]...', 'Check for outdated documentations' + desc 'check [--verbose] [doc]...', 'Check for outdated documentations' + option :verbose, :type => :boolean def check(*names) # Convert names to a list of Scraper instances # Versions are omitted, if v10 is outdated than v8 is aswell @@ -27,13 +28,14 @@ class UpdatesCLI < Thor result end - outdated = results.select {|result| result.is_a?(Hash) && result[:is_outdated]} - return if outdated.empty? + valid_results = results.select {|result| result.is_a?(Hash)} - logger.info("Outdated documentations (#{outdated.length}):") - outdated.each do |result| - logger.info("#{result[:name]}: #{result[:current_version]} -> #{result[:latest_version]}") - end + up_to_date_results = valid_results.select {|result| !result[:is_outdated]} + outdated_results = valid_results.select {|result| result[:is_outdated]} + + log_results('Up-to-date', up_to_date_results) if options[:verbose] and !up_to_date_results.empty? + logger.info("") if options[:verbose] and !up_to_date_results.empty? and !outdated_results.empty? + log_results('Outdated', outdated_results) unless outdated_results.empty? rescue Docs::DocNotFound => error logger.error(error) logger.info('Run "thor docs:list" to see the list of docs.') @@ -42,33 +44,48 @@ class UpdatesCLI < Thor private def check_doc(doc) - # Scraper versions are always sorted from new to old + # Newer scraper versions always come before older scraper versions # Therefore, the first item's release value is the latest current scraper version # # For example, a scraper could scrape 3 versions: 10, 11 and 12 - # doc.versions.first would be the scraper for version 12 if the scraper is written like all the other scrapers are + # doc.versions.first would be the scraper for version 12 instance = doc.versions.first.new + return nil unless instance.class.method_defined?(:options) + current_version = instance.options[:release] return nil if current_version.nil? - latest_version = instance.get_latest_version - return nil if latest_version.nil? + logger.debug("Checking #{doc.name}") + + instance.get_latest_version do |latest_version| + return { + name: doc.name, + current_version: current_version, + latest_version: latest_version, + is_outdated: instance.is_outdated(current_version, latest_version) + } + end - { - name: doc.name, - current_version: current_version, - latest_version: latest_version, - is_outdated: instance.is_outdated(current_version, latest_version) - } + return nil rescue NotImplementedError - logger.warn("Can't check #{doc.name}, get_latest_version is not implemented") - rescue => error - logger.error("Error while checking #{doc.name}: #{error}") + logger.warn("Couldn't check #{doc.name}, get_latest_version is not implemented") + rescue + logger.error("Error while checking #{doc.name}") + raise + end + + def log_results(label, results) + logger.info("#{label} documentations (#{results.length}):") + + results.each do |result| + logger.info("#{result[:name]}: #{result[:current_version]} -> #{result[:latest_version]}") + end end def logger @logger ||= Logger.new($stdout).tap do |logger| + logger.level = options[:verbose] ? Logger::DEBUG : Logger::INFO logger.formatter = proc do |severity, datetime, progname, msg| prefix = severity != "INFO" ? "[#{severity}] " : "" "#{prefix}#{msg}\n"