From 0d4c6b51bbb597efe3ed3cfe2b9c1876f2db29ac Mon Sep 17 00:00:00 2001 From: Thibaut Courouble Date: Sun, 13 Nov 2016 11:18:02 -0500 Subject: [PATCH] Update TensorFlow documentation (0.11) --- lib/docs/core/scrapers/url_scraper.rb | 7 +++++- lib/docs/filters/tensorflow/clean_html.rb | 6 +++-- lib/docs/filters/tensorflow/entries.rb | 16 +++++++++--- lib/docs/scrapers/npm.rb | 7 +----- lib/docs/scrapers/tensorflow.rb | 30 +++++++++++++++-------- 5 files changed, 43 insertions(+), 23 deletions(-) diff --git a/lib/docs/core/scrapers/url_scraper.rb b/lib/docs/core/scrapers/url_scraper.rb index 3887cc14..12b4a68c 100644 --- a/lib/docs/core/scrapers/url_scraper.rb +++ b/lib/docs/core/scrapers/url_scraper.rb @@ -3,16 +3,19 @@ module Docs class << self attr_accessor :params attr_accessor :headers + attr_accessor :force_gzip def inherited(subclass) super subclass.params = params.deep_dup subclass.headers = headers.deep_dup + subclass.force_gzip = force_gzip end end self.params = {} self.headers = { 'User-Agent' => 'DevDocs' } + self.force_gzip = false private @@ -25,7 +28,9 @@ module Docs end def request_options - { params: self.class.params, headers: self.class.headers } + options = { params: self.class.params, headers: self.class.headers } + options[:accept_encoding] = 'gzip' if self.class.force_gzip + options end def process_response?(response) diff --git a/lib/docs/filters/tensorflow/clean_html.rb b/lib/docs/filters/tensorflow/clean_html.rb index 976843cb..6f20b8e7 100644 --- a/lib/docs/filters/tensorflow/clean_html.rb +++ b/lib/docs/filters/tensorflow/clean_html.rb @@ -12,9 +12,11 @@ module Docs css('pre').each do |node| node.inner_html = node.inner_html.strip_heredoc - if node['class'].include?('lang-c++') + next unless node['class'] + + if node['class'] =~ /lang-c++/i node['data-language'] = 'cpp' - elsif node['class'].include?('lang-python') + elsif node['class'] =~ /lang-python/i node['data-language'] = 'python' end end diff --git a/lib/docs/filters/tensorflow/entries.rb b/lib/docs/filters/tensorflow/entries.rb index 80b4e3db..8f71a1b2 100644 --- a/lib/docs/filters/tensorflow/entries.rb +++ b/lib/docs/filters/tensorflow/entries.rb @@ -9,13 +9,21 @@ module Docs end def get_type - type = name.dup - type.remove! %r{\ \(.*\)} - type.remove! 'tensorflow::' - type + if subpath.start_with?('tutorials') + 'Tutorials' + elsif subpath.start_with?('how_tos') + 'How-Tos' + else + type = name.dup + type.remove! %r{\ \(.*\)} + type.remove! 'tensorflow::' + type + end end def additional_entries + return [] if subpath.start_with?('tutorials') || subpath.start_with?('how_tos') + css('h2 code', 'h3 code', 'h4 code', 'h5 code').map do |node| name = node.content name.sub! %r{\(.*}, '()' diff --git a/lib/docs/scrapers/npm.rb b/lib/docs/scrapers/npm.rb index cd23a4d4..1a900a4e 100644 --- a/lib/docs/scrapers/npm.rb +++ b/lib/docs/scrapers/npm.rb @@ -4,6 +4,7 @@ module Docs self.type = 'npm' self.release = '3.10.2' self.base_url = 'https://docs.npmjs.com/' + self.force_gzip = true self.links = { home: 'https://www.npmjs.com/', code: 'https://github.com/npm/npm' @@ -27,11 +28,5 @@ module Docs Licensed under the npm License.
npm is a trademark of npm, Inc. HTML - - private - - def request_options - super.merge accept_encoding: 'gzip' - end end end diff --git a/lib/docs/scrapers/tensorflow.rb b/lib/docs/scrapers/tensorflow.rb index 23c2c668..e19a83d0 100644 --- a/lib/docs/scrapers/tensorflow.rb +++ b/lib/docs/scrapers/tensorflow.rb @@ -2,6 +2,8 @@ module Docs class Tensorflow < UrlScraper self.name = 'TensorFlow' self.type = 'tensorflow' + self.root_path = 'index.html' + self.force_gzip = true self.links = { home: 'https://www.tensorflow.org/', code: 'https://github.com/tensorflow/tensorflow' @@ -11,25 +13,33 @@ module Docs options[:container] = '#content' + options[:fix_urls] = ->(url) do + url.sub! %r{\Ahttps://www.tensorflow.org/versions(.+)/([^\.\#]+)(#.*)?\z}, 'https://www.tensorflow.org/versions\1/\2.html\3' + url + end + options[:attribution] = <<-HTML © 2015 The TensorFlow Authors. All rights reserved.
Licensed under the Apache 2.0 License. HTML version 'Python' do - self.base_url = 'https://www.tensorflow.org/versions/r0.10/api_docs/python/' - self.release = '0.10' + self.base_url = 'https://www.tensorflow.org/versions/r0.11/api_docs/python/' + self.release = '0.11' end version 'C++' do - self.base_url = 'https://www.tensorflow.org/versions/r0.10/api_docs/cc/' - self.release = '0.10' - - options[:fix_urls] = ->(url) { - url.sub! '/api_docs/cc/class', '/api_docs/cc/Class' - url.sub! '/api_docs/cc/struct', '/api_docs/cc/Struct' - url - } + self.base_url = 'https://www.tensorflow.org/versions/r0.11/api_docs/cc/' + self.release = '0.11' + end + + version 'Guide' do + self.base_url = 'https://www.tensorflow.org/versions/r0.11/' + self.release = '0.11' + self.root_path = 'tutorials/index.html' + self.initial_paths = %w(how_tos/index.html) + + options[:only_patterns] = [/\Atutorials/, /\Ahow_tos/] end end end