diff --git a/lib/docs/core/scrapers/url_scraper.rb b/lib/docs/core/scrapers/url_scraper.rb
index 3887cc14..12b4a68c 100644
--- a/lib/docs/core/scrapers/url_scraper.rb
+++ b/lib/docs/core/scrapers/url_scraper.rb
@@ -3,16 +3,19 @@ module Docs
class << self
attr_accessor :params
attr_accessor :headers
+ attr_accessor :force_gzip
def inherited(subclass)
super
subclass.params = params.deep_dup
subclass.headers = headers.deep_dup
+ subclass.force_gzip = force_gzip
end
end
self.params = {}
self.headers = { 'User-Agent' => 'DevDocs' }
+ self.force_gzip = false
private
@@ -25,7 +28,9 @@ module Docs
end
def request_options
- { params: self.class.params, headers: self.class.headers }
+ options = { params: self.class.params, headers: self.class.headers }
+ options[:accept_encoding] = 'gzip' if self.class.force_gzip
+ options
end
def process_response?(response)
diff --git a/lib/docs/filters/tensorflow/clean_html.rb b/lib/docs/filters/tensorflow/clean_html.rb
index 976843cb..6f20b8e7 100644
--- a/lib/docs/filters/tensorflow/clean_html.rb
+++ b/lib/docs/filters/tensorflow/clean_html.rb
@@ -12,9 +12,11 @@ module Docs
css('pre').each do |node|
node.inner_html = node.inner_html.strip_heredoc
- if node['class'].include?('lang-c++')
+ next unless node['class']
+
+ if node['class'] =~ /lang-c++/i
node['data-language'] = 'cpp'
- elsif node['class'].include?('lang-python')
+ elsif node['class'] =~ /lang-python/i
node['data-language'] = 'python'
end
end
diff --git a/lib/docs/filters/tensorflow/entries.rb b/lib/docs/filters/tensorflow/entries.rb
index 80b4e3db..8f71a1b2 100644
--- a/lib/docs/filters/tensorflow/entries.rb
+++ b/lib/docs/filters/tensorflow/entries.rb
@@ -9,13 +9,21 @@ module Docs
end
def get_type
- type = name.dup
- type.remove! %r{\ \(.*\)}
- type.remove! 'tensorflow::'
- type
+ if subpath.start_with?('tutorials')
+ 'Tutorials'
+ elsif subpath.start_with?('how_tos')
+ 'How-Tos'
+ else
+ type = name.dup
+ type.remove! %r{\ \(.*\)}
+ type.remove! 'tensorflow::'
+ type
+ end
end
def additional_entries
+ return [] if subpath.start_with?('tutorials') || subpath.start_with?('how_tos')
+
css('h2 code', 'h3 code', 'h4 code', 'h5 code').map do |node|
name = node.content
name.sub! %r{\(.*}, '()'
diff --git a/lib/docs/scrapers/npm.rb b/lib/docs/scrapers/npm.rb
index cd23a4d4..1a900a4e 100644
--- a/lib/docs/scrapers/npm.rb
+++ b/lib/docs/scrapers/npm.rb
@@ -4,6 +4,7 @@ module Docs
self.type = 'npm'
self.release = '3.10.2'
self.base_url = 'https://docs.npmjs.com/'
+ self.force_gzip = true
self.links = {
home: 'https://www.npmjs.com/',
code: 'https://github.com/npm/npm'
@@ -27,11 +28,5 @@ module Docs
Licensed under the npm License.
npm is a trademark of npm, Inc.
HTML
-
- private
-
- def request_options
- super.merge accept_encoding: 'gzip'
- end
end
end
diff --git a/lib/docs/scrapers/tensorflow.rb b/lib/docs/scrapers/tensorflow.rb
index 23c2c668..e19a83d0 100644
--- a/lib/docs/scrapers/tensorflow.rb
+++ b/lib/docs/scrapers/tensorflow.rb
@@ -2,6 +2,8 @@ module Docs
class Tensorflow < UrlScraper
self.name = 'TensorFlow'
self.type = 'tensorflow'
+ self.root_path = 'index.html'
+ self.force_gzip = true
self.links = {
home: 'https://www.tensorflow.org/',
code: 'https://github.com/tensorflow/tensorflow'
@@ -11,25 +13,33 @@ module Docs
options[:container] = '#content'
+ options[:fix_urls] = ->(url) do
+ url.sub! %r{\Ahttps://www.tensorflow.org/versions(.+)/([^\.\#]+)(#.*)?\z}, 'https://www.tensorflow.org/versions\1/\2.html\3'
+ url
+ end
+
options[:attribution] = <<-HTML
© 2015 The TensorFlow Authors. All rights reserved.
Licensed under the Apache 2.0 License.
HTML
version 'Python' do
- self.base_url = 'https://www.tensorflow.org/versions/r0.10/api_docs/python/'
- self.release = '0.10'
+ self.base_url = 'https://www.tensorflow.org/versions/r0.11/api_docs/python/'
+ self.release = '0.11'
end
version 'C++' do
- self.base_url = 'https://www.tensorflow.org/versions/r0.10/api_docs/cc/'
- self.release = '0.10'
-
- options[:fix_urls] = ->(url) {
- url.sub! '/api_docs/cc/class', '/api_docs/cc/Class'
- url.sub! '/api_docs/cc/struct', '/api_docs/cc/Struct'
- url
- }
+ self.base_url = 'https://www.tensorflow.org/versions/r0.11/api_docs/cc/'
+ self.release = '0.11'
+ end
+
+ version 'Guide' do
+ self.base_url = 'https://www.tensorflow.org/versions/r0.11/'
+ self.release = '0.11'
+ self.root_path = 'tutorials/index.html'
+ self.initial_paths = %w(how_tos/index.html)
+
+ options[:only_patterns] = [/\Atutorials/, /\Ahow_tos/]
end
end
end