Finish TensorFlow scraper

pull/382/head
Thibaut Courouble 9 years ago
parent cd55b861ca
commit 2bec61a1a7

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

After

Width:  |  Height:  |  Size: 103 KiB

@ -1,5 +1,8 @@
[ [
[ [
"2016-03-06",
"New documentation: <a href=\"/tensorflow/\">TensorFlow</a>"
], [
"2016-02-28", "2016-02-28",
"New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>" "New documentations: <a href=\"/codeigniter/\">CodeIgniter</a>, <a href=\"/nginx_lua_module/\">nginx Lua Module</a> and <a href=\"/influxdata/\">InfluxData</a>"
], [ ], [

@ -414,6 +414,11 @@ credits = [
'The Regents of the University of California, Sun Microsystems, Inc., Scriptics Corporation, and other parties', 'The Regents of the University of California, Sun Microsystems, Inc., Scriptics Corporation, and other parties',
'Tcl/Tk', 'Tcl/Tk',
'http://tcl.tk/software/tcltk/license.html' 'http://tcl.tk/software/tcltk/license.html'
], [
'TensorFlow',
'2015 The TensorFlow Authors',
'Apache',
'https://raw.githubusercontent.com/tensorflow/tensorflow/master/LICENSE'
], [ ], [
'Underscore.js', 'Underscore.js',
'2009-2015 Jeremy Ashkenas, DocumentCloud and Investigative Reporters & Editors', '2009-2015 Jeremy Ashkenas, DocumentCloud and Investigative Reporters & Editors',

@ -0,0 +1,7 @@
#= require views/pages/base
class app.views.TensorflowPage extends app.views.BasePage
prepare: ->
@highlightCode @findAll('pre[class*="lang-c++"]'), 'cpp'
@highlightCode @findAll('pre.lang-python'), 'python'
return

@ -129,3 +129,4 @@
._icon-ramda:before { background-position: -3rem -10rem; @extend %darkIconFix !optional; } ._icon-ramda:before { background-position: -3rem -10rem; @extend %darkIconFix !optional; }
._icon-codeigniter:before { background-position: -4rem -10rem; @extend %darkIconFix !optional; } ._icon-codeigniter:before { background-position: -4rem -10rem; @extend %darkIconFix !optional; }
._icon-influxdata:before { background-position: -5rem -10rem; @extend %darkIconFix !optional; } ._icon-influxdata:before { background-position: -5rem -10rem; @extend %darkIconFix !optional; }
._icon-tensorflow:before { background-position: -6rem -10rem; }

@ -1,12 +1,6 @@
._tensorflow { ._tensorflow {
h2, h3, h4 { @extend %block-heading, %label-blue; } @extend %simple;
p > code, li > code { @extend %label; }
// These are used for lists of arguments and return values in the docs. h4 { @extend %block-label; }
b > code { > .toc ul ul { margin: .25rem 0; }
@extend %label;
background-color: initial;
font-weight: 500;
font-size: 1.1em;
}
} }

@ -48,7 +48,7 @@ module Docs
def slug def slug
slug = @slug || name.try(:downcase) slug = @slug || name.try(:downcase)
version? ? "#{slug}~#{version.downcase.gsub(/[^a-z0-9\_\.]/, '_')}" : slug version? ? "#{slug}~#{version.downcase.gsub('+', 'p').gsub(/[^a-z0-9\_\.]/, '_')}" : slug
end end
def path def path

@ -3,6 +3,16 @@ module Docs
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
css('hr').remove css('hr').remove
css('pre > code').each do |node|
node.parent['class'] = node['class']
node.parent.content = node.content
end
css('b').each do |node|
node.before(node.children).remove
end
doc doc
end end
end end

@ -2,31 +2,26 @@ module Docs
class Tensorflow class Tensorflow
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
def get_name def get_name
at_css('h1').content name = at_css('h1').content.strip
name.remove! 'class '
name.remove! 'struct '
name
end end
def get_type def get_type
at_css('h1').content type = name.dup
end type.remove! %r{\ \(.*\)}
type.remove! 'tensorflow::'
def include_default_entry? type
false
end end
def additional_entries def additional_entries
entries = [] css('h2 code', 'h3 code', 'h4 code', 'h5 code').map do |node|
name = node.content
# Just get everything that is a code tag inside a header tag. I haven't name.sub! %r{\(.*}, '()'
# checked if all of these are necessary. name = name.split(' ').last
ents = css('h5 code') + css('h4 code') + css('h3 code') + css('h2 code') [name, node.parent['id']]
ents.each do |node|
name = node.content.sub(/\(.*\)/, '()')
id = node.parent['id']
entries << [name, id, get_name]
end end
entries
end end
end end
end end

@ -1,18 +1,31 @@
module Docs module Docs
class Tensorflow < UrlScraper class Tensorflow < UrlScraper
self.name = 'TensorFlow' self.name = 'TensorFlow'
self.slug = 'tensorflow'
self.type = 'tensorflow' self.type = 'tensorflow'
self.release = '0.6.0-py'
self.base_url = 'https://www.tensorflow.org/versions/0.6.0/api_docs/python/'
options[:container] = '#content' html_filters.push 'tensorflow/entries', 'tensorflow/clean_html'
html_filters.push 'tensorflow/entries', 'tensorflow/clean_html', 'clean_html' options[:container] = '#content'
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; The TensorFlow Authors. All rights reserved.<br> &copy; 2015 The TensorFlow Authors. All rights reserved.<br>
Licensed under the Apache 2.0 License. Licensed under the Apache 2.0 License.
HTML HTML
version 'Python' do
self.base_url = 'https://www.tensorflow.org/versions/r0.7/api_docs/python/'
self.release = '0.7'
end
version 'C++' do
self.base_url = 'https://www.tensorflow.org/versions/r0.7/api_docs/cc/'
self.release = '0.7'
options[:fix_urls] = ->(url) {
url.sub! '/api_docs/cc/class', '/api_docs/cc/Class'
url.sub! '/api_docs/cc/struct', '/api_docs/cc/Struct'
url
}
end
end end
end end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 647 B

After

Width:  |  Height:  |  Size: 701 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 898 B

Loading…
Cancel
Save