Finish Dojo scraper

pull/304/head
Thibaut 9 years ago
parent ebfe3a1208
commit 6939865137

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 91 KiB

After

Width:  |  Height:  |  Size: 92 KiB

@ -1,5 +1,8 @@
[
[
"2015-11-22",
"New documentation: <a href=\"/dojo/\">Dojo</a>"
], [
"2015-11-08",
"New documentations: <a href=\"/elixir/\">Elixir</a> and <a href=\"/vagrant/\">Vagrant</a>"
], [

@ -134,6 +134,11 @@ credits = [
'Django Software Foundation and individual contributors',
'BSD',
'https://raw.githubusercontent.com/django/django/master/LICENSE'
], [
'Dojo',
'2005-2015 The Dojo Foundation',
'BSD + AFL',
'http://dojotoolkit.org/license.html'
], [
'Drupal',
'2001-2015 by the original authors<br>Drupal is a registered trademark of Dries Buytaert.',

@ -33,6 +33,7 @@ app.views.UnderscorePage =
app.views.WebpackPage =
app.views.JavascriptPage
app.views.DojoPage =
app.views.RequirejsPage =
app.views.SocketioPage =
app.views.VuePage =

@ -37,6 +37,7 @@
'pages/clojure',
'pages/coffeescript',
'pages/d3',
'pages/dojo',
'pages/drupal',
'pages/elixir',
'pages/ember',

@ -37,6 +37,7 @@
'pages/clojure',
'pages/coffeescript',
'pages/d3',
'pages/dojo',
'pages/drupal',
'pages/elixir',
'pages/ember',

@ -118,3 +118,4 @@
%icon-clipboard-white { background-position: -3rem -9rem; }
._icon-elixir:before { background-position: -4rem -9rem; @extend %darkIconFix !optional; }
._icon-vagrant:before { background-position: -5rem -9rem; }
._icon-dojo:before { background-position: -6rem -9rem; }

@ -0,0 +1,6 @@
._dojo {
@extend %simple;
.functionIcon, .parameters { @extend %code; }
.jsdoc-inheritance { color: $textColorLight; }
}

@ -2,13 +2,41 @@ module Docs
class Dojo
class CleanHtmlFilter < Filter
def call
css('script').remove
if root_page?
doc.inner_html = ' '
return doc
end
css('h1[class]').each do |node|
node.remove_attribute('class')
end
css('.version', '.jsdoc-permalink', '.feedback', '.jsdoc-summary-heading', '.jsdoc-summary-list', '.jsdoc-field.private').remove
css('.version').remove
css('.jsdoc-wrapper, .jsdoc-children, .jsdoc-fields, .jsdoc-field, .jsdoc-property-list, .jsdoc-full-summary, .jsdoc-return-description').each do |node|
node.before(node.children).remove
end
css('a[name]').each do |node|
next unless node.content.blank?
node.parent['id'] = node['name']
node.remove
end
css('div.returnsInfo', 'div.jsdoc-inheritance').each do |node|
node.name = 'p'
end
css('div.jsdoc-title').each do |node|
node.name = 'h3'
end
css('.returns').each do |node|
node.inner_html = node.inner_html + ' '
end
#Remove links which are broken on the methods
doc.css(".functionIcon a").each do |a|
a.replace a.content
css('.functionIcon a').each do |node|
node.replace(node.content)
end
doc

@ -0,0 +1,10 @@
module Docs
class Dojo
class CleanUrlsFilter < Filter
def call
html.remove! '?xhr=true'
html
end
end
end
end

@ -2,14 +2,27 @@ module Docs
class Dojo
class EntriesFilter < Docs::EntriesFilter
def get_name
at_css('h1').content
at_css('h1').content.remove(/\(.*\)/).remove('dojo/').strip
end
def get_type
list_of_names = name.split(/\/|\./)
list_of_names.pop
list_of_names.join("/")
path = name.split(/[\/\.\-]/)
path[0] == '_base' ? path[0..1].join('/') : path[0]
end
def additional_entries
entries = []
css('.jsdoc-summary-list li.functionIcon:not(.private):not(.inherited) > a').each do |node|
entries << ["#{self.name}##{node.content}()", node['href'].remove('#')]
end
css('.jsdoc-summary-list li.objectIcon:not(.private):not(.inherited) > a').each do |node|
entries << ["#{self.name}##{node.content}", node['href'].remove('#')]
end
entries
end
end
end
end
end

@ -1,12 +1,12 @@
require 'yajl/json_gem'
module Docs
class Dojo < UrlScraper
include StubRootPage
self.name = 'Dojo'
self.slug = 'dojo'
self.type = 'dojo'
self.version = '1.10'
self.base_url = 'http://dojotoolkit.org/api/1.10/'
self.base_url = "http://dojotoolkit.org/api/#{version}/"
# Dojo expects all the requests to be xhrs or it redirects you back to the docs home page
# where it uses js to call the backend based on the URL so you get the appropriate documentation
@ -16,34 +16,34 @@ module Docs
code: 'https://github.com/dojo/dojo'
}
html_filters.push 'dojo/clean_html', 'dojo/entries'
html_filters.push 'dojo/entries', 'dojo/clean_html', 'title'
text_filters.push 'dojo/clean_urls'
# Don't use default selector on xhrs as no body or html document exists
options[:container] = false
options[:title] = false
options[:root_title] = 'Dojo Toolkit'
def root_page_body
require 'json'
require 'set'
response = Typhoeus::Request.new("dojotoolkit.org/api/1.10/tree.json",
headers: { 'User-Agent' => 'devdocs.io' , 'X-Requested-With' => 'XMLHttpRequest' }).run
treeJSON = JSON.parse(response.response_body)
treeJSON = treeJSON["children"].bsearch { |framework| framework["name"] == "dojo" }
@url_set = Set.new
def get_url_list treeJSON
@url_set.add(self.class.base_url + treeJSON["fullname"] + ".html?xhr=true")
if (treeJSON["children"])
treeJSON["children"].each do |child|
get_url_list child
end
end
end
get_url_list treeJSON
@url_set.map { |l| "<a href='#{l}'>#{l}</a>"}.join "<br>"
end
options[:only_patterns] = [/\Adojo\//]
options[:skip_patterns] = [/dijit/, /dojox/]
options[:attribution] = <<-HTML
The Dojo Toolkit is Copyright &copy; 2005&ndash;2013 <br>
Dual licensed under BSD 3-Clause and AFL.
&copy; 2005&ndash;2015 The Dojo Foundation<br>
Licensed under the AFL 2.1 and BSD 3-Clause licenses.
HTML
private
def root_page_body
response = request_one("#{self.base_url}tree.json")
json = JSON.parse(response.body)
urls = get_url_list(json)
urls.map { |url| "<a href='#{url}'>#{url}</a>" }.join
end
def get_url_list(json, set = Set.new)
set.add("#{self.class.base_url}#{json['fullname']}.html?xhr=true")
json['children'].each { |child| get_url_list(child, set) } if json['children']
set
end
end
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 672 B

After

Width:  |  Height:  |  Size: 493 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

@ -75,7 +75,6 @@ class DocsUrlScraperTest < MiniTest::Spec
result
end
it "runs a Requester with the given block" do
stub(Docs::Requester).run { |*args| @block = args.last }
result

Loading…
Cancel
Save