From 346593354300588b5eaeb57155ec7b3c8370896d Mon Sep 17 00:00:00 2001 From: ShaneQful Date: Sat, 14 Nov 2015 19:15:48 +0000 Subject: [PATCH] Added dojo to devdocs & ability to define headers in scraper requests --- lib/docs/core/scrapers/url_scraper.rb | 5 +- lib/docs/filters/dojo/clean_html.rb | 9 +- lib/docs/filters/dojo/entries.rb | 1 - lib/docs/scrapers/dojo.rb | 150 ++---------------- public/icons/docs/dojo/16.png | Bin 0 -> 672 bytes public/icons/docs/dojo/16@2x.png | Bin 0 -> 1698 bytes .../docs/core/scrapers/url_scraper_test.rb | 12 ++ 7 files changed, 40 insertions(+), 137 deletions(-) create mode 100644 public/icons/docs/dojo/16.png create mode 100644 public/icons/docs/dojo/16@2x.png diff --git a/lib/docs/core/scrapers/url_scraper.rb b/lib/docs/core/scrapers/url_scraper.rb index 97bcb6f4..46eba810 100644 --- a/lib/docs/core/scrapers/url_scraper.rb +++ b/lib/docs/core/scrapers/url_scraper.rb @@ -2,14 +2,17 @@ module Docs class UrlScraper < Scraper class << self attr_accessor :params + attr_accessor :headers def inherited(subclass) super subclass.params = params.deep_dup + subclass.headers = headers.deep_dup end end self.params = {} + self.headers = { 'User-Agent' => 'devdocs.io' } private @@ -22,7 +25,7 @@ module Docs end def request_options - { params: self.class.params } + { params: self.class.params, headers: self.class.headers } end def process_response?(response) diff --git a/lib/docs/filters/dojo/clean_html.rb b/lib/docs/filters/dojo/clean_html.rb index 2e6e0111..0cf2c443 100644 --- a/lib/docs/filters/dojo/clean_html.rb +++ b/lib/docs/filters/dojo/clean_html.rb @@ -2,8 +2,15 @@ module Docs class Dojo class CleanHtmlFilter < Filter def call - # TODO: Probably needs a little more cleanup but should do for the moment css('script').remove + + css('.version').remove + + #Remove links which are broken on the methods + doc.css(".functionIcon a").each do |a| + a.replace a.content + end + doc end end diff --git a/lib/docs/filters/dojo/entries.rb b/lib/docs/filters/dojo/entries.rb index 1c50799d..b02ea0ef 100644 --- a/lib/docs/filters/dojo/entries.rb +++ b/lib/docs/filters/dojo/entries.rb @@ -8,7 +8,6 @@ module Docs def get_type name end - # TODO:Figure out how to solve the internal links issue later end end end \ No newline at end of file diff --git a/lib/docs/scrapers/dojo.rb b/lib/docs/scrapers/dojo.rb index b2af9962..207a0ff7 100644 --- a/lib/docs/scrapers/dojo.rb +++ b/lib/docs/scrapers/dojo.rb @@ -4,7 +4,13 @@ module Docs self.slug = 'dojo' self.type = 'dojo' self.version = '1.10' - self.base_url = 'http://dojotoolkit.org/api/1.10/' #tree.json + self.base_url = 'http://dojotoolkit.org/api/1.10/' + + # This is a cut down list of the actually paths taken from the tree.json api on the dojo site + # Dojo used javascript and xhr requests to allow users to browse it's documentation so it can't + # be scrapped by just following links from the base page. This list was generating with a little + # bash and then cut down in order to remove a lot of the more unused documentation e.g. kernel, + # main, dnd and some others self.initial_paths = %w( dojo/AdapterRegistry dojo/aspect @@ -17,65 +23,18 @@ module Docs dojo/_base/config.modulePaths dojo/_base/connect dojo/_base/declare - dojo/_base/declare.__DeclareCreatedObject dojo/_base/Deferred dojo/_base/event dojo/_base/fx dojo/_base/html dojo/_base/json dojo/_base/kernel - dojo/_base/kernel.back - dojo/_base/kernel.cldr - dojo/_base/kernel.colors - dojo/_base/kernel.config - dojo/_base/kernel.contentHandlers - dojo/_base/kernel._contentHandlers - dojo/_base/kernel.currency - dojo/_base/kernel.data - dojo/_base/kernel.date - dojo/_base/kernel.dijit - dojo/_base/kernel.dnd - dojo/_base/kernel.doc - dojo/_base/kernel.dojox - dojo/_base/kernel.fx - dojo/_base/kernel.gears - dojo/_base/kernel.global - dojo/_base/kernel._hasResource - dojo/_base/kernel.html - dojo/_base/kernel.i18n - dojo/_base/kernel.io - dojo/_base/kernel.__IoArgs - dojo/_base/kernel.__IoCallbackArgs - dojo/_base/kernel.__IoPublish - dojo/_base/kernel.keys - dojo/_base/kernel.mouseButtons - dojo/_base/kernel._nodeDataCache - dojo/_base/kernel.number - dojo/_base/kernel.regexp - dojo/_base/kernel.rpc - dojo/_base/kernel.scopeMap - dojo/_base/kernel.Stateful - dojo/_base/kernel.store - dojo/_base/kernel.string - dojo/_base/kernel.tests - dojo/_base/kernel.touch - dojo/_base/kernel.version - dojo/_base/kernel.window - dojo/_base/kernel.__XhrArgs dojo/_base/lang dojo/_base/loader dojo/_base/NodeList dojo/_base/query dojo/_base/sniff dojo/_base/unload - dojo/_base/url - dojo/_base/url.authority - dojo/_base/url.fragment - dojo/_base/url.password - dojo/_base/url.port - dojo/_base/url.query - dojo/_base/url.scheme - dojo/_base/url.user dojo/_base/window dojo/_base/window.doc dojo/_base/window.global @@ -88,8 +47,6 @@ module Docs dojo/colors dojo/cookie dojo/currency - dojo/currency.__FormatOptions - dojo/currency.__ParseOptions dojo/data/api/Identity dojo/data/api/Item dojo/data/api/Notification @@ -104,33 +61,10 @@ module Docs dojo/data/util/sorter dojo/date dojo/date/locale - dojo/date/locale.__FormatOptions dojo/date/stamp dojo/debounce dojo/Deferred dojo/DeferredList - dojo/dnd/autoscroll - dojo/dnd/autoscroll._validNodes - dojo/dnd/autoscroll._validOverflow - dojo/dnd/AutoSource - dojo/dnd/Avatar - dojo/dnd/common - dojo/dnd/common._defaultCreatorNodes - dojo/dnd/common._empty - dojo/dnd/Container - dojo/dnd/Container.__ContainerArgs - dojo/dnd/Manager - dojo/dnd/move - dojo/dnd/Moveable - dojo/dnd/Moveable.__MoveableArgs - dojo/dnd/move.boxConstrainedMoveable - dojo/dnd/move.constrainedMoveable - dojo/dnd/move.parentConstrainedMoveable - dojo/dnd/Mover - dojo/dnd/Selector - dojo/dnd/Source - dojo/dnd/Target - dojo/dnd/TimedMoveable dojo/dom dojo/dom-attr dojo/dom-class @@ -146,7 +80,6 @@ module Docs dojo/errors/RequestError dojo/errors/RequestTimeoutError dojo/Evented - dojo/_firebug/firebug dojo/fx dojo/fx/easing dojo/fx.easing @@ -168,44 +101,6 @@ module Docs dojo/keys dojo/loadInit dojo/main - dojo/main.back - dojo/main.cldr - dojo/main.colors - dojo/main.config - dojo/main.contentHandlers - dojo/main._contentHandlers - dojo/main.currency - dojo/main.data - dojo/main.date - dojo/main.dijit - dojo/main.dnd - dojo/main.doc - dojo/main.dojox - dojo/main.fx - dojo/main.gears - dojo/main.global - dojo/main._hasResource - dojo/main.html - dojo/main.i18n - dojo/main.io - dojo/main.__IoArgs - dojo/main.__IoCallbackArgs - dojo/main.__IoPublish - dojo/main.keys - dojo/main.mouseButtons - dojo/main._nodeDataCache - dojo/main.number - dojo/main.regexp - dojo/main.rpc - dojo/main.scopeMap - dojo/main.Stateful - dojo/main.store - dojo/main.string - dojo/main.tests - dojo/main.touch - dojo/main.version - dojo/main.window - dojo/main.__XhrArgs dojo/mouse dojo/node dojo/NodeList @@ -217,12 +112,6 @@ module Docs dojo/NodeList._nodeDataCache dojo/NodeList-traverse dojo/number - dojo/number.__FormatAbsoluteOptions - dojo/number.__FormatOptions - dojo/number.__IntegerRegexpFlags - dojo/number.__ParseOptions - dojo/number.__RealNumberRegexpFlags - dojo/number.__RegexpOptions dojo/on dojo/on/asyncEventListener dojo/on/debounce @@ -237,32 +126,16 @@ module Docs dojo/ready dojo/regexp dojo/request - dojo/request.__BaseOptions dojo/request/default dojo/request/handlers dojo/request/iframe - dojo/request/iframe.__BaseOptions - dojo/request/iframe.__MethodOptions - dojo/request/iframe.__Options - dojo/request.__MethodOptions dojo/request/node - dojo/request/node.__BaseOptions - dojo/request/node.__MethodOptions - dojo/request/node.__Options dojo/request/notify - dojo/request.__Options - dojo/request.__Promise dojo/request/registry dojo/request/script - dojo/request/script.__BaseOptions - dojo/request/script.__MethodOptions - dojo/request/script.__Options dojo/request/util dojo/request/watch dojo/request/xhr - dojo/request/xhr.__BaseOptions - dojo/request/xhr.__MethodOptions - dojo/request/xhr.__Options dojo/require dojo/robot dojo/robot._runsemaphore @@ -299,6 +172,11 @@ module Docs dojo/uacss dojo/when dojo/window) + # Add the rest of the url to the path + self.initial_paths = self.initial_paths.map { |l| l + ".html?xhr=true" } + # Dojo expects all the requests to be xhrs or it redirects you back to the docs home page + # where it uses js to call the backend based on the URL so you get the appropriate documentation + self.headers = { 'User-Agent' => 'devdocs.io' , 'X-Requested-With' => 'XMLHttpRequest' } self.links = { home: 'http://dojotoolkit.org', code: 'https://github.com/dojo/dojo' @@ -306,7 +184,11 @@ module Docs html_filters.push 'dojo/clean_html', 'dojo/entries' + # Don't use default selector on xhrs as no body or html document exists + options[:container] = ->(filter) { filter.root_page? ? '#content' : false } + options[:follow_links] = false options[:skip_links] = true + options[:only] = self.initial_paths options[:attribution] = <<-HTML The Dojo Toolkit is Copyright © 2005–2013
diff --git a/public/icons/docs/dojo/16.png b/public/icons/docs/dojo/16.png new file mode 100644 index 0000000000000000000000000000000000000000..34695164903ff88c3e34ba77eb1d005fa4607848 GIT binary patch literal 672 zcmV;R0$=@!P)5lg&$%Q543XbKh5;ESrWx zM#HoQgQ;dLF~|`~5ZXl&>27Fg?yRs~ws2=L*{9CGU@Kj<(E`anl$2)dotn&rNI^lR z;@G_Rp3@>fCqX;A=ixlRbGi2(!WI>Xry~H@0-zO30QGXZwQ@AKmo|zceNGJ#U4XKN zro9s*XlXG3faZQHaPCw+*ffgBz~?K$@qeb4$5Txe@pN>n6K$-Dcsg1hPc>~a%le#} zig;?<@H!+FE$ee?)~ENTx^^1Q%~0thCqfGOb8W%r01$rsso2g#8Ed0$C3OP4Bf~$J z^9sIsScd>P3L`^b2qg@1Z5Gsi9ZvL|>~$^?5C*w3q?G}9$pC(4^Gk_6Z_Ur!X16CO zBE=B+=bj};{AWY$3IsaQ5I$~<_$JzdoxjC;oi)1381YTC20IywHOXc66kOGe(aE-B zHw~omP<6>(5J(vY+^k!YG5R9_xJ%hRF2ld7mooT})m-Yb_~%?J4FJnVW_NiGcnt&O zyPv%NBc1>7a3I?yf<*`_33v*HDiJIKvJb$dFlLEx5uoox$j*$u@7`Y@YKQNh6Y)N9 z8UO&z{nnlYRT`Y0J(#uQi6v8)pPh)|bhO1nX@h zjY{-jrlWdnHM0q@2I@}KDUvfx76E`FBW&oEz7cwn+xrJ8ZU)&bDmiih0000NGYG66=pmolui%YJsR-$g&FB!Ll+z6edN=C^c=`7^jsOFeZ>H z^%vEuPGW7fp=pB_<6H;lt7Kg$nu1GViDTdQoc*vvoevLEvh(lW=e*~6-gD1A?|BLH zHlfv_c(FE(RZ7?k08A=j*!q+HX8$NaGM{L4M_z5B%I}MBQlNSQe-A>R6Um@}t^t5R zd?Zu>NTHDWGeb@ded_-s+KKO00F}Rash(K&5vZRDN3RpVH}=e$8*@1?do$+oiP|WF zEdp6BiuMmZ^UYHL@@6HkfRgYhMGi{-4uDPC-aXJhC<*u1&V|p)KuOrDHDSD^Mg8RR_SI+a zRsaB$9`3I*h)>win@5@g=kp5qdRMI2bm9XRdFt}^z=Du}DZ;8VlscZCl<)rYxizCx z?^UiC(@94FbSyME09@W4I4dF@S&rGYR~oXOTy;2hdZCGD+NyBubZO|)lT*)`477<7 z&|Vf}_y1b}v)(Nvd(kH9mVlKj;+p{KA8Gc_`Ky&48FsHdYAw56@apb}_mP7m1^>%u zq`ArhD4yX2OaLHogJwCJZ_)C})MGHH1;7RXSliQg2Ee_yb|v-?&NWqyxJ}0^_LA|_ z08Typy0wlRs{2+D!QcMgRN0vE$58?|0C>j)ghcw*C2vwo?oOPBa~)XB$;tGGfQINV z>n3TZew>R(ch+3Z1g00#*OL z93rL-=UXZxK<4`|9iMni<=~SxZHQ*^nx4KgrE%u|*JBn701bG*c5ep@J8vc({(&9- zNdR~tWc`c`yp6Uq_oMy$HVA_*EoF*HyuwHT01RxkS%*H#1_yx4+scv}fSrR!bMVMd zD~Gk9LxZ2y5Tr#+4ca=P)z7EM0pR22KsU2YeCwWjz5@W7CBYjG5q67da7D@o*&8oVff%UT%imYXw zAGZY?wBUjkfJts>!Dkg+u}5-!?mPI`0-L4IW{zYx?PZ~asSO%dHdg>Za-4e{i`4n@ zL(yOTCvA7RZzi-+xO|Idd7erBlAe$rN5O1dz+2Ny-v-HVArJ-G2gvWsU%vSC(Dkab z6`tryGVq!f{$OxNb+*?~g*IqdU2FD_3gBJV8S{+1Sd&Pn?Dfoew7@j4s>0=CET|Xe zUo+IFBQMq@(rJ4=3m#qMFt5_m44zCG8;5pQjj)DC3ylx#tonm!>=mLTWre*rw8BT4 z>CzxvjR3H$Gv*P2_gb?*b13;CT52y%eb|dHLeMi#w)w z%%bqW7EQI?Df4w%k9%K@9RsqRP?%4G&;;P^TVCx|sl4QK;0oBqT7EgYCoq=#{z}uz z@tI3Sum#LLz|^tWCTgkAifYK_n>*$=dgX+rJDG?VABYAF2fcMY5vW2taV9nG#3Hm` zF%En+a^OzKab@CwfcDQdG1KB;@4KF0?_JM<`OFjSZS)-KeHS?cNp5k7P9?XoV2?X! z)h#r+J80FhV2?eS+=iSneOh%+v_3Q{peK~@33o8EU?SYXNSzWsA)qHFS|1wC#nepa z@fQN;0kv6)I_?Ta?)r1Mf{{ig>Nub_XJSvcycd&#&U3{uT}MD-Yr?M7^VPBW5Ichx z*C>sbh{%LJxozgoZ!2I5cOSIsLA(!yINNe8#pZixo%?Er&X~vPv^Ofy(_r`^_)T(G z)k%DntJySb6XjfTaT7r`Kx7>hCBPUG;2IP_fR6~}!r}$!eF2|LeNnw2m(w{+2nRnd sHpNky;onD_Pkzt*`WUE)kc8UO$Q07*qoM6N<$g8#lN2LJ#7 literal 0 HcmV?d00001 diff --git a/test/lib/docs/core/scrapers/url_scraper_test.rb b/test/lib/docs/core/scrapers/url_scraper_test.rb index 35867880..da59c22c 100644 --- a/test/lib/docs/core/scrapers/url_scraper_test.rb +++ b/test/lib/docs/core/scrapers/url_scraper_test.rb @@ -58,12 +58,24 @@ class DocsUrlScraperTest < MiniTest::Spec result end + it "runs a Requester with .headers as :request_options" do + stub(Scraper).headers { { testheader: true } } + mock(Docs::Requester).run anything, satisfy { |options| options[:request_options][:headers][:testheader] } + result + end + + it "runs a Requester with default .headers as :request_options" do + mock(Docs::Requester).run anything, satisfy { |options| options[:request_options][:headers]["User-Agent"] } + result + end + it "runs a Requester with .params as :request_options" do stub(Scraper).params { { test: true } } mock(Docs::Requester).run anything, satisfy { |options| options[:request_options][:params][:test] } result end + it "runs a Requester with the given block" do stub(Docs::Requester).run { |*args| @block = args.last } result