From b1ecc6c9645bcb7051c6cf2b941e636f6bee478a Mon Sep 17 00:00:00 2001 From: Yosi Attias Date: Mon, 17 Oct 2016 14:49:23 +0300 Subject: [PATCH] ImmutableJS Scrapper. Core scrapper changes: *) Add regexp support to stub *) Add support to fragment in internal_urls, normalized_paths => done externally. --- lib/docs/core/scraper.rb | 14 +++++-- lib/docs/filters/immutablejs/clean_html.rb | 38 ++++++++++++++++++ lib/docs/filters/immutablejs/entries.rb | 31 ++++++++++++++ lib/docs/filters/immutablejs/internal_urls.rb | 23 +++++++++++ .../filters/immutablejs/normalize_paths.rb | 28 +++++++++++++ lib/docs/scrapers/immutablejs.rb | 38 ++++++++++++++++++ public/icons/docs/immutablejs/16.png | Bin 0 -> 758 bytes public/icons/docs/immutablejs/16@2x.png | Bin 0 -> 1565 bytes 8 files changed, 169 insertions(+), 3 deletions(-) create mode 100644 lib/docs/filters/immutablejs/clean_html.rb create mode 100644 lib/docs/filters/immutablejs/entries.rb create mode 100644 lib/docs/filters/immutablejs/internal_urls.rb create mode 100644 lib/docs/filters/immutablejs/normalize_paths.rb create mode 100644 lib/docs/scrapers/immutablejs.rb create mode 100644 public/icons/docs/immutablejs/16.png create mode 100644 public/icons/docs/immutablejs/16@2x.png diff --git a/lib/docs/core/scraper.rb b/lib/docs/core/scraper.rb index 3e96cc70..0b508afc 100644 --- a/lib/docs/core/scraper.rb +++ b/lib/docs/core/scraper.rb @@ -51,12 +51,20 @@ module Docs def initialize_stubs self.class.stubs.each do |path, block| - Typhoeus.stub(url_for(path)).and_return do + stub_path = nil + case path + when String + stub_path = url_for(path) + when Regexp + stub_path = path + end + + Typhoeus.stub(stub_path).and_return do |request| Typhoeus::Response.new \ - effective_url: url_for(path), + effective_url: request.url, code: 200, headers: { 'Content-Type' => 'text/html' }, - body: self.instance_exec(&block) + body: self.instance_exec(request.url, &block) end end end diff --git a/lib/docs/filters/immutablejs/clean_html.rb b/lib/docs/filters/immutablejs/clean_html.rb new file mode 100644 index 00000000..b6769a9a --- /dev/null +++ b/lib/docs/filters/immutablejs/clean_html.rb @@ -0,0 +1,38 @@ +module Docs + class Immutablejs + class CleanHtmlFilter < Filter + def call + # Skip the container "div" + @doc = at_css('div') + + # Remove data-reactid attributes for cleaner html + css('*[data-reactid]').each do |reactEl| + reactEl.delete 'data-reactid' + end + + # Add id to member label, so we can navigate among them + css('h3.memberLabel').each do |memberLabel| + memberLabel['id'] = memberLabel.content.strip.chomp('()') + end + + + css('a').each do |link| + # Remove "/" from the start + link['href'] = link['href'].gsub(/^(#)?\//, '') + + # We need to convert links - from Iterable/butLast to Iterable#butLast + link['href'] = link['href'].split('/').join('#') + end + + # Replace code blocks tag code with pre, and add stylings. + css('code.codeBlock').each do |codeBlock| + codeBlock.name = 'pre' + codeBlock['data-language'] = 'javascript' + codeBlock['class'] = 'language-javascript' + end + + doc + end + end + end +end diff --git a/lib/docs/filters/immutablejs/entries.rb b/lib/docs/filters/immutablejs/entries.rb new file mode 100644 index 00000000..e3755f43 --- /dev/null +++ b/lib/docs/filters/immutablejs/entries.rb @@ -0,0 +1,31 @@ +module Docs + class Immutablejs + class EntriesFilter < Docs::EntriesFilter + def name + typeHeader = at_css('h1.typeHeader') + return typeHeader.content if typeHeader + end + + def type + typeHeader = at_css('h1.typeHeader') + return typeHeader.content if typeHeader + + # TODO: Is this ok? This the index page.. I don't think it should have it's own type.. + nil + end + + + def additional_entries + if current_url.fragment.nil? + return [] + end + + css('h3.memberLabel').map do |memberLabel| + entry_name = "#{type}##{memberLabel.content}" + [entry_name, memberLabel.content.chomp('()')] + end + end + + end + end +end diff --git a/lib/docs/filters/immutablejs/internal_urls.rb b/lib/docs/filters/immutablejs/internal_urls.rb new file mode 100644 index 00000000..1ddc4ba9 --- /dev/null +++ b/lib/docs/filters/immutablejs/internal_urls.rb @@ -0,0 +1,23 @@ +module Docs + class Immutablejs + class InternalUrlsFilter < Docs::InternalUrlsFilter + def update_and_follow_links + urls = result[:internal_urls] = [] + update_links do |url| + urls << url.to_s + end + urls.uniq! + end + + def to_internal_url(str) + if str.start_with? "#/" + return nil if not str =~ /^#\/[^\/]+$/ + str = root_url.to_s + str + end + + super(str) + end + + end + end +end diff --git a/lib/docs/filters/immutablejs/normalize_paths.rb b/lib/docs/filters/immutablejs/normalize_paths.rb new file mode 100644 index 00000000..1681fa3c --- /dev/null +++ b/lib/docs/filters/immutablejs/normalize_paths.rb @@ -0,0 +1,28 @@ +module Docs + class Immutablejs + class NormalizePathsFilter < Docs::NormalizePathsFilter + # + # Checks if the given url starts with: + # "#" or ".#", with means it's a fragment url + # + FRAGMENT_REGEX = /^(\.)?#/ + + def path + # + # If we have fragment, we want to use as our path. + # + if current_url.fragment + # Remove "/" from the start + @path = current_url.fragment.sub(/^\//, '') + end + + super + end + + def normalize_href href + return href.gsub(FRAGMENT_REGEX, '') if href =~ FRAGMENT_REGEX + super + end + end + end +end diff --git a/lib/docs/scrapers/immutablejs.rb b/lib/docs/scrapers/immutablejs.rb new file mode 100644 index 00000000..17397c58 --- /dev/null +++ b/lib/docs/scrapers/immutablejs.rb @@ -0,0 +1,38 @@ +module Docs + class Immutablejs < UrlScraper + self.name = "ImmutableJS" + self.type = "immutablejs" + self.release = "3.8.1" + self.base_url = "https://facebook.github.io/immutable-js/docs/" + + + # + # Replacins core html filters with our own, so we can handle fragments in + # + html_filters.replace 'internal_urls', 'immutablejs/internal_urls' + html_filters.replace 'normalize_paths', 'immutablejs/normalize_paths' + + html_filters.push 'immutablejs/clean_html', 'immutablejs/entries' + + + options[:attribution] = <<-HTML + This documentation is generated from Immutable.d.ts. + Pull requests and Issues welcome. + HTML + + stub(/.*/) do |url| + # + # Reuse capybara sessions, since we scrape all pages.. + # by visiting 'about:blank' we reset the oldest session. + # + @capybara ||= load_capybara_selenium + @capybara.visit 'about:blank' + @capybara.visit url + + @capybara.execute_script 'return document.querySelector(".docContents").innerHTML' + end + + + + end +end diff --git a/public/icons/docs/immutablejs/16.png b/public/icons/docs/immutablejs/16.png new file mode 100644 index 0000000000000000000000000000000000000000..427326bb461ed015aa3a84ced3dd5ccc330c796c GIT binary patch literal 758 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GXl47zG1-LR^7dYdil`d%yqT#yYkE z+O`2&wgEr}5b4?m*xUI-7zQvwecJ#BJO3JcUmyuo2}D3GR<`~=cK-jp9e^ZI#L_k( z*3J)zfDA`le@8oiJ=*{qTYncje`{O+SbLDT1yGH>ACL>w4n(%L{>gT};dXvuc7ABi z0CIt50u{vA`JHz1JnrNTv@QZ{cp=ysnf8A6w*HeHeB$i=>}>skZqBgx%dz+Kvhz;| zTajn)3$#wtHXzW>KiS?d$=)x>&fm~BAj{q_+uqN}HXzO3FU8K!80^S@{U>99fnrk< zd-tW| z6FnLm^8Fec^LUwAmI-gU1k}!1;1O92q&>jc?PRtckg>5;c@T_jv&WL%fd zofDgq^7KjUoY+RDymtMI(Wr*H3HzrKI_bZMUh7th-(7@RmSsP|x< zh>VcbnkRfh1q|F8t6N%EYV5XVV4ZWQvCVD8s+HLpS*{GZ7yC*s2y*vbx_a6C!sTAB zYiR~1MrMYl#@jb+*|cqofw{5Dn@EN$dz2WQcCu*${h?ao8c~vxSdwa$T$Bo=7>o=I z&2$Y7bPX*+3=OOd%&ZKJbPY_c3=B5^{J922LvDUbW?Cg~4TqLEf)cDG$cEtjw370~ mqErUQl>DSr1<%~X^wgl##FWaylc_d9MGT&eM}Q~7_K;hIZ!f)IwtOR;y`12ccr!TY$yGcPKr%MTk*3WCq3alyq>n8PAPuW z!Zrhg3Ci3+AQFVR$&jECW}-56E*T0+gsG@YR8+*_1l_JExIfq~x%++OeV+Gu-}lMw zF(fYxoftWh$K!?S64gd-1^jPtAoq1a&! zYE)}q=i*Ls(801a3PCQHOW=wVP)r(xNTpH;7D7THpNrr-GfCFs=9A8uBMfTHX=8Aj z#VHc-Gg_?FMph1TN?(;g(4(}Z^L3lJhCyx%4Iu*9U(!gRUjP501Th-zWR2Jxy<-wP zQ!;4`GGb0@BV*$Rw{E6C6^*JG%)(Mk3Poj%bkX3TSjy?3Xh5YJ?ivuQx7cvfe>r;? zrPrf6(#cv#8>UmsL5@d&<91Z7g;kO`7#7DXg$SY%DWxhzp%h8tL}Jmrc$IPltEOxl z35;Y%u=Y1tqnIn-p9StK(6A4pHx&pi#>*!_*}Ok;{iB zC_2F-K(Np2ElTzIbeqml`K#V0US8j~C~)41;(?QyR9AP>4x=wpx-U{z5-19VpI-X0 zcD!cMji=@F%fhzh7>ZwHxcXP@SzHmZGGte0?E z4MloRYb$$p4R%zDC%s7OyW`AGteH0DL@ZodviiZfBJtXWy|Y5&tsQODp}&7A@Ri?c zc~lutO9fFKrIQo1=U+PC6@2*qkjD{pH>A|$NsH)S@%jFp_gdmaC!@^#9}dZ?J?=Ay zZUtXP(_Si>vos*Q(`U<09BSyNCCa*+j;XB1|7&vEqGk= zwf&3pZM}RvVr4gIU%cONc-e_B(VZbn=QJrhFAAUTpKDF1D||M^K4|u0-P;SF84uUQ sY;BHGExVaoFD(13y#`Mx4}CVXndg)AJ=aeQ+vfja>om#gTIE{LKl5Q;hyVZp literal 0 HcmV?d00001