From 17528d984575fe6d7a586987a7936a06d695c588 Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Fri, 16 Aug 2019 16:49:12 +0200 Subject: [PATCH] rxjs: finish scraper and filters --- .../templates/pages/about_tmpl.coffee | 20 ++-- lib/docs/filters/rxjs/clean_html.rb | 54 ++++++++-- lib/docs/filters/rxjs/entries.rb | 12 ++- lib/docs/scrapers/rxjs.rb | 102 +++++++++--------- public/icons/docs/rxjs/16.png | Bin 5356 -> 1521 bytes public/icons/docs/rxjs/SOURCE | 2 +- 6 files changed, 117 insertions(+), 73 deletions(-) diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index ac86b701..892c4284 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -211,8 +211,7 @@ credits = [ '2017 Cypress.io', 'MIT', 'https://raw.githubusercontent.com/cypress-io/cypress-documentation/develop/LICENSE.md' - ], - [ + ], [ 'D', '1999-2018 The D Language Foundation', 'Boost', @@ -572,8 +571,7 @@ credits = [ '2016-2018, The Pony Developers & 2014-2015, Causality Ltd.', 'BSD', 'https://raw.githubusercontent.com/ponylang/ponyc/master/LICENSE' - ], - [ + ], [ 'PostgreSQL', '1996-2018 The PostgreSQL Global Development Group
© 1994 The Regents of the University of California', 'PostgreSQL', @@ -648,13 +646,17 @@ credits = [ '2010 The Rust Project Developers', 'MIT', 'https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT' + ], [ + 'RxJS', + '2015-2018 Google, Inc., Netflix, Inc., Microsoft Corp. and contributors', + 'Apache', + 'https://raw.githubusercontent.com/ReactiveX/rxjs/master/LICENSE.txt' ], [ 'Salt Stack', '2019 SaltStack', 'Apache', 'https://raw.githubusercontent.com/saltstack/salt/develop/LICENSE' - ], - [ + ], [ 'Sass', '2006-2016 Hampton Catlin, Nathan Weizenbaum, and Chris Eppstein', 'MIT', @@ -664,8 +666,7 @@ credits = [ '2002-2019 EPFL, with contributions from Lightbend', 'Apache', 'https://raw.githubusercontent.com/scala/scala-lang/master/license.md' - ], - [ + ], [ 'scikit-image', '2011 the scikit-image team', 'BSD', @@ -765,8 +766,7 @@ credits = [ '2003-2019 WordPress Foundation', 'GPLv2+', 'https://wordpress.org/about/license/' - ], - [ + ], [ 'Yarn', '2016-present Yarn Contributors', 'BSD', diff --git a/lib/docs/filters/rxjs/clean_html.rb b/lib/docs/filters/rxjs/clean_html.rb index 1056b1a6..864c201b 100644 --- a/lib/docs/filters/rxjs/clean_html.rb +++ b/lib/docs/filters/rxjs/clean_html.rb @@ -7,6 +7,11 @@ module Docs at_css('h1').content = 'RxJS Documentation' end + if at_css('h1').nil? + title = subpath.rpartition('/').last.titleize + doc.prepend_child("

#{title}

") + end + css('br', 'hr', '.material-icons', '.header-link', '.breadcrumb').remove css('.content', 'article', '.api-header', 'section', '.instance-member').each do |node| @@ -65,6 +70,16 @@ module Docs if node['class'] && node['class'].include?('api-heading') node.name = 'h3' + + unless node.ancestors('.instance-method').empty? + matches = node.inner_html.scan(/([^(& ]+)[(&]/) + + unless matches.empty? || matches[0][0] == 'constructor' + node['name'] = matches[0][0] + node['id'] = node['name'].downcase + '-' + end + end + node.inner_html = "#{node.inner_html}" end @@ -77,25 +92,48 @@ module Docs node.remove_attribute('class') end - css('h1[class]').remove_attr('class') - css('table[class]').remove_attr('class') - css('table[width]').remove_attr('width') - css('tr[style]').remove_attr('style') + css('td > .overloads').each do |node| + node.replace node.at_css('.detail-contents') + end + + css('td.short-description p').each do |node| + signature = node.parent.parent.next_element.at_css('h3[id]') + signature.after(node) unless signature.nil? + end - if at_css('.api-type-label.module') - at_css('h1').content = subpath.remove('api/') + css('.method-table').each do |node| + node.replace node.at_css('tbody') end - css('th h3').each do |node| - node.name = 'span' + css('.api-body > table > caption').each do |node| + node.name = 'center' + lift_out_of_table node end + css('.api-body > table > tbody > tr:not([class]) > td > *').each do |node| + lift_out_of_table node + end + + css('.api-body > table').each do |node| + node.remove if node.content.strip.blank? + end + + css('h1[class]').remove_attr('class') + css('table[class]').remove_attr('class') + css('table[width]').remove_attr('width') + css('tr[style]').remove_attr('style') + css('code code').each do |node| node.before(node.children).remove end doc end + + def lift_out_of_table(node) + table = node.ancestors('table').first + table.previous_element.after(node) + end end end end diff --git a/lib/docs/filters/rxjs/entries.rb b/lib/docs/filters/rxjs/entries.rb index 020ce1eb..c6e488fb 100644 --- a/lib/docs/filters/rxjs/entries.rb +++ b/lib/docs/filters/rxjs/entries.rb @@ -2,22 +2,28 @@ module Docs class Rxjs class EntriesFilter < Docs::EntriesFilter def get_name - name = at_css('h1').content + title = at_css('h1') + name = title.nil? ? subpath.rpartition('/').last.titleize : title.content name.prepend "#{$1}. " if subpath =~ /\-pt(\d+)/ + name += '()' unless at_css('.api-type-label.function').nil? name end def get_type if slug.start_with?('guide') 'Guide' - elsif at_css('.api-type-label.module') - name.split('/').first elsif slug.start_with?('api/') slug.split('/').second else 'Miscellaneous' end end + + def additional_entries + css('h3[id]').map do |node| + ["#{name}.#{node['name']}()", node['id']] + end + end end end end diff --git a/lib/docs/scrapers/rxjs.rb b/lib/docs/scrapers/rxjs.rb index 1825fc80..e5ea1051 100644 --- a/lib/docs/scrapers/rxjs.rb +++ b/lib/docs/scrapers/rxjs.rb @@ -4,11 +4,26 @@ module Docs class Rxjs < UrlScraper self.name = 'RxJS' self.type = 'rxjs' + self.release = '6.5.2' + self.base_url = 'https://rxjs.dev/' + self.root_path = 'guide/overview' self.links = { home: 'https://rxjs.dev/', code: 'https://github.com/ReactiveX/rxjs' } + html_filters.push 'rxjs/clean_html', 'rxjs/entries' + + options[:follow_links] = false + options[:only_patterns] = [/guide\//, /api\//] + options[:skip_patterns] = [/api\/([^\/]+)\.json/] + options[:fix_urls_before_parse] = ->(url) do + url.sub! %r{\Aguide/}, '/guide/' + url.sub! %r{\Aapi/}, '/api/' + url.sub! %r{\Agenerated/}, '/generated/' + url + end + options[:max_image_size] = 256_000 options[:attribution] = <<-HTML @@ -16,69 +31,54 @@ module Docs Code licensed under an Apache-2.0 License. Documentation licensed under CC BY 4.0. HTML - module Common - private + def get_latest_version(opts) + json = fetch_json('https://rxjs.dev/generated/navigation.json', opts) + json['__versionInfo']['raw'] + end - def initial_urls - initial_urls = [] + private - Request.run "#{self.class.base_url}generated/navigation.json" do |response| - data = JSON.parse(response.body) - dig = ->(entry) do - initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 'api' - entry['children'].each(&dig) if entry['children'] - end - data['SideNav'].each(&dig) - end + def initial_urls + initial_urls = [] - Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response| - data = JSON.parse(response.body) - dig = ->(entry) do - initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path'] - initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && !entry['path'] - entry['items'].each(&dig) if entry['items'] - end - data.each(&dig) + Request.run "#{self.class.base_url}generated/navigation.json" do |response| + data = JSON.parse(response.body) + dig = ->(entry) do + initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 'api' + entry['children'].each(&dig) if entry['children'] end - - initial_urls + data['SideNav'].each(&dig) end - def handle_response(response) - if response.mime_type.include?('json') - begin - response.options[:response_body] = JSON.parse(response.body)['contents'] - rescue JSON::ParserError - response.options[:response_body] = '' - end - response.headers['Content-Type'] = 'text/html' - response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json') - response.effective_url.path = response.effective_url.path.sub('/generated/docs/', '/').remove('.json') + Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response| + data = JSON.parse(response.body) + dig = ->(entry) do + initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path'] + initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && !entry['path'] + entry['items'].each(&dig) if entry['items'] end - super + data.each(&dig) end - end - version do - self.release = '6.3.3' - self.base_url = 'https://rxjs.dev/' - self.root_path = 'guide/overview' - - html_filters.push 'rxjs/clean_html', 'rxjs/entries' - - options[:follow_links] = false - options[:only_patterns] = [/\Aguide/, /\Aapi/] - options[:fix_urls_before_parse] = ->(url) do - url.sub! %r{\Aguide/}, '/guide/' - url.sub! %r{\Aapi/}, '/api/' - url.sub! %r{\Agenerated/}, '/generated/' - url + initial_urls.select do |url| + options[:only_patterns].any? { |pattern| url =~ pattern } && + options[:skip_patterns].none? { |pattern| url =~ pattern } end - - include Docs::Rxjs::Common end - private + def handle_response(response) + if response.mime_type.include?('json') + begin + response.options[:response_body] = JSON.parse(response.body)['contents'] + rescue JSON::ParserError + response.options[:response_body] = '' + end + response.headers['Content-Type'] = 'text/html' + response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json') + response.effective_url.path = response.effective_url.path.sub('/generated/docs/', '/').remove('.json') + end + super + end def parse(response) response.body.gsub! 'Z~!tni*7$tmRtL~dU*gFMgdVVKo?tzst4BC z0#AGa`vjnWSn}CfQviAS%o*O!tbl^60Zl^?1vMZk0}&J{=~zWkKyf1tTr##&_yj;* zfTVJ`Q(!rg%c0qXm?*?PgCht3R?t>x42bT=eu~u;NgR?XDhvqQgUb|?DYRYCHbZj@ zVits4a2eSAW=Ebvd;#e$q>~V`A>=_UkVz#nav!7uiKAgbL7G7_ARGr@BX2NF%Fde3PAEfo)~?=19(M1SOKya!d+1K`&|odfO#_~-U1jV62uxFm3ot#s`}H4h<3;Hn{p zgUbSU4BQd$=fU3wUjnWcd@FeB20jze`nKp2mOW{~o7Psne`2WiTB! zr}%Y!N!E(Nz?`x+M)XSWESIa<2d(y0=2L3uFwk2Q%q|)zZPG0i?zD7QtsQ&4F~Ho^ z$~|y4M^qi`!V$709+#=3Ldj>R61v`&;!xuf&=v&~tTB&7$Cr636KlGP6Mu z`rQjGQw=olQ0$1egK`?Hb6qebt<%udQ5+v%*xlf#K;Q7<4kn~o?`Z zY>=CigZF?tvOcos1|x@j#D^nCeKh)G#U~~#H68P*&1a>b+uA9|jvGJ0j4^yMagzNX zlc(IfKlRJ6zBVz3!?dyoc4plyui5aP^WEHs zKJ(@;Sh#5MqwjtFu;g)lgMUDvMNsh4Wy?cWti-A(p-;nBzZD+Qn~7DeS&Mbg8h==C zxuFmGZrt?F=EyCG+S(Mo?Omqmy_olZ{3&)j;&wc5-nlEjpW389#|t9C07=uL)g>f; zusbPv&)$@M7?`^M=d=R{52YXe<;c-v8OL#AP-fQ2Q`x7_{F-z2-1!T+82sCiyo;Cg zms_t~y_SFd#!$fOruD7c1-};-72mmAGHiIFD!hJ(qu)Me#7LF1k1D`V6=?4t9>@Ze zwswtBGCp#jT+*8c}U`41q`E9N=_z|Gmy JDPx*%^xvy8X{i7J literal 5356 zcmV zaB^>EX>4U6ba`-PAZ2)IW&i+q+O1bvk}M|-{O1%t0w5OYIII!AfsgM5MAb~cd5*Bd zT{Be_%_3z=DcX#`{+#9yd_4JJLd={bC5umxAiki%_I%FM*|6{WVsGPHzj?m!=psDn zyr!Y!3;N?bAqVZR-}_B&x(@3?cV%6W*3KRc9ml#5`P1+8-H6UB;fp)^-^O0TI$iYh zJ!F8^4FltlgczeKCj4_Uq>hc5`-~a~$tVNI)tXb?UmZTNqdYCGBYmSH^!tJMT+y zF#n%z#(p-}U}EsYXFR=co^~Ho_Tt+nC9KO$pGl}2Fyg6! zHSj_HvBm9uW$)J>se^d6}rw@ez1aFegz=RSj??EMu8AF z71Ox!-Iw_iH}(s$1P0@Vnc-lu^(;}Ndt-|>&xCQ&24hcZxZHICK!m*`jBx}8Y(jXu zQ(!ZykP3JDX3^x!;BaArG z$b*NBG7)Q&W|(oNnI}(~EVD6t;bIRIcEbMXlWewdq-zD6VlejnoV?H=A zo)-fMXumkK>_YI1xy6|kkEDnmthu25Iw2E%YZPWN#4GWUgs{w#UR(rOz|BdT_|aXER#!@k|c(v_cc|$MzxAF2KC>Ub_?Z=KV3Q z4V2?zZrfc$t(?J)(L9|nlnz)BCRdL^$er_TYs1qpRk>|M6LtL!00pa8L_oZ=iFCtJ zX`pqyMBs@DsU9F2Lv00HZm&s3v+dZ z?a?>~g?h|I1#dxgT58Lz4M2d99IM22D20ZLH(1)L|H-PCb)C3gP-7VB`T%Q@8c{>I zH+YZ;db0v@0f{UL%C)T8bWjE$U!wa$&rcsq*6lKx9{fC3iyMMt#c4~?w17y$hZCno zL$h#$Q+r^+YP=R+=j?=X0sC)~EoL+)aonksmgcRmDo=*s9ymt?B{pd)G&bo1wDecK zSQUG9vNTSQy+B5XAj)9dS+8iO8blTnK=b2uU-(1>G+0{iPjsO^s1Zy_0RqCpMj;0A zC@dFk&4_os1@S|pKx?`>&yZ&umau)um&W2m!I=U1;-747*%>-Au!GO|knAFx>S9^L2y_TUM(t$ zcv(fpUVNH;ju608E%Nt};I8#Xp#xP<*uO#fjpZAZmT#t5J+0wqZZ**KORy7I^@!HT z(@j!g>(Hnc=mrd*B4w7tIG(73np>oG1Sf@XVSemDX&D-!*Y@JAIpH6xta_trfk@1# z(odq|bS>s@Qutrc8(yNI000UxX+uL$Nkc;*P*P7uNlZlm0C=38mUmQB*%pV-y*Is3 zk`RiN&}(Q?0!R(LNRcioF$oY#z>okUHbhi#L{X8Z2r?+(fTKf^u_B6v0a3B*1Q|rs zac~qHmPur-8Q;8l@6DUvANPK1pS{oBXYYO1x&V;;g9XA&SP6g(p;#2*=f#MPi)Ua5 z0Sxc}18e}`aI>>Q7WhU2nF4&+jBJ?`_!qsp4j}paD$_rV!2tiCl(|_VF#u4QjOX(B z*<2YH$v8b%oF%tU$(Xh@P0lb%&LUZYGFFpw@+@0?_L*f5IrB1vJQ>S#&f;b8cV}o=_hCs$|GJ-ARc>v%@ z$zSl&FIdda6Uz_9&dgda5+tXH875p)hK-XGi{a1DP3Mcn%rFi&jU(bQ*qIqw9N}^R zX3zXt6nSkKvLZX!I5{{lZ7prSDAa#l{F{>Zc9vd*f9@GXANa%eSALld0I;TIwb}ZI zZD|z%UF!i*yZwjFU@riQvc7c=eQ_STd|pz-;w)z?tK8gNO97v2DKF^n`kxMeLtlK) zQoh~qM8wF>;&Ay4=AVc79|!(*9u^V&B)*6*lto0#rc5AA zmbF{R6Nm+wLWV&2pPKj&!~Ue%xt59A_z}>SSOTRX8bE#?04OREAPIY9E70$K3&uwS z`OS;bnV6mX&w~DaSGY|6$QC4jj$=neGPn{^&g`1}S^_j607XCp>OdRl0~5dmw!jg% z01w~;0zoK<1aV+7;DQv80Yo4d6o9p$7?gsoU?->sb)XS6gEnv&bb({wG&lz?fy-b7 z+yPQB4xWH1@CwX85QK%u5EW8~bRa{>9I}O2kQ?L!1w#=~9FzzpLqbRb6+r8tQm7oN zhU%ea=v(M0bQ-z<4MVq}QD_qS6?z9FFbSr?TCfpp1+!pJI0%k}7s1K!GB_VDg15kx za07f0?u1Xnm*5dt3O|9T5r7a8I--j(5f;KmLXmhR2@xTykP@TC z$XgT!MMW`COq2`C9~Fh-qL!gnp*EwcQ3p_+s6NzH)F^5S^$|@*Yog83&gcMiEIJvT zi!Mf2pqtPg=(Fe%^f>wz27{qvj4_TFe@q-E6|(}f8M7PHjyZ)H#*AU6u~@7+)*S1K z4aIV>Vr((C3VRTH5_<(Zj(vk8;&gDfIA2^mPKYbSRp451CvaDA6Sx_?65bH+j1R^0 z@XPUK_(psWeh5E~pCKp{j0vuUNJ1)MEuoUoMmS5jOL##f67`5q#Bid3xQ19sJVZQC z93{RbQAlPaHYtH5A#EY;C!HeQBE2A!$wp)kay(f~-a>9BpCR8TzfqtnSSkc4@Dx@n z)F^Z+Tv2$Yh*vaJ^i*7|n6Fr&ctmkX@u?DC$w-N<#8FzMRHJlM>4ws@GF90|IaE1A zd9!kh@&)Bb6fDJv;zQw4iYWUiXDDM-gsM+vQ@PZ2)JE!A>NpKUGo}U5QfZ~MZ)k(G zDHV!}ol3Myo=T0%aTO^Yp&QWy=;`z_`eFKY`a4xERZmsE>L%4T)hnv6)#j*qsPWZG z)Y{cX)ZVEx)P2;`)VHa3so&E;X_#q*YvgL|(KxH|bPjEf%N*{Uk~xRx+}4CO%`_u4 zS7`3j9MGKB($@0R%F?RRI-~Veo38DlovOV<`-JwS4pqlZN1(Gq=cLYKh6=-zkLZ@rEqJ z6vJJH{f4iNjE!Q9HW+moJu+4^4lvF)ZZ*DZLN;+XS!U8;a?KQD$}&we-EDf=3^ubj zOEIf48#0H@9n1yhyUm9!&=yV>LW>5A8%z?@lbOS8WsX|XErTr!ExRnASs7TxTWz!I zxB6&pZ=G)4Xnn_qViRanXwzf!tF4(W*S5y?+FbHn-?^*jcF%ooXKu&0+hcdro@yUr zzrnuO{)2;~gUF%HVbamSG10Ns@dk^=3S(_%op(Yzc{#0iI_C7&*}+-teAxLH7p6;^ zON+~+dB*ej^BU)kx$3!cTZVb0Xx4mvscU^amdxQG}4}A}wN0Y~dr>SSE=RwbB zUe;bBuMV%*Y-jdL_9<_~+t0hid(emC6XjFwbKh6bH`%w{ z0a^jvfaZXyK*zw9fqg-wpantIK@Wn>fV8I2F~=-fTgudr?_nHF76Ya2X6;&lJCkd=T9WLCY2{WN_I`&o;;c2 zo>GzWRKONg3!bO?r`DyuP76)jpY|y|CcQlamywupR7eq~3Hvg&GxIWsv&^%Kv!u(M zm+f3OB?=NXWkcDEvb)7J+0WE~#6+@QGMeL-QhTd=lZbfxFY`c=@XrK@^Z>#r_aJ-)_o&4IOqwP|aAD6}ptFMPQ!W?fH_ zR?(WGvGsoITZV0)e^+=6ZO?$0o?WWq-yLr2>?D5#sR;N{0TK8_RVDHU(zxvJwqlSuo zn0-0>9yUfd_J7U#y17ZCskG_Ce&K%UfrtZr&5q5@Et)N5t#GTPb@E`s!OP!xf79K@ zY^!glx0fCQha`s{f1CL2^}|7jdylY=w0&pzU2O-oqofn+T;4g=mC_~cj_V#i8hEs~ z$EBy^d&}?lAJaWnb6n+k*$Kjlq7$D^=AWECm38Xr>EzR6y-RxUoQXYituMT9@NCf8 z^XGieo$2@NKY8Bu{ILtp7mi+JUF^E#aH(^^exTzA`yV<69R@px9EZ9uJ6-M>o;Q5r ziu;w*SG}*EyB2Wm(#ZUg;pqt>?FMZqM9Va~FNLGD$ zlbNT*KP&%S`^@CocfWZ2GB6c8HU3=m{L`|I+Sd?{wJo{Z|>UW?q-PQGavb zE$eOnyO?(qGr8}v?<+r;e(3oa^zrVej8C6_1NVgU`<=UGpa1{>24YJ`L;(K){{a7> zy{D4^000SaNLh0L01m?d01m?e$8V@)00007bV*G`2jUD55&|V`TP&pj00RU`L_t(I z%WacOh}C5rhM(W}egFT=xz3q!oSeKwBEqB~X4GN|ClJYGHqj7Wa1lWlLI_#~;xtgW za#LZ8785a8O6jVLK_)qE0w)ob(h(AAnsFSD$8+YK|K_$rn{VNC}B?x`rapI z;wb@eb{&;(*k zXu@+%d*j$v=_h6J+jh3{t)Tvq4BgU(N}K%W-(gdV+30>CJ^K{q@d}DVG{qi{?ZB~D z^S$BUrl-3Ob%ukPCc1bUMtM8EC>7uRN}W#AZNiR4codF za4A{zI>YX7jRJwOsq4irG-cMP20M_9yokCKOsRcGTjf3pdos7mk<^ zM(z8T&T-(lto`^UR?Mw1DTyhlaf<8|>>*ExMAs-ROpB&xj|+ZxhMEL(ra*K)hwF8+ ziEC&Y;ofawcP)@@ULrrAQ|h@+YxkWmlqMu=_m-TRF9hA?37TQrIocI4iw*Q}9i6SC z&(;aUZJgY~zfiFCFXiqVOFs(9C4=wm1@n$VUk5MCXh;xAu+l=LfzI9}yj-$Nr$_Sr z;hq4XZ$h{|B>4Ffs!xO2q1bg9u!O(bAbgXE`8s#CWNZ4O0{jh$WL{&SUMK4S0000< KMNUMnLSTX!`8aC; diff --git a/public/icons/docs/rxjs/SOURCE b/public/icons/docs/rxjs/SOURCE index 536eb88a..2a3b3084 100644 --- a/public/icons/docs/rxjs/SOURCE +++ b/public/icons/docs/rxjs/SOURCE @@ -1 +1 @@ -http://reactivex.io/ +https://github.com/ReactiveX/reactivex.github.io/blob/develop/favicon.ico