From 9d60cc80f5cf0cc67b5153fe2a28dd217c47c43e Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Tue, 25 Sep 2018 22:29:26 -0400 Subject: [PATCH 01/10] Add basic scraper and friends that doesn't blow up --- lib/docs/filters/salt_stack/clean_html.rb | 9 +++++++++ lib/docs/filters/salt_stack/entries.rb | 13 +++++++++++++ lib/docs/scrapers/salt_stack.rb | 14 ++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 lib/docs/filters/salt_stack/clean_html.rb create mode 100644 lib/docs/filters/salt_stack/entries.rb create mode 100644 lib/docs/scrapers/salt_stack.rb diff --git a/lib/docs/filters/salt_stack/clean_html.rb b/lib/docs/filters/salt_stack/clean_html.rb new file mode 100644 index 00000000..ac53a94c --- /dev/null +++ b/lib/docs/filters/salt_stack/clean_html.rb @@ -0,0 +1,9 @@ +module Docs + class SaltStack + class CleanHtmlFilter < Filter + def call + doc + end + end + end +end diff --git a/lib/docs/filters/salt_stack/entries.rb b/lib/docs/filters/salt_stack/entries.rb new file mode 100644 index 00000000..dda9871d --- /dev/null +++ b/lib/docs/filters/salt_stack/entries.rb @@ -0,0 +1,13 @@ +module Docs + class SaltStack + class EntriesFilter < Docs::EntriesFilter + def get_name + at_css('h1').content + end + + def get_type + 'TODO' + end + end + end +end diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb new file mode 100644 index 00000000..3196a18a --- /dev/null +++ b/lib/docs/scrapers/salt_stack.rb @@ -0,0 +1,14 @@ +module Docs + class SaltStack < UrlScraper + self.release = '2018.3.2' + self.base_url = 'https://docs.saltstack.com/en/latest/ref/' + + html_filters.push 'salt_stack/entries', 'salt_stack/clean_html' + + options[:container] = '.body-content' + + options[:attribution] = <<-HTML + © 2018 SaltStack. All Rights Reserved, SaltStack Inc. + HTML + end +end From b7075dd51ac8c3c21046fbf293b744e75a19aa60 Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Tue, 25 Sep 2018 23:46:15 -0400 Subject: [PATCH 02/10] Implement working crawling and section building for salt --- lib/docs/filters/salt_stack/clean_html.rb | 2 ++ lib/docs/filters/salt_stack/entries.rb | 19 +++++++++++++++++-- lib/docs/scrapers/salt_stack.rb | 7 ++++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/docs/filters/salt_stack/clean_html.rb b/lib/docs/filters/salt_stack/clean_html.rb index ac53a94c..0f084519 100644 --- a/lib/docs/filters/salt_stack/clean_html.rb +++ b/lib/docs/filters/salt_stack/clean_html.rb @@ -2,6 +2,8 @@ module Docs class SaltStack class CleanHtmlFilter < Filter def call + css('.headerlink').remove + doc end end diff --git a/lib/docs/filters/salt_stack/entries.rb b/lib/docs/filters/salt_stack/entries.rb index dda9871d..51bf17d2 100644 --- a/lib/docs/filters/salt_stack/entries.rb +++ b/lib/docs/filters/salt_stack/entries.rb @@ -1,12 +1,27 @@ module Docs class SaltStack class EntriesFilter < Docs::EntriesFilter + SALT_REF_RGX = /salt\.([^\.]+)\.([^\s]+)/ + def get_name - at_css('h1').content + header = at_css('h1').content + + ref_match = SALT_REF_RGX.match(header) + if ref_match + ns, mod = ref_match.captures + "#{ns}.#{mod}" + else + header + end end def get_type - 'TODO' + type, _ = slug.split('/', 2) + type + end + + def include_default_entry? + !subpath.end_with?('index.html') end end end diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb index 3196a18a..a4c974b2 100644 --- a/lib/docs/scrapers/salt_stack.rb +++ b/lib/docs/scrapers/salt_stack.rb @@ -1,9 +1,14 @@ module Docs class SaltStack < UrlScraper + self.type = 'salt_stack' self.release = '2018.3.2' self.base_url = 'https://docs.saltstack.com/en/latest/ref/' - html_filters.push 'salt_stack/entries', 'salt_stack/clean_html' + html_filters.push 'salt_stack/clean_html', 'salt_stack/entries' + + options[:only_patterns] = [ + %r{[^/]+/all/} + ] options[:container] = '.body-content' From e69e15e04a0491e6ce3619d5a1af57a38e516581 Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Wed, 26 Sep 2018 00:02:35 -0400 Subject: [PATCH 03/10] Add salt links --- lib/docs/scrapers/salt_stack.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb index a4c974b2..90df1767 100644 --- a/lib/docs/scrapers/salt_stack.rb +++ b/lib/docs/scrapers/salt_stack.rb @@ -3,6 +3,10 @@ module Docs self.type = 'salt_stack' self.release = '2018.3.2' self.base_url = 'https://docs.saltstack.com/en/latest/ref/' + self.links = { + home: 'https://www.saltstack.com/', + code: 'https://github.com/saltstack/salt' + } html_filters.push 'salt_stack/clean_html', 'salt_stack/entries' From 55023390c1b771730080f1cab0e8ed592d87f7cf Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Wed, 26 Sep 2018 00:20:43 -0400 Subject: [PATCH 04/10] Include indexes in docs --- lib/docs/filters/salt_stack/entries.rb | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/docs/filters/salt_stack/entries.rb b/lib/docs/filters/salt_stack/entries.rb index 51bf17d2..ff49a8f7 100644 --- a/lib/docs/filters/salt_stack/entries.rb +++ b/lib/docs/filters/salt_stack/entries.rb @@ -19,10 +19,6 @@ module Docs type, _ = slug.split('/', 2) type end - - def include_default_entry? - !subpath.end_with?('index.html') - end end end end From 63b3ef9a419a6a3cc2fe8004ae127df9e5062e75 Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Wed, 26 Sep 2018 20:12:50 -0400 Subject: [PATCH 05/10] Remove the index pages for refs --- lib/docs/filters/salt_stack/entries.rb | 7 +++++-- lib/docs/scrapers/salt_stack.rb | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/docs/filters/salt_stack/entries.rb b/lib/docs/filters/salt_stack/entries.rb index ff49a8f7..23b8f46f 100644 --- a/lib/docs/filters/salt_stack/entries.rb +++ b/lib/docs/filters/salt_stack/entries.rb @@ -16,8 +16,11 @@ module Docs end def get_type - type, _ = slug.split('/', 2) - type + slug.split('/', 2).first + end + + def include_default_entry? + slug.split('/').last.start_with? 'salt' end end end diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb index 90df1767..ee83a476 100644 --- a/lib/docs/scrapers/salt_stack.rb +++ b/lib/docs/scrapers/salt_stack.rb @@ -11,7 +11,7 @@ module Docs html_filters.push 'salt_stack/clean_html', 'salt_stack/entries' options[:only_patterns] = [ - %r{[^/]+/all/} + %r{^[^/]+/all/} ] options[:container] = '.body-content' From 925f458985bb2a659d59d09d22a00672b9acd73f Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Wed, 26 Sep 2018 20:14:56 -0400 Subject: [PATCH 06/10] Add apache2 license to attribution --- lib/docs/scrapers/salt_stack.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb index ee83a476..ae4e47f3 100644 --- a/lib/docs/scrapers/salt_stack.rb +++ b/lib/docs/scrapers/salt_stack.rb @@ -17,7 +17,8 @@ module Docs options[:container] = '.body-content' options[:attribution] = <<-HTML - © 2018 SaltStack. All Rights Reserved, SaltStack Inc. + © 2018 SaltStack.
+ Licensed under the Apache License, Version 2.0. HTML end end From c5f382b0adff61454e9afbc576c8d5a11760f34b Mon Sep 17 00:00:00 2001 From: Boris Bera Date: Wed, 26 Sep 2018 20:27:51 -0400 Subject: [PATCH 07/10] Add salt logo --- public/icons/docs/salt_stack/16.png | Bin 0 -> 726 bytes public/icons/docs/salt_stack/16@2x.png | Bin 0 -> 1432 bytes public/icons/docs/salt_stack/SOURCE | 1 + 3 files changed, 1 insertion(+) create mode 100644 public/icons/docs/salt_stack/16.png create mode 100644 public/icons/docs/salt_stack/16@2x.png create mode 100644 public/icons/docs/salt_stack/SOURCE diff --git a/public/icons/docs/salt_stack/16.png b/public/icons/docs/salt_stack/16.png new file mode 100644 index 0000000000000000000000000000000000000000..ee2631f845b18128b013d30d8070ccdfcbb3b142 GIT binary patch literal 726 zcmV;{0xA88P)@~8FWQhbW?9;ba!ELWdL_~cP?peYja~^ zaAhuUa%Y?FJQ@H10#ivuK~y-6jgvoU6hRop-#2@=w|ma)%$%_>hBU?qHj+Y6EL0Fd zEky(c8$}36ENo2tgJmF!DE?{e1WOwYXm4Yoq!I)rCWaunc(>VIlkDB?eikO@c^dRJ z!#w8wzBdfx;4kXygD->{@m+UlHEm1*$dyt%%YJFpE4fEKzp${n+WZOt)RiS0$WtI1 zVsaj4S>_2rmjpNgc)GrnJO+>%qs}tZK(X0+&`wr@V6O?}or+TXI*CTBDXry+?v}>L zLq@C5jT&%-FWLd?glmRGH{8UW_!=a)0) zr#Zy7teIZxY^ap#0rG)DhnI#?bWcjTr9jzbt<@Ycc|ab3oH0i0z923$$TdMQ0^|Wu zxY!8*03=CL1#rU#!5IQ(Yf*KWucZA2q+)5s*!RyZ`_I07*qo IM6N<$f_+6d-2eap literal 0 HcmV?d00001 diff --git a/public/icons/docs/salt_stack/16@2x.png b/public/icons/docs/salt_stack/16@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..3fe90907409a82e389bf23d66322249968a32229 GIT binary patch literal 1432 zcmV;J1!ww+P)pF8FWQhbW?9;ba!ELWdL_~cP?peYja~^ zaAhuUa%Y?FJQ@H11rEBYZJL=%lyi$;{IRj&_K+S0T3qt&?Qtm&h! z)Vb%Lo?A6O_Znx6G1pmZ?zw@3K+1f6=G;l$INtjcRCu)rf1EL&HyJOPim`~yQsGsW zV0o)XYef9B%sjY1M^#llFG|Qh0O5ngXh}@4Ld367)ngATz|hc;Q}rW+_z8$cLtdUl zR2xoUWi7#GRX;K`G~`5hA!2Kk2(H<(Q$i>gHHzYuke73aunyMxOw}#Uvds5dbUhV! ziz;tRg`aoM9o|mx-p>}{O)9)sgx{R$-0);#gZ=$Ss_+Zy`DYQnvFiqS?=M%+U!~&S zPE&XCWIR<~uEHVn{)%3!QXM!=g}+nrpHutdOy`E1HChDVxWwq*Mp3-kRZMszfWrU; zinymymLGQGGxMAp;YkqY0+_S^!l0^OD#E{{-ro+O-a|1Y!kf}`V7`dEUWI>1#jg~R z+j4Q(;R0^oj7oS`8b?tS?5K(kP0h0Gi#LMdU2?yq18u0FdX^ z%8-|Hjqo8Mu2*%7(=?TF{RH+U5zW|lIE$wNi~&Ghg}(+sG15gvQGDA=)vA68EZ$0> zy9}1@xAtcU#bW2&ioO~>Y-ny$;R=S$EyE>cQGPQ)O@*V1+n~a0MdZvL0)Uxks=8-X z_*WGUX*w{!^Dgnd>iNaVbjJSzguFV(U`q+{wN(7dP7Py!lx6vQ2-Tgfhzkkyj>Tk^ zh`X+xmzeD_qCMTT#_yr!d9|v4fagWn23Tu-D*X81;2<9$$d*Q4TyKcC5$FLGUZXm2 zYPa7^761USZQHgzd0sw|)S71*;i%@=pQEaNK|8Mbk~b3Y3xMC6&A!9BIji#3Dm)5cb=SLI z6vd|iE=W_p$|7D=<*&_>yYoE%9soq-3_^SgW(=~MLa5fN@D>0Q8r(kP{?lQ#r(h-^ z+#EvnUN6lCjYjzffOkarE<*e`6<%hBAs}7~!h<1HF9C=EU_h%zacP#hd+hH{GC0_O zRCBEMC=6GEa9;@3vT;7Ca141_AAfKlIOh(pC-j<;{WN|0z{2g@x9|RUl{^Z=TOiy3 z!|wp512_VJGhlO`m;ZWMO=%tgfU2r?Ah9pVl6CFo*tZ3dlDLehK1{$z0A>T&4WfrS z=}kF64~PH=XkKWrD?nJ@ikMex##cL$dpZMCgSE_(6)@~J)UM4#{uuzw>@Z9GPXUr$ z6yxiSqIw=ZoU20~z;SWa)~)!XSNKF1v@wCd>Sk&xN#trop2;-7QQ z4IOIZ@lhT|PZHt&2>}{K_1v^+(@!A6j(UwYsrnJ@Yaagz9Y*&j4(eiu>m8w_wMfegGt@e!-1Tr z>XxYYceFnj8MsV^zfke-PBtY?1tj9GQ{f-mr=<1(%zR=heq}1Wxzl9S4c)Hc0jDJI zeU=J8o+NlL!ZtMWa( Date: Tue, 13 Aug 2019 16:32:40 +0200 Subject: [PATCH 08/10] salt_stack: finish scraper and filters --- .../templates/pages/about_tmpl.coffee | 6 ++++ lib/docs/filters/salt_stack/clean_html.rb | 11 +++++++ lib/docs/filters/salt_stack/entries.rb | 13 +++++++- lib/docs/scrapers/salt_stack.rb | 32 +++++++++++++------ 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index 5e9dc6cc..dea5c4da 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -638,6 +638,12 @@ credits = [ 'MIT', 'https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT' ], [ + 'Salt Stack', + '2019 SaltStack', + 'Apache', + 'https://raw.githubusercontent.com/saltstack/salt/develop/LICENSE' + ], + [ 'Sass', '2006-2016 Hampton Catlin, Nathan Weizenbaum, and Chris Eppstein', 'MIT', diff --git a/lib/docs/filters/salt_stack/clean_html.rb b/lib/docs/filters/salt_stack/clean_html.rb index 0f084519..8c5cb6ca 100644 --- a/lib/docs/filters/salt_stack/clean_html.rb +++ b/lib/docs/filters/salt_stack/clean_html.rb @@ -4,6 +4,17 @@ module Docs def call css('.headerlink').remove + css('div[class^="highlight-"]').each do |node| + node.name = 'pre' + node['data-language'] = node['class'].scan(/highlight-([a-z]+)/i)[0][0] + node.content = node.content.strip + end + + css('.function > dt').each do |node| + node.name = 'h3' + node.content = node.content + end + doc end end diff --git a/lib/docs/filters/salt_stack/entries.rb b/lib/docs/filters/salt_stack/entries.rb index 23b8f46f..d346fa29 100644 --- a/lib/docs/filters/salt_stack/entries.rb +++ b/lib/docs/filters/salt_stack/entries.rb @@ -16,12 +16,23 @@ module Docs end def get_type - slug.split('/', 2).first + slug.split('/', 3)[1] end def include_default_entry? slug.split('/').last.start_with? 'salt' end + + def additional_entries + entries = [] + + css('.function > h3').each do |node| + name = node.content.remove('salt.').split('(')[0] + '()' + entries << [name, node['id']] + end + + entries + end end end end diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb index ae4e47f3..390dd7bb 100644 --- a/lib/docs/scrapers/salt_stack.rb +++ b/lib/docs/scrapers/salt_stack.rb @@ -1,8 +1,19 @@ module Docs - class SaltStack < UrlScraper - self.type = 'salt_stack' - self.release = '2018.3.2' - self.base_url = 'https://docs.saltstack.com/en/latest/ref/' + # The official documentation website is heavily rate-limited + # + # The documentation can be generated like this (replace 2019.2 with the correct tag): + # $ git clone https://github.com/saltstack/salt.git --branch 2019.2 --depth 1 + # $ cd salt/doc + # $ pip install sphinx + # $ make html + # + # The generated html can be found in salt/doc/_build/html + class SaltStack < FileScraper + self.type = 'simple' + self.slug = 'salt_stack' + self.release = '2019.2.0' + self.base_url = 'https://docs.saltstack.com/en/latest/' + self.root_path = 'ref/index.html' self.links = { home: 'https://www.saltstack.com/', code: 'https://github.com/saltstack/salt' @@ -10,15 +21,16 @@ module Docs html_filters.push 'salt_stack/clean_html', 'salt_stack/entries' - options[:only_patterns] = [ - %r{^[^/]+/all/} - ] - - options[:container] = '.body-content' + options[:only_patterns] = [/all\//] + options[:container] = '.body-content > .section' options[:attribution] = <<-HTML - © 2018 SaltStack.
+ © 2019 SaltStack.
Licensed under the Apache License, Version 2.0. HTML + + def get_latest_version(opts) + get_latest_github_release('saltstack', 'salt', opts) + end end end From da1200dbb4da5943ba7b7254abbaffca7604bc33 Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Tue, 13 Aug 2019 16:42:35 +0200 Subject: [PATCH 09/10] salt_stack: simplify root page --- lib/docs/filters/salt_stack/clean_html.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/docs/filters/salt_stack/clean_html.rb b/lib/docs/filters/salt_stack/clean_html.rb index 8c5cb6ca..37e2a3b6 100644 --- a/lib/docs/filters/salt_stack/clean_html.rb +++ b/lib/docs/filters/salt_stack/clean_html.rb @@ -2,6 +2,11 @@ module Docs class SaltStack class CleanHtmlFilter < Filter def call + if root_page? + doc.inner_html = '

SaltStack

' + return doc + end + css('.headerlink').remove css('div[class^="highlight-"]').each do |node| From 5dec35b914fc104f16a08df7d98e7800f8e5c3ed Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Tue, 13 Aug 2019 17:13:28 +0200 Subject: [PATCH 10/10] saltstack: salt_stack -> saltstack --- lib/docs/scrapers/salt_stack.rb | 2 +- public/icons/docs/{salt_stack => saltstack}/16.png | Bin .../icons/docs/{salt_stack => saltstack}/16@2x.png | Bin public/icons/docs/{salt_stack => saltstack}/SOURCE | 0 4 files changed, 1 insertion(+), 1 deletion(-) rename public/icons/docs/{salt_stack => saltstack}/16.png (100%) rename public/icons/docs/{salt_stack => saltstack}/16@2x.png (100%) rename public/icons/docs/{salt_stack => saltstack}/SOURCE (100%) diff --git a/lib/docs/scrapers/salt_stack.rb b/lib/docs/scrapers/salt_stack.rb index 390dd7bb..3af77753 100644 --- a/lib/docs/scrapers/salt_stack.rb +++ b/lib/docs/scrapers/salt_stack.rb @@ -10,7 +10,7 @@ module Docs # The generated html can be found in salt/doc/_build/html class SaltStack < FileScraper self.type = 'simple' - self.slug = 'salt_stack' + self.slug = 'saltstack' self.release = '2019.2.0' self.base_url = 'https://docs.saltstack.com/en/latest/' self.root_path = 'ref/index.html' diff --git a/public/icons/docs/salt_stack/16.png b/public/icons/docs/saltstack/16.png similarity index 100% rename from public/icons/docs/salt_stack/16.png rename to public/icons/docs/saltstack/16.png diff --git a/public/icons/docs/salt_stack/16@2x.png b/public/icons/docs/saltstack/16@2x.png similarity index 100% rename from public/icons/docs/salt_stack/16@2x.png rename to public/icons/docs/saltstack/16@2x.png diff --git a/public/icons/docs/salt_stack/SOURCE b/public/icons/docs/saltstack/SOURCE similarity index 100% rename from public/icons/docs/salt_stack/SOURCE rename to public/icons/docs/saltstack/SOURCE