From b45090f369bc8fa7681237eca0def4c730f5f5a2 Mon Sep 17 00:00:00 2001 From: Emil Maruszczak Date: Mon, 29 Apr 2019 00:57:50 +0200 Subject: [PATCH 1/7] Add Mkdocs abstract scraper --- assets/stylesheets/application.css.scss | 1 + assets/stylesheets/pages/_mkdocs.scss | 15 +++++++++++++++ lib/docs/filters/mkdocs/clean_html.rb | 18 ++++++++++++++++++ lib/docs/scrapers/mkdocs.rb | 5 +++++ 4 files changed, 39 insertions(+) create mode 100644 assets/stylesheets/pages/_mkdocs.scss create mode 100644 lib/docs/filters/mkdocs/clean_html.rb create mode 100644 lib/docs/scrapers/mkdocs.rb diff --git a/assets/stylesheets/application.css.scss b/assets/stylesheets/application.css.scss index 2a64e5c9..d2005cb2 100644 --- a/assets/stylesheets/application.css.scss +++ b/assets/stylesheets/application.css.scss @@ -71,6 +71,7 @@ 'pages/lua', 'pages/mdn', 'pages/meteor', + 'pages/mkdocs', 'pages/modernizr', 'pages/moment', 'pages/nginx', diff --git a/assets/stylesheets/pages/_mkdocs.scss b/assets/stylesheets/pages/_mkdocs.scss new file mode 100644 index 00000000..e70ff66c --- /dev/null +++ b/assets/stylesheets/pages/_mkdocs.scss @@ -0,0 +1,15 @@ +%mkdocs { + h2 { @extend %block-heading; } + h3 { @extend %block-label, %label-blue; } + h4 { @extend %block-label; } + + blockquote { @extend %note; } + + strong { font-weight: var(--bolderFontWeight); } + + p > code, li > code { @extend %label; } +} + +._mkdocs { + @extend %mkdocs; +} diff --git a/lib/docs/filters/mkdocs/clean_html.rb b/lib/docs/filters/mkdocs/clean_html.rb new file mode 100644 index 00000000..c346d23e --- /dev/null +++ b/lib/docs/filters/mkdocs/clean_html.rb @@ -0,0 +1,18 @@ +module Docs + class Mkdocs + class CleanHtmlFilter < Docs::Filter + def call + css('.toclink').each do |node| + node.parent.content = node.content + node.remove + end + + css('pre').each do |node| + node.content = node.at_css('code').content + end + + at_css('#main-content') + end + end + end +end diff --git a/lib/docs/scrapers/mkdocs.rb b/lib/docs/scrapers/mkdocs.rb new file mode 100644 index 00000000..c0f5d5e5 --- /dev/null +++ b/lib/docs/scrapers/mkdocs.rb @@ -0,0 +1,5 @@ +module Docs + class Mkdocs < Scraper + self.abstract = true + end +end From 14623be6285120a78c9d4f33cd831e48600efa6f Mon Sep 17 00:00:00 2001 From: Emil Maruszczak Date: Mon, 29 Apr 2019 01:02:25 +0200 Subject: [PATCH 2/7] Add Django Rest Framework scrapper --- lib/docs/filters/rest_framework/clean_html.rb | 36 +++++++++++ lib/docs/filters/rest_framework/entries.rb | 60 ++++++++++++++++++ lib/docs/scrapers/rest_framework.rb | 37 +++++++++++ public/icons/docs/rest_framework/16.png | Bin 0 -> 1166 bytes public/icons/docs/rest_framework/16@2x.png | Bin 0 -> 4254 bytes public/icons/docs/rest_framework/SOURCE | 1 + 6 files changed, 134 insertions(+) create mode 100644 lib/docs/filters/rest_framework/clean_html.rb create mode 100644 lib/docs/filters/rest_framework/entries.rb create mode 100644 lib/docs/scrapers/rest_framework.rb create mode 100644 public/icons/docs/rest_framework/16.png create mode 100644 public/icons/docs/rest_framework/16@2x.png create mode 100644 public/icons/docs/rest_framework/SOURCE diff --git a/lib/docs/filters/rest_framework/clean_html.rb b/lib/docs/filters/rest_framework/clean_html.rb new file mode 100644 index 00000000..007b33b2 --- /dev/null +++ b/lib/docs/filters/rest_framework/clean_html.rb @@ -0,0 +1,36 @@ +module Docs + class RestFramework + class CleanHtmlFilter < Docs::Filter + def call + css('hr').remove + + css('.badges').each do |node| + node.remove + end + + css('pre').each do |node| + node['data-language'] = 'python' + end + + css('h1').each do |node| + node['style'] = nil + end + + # Translate source files links to DevDocs links + links = Nokogiri::XML::Node.new('p', doc) + links['class'] = '_links' + + css('a.github').each do |node| + span = node.at_css('span') + node.content = span.content + span.remove + node['class'] = '_links-link' + links.add_child(node) + end + doc.add_child(links) + + doc + end + end + end +end diff --git a/lib/docs/filters/rest_framework/entries.rb b/lib/docs/filters/rest_framework/entries.rb new file mode 100644 index 00000000..53b2fce0 --- /dev/null +++ b/lib/docs/filters/rest_framework/entries.rb @@ -0,0 +1,60 @@ +module Docs + class RestFramework + class EntriesFilter < Docs::EntriesFilter + + def get_name + name = css('h1').first.content + name.slice! 'Tutorial ' + name = '0: ' + name if name.include? 'Quickstart' + name + end + + def get_type + case subpath + when /\Atutorial/ + 'Tutorial' + when /\Aapi-guide/ + 'API Guide' + end + end + + def additional_entries + return [] if type == nil || type == 'Tutorial' + + # Framework classes are provided in two different ways: + # - as H2's after H1 category titled: + accepted_headers = ['API Reference', 'API Guide'] + # - as headers (1 or 2) with these endings: + endings = ['Validator', 'Field', 'View', 'Mixin', 'Default', 'Serializer'] + + # To avoid writing down all the endings + # and to ensure all entries in API categories are matched + # two different ways of finding them are used + + entries = [] + + local_type = 'Ref: ' + name + in_category = false + + css('h1, h2').each do |node| + # Third party category contains entries that could be matched (and shouldn't be) + break if node.content === 'Third party packages' + + if in_category + if node.name === 'h1' + in_category = false + next + end + entries << [node.content, node['id'], local_type] + elsif accepted_headers.include? node.content + in_category = true + elsif endings.any? { |word| node.content.ends_with?(word) } + entries << [node.content, node['id'], local_type] + end + end + + entries + end + end + end +end diff --git a/lib/docs/scrapers/rest_framework.rb b/lib/docs/scrapers/rest_framework.rb new file mode 100644 index 00000000..fa64b080 --- /dev/null +++ b/lib/docs/scrapers/rest_framework.rb @@ -0,0 +1,37 @@ +module Docs + class RestFramework < UrlScraper + self.name = 'Django REST Framework' + self.release = '3.9.2' + self.slug = 'rest_framework' + self.type = 'mkdocs' + self.base_url = 'https://www.django-rest-framework.org/' + self.root_path = 'index.html' + self.links = { + home: 'https://www.django-rest-framework.org/', + code: 'https://github.com/encode/django-rest-framework' + } + + html_filters.push 'mkdocs/clean_html', 'rest_framework/clean_html', 'rest_framework/entries' + + options[:skip_patterns] = [ + /\Atopics\//, + /\Acommunity\//, + ] + + options[:attribution] = <<-HTML + Copyright 2011–present Encode OSS Ltd
+ Licensed under the BSD License. + HTML + + private + + def handle_response(response) + # Some scrapped urls don't have ending slash + # which leads to page duplication + if !response.url.path.ends_with?('/') && !response.url.path.ends_with?('index.html') + response.url.path << '/' + end + super + end + end +end diff --git a/public/icons/docs/rest_framework/16.png b/public/icons/docs/rest_framework/16.png new file mode 100644 index 0000000000000000000000000000000000000000..e2e33539d613144de4e7e708cf760355cdc835e5 GIT binary patch literal 1166 zcmb7Du}T9$6nrN~Vj_W9s1OfC?8QpOCMXw@Q;Z^lhzQ0H5J3@ZJ6o;AfCM|i#zL@2 zYas~!fnsYRVj(2Xc(>>6?G`%W-R{o3H~Tir9xvCcg?uS5B88yluV5@PdNcTrymuQI zGTRGPU%LD??=DZly(#h$t>eKkh>FjVF^`}T!NMCvBW=<{H3jaRET&8&a?~`~ z4A{2XIhjmO9VzB*2A@znamMe295oF#gBCJgwXASKj+zFW0TXu+F9`0+;GCRcY}^8p z1UEtB5@LgE;HN9xfN?Zi2Vuz{zeWeq?n|gYL9h!#9qT&Xg{=b?vDiaUXKB literal 0 HcmV?d00001 diff --git a/public/icons/docs/rest_framework/16@2x.png b/public/icons/docs/rest_framework/16@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..e4ea76baed6b8a7a5b5b2d4157d5294e4bf72b93 GIT binary patch literal 4254 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI z1_nK45N51cYF`EvWH0gbb!C6dA|o!MdcMQDhk-#r+tbA{B!ZJsLE!#-po)P7jO19q zm;vZ*EFhMFfn@VY(tyiibhnV=5TYGGtmQDr5bFpgn1+F(i3$NyeGl^tF`+QvAu#HI z0S^J95+Nv2k(v;RNq``8hfWBP5(>on9%LrbQ2M6Q1&(!1 Date: Thu, 2 May 2019 17:23:56 +0200 Subject: [PATCH 3/7] Use Mkdocs as base for RestFramework --- lib/docs/scrapers/mkdocs.rb | 16 +++++++++++++++- lib/docs/scrapers/rest_framework.rb | 15 ++------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/lib/docs/scrapers/mkdocs.rb b/lib/docs/scrapers/mkdocs.rb index c0f5d5e5..20559863 100644 --- a/lib/docs/scrapers/mkdocs.rb +++ b/lib/docs/scrapers/mkdocs.rb @@ -1,5 +1,19 @@ module Docs - class Mkdocs < Scraper + class Mkdocs < UrlScraper self.abstract = true + self.type = 'mkdocs' + + html_filters.push 'mkdocs/clean_html' + + private + + def handle_response(response) + # Some scrapped urls don't have ending slash + # which leads to page duplication + if !response.url.path.ends_with?('/') && !response.url.path.ends_with?('index.html') + response.url.path << '/' + end + super + end end end diff --git a/lib/docs/scrapers/rest_framework.rb b/lib/docs/scrapers/rest_framework.rb index fa64b080..16e85449 100644 --- a/lib/docs/scrapers/rest_framework.rb +++ b/lib/docs/scrapers/rest_framework.rb @@ -1,5 +1,5 @@ module Docs - class RestFramework < UrlScraper + class RestFramework < Mkdocs self.name = 'Django REST Framework' self.release = '3.9.2' self.slug = 'rest_framework' @@ -11,7 +11,7 @@ module Docs code: 'https://github.com/encode/django-rest-framework' } - html_filters.push 'mkdocs/clean_html', 'rest_framework/clean_html', 'rest_framework/entries' + html_filters.push 'rest_framework/clean_html', 'rest_framework/entries' options[:skip_patterns] = [ /\Atopics\//, @@ -22,16 +22,5 @@ module Docs Copyright 2011–present Encode OSS Ltd
Licensed under the BSD License. HTML - - private - - def handle_response(response) - # Some scrapped urls don't have ending slash - # which leads to page duplication - if !response.url.path.ends_with?('/') && !response.url.path.ends_with?('index.html') - response.url.path << '/' - end - super - end end end From 6c12d53a465e47da1ab7e6325862eef4f7c666af Mon Sep 17 00:00:00 2001 From: Emil Maruszczak Date: Thu, 2 May 2019 17:25:34 +0200 Subject: [PATCH 4/7] Cleanup redundant bits --- assets/stylesheets/pages/_mkdocs.scss | 6 +----- lib/docs/filters/mkdocs/clean_html.rb | 1 - lib/docs/filters/rest_framework/clean_html.rb | 13 +++---------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/assets/stylesheets/pages/_mkdocs.scss b/assets/stylesheets/pages/_mkdocs.scss index e70ff66c..e374474c 100644 --- a/assets/stylesheets/pages/_mkdocs.scss +++ b/assets/stylesheets/pages/_mkdocs.scss @@ -1,4 +1,4 @@ -%mkdocs { +._mkdocs { h2 { @extend %block-heading; } h3 { @extend %block-label, %label-blue; } h4 { @extend %block-label; } @@ -9,7 +9,3 @@ p > code, li > code { @extend %label; } } - -._mkdocs { - @extend %mkdocs; -} diff --git a/lib/docs/filters/mkdocs/clean_html.rb b/lib/docs/filters/mkdocs/clean_html.rb index c346d23e..2eef9cdc 100644 --- a/lib/docs/filters/mkdocs/clean_html.rb +++ b/lib/docs/filters/mkdocs/clean_html.rb @@ -4,7 +4,6 @@ module Docs def call css('.toclink').each do |node| node.parent.content = node.content - node.remove end css('pre').each do |node| diff --git a/lib/docs/filters/rest_framework/clean_html.rb b/lib/docs/filters/rest_framework/clean_html.rb index 007b33b2..87d048b9 100644 --- a/lib/docs/filters/rest_framework/clean_html.rb +++ b/lib/docs/filters/rest_framework/clean_html.rb @@ -4,17 +4,11 @@ module Docs def call css('hr').remove - css('.badges').each do |node| - node.remove - end + css('.badges').remove - css('pre').each do |node| - node['data-language'] = 'python' - end + css('pre').attr('data-language', 'python') - css('h1').each do |node| - node['style'] = nil - end + css('h1').attr('style', nil) # Translate source files links to DevDocs links links = Nokogiri::XML::Node.new('p', doc) @@ -23,7 +17,6 @@ module Docs css('a.github').each do |node| span = node.at_css('span') node.content = span.content - span.remove node['class'] = '_links-link' links.add_child(node) end From cc87443c3d2738b8ec2d57784e4ec899a7473cc6 Mon Sep 17 00:00:00 2001 From: Emil Maruszczak Date: Tue, 14 May 2019 23:47:07 +0200 Subject: [PATCH 5/7] Remove duplicated type --- lib/docs/scrapers/rest_framework.rb | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/docs/scrapers/rest_framework.rb b/lib/docs/scrapers/rest_framework.rb index 16e85449..d81c22ab 100644 --- a/lib/docs/scrapers/rest_framework.rb +++ b/lib/docs/scrapers/rest_framework.rb @@ -3,7 +3,6 @@ module Docs self.name = 'Django REST Framework' self.release = '3.9.2' self.slug = 'rest_framework' - self.type = 'mkdocs' self.base_url = 'https://www.django-rest-framework.org/' self.root_path = 'index.html' self.links = { From cbe38c8f362999cd67f4cb1dc060556abb79fdec Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Tue, 20 Aug 2019 12:01:41 +0200 Subject: [PATCH 6/7] django_rest_framework: finish scraper and filters --- .../javascripts/templates/pages/about_tmpl.coffee | 5 +++++ .../clean_html.rb | 7 ++++--- .../entries.rb | 5 ++--- .../django_rest_framework.rb} | 14 +++++++++----- lib/docs/scrapers/{ => mkdocs}/mkdocs.rb | 0 .../16.png | Bin .../16@2x.png | Bin .../SOURCE | 0 8 files changed, 20 insertions(+), 11 deletions(-) rename lib/docs/filters/{rest_framework => django_rest_framework}/clean_html.rb (82%) rename lib/docs/filters/{rest_framework => django_rest_framework}/entries.rb (95%) rename lib/docs/scrapers/{rest_framework.rb => mkdocs/django_rest_framework.rb} (56%) rename lib/docs/scrapers/{ => mkdocs}/mkdocs.rb (100%) rename public/icons/docs/{rest_framework => django_rest_framework}/16.png (100%) rename public/icons/docs/{rest_framework => django_rest_framework}/16@2x.png (100%) rename public/icons/docs/{rest_framework => django_rest_framework}/SOURCE (100%) diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index 5fc27d3c..b2df2c21 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -231,6 +231,11 @@ credits = [ 'Django Software Foundation and individual contributors', 'BSD', 'https://raw.githubusercontent.com/django/django/master/LICENSE' + ], [ + 'Django REST Framework', + '2011-present Encode OSS Ltd.', + 'BSD', + 'https://raw.githubusercontent.com/encode/django-rest-framework/master/LICENSE.md' ], [ 'Docker', '2019 Docker, Inc.
Docker and the Docker logo are trademarks of Docker, Inc.', diff --git a/lib/docs/filters/rest_framework/clean_html.rb b/lib/docs/filters/django_rest_framework/clean_html.rb similarity index 82% rename from lib/docs/filters/rest_framework/clean_html.rb rename to lib/docs/filters/django_rest_framework/clean_html.rb index 87d048b9..67c131bb 100644 --- a/lib/docs/filters/rest_framework/clean_html.rb +++ b/lib/docs/filters/django_rest_framework/clean_html.rb @@ -1,14 +1,14 @@ module Docs - class RestFramework + class DjangoRestFramework class CleanHtmlFilter < Docs::Filter def call css('hr').remove - css('.badges').remove css('pre').attr('data-language', 'python') - css('h1').attr('style', nil) + css('h1').remove_attribute('style') + css('.promo a').remove_attribute('style') # Translate source files links to DevDocs links links = Nokogiri::XML::Node.new('p', doc) @@ -20,6 +20,7 @@ module Docs node['class'] = '_links-link' links.add_child(node) end + doc.add_child(links) doc diff --git a/lib/docs/filters/rest_framework/entries.rb b/lib/docs/filters/django_rest_framework/entries.rb similarity index 95% rename from lib/docs/filters/rest_framework/entries.rb rename to lib/docs/filters/django_rest_framework/entries.rb index 53b2fce0..d583af9e 100644 --- a/lib/docs/filters/rest_framework/entries.rb +++ b/lib/docs/filters/django_rest_framework/entries.rb @@ -1,7 +1,6 @@ module Docs - class RestFramework + class DjangoRestFramework class EntriesFilter < Docs::EntriesFilter - def get_name name = css('h1').first.content name.slice! 'Tutorial ' @@ -46,7 +45,7 @@ module Docs next end entries << [node.content, node['id'], local_type] - elsif accepted_headers.include? node.content + elsif accepted_headers.include? node.content in_category = true elsif endings.any? { |word| node.content.ends_with?(word) } entries << [node.content, node['id'], local_type] diff --git a/lib/docs/scrapers/rest_framework.rb b/lib/docs/scrapers/mkdocs/django_rest_framework.rb similarity index 56% rename from lib/docs/scrapers/rest_framework.rb rename to lib/docs/scrapers/mkdocs/django_rest_framework.rb index d81c22ab..ad88b2c7 100644 --- a/lib/docs/scrapers/rest_framework.rb +++ b/lib/docs/scrapers/mkdocs/django_rest_framework.rb @@ -1,8 +1,8 @@ module Docs - class RestFramework < Mkdocs + class DjangoRestFramework < Mkdocs self.name = 'Django REST Framework' - self.release = '3.9.2' - self.slug = 'rest_framework' + self.release = '3.9.3' + self.slug = 'django_rest_framework' self.base_url = 'https://www.django-rest-framework.org/' self.root_path = 'index.html' self.links = { @@ -10,7 +10,7 @@ module Docs code: 'https://github.com/encode/django-rest-framework' } - html_filters.push 'rest_framework/clean_html', 'rest_framework/entries' + html_filters.push 'django_rest_framework/clean_html', 'django_rest_framework/entries' options[:skip_patterns] = [ /\Atopics\//, @@ -18,8 +18,12 @@ module Docs ] options[:attribution] = <<-HTML - Copyright 2011–present Encode OSS Ltd
+ Copyright 2011–present Encode OSS Ltd.
Licensed under the BSD License. HTML + + def get_latest_version(opts) + get_latest_github_release('encode', 'django-rest-framework', opts) + end end end diff --git a/lib/docs/scrapers/mkdocs.rb b/lib/docs/scrapers/mkdocs/mkdocs.rb similarity index 100% rename from lib/docs/scrapers/mkdocs.rb rename to lib/docs/scrapers/mkdocs/mkdocs.rb diff --git a/public/icons/docs/rest_framework/16.png b/public/icons/docs/django_rest_framework/16.png similarity index 100% rename from public/icons/docs/rest_framework/16.png rename to public/icons/docs/django_rest_framework/16.png diff --git a/public/icons/docs/rest_framework/16@2x.png b/public/icons/docs/django_rest_framework/16@2x.png similarity index 100% rename from public/icons/docs/rest_framework/16@2x.png rename to public/icons/docs/django_rest_framework/16@2x.png diff --git a/public/icons/docs/rest_framework/SOURCE b/public/icons/docs/django_rest_framework/SOURCE similarity index 100% rename from public/icons/docs/rest_framework/SOURCE rename to public/icons/docs/django_rest_framework/SOURCE From a14ef388fa993ceebacfbae03eafce10aeb0f515 Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Tue, 20 Aug 2019 12:06:26 +0200 Subject: [PATCH 7/7] django_rest_framework: update attribution --- lib/docs/scrapers/mkdocs/django_rest_framework.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/docs/scrapers/mkdocs/django_rest_framework.rb b/lib/docs/scrapers/mkdocs/django_rest_framework.rb index ad88b2c7..db58eb8c 100644 --- a/lib/docs/scrapers/mkdocs/django_rest_framework.rb +++ b/lib/docs/scrapers/mkdocs/django_rest_framework.rb @@ -18,7 +18,7 @@ module Docs ] options[:attribution] = <<-HTML - Copyright 2011–present Encode OSS Ltd.
+ Copyright © 2011–present Encode OSS Ltd.
Licensed under the BSD License. HTML