From 14623be6285120a78c9d4f33cd831e48600efa6f Mon Sep 17 00:00:00 2001 From: Emil Maruszczak Date: Mon, 29 Apr 2019 01:02:25 +0200 Subject: [PATCH] Add Django Rest Framework scrapper --- lib/docs/filters/rest_framework/clean_html.rb | 36 +++++++++++ lib/docs/filters/rest_framework/entries.rb | 60 ++++++++++++++++++ lib/docs/scrapers/rest_framework.rb | 37 +++++++++++ public/icons/docs/rest_framework/16.png | Bin 0 -> 1166 bytes public/icons/docs/rest_framework/16@2x.png | Bin 0 -> 4254 bytes public/icons/docs/rest_framework/SOURCE | 1 + 6 files changed, 134 insertions(+) create mode 100644 lib/docs/filters/rest_framework/clean_html.rb create mode 100644 lib/docs/filters/rest_framework/entries.rb create mode 100644 lib/docs/scrapers/rest_framework.rb create mode 100644 public/icons/docs/rest_framework/16.png create mode 100644 public/icons/docs/rest_framework/16@2x.png create mode 100644 public/icons/docs/rest_framework/SOURCE diff --git a/lib/docs/filters/rest_framework/clean_html.rb b/lib/docs/filters/rest_framework/clean_html.rb new file mode 100644 index 00000000..007b33b2 --- /dev/null +++ b/lib/docs/filters/rest_framework/clean_html.rb @@ -0,0 +1,36 @@ +module Docs + class RestFramework + class CleanHtmlFilter < Docs::Filter + def call + css('hr').remove + + css('.badges').each do |node| + node.remove + end + + css('pre').each do |node| + node['data-language'] = 'python' + end + + css('h1').each do |node| + node['style'] = nil + end + + # Translate source files links to DevDocs links + links = Nokogiri::XML::Node.new('p', doc) + links['class'] = '_links' + + css('a.github').each do |node| + span = node.at_css('span') + node.content = span.content + span.remove + node['class'] = '_links-link' + links.add_child(node) + end + doc.add_child(links) + + doc + end + end + end +end diff --git a/lib/docs/filters/rest_framework/entries.rb b/lib/docs/filters/rest_framework/entries.rb new file mode 100644 index 00000000..53b2fce0 --- /dev/null +++ b/lib/docs/filters/rest_framework/entries.rb @@ -0,0 +1,60 @@ +module Docs + class RestFramework + class EntriesFilter < Docs::EntriesFilter + + def get_name + name = css('h1').first.content + name.slice! 'Tutorial ' + name = '0: ' + name if name.include? 'Quickstart' + name + end + + def get_type + case subpath + when /\Atutorial/ + 'Tutorial' + when /\Aapi-guide/ + 'API Guide' + end + end + + def additional_entries + return [] if type == nil || type == 'Tutorial' + + # Framework classes are provided in two different ways: + # - as H2's after H1 category titled: + accepted_headers = ['API Reference', 'API Guide'] + # - as headers (1 or 2) with these endings: + endings = ['Validator', 'Field', 'View', 'Mixin', 'Default', 'Serializer'] + + # To avoid writing down all the endings + # and to ensure all entries in API categories are matched + # two different ways of finding them are used + + entries = [] + + local_type = 'Ref: ' + name + in_category = false + + css('h1, h2').each do |node| + # Third party category contains entries that could be matched (and shouldn't be) + break if node.content === 'Third party packages' + + if in_category + if node.name === 'h1' + in_category = false + next + end + entries << [node.content, node['id'], local_type] + elsif accepted_headers.include? node.content + in_category = true + elsif endings.any? { |word| node.content.ends_with?(word) } + entries << [node.content, node['id'], local_type] + end + end + + entries + end + end + end +end diff --git a/lib/docs/scrapers/rest_framework.rb b/lib/docs/scrapers/rest_framework.rb new file mode 100644 index 00000000..fa64b080 --- /dev/null +++ b/lib/docs/scrapers/rest_framework.rb @@ -0,0 +1,37 @@ +module Docs + class RestFramework < UrlScraper + self.name = 'Django REST Framework' + self.release = '3.9.2' + self.slug = 'rest_framework' + self.type = 'mkdocs' + self.base_url = 'https://www.django-rest-framework.org/' + self.root_path = 'index.html' + self.links = { + home: 'https://www.django-rest-framework.org/', + code: 'https://github.com/encode/django-rest-framework' + } + + html_filters.push 'mkdocs/clean_html', 'rest_framework/clean_html', 'rest_framework/entries' + + options[:skip_patterns] = [ + /\Atopics\//, + /\Acommunity\//, + ] + + options[:attribution] = <<-HTML + Copyright 2011–present Encode OSS Ltd
+ Licensed under the BSD License. + HTML + + private + + def handle_response(response) + # Some scrapped urls don't have ending slash + # which leads to page duplication + if !response.url.path.ends_with?('/') && !response.url.path.ends_with?('index.html') + response.url.path << '/' + end + super + end + end +end diff --git a/public/icons/docs/rest_framework/16.png b/public/icons/docs/rest_framework/16.png new file mode 100644 index 0000000000000000000000000000000000000000..e2e33539d613144de4e7e708cf760355cdc835e5 GIT binary patch literal 1166 zcmb7Du}T9$6nrN~Vj_W9s1OfC?8QpOCMXw@Q;Z^lhzQ0H5J3@ZJ6o;AfCM|i#zL@2 zYas~!fnsYRVj(2Xc(>>6?G`%W-R{o3H~Tir9xvCcg?uS5B88yluV5@PdNcTrymuQI zGTRGPU%LD??=DZly(#h$t>eKkh>FjVF^`}T!NMCvBW=<{H3jaRET&8&a?~`~ z4A{2XIhjmO9VzB*2A@znamMe295oF#gBCJgwXASKj+zFW0TXu+F9`0+;GCRcY}^8p z1UEtB5@LgE;HN9xfN?Zi2Vuz{zeWeq?n|gYL9h!#9qT&Xg{=b?vDiaUXKB literal 0 HcmV?d00001 diff --git a/public/icons/docs/rest_framework/16@2x.png b/public/icons/docs/rest_framework/16@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..e4ea76baed6b8a7a5b5b2d4157d5294e4bf72b93 GIT binary patch literal 4254 zcmeAS@N?(olHy`uVBq!ia0vp^3LwnE1|*BCs=fdzwj^(N7l!{JxM1({$v_d#0*}aI z1_nK45N51cYF`EvWH0gbb!C6dA|o!MdcMQDhk-#r+tbA{B!ZJsLE!#-po)P7jO19q zm;vZ*EFhMFfn@VY(tyiibhnV=5TYGGtmQDr5bFpgn1+F(i3$NyeGl^tF`+QvAu#HI z0S^J95+Nv2k(v;RNq``8hfWBP5(>on9%LrbQ2M6Q1&(!1