From c38856c5285444d66d2aaef3ac2f0c88e7edff7c Mon Sep 17 00:00:00 2001 From: Enoc Date: Tue, 22 Jun 2021 22:29:44 -0600 Subject: [PATCH] Improve url scraping in rails --- assets/stylesheets/application.css.scss | 1 + assets/stylesheets/pages/_rails.scss | 9 ++ docs/file-scrapers.md | 1 - lib/docs/filters/rails/clean_html.rb | 67 +++++++++++++++ lib/docs/filters/rails/clean_html_guides.rb | 37 --------- lib/docs/filters/rails/entries.rb | 92 ++++++--------------- lib/docs/scrapers/rails.rb | 52 +++++++++--- 7 files changed, 140 insertions(+), 119 deletions(-) create mode 100644 assets/stylesheets/pages/_rails.scss create mode 100644 lib/docs/filters/rails/clean_html.rb delete mode 100644 lib/docs/filters/rails/clean_html_guides.rb diff --git a/assets/stylesheets/application.css.scss b/assets/stylesheets/application.css.scss index 0243afeb..cc6c0291 100644 --- a/assets/stylesheets/application.css.scss +++ b/assets/stylesheets/application.css.scss @@ -97,6 +97,7 @@ 'pages/pygame', 'pages/python', 'pages/qt', + 'pages/rails', 'pages/ramda', 'pages/rdoc', 'pages/react_native', diff --git a/assets/stylesheets/pages/_rails.scss b/assets/stylesheets/pages/_rails.scss new file mode 100644 index 00000000..acf3eae2 --- /dev/null +++ b/assets/stylesheets/pages/_rails.scss @@ -0,0 +1,9 @@ +._rails { + .title.method-title { + @extend %block-label, %label-blue; + } + + h2 { + @extend %block-heading; + } +} diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index c25db042..aa2dace0 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -207,7 +207,6 @@ done ### Nokogiri ### Ruby / Minitest -### Ruby on Rails ### Ruby Download the tarball of Ruby from https://www.ruby-lang.org/en/downloads/, extract it, run `./configure && make html` in your terminal (while your are in the ruby directory) and move diff --git a/lib/docs/filters/rails/clean_html.rb b/lib/docs/filters/rails/clean_html.rb new file mode 100644 index 00000000..8a6e4c6d --- /dev/null +++ b/lib/docs/filters/rails/clean_html.rb @@ -0,0 +1,67 @@ +module Docs + class Rails + class CleanHtmlFilter < Filter + def call + + if current_url.to_s.match?('guides') + css('img, textarea, button, .anchorlink').remove + + at_css('#mainCol').prepend_child at_css('#feature .wrapper').children + @doc = at_css('#mainCol') + + container = Nokogiri::XML::Node.new 'div', doc + container['class'] = '_rails' + container.children = doc.children + doc << container + + css('h2, h3, h4, h5, h6').each do |node| + node.name = node.name.sub(/\d/) { |i| i.to_i - 1 } + end + + doc.prepend_child at_css('h1') + + if version == '6.1' || version == '6.0' + css('pre').each do |node| + code = node.at_css('code') + language = code['class'][/highlight ?(\w+)/, 1] + node['data-language'] = language unless language == 'plain' + code.remove_attribute('class') + node.content = node.content.strip + end + end + + else + title = at_css('h2') + title.name = 'h1' + + @doc = at_css('#content') + @doc.prepend_child(title) + + css('table td').each do |node| + node.remove if node.content.empty? + end + + css('.permalink').remove + + css('.sectiontitle').each do |node| + node.name = 'h2' + end + + css('pre').each do |node| + node['data-language'] = 'ruby' + end + + # move 'source on github' to the end of the source code + css('.sourcecode').each do |node| + github_url = node.at_css('.github_url') + github_url.content = "Source on Github" + node.at_css('.source-link').content = 'Source:' + node.at_css('.dyn-source').after(github_url) + end + end + + doc + end + end + end +end diff --git a/lib/docs/filters/rails/clean_html_guides.rb b/lib/docs/filters/rails/clean_html_guides.rb deleted file mode 100644 index 1f81ed58..00000000 --- a/lib/docs/filters/rails/clean_html_guides.rb +++ /dev/null @@ -1,37 +0,0 @@ -module Docs - class Rails - class CleanHtmlGuidesFilter < Filter - def call - return doc unless root_url.to_s.match?('guides') - - at_css('#mainCol').prepend_child at_css('#feature .wrapper').children - @doc = at_css('#mainCol') - - container = Nokogiri::XML::Node.new 'div', doc - container['class'] = '_simple' - container.children = doc.children - doc << container - - css('h2, h3, h4, h5, h6').each do |node| - node.name = node.name.sub(/\d/) { |i| i.to_i - 1 } - end - - doc.prepend_child at_css('h1') - - css('#subCol', '.code_container').each do |node| - node.before(node.children).remove - end - - css('pre').each do |node| - code = node.at_css('code') - language = code['class'][/highlight ?(\w+)/, 1] - node['data-language'] = language unless language == 'plain' - code.remove_attribute('class') - node.content = node.content.strip - end - - doc - end - end - end -end diff --git a/lib/docs/filters/rails/entries.rb b/lib/docs/filters/rails/entries.rb index b83b4c87..e48fe349 100644 --- a/lib/docs/filters/rails/entries.rb +++ b/lib/docs/filters/rails/entries.rb @@ -1,88 +1,44 @@ module Docs class Rails class EntriesFilter < Docs::EntriesFilter# Docs::Rdoc::EntriesFilter - TYPE_BY_NAME_MATCHES = { - /Assertions|::Test|Fixture/ => 'Testing', - /\AActiveRecord.+mysql/i => 'ActiveRecord/MySQL', - /\AActiveRecord.+postgresql/i => 'ActiveRecord/PostgreSQL', - /\AActiveRecord.+sqlite/i => 'ActiveRecord/SQLite', - /\AActiveRecord.+Assoc/ => 'ActiveRecord/Associations', - /\AActiveRecord.+Attribute/ => 'ActiveRecord/Attributes', - /\AActiveRecord.+ConnectionAdapters/ => 'ActiveRecord/Connection', - /\AActiveSupport.+(Subscriber|Notifications)/ => 'ActiveSupport/Instrumentation' } - - TYPE_BY_NAME_STARTS_WITH = { - 'ActionController::Parameters' => 'ActionController/Parameters', - 'ActionDispatch::Integration' => 'Testing', - 'ActionDispatch::Request' => 'ActionDispatch/Request', - 'ActionDispatch::Response' => 'ActionDispatch/Response', - 'ActionDispatch::Routing' => 'ActionDispatch/Routing', - 'ActionView::Helpers' => 'ActionView/Helpers', - 'ActiveModel::Errors' => 'ActiveModel/Validation', - 'ActiveModel::Valid' => 'ActiveModel/Validation', - 'ActiveRecord::Batches' => 'ActiveRecord/Query', - 'ActiveRecord::Calculations' => 'ActiveRecord/Query', - 'ActiveRecord::Connection' => 'ActiveRecord/Connection', - 'ActiveRecord::FinderMethods' => 'ActiveRecord/Query', - 'ActiveRecord::Migra' => 'ActiveRecord/Migration', - 'ActiveRecord::Query' => 'ActiveRecord/Query', - 'ActiveRecord::Relation' => 'ActiveRecord/Relation', - 'ActiveRecord::Result' => 'ActiveRecord/Connection', - 'ActiveRecord::Scoping' => 'ActiveRecord/Query', - 'ActiveRecord::SpawnMethods' => 'ActiveRecord/Query', - 'ActiveSupport::Cach' => 'ActiveSupport/Caching', - 'ActiveSupport::Inflector' => 'ActiveSupport/Inflector', - 'ActiveSupport::Time' => 'ActiveSupport/TimeZones', - 'Rails::Application' => 'Rails/Application', - 'Rails::Engine' => 'Rails/Engine', - 'Rails::Generators' => 'Rails/Generators', - 'Rails::Railtie' => 'Rails/Railtie' } - def get_name - if slug.start_with?('guides') - name = at_css('#feature h2').content.strip - name.remove! %r{\s\(.+\)\z} - return name + if current_url.to_s.match?('guides') + at_css('h2').content + else + name = at_css('h2').to_html.scan(/<\/span>.*?", '') + name.sub!('<', '') + end + + name.strip end - - super end def get_type - return 'Guides' if root_url.to_s.match?('guides') - - parent = at_css('.meta-parent').try(:content).to_s - - if [name, parent].any? { |str| str.end_with?('Error') || str.end_with?('Exception') } - return 'Errors' - end - - TYPE_BY_NAME_MATCHES.each_pair do |key, value| - return value if name =~ key - end + return 'Guides' if current_url.to_s.match?('guides') + return 'Ruby files' if name =~ /.rb/ - TYPE_BY_NAME_STARTS_WITH.each_pair do |key, value| - return value if name.start_with?(key) - end + name.split('::')[0] - super end - def include_default_entry? - return true if root_url.to_s.match?('guides') + def additional_entries + return [] if current_url.to_s.match?('guides') - super && !skip? - end + entries = [] - def additional_entries - return [] if root_url.to_s.match?('guides') + css('.title.method-title').each do |node| + entry_name = node.at_css('b').content + entries << [name+"##{entry_name}", node['id']] + end - skip? ? [] : super + entries end - def skip? - @skip ||= !css('p').any? { |node| node.content.present? } - end end end end diff --git a/lib/docs/scrapers/rails.rb b/lib/docs/scrapers/rails.rb index 8789a7d3..56c55bb0 100644 --- a/lib/docs/scrapers/rails.rb +++ b/lib/docs/scrapers/rails.rb @@ -1,10 +1,9 @@ module Docs class Rails < UrlScraper - # include FixInternalUrlsBehavior include MultipleBaseUrls self.name = 'Ruby on Rails' - self.type = 'rdoc' + self.type = 'rails' self.slug = 'rails' self.links = { @@ -12,10 +11,7 @@ module Docs code: 'https://github.com/rails/rails' } - # html_filters.replace 'container', 'rails/container' - html_filters.push 'rails/entries', 'rdoc/clean_html', 'rails/clean_html_guides' - - options[:skip_rdoc_filters?] = ->(filter) { filter.root_url.to_s.match?('guides/') } + html_filters.push 'rails/entries', 'rails/clean_html' options[:root_title] = 'Ruby on Rails' @@ -88,35 +84,65 @@ module Docs self.release = '6.1.3.2' self.base_urls = [ - 'https://api.rubyonrails.org/', - 'https://guides.rubyonrails.org/' + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/' ] options[:skip_patterns] << /v.*\..*\// end version '6.0' do - self.release = '6.0.0' + self.release = '6.1.3.2' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/' + ] end version '5.2' do - self.release = '5.2.2' + self.release = '5.2.5' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v5.2/' + ] end version '5.1' do - self.release = '5.1.6' + self.release = '5.1.7' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v5.1/' + ] end version '5.0' do - self.release = '5.0.7' + self.release = '5.0.7.2' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v5.0/' + ] end version '4.2' do - self.release = '4.2.11' + self.release = '4.2.11.3' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v4.2/' + ] end version '4.1' do self.release = '4.1.16' + + self.base_urls = [ + 'https://api.rubyonrails.org/', + 'https://guides.rubyonrails.org/v4.1/' + ] end def get_latest_version(opts)