Improve url scraping in rails

pull/1580/head
Enoc 4 years ago
parent 6d6805972b
commit c38856c528

@ -97,6 +97,7 @@
'pages/pygame',
'pages/python',
'pages/qt',
'pages/rails',
'pages/ramda',
'pages/rdoc',
'pages/react_native',

@ -0,0 +1,9 @@
._rails {
.title.method-title {
@extend %block-label, %label-blue;
}
h2 {
@extend %block-heading;
}
}

@ -207,7 +207,6 @@ done
### Nokogiri
### Ruby / Minitest
### Ruby on Rails
### Ruby
Download the tarball of Ruby from https://www.ruby-lang.org/en/downloads/, extract it, run
`./configure && make html` in your terminal (while your are in the ruby directory) and move

@ -0,0 +1,67 @@
module Docs
class Rails
class CleanHtmlFilter < Filter
def call
if current_url.to_s.match?('guides')
css('img, textarea, button, .anchorlink').remove
at_css('#mainCol').prepend_child at_css('#feature .wrapper').children
@doc = at_css('#mainCol')
container = Nokogiri::XML::Node.new 'div', doc
container['class'] = '_rails'
container.children = doc.children
doc << container
css('h2, h3, h4, h5, h6').each do |node|
node.name = node.name.sub(/\d/) { |i| i.to_i - 1 }
end
doc.prepend_child at_css('h1')
if version == '6.1' || version == '6.0'
css('pre').each do |node|
code = node.at_css('code')
language = code['class'][/highlight ?(\w+)/, 1]
node['data-language'] = language unless language == 'plain'
code.remove_attribute('class')
node.content = node.content.strip
end
end
else
title = at_css('h2')
title.name = 'h1'
@doc = at_css('#content')
@doc.prepend_child(title)
css('table td').each do |node|
node.remove if node.content.empty?
end
css('.permalink').remove
css('.sectiontitle').each do |node|
node.name = 'h2'
end
css('pre').each do |node|
node['data-language'] = 'ruby'
end
# move 'source on github' to the end of the source code
css('.sourcecode').each do |node|
github_url = node.at_css('.github_url')
github_url.content = "Source on Github"
node.at_css('.source-link').content = 'Source:'
node.at_css('.dyn-source').after(github_url)
end
end
doc
end
end
end
end

@ -1,37 +0,0 @@
module Docs
class Rails
class CleanHtmlGuidesFilter < Filter
def call
return doc unless root_url.to_s.match?('guides')
at_css('#mainCol').prepend_child at_css('#feature .wrapper').children
@doc = at_css('#mainCol')
container = Nokogiri::XML::Node.new 'div', doc
container['class'] = '_simple'
container.children = doc.children
doc << container
css('h2, h3, h4, h5, h6').each do |node|
node.name = node.name.sub(/\d/) { |i| i.to_i - 1 }
end
doc.prepend_child at_css('h1')
css('#subCol', '.code_container').each do |node|
node.before(node.children).remove
end
css('pre').each do |node|
code = node.at_css('code')
language = code['class'][/highlight ?(\w+)/, 1]
node['data-language'] = language unless language == 'plain'
code.remove_attribute('class')
node.content = node.content.strip
end
doc
end
end
end
end

@ -1,88 +1,44 @@
module Docs
class Rails
class EntriesFilter < Docs::EntriesFilter# Docs::Rdoc::EntriesFilter
TYPE_BY_NAME_MATCHES = {
/Assertions|::Test|Fixture/ => 'Testing',
/\AActiveRecord.+mysql/i => 'ActiveRecord/MySQL',
/\AActiveRecord.+postgresql/i => 'ActiveRecord/PostgreSQL',
/\AActiveRecord.+sqlite/i => 'ActiveRecord/SQLite',
/\AActiveRecord.+Assoc/ => 'ActiveRecord/Associations',
/\AActiveRecord.+Attribute/ => 'ActiveRecord/Attributes',
/\AActiveRecord.+ConnectionAdapters/ => 'ActiveRecord/Connection',
/\AActiveSupport.+(Subscriber|Notifications)/ => 'ActiveSupport/Instrumentation' }
TYPE_BY_NAME_STARTS_WITH = {
'ActionController::Parameters' => 'ActionController/Parameters',
'ActionDispatch::Integration' => 'Testing',
'ActionDispatch::Request' => 'ActionDispatch/Request',
'ActionDispatch::Response' => 'ActionDispatch/Response',
'ActionDispatch::Routing' => 'ActionDispatch/Routing',
'ActionView::Helpers' => 'ActionView/Helpers',
'ActiveModel::Errors' => 'ActiveModel/Validation',
'ActiveModel::Valid' => 'ActiveModel/Validation',
'ActiveRecord::Batches' => 'ActiveRecord/Query',
'ActiveRecord::Calculations' => 'ActiveRecord/Query',
'ActiveRecord::Connection' => 'ActiveRecord/Connection',
'ActiveRecord::FinderMethods' => 'ActiveRecord/Query',
'ActiveRecord::Migra' => 'ActiveRecord/Migration',
'ActiveRecord::Query' => 'ActiveRecord/Query',
'ActiveRecord::Relation' => 'ActiveRecord/Relation',
'ActiveRecord::Result' => 'ActiveRecord/Connection',
'ActiveRecord::Scoping' => 'ActiveRecord/Query',
'ActiveRecord::SpawnMethods' => 'ActiveRecord/Query',
'ActiveSupport::Cach' => 'ActiveSupport/Caching',
'ActiveSupport::Inflector' => 'ActiveSupport/Inflector',
'ActiveSupport::Time' => 'ActiveSupport/TimeZones',
'Rails::Application' => 'Rails/Application',
'Rails::Engine' => 'Rails/Engine',
'Rails::Generators' => 'Rails/Generators',
'Rails::Railtie' => 'Rails/Railtie' }
def get_name
if slug.start_with?('guides')
name = at_css('#feature h2').content.strip
name.remove! %r{\s\(.+\)\z}
return name
if current_url.to_s.match?('guides')
at_css('h2').content
else
name = at_css('h2').to_html.scan(/<\/span>.*?</)[0]
if name.nil?
name = at_css('h2').content
else
name.sub!("<\/span>", '')
name.sub!('<', '')
end
name.strip
end
super
end
def get_type
return 'Guides' if root_url.to_s.match?('guides')
parent = at_css('.meta-parent').try(:content).to_s
if [name, parent].any? { |str| str.end_with?('Error') || str.end_with?('Exception') }
return 'Errors'
end
TYPE_BY_NAME_MATCHES.each_pair do |key, value|
return value if name =~ key
end
return 'Guides' if current_url.to_s.match?('guides')
return 'Ruby files' if name =~ /.rb/
TYPE_BY_NAME_STARTS_WITH.each_pair do |key, value|
return value if name.start_with?(key)
end
name.split('::')[0]
super
end
def include_default_entry?
return true if root_url.to_s.match?('guides')
def additional_entries
return [] if current_url.to_s.match?('guides')
super && !skip?
end
entries = []
def additional_entries
return [] if root_url.to_s.match?('guides')
css('.title.method-title').each do |node|
entry_name = node.at_css('b').content
entries << [name+"##{entry_name}", node['id']]
end
skip? ? [] : super
entries
end
def skip?
@skip ||= !css('p').any? { |node| node.content.present? }
end
end
end
end

@ -1,10 +1,9 @@
module Docs
class Rails < UrlScraper
# include FixInternalUrlsBehavior
include MultipleBaseUrls
self.name = 'Ruby on Rails'
self.type = 'rdoc'
self.type = 'rails'
self.slug = 'rails'
self.links = {
@ -12,10 +11,7 @@ module Docs
code: 'https://github.com/rails/rails'
}
# html_filters.replace 'container', 'rails/container'
html_filters.push 'rails/entries', 'rdoc/clean_html', 'rails/clean_html_guides'
options[:skip_rdoc_filters?] = ->(filter) { filter.root_url.to_s.match?('guides/') }
html_filters.push 'rails/entries', 'rails/clean_html'
options[:root_title] = 'Ruby on Rails'
@ -88,35 +84,65 @@ module Docs
self.release = '6.1.3.2'
self.base_urls = [
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/'
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/'
]
options[:skip_patterns] << /v.*\..*\//
end
version '6.0' do
self.release = '6.0.0'
self.release = '6.1.3.2'
self.base_urls = [
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/'
]
end
version '5.2' do
self.release = '5.2.2'
self.release = '5.2.5'
self.base_urls = [
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/v5.2/'
]
end
version '5.1' do
self.release = '5.1.6'
self.release = '5.1.7'
self.base_urls = [
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/v5.1/'
]
end
version '5.0' do
self.release = '5.0.7'
self.release = '5.0.7.2'
self.base_urls = [
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/v5.0/'
]
end
version '4.2' do
self.release = '4.2.11'
self.release = '4.2.11.3'
self.base_urls = [
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/v4.2/'
]
end
version '4.1' do
self.release = '4.1.16'
self.base_urls = [
'https://api.rubyonrails.org/',
'https://guides.rubyonrails.org/v4.1/'
]
end
def get_latest_version(opts)

Loading…
Cancel
Save