sequelize: finish scraper and filters

pull/1107/head
Jasper van Merle 5 years ago
parent 0631b3f7e9
commit 089aa6158a

@ -2,16 +2,37 @@ module Docs
class Sequelize class Sequelize
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
@doc = at_css('.content')
# Clean up the home page # Clean up the home page
if root_page? if root_page? || subpath == "index.html"
# Remove logo # Remove logo
css('.manual-user-index > div > div.logo').remove css('.manual-user-index > div > div.logo').remove
# Convert title to proper H1 element
# Convert title to proper h1 element
at_css('.manual-user-index > div > div.sequelize').name = 'h1' at_css('.manual-user-index > div > div.sequelize').name = 'h1'
# Remove badges (NPM, Travis, test coverage, etc.) # Remove badges (NPM, Travis, test coverage, etc.)
css('.manual-user-index > p:nth-child(4)').remove css('.manual-user-index > p:nth-child(4)').remove
# Remove image cards pointing to entries of the manual # Remove image cards pointing to entries of the manual
css('.manual-cards').remove css('.manual-cards').remove
# Pull the header out of it's container
header = at_css('h1')
header.parent.parent.parent.add_previous_sibling header
else
# Pull the header out of it's container
header = at_css('h1')
header.parent.add_previous_sibling header
end
# Remove header notice
css('.header-notice').remove
# Change td in thead to th
css('table > thead > tr > td').each do |node|
node.name = 'th'
end end
# Add syntax highlighting to code blocks # Add syntax highlighting to code blocks

@ -9,17 +9,24 @@ module Docs
# Assign the pages to main categories # Assign the pages to main categories
def get_type def get_type
if path.start_with?('manual/') if path.start_with?('manual/')
type = 'Manual' 'Manual'
elsif path.start_with?('file/lib/') elsif path.include?('lib/data-types')
type = 'Source files' 'datatypes'
elsif path.include?('lib/errors/validation')
'errors/validation'
elsif path.include?('lib/errors/database')
'errors/database'
elsif path.include?('lib/errors/connection')
'errors/connection'
elsif path.include?('lib/errors')
'errors'
elsif path.include?('lib/associations')
'associations'
elsif path.include?('master/variable')
'variables'
else else
# API Reference pages. The `path` for most of these starts with 'class/lib/', 'classes'
# but there's also 'variable/index' (pseudo-classes), and 'identifiers' (the main index)
# so we use an unqualified `else` as a catch-all.
type = 'Reference'
end end
type
end end
end end
end end

@ -3,18 +3,18 @@ module Docs
self.name = 'Sequelize' self.name = 'Sequelize'
self.slug = 'sequelize' self.slug = 'sequelize'
self.type = 'simple' self.type = 'simple'
self.release = '5.19.6' self.release = '5.21.1'
self.base_url = 'https://sequelize.org/master/' self.base_url = 'https://sequelize.org/master/'
self.links = { self.links = {
home: 'https://sequelize.org/', home: 'https://sequelize.org/',
code: 'https://github.com/sequelize/sequelize/' code: 'https://github.com/sequelize/sequelize'
} }
# List of content filters (to be applied sequentially) # List of content filters (to be applied sequentially)
html_filters.push 'sequelize/entries', 'sequelize/clean_html' html_filters.push 'sequelize/entries', 'sequelize/clean_html'
# Wrapper element that holds the main content # Skip the source files, the license page and the "Who's using Sequelize" page
options[:container] = '.content' options[:skip_patterns] = [/\.js\.html/, /manual\/legal\.html/, /manual\/whos-using\.html/]
# License information that appears appears at the bottom of the entry page # License information that appears appears at the bottom of the entry page
options[:attribution] = <<-HTML options[:attribution] = <<-HTML

Loading…
Cancel
Save