mirror of https://github.com/freeCodeCamp/devdocs
commit
c8380bb228
@ -0,0 +1,7 @@
|
||||
._mariadb {
|
||||
@extend %simple;
|
||||
|
||||
.graybox, .product {
|
||||
@extend %note;
|
||||
}
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
module Docs
|
||||
class Mariadb
|
||||
class CleanHtmlFilter < Filter
|
||||
def call
|
||||
# Return the empty doc if the EraseInvalidPagesFilter detected this page shouldn't be scraped
|
||||
return doc if doc.inner_html == ''
|
||||
|
||||
# Extract main content
|
||||
@doc = at_css('#content')
|
||||
|
||||
# Remove navigation at the bottom
|
||||
css('.simple_section_nav').remove
|
||||
|
||||
# Remove table of contents
|
||||
css('.table_of_contents').remove
|
||||
|
||||
# Add code highlighting and remove nested tags
|
||||
css('pre').each do |node|
|
||||
node.content = node.content
|
||||
node['data-language'] = 'sql'
|
||||
end
|
||||
|
||||
# Fix images
|
||||
css('img').each do |node|
|
||||
node['src'] = node['src'].sub('http:', 'https:')
|
||||
end
|
||||
|
||||
# Remove navigation items containing only numbers
|
||||
css('.node_comments').each do |node|
|
||||
if node.content.scan(/\D/).empty?
|
||||
node.remove
|
||||
end
|
||||
end
|
||||
|
||||
# Convert listings (pages like https://mariadb.com/kb/en/library/documentation/sql-statements-structure/) into tables
|
||||
css('ul.listing').each do |node|
|
||||
rows = []
|
||||
|
||||
node.css('li:not(.no_data)').each do |li|
|
||||
name = li.at_css('.media-heading').content
|
||||
description = li.at_css('.blurb').content
|
||||
url = li.at_css('a')['href']
|
||||
rows << "<tr><td><a href=\"#{url}\">#{name}</a></td><td>#{description}</td></tr>"
|
||||
end
|
||||
|
||||
table = "<table><thead><tr><th>Title</th><th>Description</th></tr></thead><tbody>#{rows.join('')}</tbody></table>"
|
||||
node.replace(table)
|
||||
end
|
||||
|
||||
# Turn note titles into <strong> tags
|
||||
css('.product_title').each do |node|
|
||||
node.name = 'strong'
|
||||
end
|
||||
|
||||
# Remove comments and questions
|
||||
css('.related_questions, #comments').remove
|
||||
css('h2').each do |node|
|
||||
if node.content == 'Comments'
|
||||
node.remove
|
||||
end
|
||||
end
|
||||
|
||||
doc
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,24 @@
|
||||
module Docs
|
||||
class Mariadb
|
||||
class EntriesFilter < Docs::EntriesFilter
|
||||
def get_name
|
||||
return 'Name' if doc.inner_html == ''
|
||||
|
||||
at_css('#content > h1').content.strip
|
||||
end
|
||||
|
||||
def get_type
|
||||
return 'Type' if doc.inner_html == ''
|
||||
|
||||
link = at_css('#breadcrumbs > a:nth-child(4)')
|
||||
link.nil? ? at_css('#breadcrumbs > a:nth-child(3)').content : link.content
|
||||
end
|
||||
|
||||
def entries
|
||||
# Don't add an entry for this page if the EraseInvalidPagesFilter detected this page shouldn't be scraped
|
||||
return [] if doc.inner_html == ''
|
||||
super
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,34 @@
|
||||
module Docs
|
||||
class Mariadb
|
||||
class EraseInvalidPagesFilter < Filter
|
||||
@@seen_urls = Hash.new
|
||||
|
||||
def call
|
||||
# The MariaDB documentation uses urls like mariadb.com/kb/en/*
|
||||
# This means there is no way to detect if a page should be scraped based on it's url
|
||||
# We run this filter before the internal_urls filter scrapes all internal urls
|
||||
# If this page should not be scraped, we erase it's contents in here so that the internal urls are not picked up
|
||||
# The entries filter will make sure that no entry is saved for this page
|
||||
|
||||
if at_css('a.crumb[href="https://mariadb.com/kb/en/documentation/"]').nil?
|
||||
doc.inner_html = ''
|
||||
end
|
||||
|
||||
current_page = at_css('a.crumb.node_link')
|
||||
unless current_page.nil?
|
||||
url = current_page['href']
|
||||
|
||||
# Some links lead to the same page
|
||||
# Only parse the page one time
|
||||
if @@seen_urls.has_key?(url)
|
||||
doc.inner_html = ''
|
||||
end
|
||||
|
||||
@@seen_urls[url] = true
|
||||
end
|
||||
|
||||
doc
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -0,0 +1,36 @@
|
||||
module Docs
|
||||
class Mariadb < UrlScraper
|
||||
self.name = 'MariaDB'
|
||||
self.type = 'mariadb'
|
||||
self.release = '10.4.8'
|
||||
self.base_url = 'https://mariadb.com/kb/en/'
|
||||
self.root_path = 'library/documentation/'
|
||||
self.links = {
|
||||
home: 'https://mariadb.com/',
|
||||
code: 'https://github.com/MariaDB/server'
|
||||
}
|
||||
|
||||
html_filters.insert_before 'internal_urls', 'mariadb/erase_invalid_pages'
|
||||
html_filters.push 'mariadb/entries', 'mariadb/clean_html'
|
||||
|
||||
options[:rate_limit] = 200
|
||||
options[:skip_patterns] = [
|
||||
/\+/,
|
||||
/\/ask\//,
|
||||
/-release-notes\//,
|
||||
/-changelog\//,
|
||||
/^documentation\//,
|
||||
/^mariadb-server-documentation\//,
|
||||
]
|
||||
|
||||
options[:attribution] = <<-HTML
|
||||
© 2019 MariaDB<br>
|
||||
Licensed under the Creative Commons Attribution 3.0 Unported License and the GNU Free Documentation License.
|
||||
HTML
|
||||
|
||||
def get_latest_version(opts)
|
||||
doc = fetch_doc('https://mariadb.com/downloads/', opts)
|
||||
doc.at_css('[data-version-id="mariadb_server-versions"] option').content.split('-')[0]
|
||||
end
|
||||
end
|
||||
end
|
After Width: | Height: | Size: 936 B |
After Width: | Height: | Size: 1.5 KiB |
@ -0,0 +1 @@
|
||||
https://mariadb.org/about/logos/
|
Loading…
Reference in new issue