mirror of https://github.com/freeCodeCamp/devdocs
parent
facb0a44d2
commit
93582d3b82
@ -1,7 +1,7 @@
|
||||
._mariadb {
|
||||
@extend %simple;
|
||||
|
||||
.graybox {
|
||||
.graybox, .product {
|
||||
@extend %note;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,34 @@
|
||||
module Docs
|
||||
class Mariadb
|
||||
class EraseInvalidPagesFilter < Filter
|
||||
@@seen_urls = Hash.new
|
||||
|
||||
def call
|
||||
# The MariaDB documentation uses urls like mariadb.com/kb/en/*
|
||||
# This means there is no way to detect if a page should be scraped based on it's url
|
||||
# We run this filter before the internal_urls filter scrapes all internal urls
|
||||
# If this page should not be scraped, we erase it's contents in here so that the internal urls are not picked up
|
||||
# The entries filter will make sure that no entry is saved for this page
|
||||
|
||||
if at_css('a.crumb[href="https://mariadb.com/kb/en/documentation/"]').nil?
|
||||
doc.inner_html = ''
|
||||
end
|
||||
|
||||
current_page = at_css('a.crumb.node_link')
|
||||
unless current_page.nil?
|
||||
url = current_page['href']
|
||||
|
||||
# Some links lead to the same page
|
||||
# Only parse the page one time
|
||||
if @@seen_urls.has_key?(url)
|
||||
doc.inner_html = ''
|
||||
end
|
||||
|
||||
@@seen_urls[url] = true
|
||||
end
|
||||
|
||||
doc
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in new issue