You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
devdocs/lib/docs/scrapers/python.rb

94 lines
2.4 KiB

module Docs
class Python < FileScraper
self.type = 'python'
self.root_path = 'index.html'
self.links = {
home: 'https://www.python.org/',
code: 'https://github.com/python/cpython'
}
options[:only_patterns] = [
# /\Ac-api/,
/\Adistributing/,
# /\Adistutils/,
/\Aextending/,
/\Afaq/,
/\Ahowto/,
/\Aindex.html/,
# /\Ainstall/,
/\Ainstalling/,
/\Alibrary/,
/\Areference/,
/\Atutorial/,
/\Ausing/,
]
options[:skip] = %w(
library/2to3.html
library/formatter.html
library/intro.html
library/undoc.html
library/unittest.mock-examples.html
library/sunau.html)
options[:attribution] = <<-HTML
&copy; 2001&ndash;2021 Python Software Foundation<br>
Licensed under the PSF License.
HTML
version '3.10' do
self.release = '3.10.1'
self.base_url = "https://docs.python.org/#{self.version}/"
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'
end
version '3.9' do
self.release = '3.9.4'
self.base_url = 'https://docs.python.org/3.9/'
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'
end
version '3.8' do
self.release = '3.8.6'
self.base_url = 'https://docs.python.org/3.8/'
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'
end
version '3.7' do
self.release = '3.7.9'
self.base_url = 'https://docs.python.org/3.7/'
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'
end
version '3.6' do
self.release = '3.6.12'
self.base_url = 'https://docs.python.org/3.6/'
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'
end
version '3.5' do
self.release = '3.5.9'
self.base_url = 'https://docs.python.org/3.5/'
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'
end
version '2.7' do
self.release = '2.7.17'
self.base_url = 'https://docs.python.org/2.7/'
html_filters.push 'python/entries_v2', 'sphinx/clean_html', 'python/clean_html'
end
def get_latest_version(opts)
doc = fetch_doc('https://docs.python.org/', opts)
doc.at_css('title').content.split(' ')[0]
end
end
end