Refactor PHP scraper with new initial_paths option

pull/29/head
Thibaut 11 years ago
parent ca06cc7ad9
commit b66d6d93d6

@ -106,7 +106,7 @@ module Docs
end
def include_default_entry?
Php::INDEX_PATHS.exclude?(subpath) && doc.at_css('.reference', '.refentry', '.sect1')
!initial_page? && doc.at_css('.reference', '.refentry', '.sect1')
end
end
end

@ -5,18 +5,7 @@ module Docs
self.version = 'up to 5.5.6'
self.base_url = 'http://www.php.net/manual/en/'
self.root_path = 'index.html'
# Downloaded from php.net/download-docs.php
self.dir = '/Users/Thibaut/DevDocs/Docs/PHP'
html_filters.push 'php/internal_urls', 'php/entries', 'php/clean_html', 'title'
text_filters.push 'php/fix_urls'
options[:title] = false
options[:root_title] = 'PHP: Hypertext Preprocessor'
INDEX_PATHS = %w(
index.html
self.initial_paths = %w(
funcref.html
refs.database.html
set.mysqlinfo.html
@ -25,11 +14,15 @@ module Docs
reserved.interfaces.html
reserved.variables.html)
options[:skip_links] = ->(filter) do
INDEX_PATHS.exclude?(filter.subpath)
end
# Downloaded from php.net/download-docs.php
self.dir = '/Users/Thibaut/DevDocs/Docs/PHP'
html_filters.push 'php/internal_urls', 'php/entries', 'php/clean_html', 'title'
text_filters.push 'php/fix_urls'
options[:only] = INDEX_PATHS.dup
options[:title] = false
options[:root_title] = 'PHP: Hypertext Preprocessor'
options[:skip_links] = ->(filter) { !filter.initial_page? }
options[:only_patterns] = [
/\Aclass\./,
@ -49,7 +42,7 @@ module Docs
sqlite3 sqlsrv ssh2 stats stream strings taint tidy uodbc url var varnish
xml xmlreader xmlrpc xmlwriter xsl yaf yaml zip zlib)
options[:only].concat BOOKS.map { |s| "book.#{s}.html" }
options[:only] = BOOKS.map { |s| "book.#{s}.html" }
options[:skip] = %w(
control-structures.intro.html

Loading…
Cancel
Save