Refactor PHP scraper with new initial_paths option

pull/29/head
Thibaut 11 years ago
parent ca06cc7ad9
commit b66d6d93d6

@ -106,7 +106,7 @@ module Docs
end end
def include_default_entry? def include_default_entry?
Php::INDEX_PATHS.exclude?(subpath) && doc.at_css('.reference', '.refentry', '.sect1') !initial_page? && doc.at_css('.reference', '.refentry', '.sect1')
end end
end end
end end

@ -5,18 +5,7 @@ module Docs
self.version = 'up to 5.5.6' self.version = 'up to 5.5.6'
self.base_url = 'http://www.php.net/manual/en/' self.base_url = 'http://www.php.net/manual/en/'
self.root_path = 'index.html' self.root_path = 'index.html'
self.initial_paths = %w(
# Downloaded from php.net/download-docs.php
self.dir = '/Users/Thibaut/DevDocs/Docs/PHP'
html_filters.push 'php/internal_urls', 'php/entries', 'php/clean_html', 'title'
text_filters.push 'php/fix_urls'
options[:title] = false
options[:root_title] = 'PHP: Hypertext Preprocessor'
INDEX_PATHS = %w(
index.html
funcref.html funcref.html
refs.database.html refs.database.html
set.mysqlinfo.html set.mysqlinfo.html
@ -25,11 +14,15 @@ module Docs
reserved.interfaces.html reserved.interfaces.html
reserved.variables.html) reserved.variables.html)
options[:skip_links] = ->(filter) do # Downloaded from php.net/download-docs.php
INDEX_PATHS.exclude?(filter.subpath) self.dir = '/Users/Thibaut/DevDocs/Docs/PHP'
end
html_filters.push 'php/internal_urls', 'php/entries', 'php/clean_html', 'title'
text_filters.push 'php/fix_urls'
options[:only] = INDEX_PATHS.dup options[:title] = false
options[:root_title] = 'PHP: Hypertext Preprocessor'
options[:skip_links] = ->(filter) { !filter.initial_page? }
options[:only_patterns] = [ options[:only_patterns] = [
/\Aclass\./, /\Aclass\./,
@ -49,7 +42,7 @@ module Docs
sqlite3 sqlsrv ssh2 stats stream strings taint tidy uodbc url var varnish sqlite3 sqlsrv ssh2 stats stream strings taint tidy uodbc url var varnish
xml xmlreader xmlrpc xmlwriter xsl yaf yaml zip zlib) xml xmlreader xmlrpc xmlwriter xsl yaf yaml zip zlib)
options[:only].concat BOOKS.map { |s| "book.#{s}.html" } options[:only] = BOOKS.map { |s| "book.#{s}.html" }
options[:skip] = %w( options[:skip] = %w(
control-structures.intro.html control-structures.intro.html

Loading…
Cancel
Save