From 44e1f216c43f4adc734eea5f89e6769a4bccd4be Mon Sep 17 00:00:00 2001 From: Thibaut Courouble Date: Sun, 7 Feb 2016 12:56:01 -0500 Subject: [PATCH] Update and improve PHP documentation (7.0.3) --- .../templates/pages/about_tmpl.coffee | 2 +- assets/stylesheets/pages/_php.scss | 1 + lib/docs/core/scraper.rb | 61 +++++++++++++++++++ lib/docs/filters/php/clean_html.rb | 12 ++++ lib/docs/filters/php/entries.rb | 11 +++- lib/docs/filters/php/fix_urls.rb | 2 +- lib/docs/filters/php/internal_urls.rb | 3 + lib/docs/scrapers/php.rb | 38 ++++++++---- 8 files changed, 113 insertions(+), 17 deletions(-) diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index fcb8d168..15702eae 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -311,7 +311,7 @@ credits = [ 'https://raw.githubusercontent.com/phoenixframework/phoenix/master/LICENSE.md' ], [ 'PHP', - '1997-2015 The PHP Documentation Group', + '1997-2016 The PHP Documentation Group', 'CC BY', 'https://creativecommons.org/licenses/by/3.0/' ], [ diff --git a/assets/stylesheets/pages/_php.scss b/assets/stylesheets/pages/_php.scss index 4386bd57..495bf19a 100644 --- a/assets/stylesheets/pages/_php.scss +++ b/assets/stylesheets/pages/_php.scss @@ -4,6 +4,7 @@ @extend %lined-heading; } + h2 { @extend %block-heading; } h3.title { @extend %block-heading; } .verinfo { diff --git a/lib/docs/core/scraper.rb b/lib/docs/core/scraper.rb index 836734f0..0ca8059b 100644 --- a/lib/docs/core/scraper.rb +++ b/lib/docs/core/scraper.rb @@ -186,5 +186,66 @@ module Docs Typhoeus.stub(root_url.to_s).and_return(response) end end + + module FixInternalUrlsBehavior + def self.included(base) + base.extend ClassMethods + end + + module ClassMethods + attr_reader :internal_urls + + def store_pages(store) + instrument 'info.doc', msg: 'Building internal urls...' + with_internal_urls do + instrument 'info.doc', msg: 'Building pages...' + super + end + end + + private + + def with_internal_urls + @internal_urls = new.fetch_internal_urls + yield + ensure + @internal_urls = nil + end + end + + def fetch_internal_urls + result = [] + build_pages do |page| + result << base_url.subpath_to(page[:response_url]) if page[:entries].present? + end + result + end + + def initial_urls + return super unless self.class.internal_urls + @initial_urls ||= self.class.internal_urls.map(&method(:url_for)).freeze + end + + private + + def additional_options + if self.class.internal_urls + { + only: self.class.internal_urls.to_set, + only_patterns: nil, + skip: nil, + skip_patterns: nil, + skip_links: nil, + fixed_internal_urls: true + } + else + {} + end + end + + def process_response(response) + super.merge! response_url: response.url + end + end end end diff --git a/lib/docs/filters/php/clean_html.rb b/lib/docs/filters/php/clean_html.rb index 3d89418c..d6791c4f 100644 --- a/lib/docs/filters/php/clean_html.rb +++ b/lib/docs/filters/php/clean_html.rb @@ -25,6 +25,18 @@ module Docs node.inner_html = node.inner_html.gsub(br, "\n") node.content = node.content end + + css('> h2:first-child.title').each do |node| + node.name = 'h1' + end + + css('div.partintro', 'div.section').each do |node| + node.before(node.children).remove + end + + css('.title + .verinfo + .title').each do |node| + node.after(node.previous_element) + end end end end diff --git a/lib/docs/filters/php/entries.rb b/lib/docs/filters/php/entries.rb index 090210e7..1863127c 100644 --- a/lib/docs/filters/php/entries.rb +++ b/lib/docs/filters/php/entries.rb @@ -25,6 +25,7 @@ module Docs 'tidy' => 'Tidy', 'Worker' => 'pthreads', 'XsltProcessor' => 'XSLT', + 'Yar' => 'Yar', 'ZipArchive' => 'Zip' } %w(APC Directory DOM Event Gearman Gmagick Imagick mysqli OAuth PDO Reflection @@ -46,13 +47,16 @@ module Docs end REPLACE_TYPES = { + 'Error' => 'Errors', 'Exceptions' => 'SPL/Exceptions', + 'finfo' => 'File System', 'GD and Image' => 'Image', 'Gmagick' => 'Image/GraphicsMagick', 'Imagick' => 'Image/ImageMagick', 'Interfaces' => 'SPL/Interfaces', 'Iterators' => 'SPL/Iterators', 'mysqli' => 'Database/MySQL', + 'PCRE Patterns' => 'PCRE Reference', 'PostgreSQL' => 'Database/PostgreSQL', 'Session' => 'Sessions', 'Session PgSQL' => 'Database/PostgreSQL', @@ -62,7 +66,7 @@ module Docs 'Yaml' => 'YAML' } TYPE_GROUPS = { - 'Classes and Functions' => ['Classes/Object', 'Function handling', 'Predefined Interfaces and Classes', 'runkit'], + 'Classes and Functions' => ['Classes/Object', 'Function handling', 'Predefined Interfaces and Classes', 'runkit', 'Throwable'], 'Encoding' => ['Gettext', 'iconv', 'Multibyte String'], 'Compression' => ['Bzip2', 'Zip', 'Zlib'], 'Cryptography' => ['Hash', 'Mcrypt', 'OpenSSL', 'Password Hashing'], @@ -90,6 +94,9 @@ module Docs end def get_type + return 'Language Reference' if subpath.start_with?('language.') + return 'PCRE Reference' if subpath.start_with?('regexp.') + type = at_css('.up').content.strip type = 'SPL/Iterators' if type.end_with? 'Iterator' type.remove! ' Functions' @@ -108,7 +115,7 @@ module Docs end def include_default_entry? - !initial_page? && doc.at_css('.reference', '.refentry', '.sect1') + !initial_page? && doc.at_css('.reference', '.refentry', '.sect1', '.simpara', '.para') end end end diff --git a/lib/docs/filters/php/fix_urls.rb b/lib/docs/filters/php/fix_urls.rb index 392f27cf..c2d96d75 100644 --- a/lib/docs/filters/php/fix_urls.rb +++ b/lib/docs/filters/php/fix_urls.rb @@ -3,7 +3,7 @@ module Docs class FixUrlsFilter < Filter def call html.gsub! File.join(Php.base_url, Php.root_path), Php.base_url - html.gsub! %r{http://www\.php\.net/manual/en/([^"']+?)\.html}, 'http://www.php.net/manual/en/\1.php' + html.gsub! %r{https://secure\.php\.net/manual/en/([^"']+?)\.html}, 'https://secure.php.net/manual/en/\1.php' html end end diff --git a/lib/docs/filters/php/internal_urls.rb b/lib/docs/filters/php/internal_urls.rb index d5dc384b..fe75130c 100644 --- a/lib/docs/filters/php/internal_urls.rb +++ b/lib/docs/filters/php/internal_urls.rb @@ -2,9 +2,12 @@ module Docs class Php class InternalUrlsFilter < Filter def call + return doc if context[:fixed_internal_urls] + if subpath.start_with?('book.') || subpath.start_with?('class.') result[:internal_urls] = internal_urls end + doc end diff --git a/lib/docs/scrapers/php.rb b/lib/docs/scrapers/php.rb index 6f0eba76..6477fbdf 100644 --- a/lib/docs/scrapers/php.rb +++ b/lib/docs/scrapers/php.rb @@ -1,19 +1,28 @@ module Docs class Php < FileScraper + include FixInternalUrlsBehavior + self.name = 'PHP' self.type = 'php' - self.release = 'up to 5.6.13' - self.base_url = 'http://www.php.net/manual/en/' + self.release = 'up to 7.0.3' + self.base_url = 'https://secure.php.net/manual/en/' self.root_path = 'index.html' self.initial_paths = %w( funcref.html + langref.html refs.database.html set.mysqlinfo.html language.control-structures.html + reference.pcre.pattern.syntax.html reserved.exceptions.html reserved.interfaces.html reserved.variables.html) + self.links = { + home: 'https://secure.php.net/', + code: 'https://github.com/php/php-src' + } + # Downloaded from php.net/download-docs.php self.dir = '/Users/Thibaut/DevDocs/Docs/PHP' @@ -25,34 +34,37 @@ module Docs options[:skip_links] = ->(filter) { !filter.initial_page? } options[:only_patterns] = [ + /\Alanguage\./, /\Aclass\./, /\Afunction\./, /\Acontrol-structures/, + /\Aregexp\./, /\Areserved\.exceptions/, /\Areserved\.interfaces/, /\Areserved\.variables/] - BOOKS = %w(apache apc array bc bzip2 calendar classobj ctype curl datetime - dba dir dom eio errorfunc event exec fileinfo filesystem filter ftp funchand - gearman geoip gettext gmagick hash http iconv iisfunc image imagick imap - info inotify intl json ldap libevent libxml mail mailparse math mbstring - mcrypt memcached misc mysqli network oauth openssl outcontrol password - pcre pdo pgsql posix pthreads regex runkit reflection session - session-pgsql simplexml soap sockets solr sphinx spl spl-types sqlite3 - sqlsrv ssh2 stats stream strings taint tidy uodbc url var varnish xml - xmlreader xmlrpc xmlwriter xsl yaf yaml zip zlib) + BOOKS = %w(apache apc array bc bzip2 calendar csprng classobj ctype curl + datetime dba dir dom eio errorfunc event exec fileinfo filesystem filter + ftp funchand gearman geoip gettext gmagick gmp hash iconv iisfunc image + imagick imap info inotify intl json ldap libevent libxml mail mailparse + math mbstring mcrypt memcached misc mysqli network oauth openssl + outcontrol password pcre pdo pgsql posix pthreads regex runkit reflection + sca session session-pgsql simplexml soap sockets solr sphinx spl + spl-types sqlite3 sqlsrv ssh2 stats stream strings taint tidy uodbc url + var varnish xml xmlreader xmlrpc xmlwriter xsl yaf yar yaml zip zlib) options[:only] = BOOKS.map { |s| "book.#{s}.html" } options[:skip] = %w( control-structures.intro.html control-structures.alternative-syntax.html - function.mssql-select-db.html) + function.mssql-select-db.html + pthreads.modifiers.html) options[:skip_patterns] = [/mysqlnd/] options[:attribution] = <<-HTML - © 1997–2015 The PHP Documentation Group
+ © 1997–2016 The PHP Documentation Group
Licensed under the Creative Commons Attribution License v3.0 or later. HTML end