diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md index 33a58145..f58a422b 100644 --- a/docs/file-scrapers.md +++ b/docs/file-scrapers.md @@ -129,7 +129,14 @@ bsdtar --extract --xz --file - --strip-components=6 --directory=docs/openjdk\~8/ ``` ## PHP +Click the link under the "Many HTML files" column on https://www.php.net/download-docs.php, extract the tarball, change its name to `php` and put it in `/path/to/devdocs/docs/`. +Or run the following commands in your terminal: + +```sh +curl https://www.php.net/distributions/manual/php_manual_en.tar.gz > php.tar; \ +tar -xf php.tar; mv php-chunked-xhtml/ path/to/devdocs/docs/php/ +``` ## Python ### Versions 3.6+ diff --git a/lib/docs/filters/php/clean_html.rb b/lib/docs/filters/php/clean_html.rb index 6fd29bc2..91074640 100644 --- a/lib/docs/filters/php/clean_html.rb +++ b/lib/docs/filters/php/clean_html.rb @@ -1,6 +1,8 @@ +# coding: utf-8 module Docs class Php class CleanHtmlFilter < Filter + def call root_page? ? root : other doc @@ -11,24 +13,11 @@ module Docs end def other - css('.manualnavbar:first-child', '.manualnavbar .up', '.manualnavbar .home', 'hr').remove + # css('.manualnavbar:first-child', '.manualnavbar .up', '.manualnavbar .home', 'hr').remove - nav = at_css('.manualnavbar').remove + css('#breadcrumbs').remove - if prev_link = nav.at_css('.prev a') - prev_link.content = "← #{prev_link.content}" - end - - if next_link = nav.at_css('.next a') - next_link.content = "#{next_link.content} →" - end - - # Remove top-level
- if doc.elements.length == 1 - @doc = doc.first_element_child - end - - doc << nav + css('.nav').remove # Remove code highlighting br = //i @@ -50,7 +39,9 @@ module Docs css('.title + .verinfo + .title').each do |node| node.after(node.previous_element) end + end + end end end diff --git a/lib/docs/filters/php/entries.rb b/lib/docs/filters/php/entries.rb index c95508f8..751f844f 100644 --- a/lib/docs/filters/php/entries.rb +++ b/lib/docs/filters/php/entries.rb @@ -12,7 +12,6 @@ module Docs 'Exception' => 'Predefined Exceptions', 'Http' => 'HTTP', 'Json' => 'JSON', - 'Lua' => 'Lua', 'Mutex' => 'pthreads', 'php_user_filter' => 'Stream', 'Pool' => 'pthreads', @@ -31,11 +30,14 @@ module Docs 'Weak' => 'Weakref', 'Worker' => 'pthreads', 'XsltProcessor' => 'XSLT', - 'Yar' => 'Yar', - 'ZipArchive' => 'Zip' } + 'ZipArchive' => 'Zip', + 'Rar' => 'Rar', + 'Direct IO' => 'Dio', + 'Zoo' => 'Zookeeper' + } %w(APC Directory DOM Event Gearman Gmagick Imagick mysqli OAuth PDO Phar Reflection - Session SimpleXML Solr Sphinx SQLite3 Varnish XSLT Yaf).each do |str| + Session SimpleXML Solr Sphinx SQLite3 Varnish XSLT Yaf OpenAL Blenc Componere OPcache phpdbg runkit7 Uopz WinCache Xhprof Yac Radius Ncurses Readline Lzf Mhash Sodium SVM dbx FPM xattr xdiff Enchant Pspell Parle Recode FDF GnuPG ssdeep Yar Lua Stomp SPL zookeeper SDO).each do |str| TYPE_BY_NAME_STARTS_WITH[str] = str end @@ -76,7 +78,7 @@ module Docs TYPE_GROUPS = { 'Classes and Functions' => ['Classes/Object', 'Function handling', 'Predefined Interfaces and Classes', 'runkit', 'Throwable'], 'Encoding' => ['Gettext', 'iconv', 'Multibyte String'], - 'Compression' => ['Bzip2', 'Zip', 'Zlib'], + 'Compression' => ['Bzip2', 'Zip', 'Zlib', 'Rar'], 'Cryptography' => ['Hash', 'Mcrypt', 'OpenSSL', 'Password Hashing'], 'Database' => ['DBA', 'ODBC', 'PDO'], 'Date and Time' => ['Calendar', 'Date/Time'], @@ -94,7 +96,13 @@ module Docs def get_name return 'IntlException' if slug == 'class.intlexception' - name = css('> .sect1 > .title', 'h1', 'h2').first.content + + if at_css('h1') + name = at_css('h1').content.strip + else + name = at_css('h2').content.strip + end + name.remove! 'The ' name.sub! ' class', ' (class)' name.sub! ' interface', ' (interface)' @@ -102,10 +110,10 @@ module Docs end def get_type - return 'Language Reference' if subpath.start_with?('language.') || subpath.start_with?('functions.') + return 'Language Reference' if subpath.start_with?('language.') || subpath.start_with?('functions.') || subpath.start_with?('reserved') return 'PCRE Reference' if subpath.start_with?('regexp.') - type = at_css('.up').content.strip + type = at_css('.breadcrumbs-container li ~ li').content.strip type = 'SPL/Iterators' if type.end_with? 'Iterator' type = 'Ev' if type =~ /\AEv[A-Z]/ type.remove! ' Functions' @@ -167,6 +175,7 @@ module Docs def include_default_entry? !initial_page? && doc.at_css('.reference', '.refentry', '.sect1', '.simpara', '.para') end + end end end diff --git a/lib/docs/filters/php/fix_urls.rb b/lib/docs/filters/php/fix_urls.rb index c2d96d75..c8387d08 100644 --- a/lib/docs/filters/php/fix_urls.rb +++ b/lib/docs/filters/php/fix_urls.rb @@ -3,7 +3,7 @@ module Docs class FixUrlsFilter < Filter def call html.gsub! File.join(Php.base_url, Php.root_path), Php.base_url - html.gsub! %r{https://secure\.php\.net/manual/en/([^"']+?)\.html}, 'https://secure.php.net/manual/en/\1.php' + html.gsub! %r{https://www.php\.net/manual/en/([^"']+?)\.html}, 'https://www.php.net/manual/en/\1.php' html end end diff --git a/lib/docs/scrapers/php.rb b/lib/docs/scrapers/php.rb index c6ab0581..2db16d87 100644 --- a/lib/docs/scrapers/php.rb +++ b/lib/docs/scrapers/php.rb @@ -1,13 +1,12 @@ module Docs class Php < FileScraper # Downloaded from php.net/download-docs.php - include FixInternalUrlsBehavior self.name = 'PHP' self.type = 'php' - self.release = '7.2.9' - self.base_url = 'https://secure.php.net/manual/en/' + self.release = '8.0' + self.base_url = 'https://www.php.net/manual/en/' self.root_path = 'index.html' self.initial_paths = %w( funcref.html @@ -21,7 +20,7 @@ module Docs reserved.variables.html) self.links = { - home: 'https://secure.php.net/', + home: 'https://www.php.net/', code: 'https://git.php.net/?p=php-src.git;a=summary' } @@ -42,15 +41,15 @@ module Docs /\Areserved\.interfaces/, /\Areserved\.variables/] - BOOKS = %w(apache apc apcu array bc bzip2 calendar csprng classobj ctype curl - datetime dba dir dom ds eio errorfunc ev event exec exif fileinfo filesystem filter - ftp funchand gearman geoip gettext gmagick gmp hash ibase iconv iisfunc image - imagick imap info inotify intl json judy ldap libevent libxml lua mail mailparse - math mbstring mcrypt memcached misc mysqli network oauth openssl - outcontrol password pcntl pcre pdo pgsql phar posix proctitle pthreads quickhash regex runkit - reflection sca session sem session-pgsql shmop simplexml soap sockets solr sphinx spl - spl-types sqlite3 sqlsrv ssh2 stats stream strings sync taint tidy tokenizer uodbc url - v8js var varnish weakref xml xmlreader xmlrpc xmlwriter xsl yaf yar yaml zip zlib) + BOOKS = %w(apache apc apcu array bc blenc bzip2 calendar csprng componere classobj ctype curl + datetime dba dbx dir dio dom ds eio errorfunc enchant ev event exec exif fileinfo filesystem filter + fdf ftp funchand fpm gearman geoip gettext gmagick gmp gnupg hash ibase iconv iisfunc image + imagick imap info inotify intl iisfunc json judy ldap libevent libxml lua lzf mail mailparse + math mhash mbstring mcrypt memcached misc mysqli ncurses network nsapi oauth openssl openal opcache + outcontrol password parle pcntl phpdbg pcre pdo pgsql phar posix proctitle pspell pthreads quickhash recode regex runkit runkit7 radius rar + reflection readline sca session sem session-pgsql shmop simplexml ssdeep sdo sdodasrel sdo-das-xml sodium soap sockets solr snmp sphinx spl stomp + spl-types sqlite3 sqlsrv ssh2 stats stream strings sync svm svn taint tidy tokenizer uodbc url uopz + v8js var varnish wddx weakref wincache xattr xdiff xhprof xml xmlreader xmlrpc xmlwriter xsl yaf yar yaml yac zip zookeeper zlib) options[:only] = BOOKS.map { |s| "book.#{s}.html" } @@ -63,7 +62,7 @@ module Docs options[:skip_patterns] = [/mysqlnd/, /xdevapi/i] options[:attribution] = <<-HTML - © 1997–2018 The PHP Documentation Group
+ © 1997–2020 The PHP Documentation Group
Licensed under the Creative Commons Attribution License v3.0 or later. HTML @@ -71,5 +70,6 @@ module Docs doc = fetch_doc('https://www.php.net/supported-versions.php', opts) doc.at_css('table > tbody > .stable:last-of-type > td > a').content.strip end + end end