diff --git a/docs/file-scrapers.md b/docs/file-scrapers.md
index 33a58145..f58a422b 100644
--- a/docs/file-scrapers.md
+++ b/docs/file-scrapers.md
@@ -129,7 +129,14 @@ bsdtar --extract --xz --file - --strip-components=6 --directory=docs/openjdk\~8/
```
## PHP
+Click the link under the "Many HTML files" column on https://www.php.net/download-docs.php, extract the tarball, change its name to `php` and put it in `/path/to/devdocs/docs/`.
+Or run the following commands in your terminal:
+
+```sh
+curl https://www.php.net/distributions/manual/php_manual_en.tar.gz > php.tar; \
+tar -xf php.tar; mv php-chunked-xhtml/ path/to/devdocs/docs/php/
+```
## Python
### Versions 3.6+
diff --git a/lib/docs/filters/php/clean_html.rb b/lib/docs/filters/php/clean_html.rb
index 6fd29bc2..91074640 100644
--- a/lib/docs/filters/php/clean_html.rb
+++ b/lib/docs/filters/php/clean_html.rb
@@ -1,6 +1,8 @@
+# coding: utf-8
module Docs
class Php
class CleanHtmlFilter < Filter
+
def call
root_page? ? root : other
doc
@@ -11,24 +13,11 @@ module Docs
end
def other
- css('.manualnavbar:first-child', '.manualnavbar .up', '.manualnavbar .home', 'hr').remove
+ # css('.manualnavbar:first-child', '.manualnavbar .up', '.manualnavbar .home', 'hr').remove
- nav = at_css('.manualnavbar').remove
+ css('#breadcrumbs').remove
- if prev_link = nav.at_css('.prev a')
- prev_link.content = "← #{prev_link.content}"
- end
-
- if next_link = nav.at_css('.next a')
- next_link.content = "#{next_link.content} →"
- end
-
- # Remove top-level
- if doc.elements.length == 1
- @doc = doc.first_element_child
- end
-
- doc << nav
+ css('.nav').remove
# Remove code highlighting
br = /
/i
@@ -50,7 +39,9 @@ module Docs
css('.title + .verinfo + .title').each do |node|
node.after(node.previous_element)
end
+
end
+
end
end
end
diff --git a/lib/docs/filters/php/entries.rb b/lib/docs/filters/php/entries.rb
index c95508f8..751f844f 100644
--- a/lib/docs/filters/php/entries.rb
+++ b/lib/docs/filters/php/entries.rb
@@ -12,7 +12,6 @@ module Docs
'Exception' => 'Predefined Exceptions',
'Http' => 'HTTP',
'Json' => 'JSON',
- 'Lua' => 'Lua',
'Mutex' => 'pthreads',
'php_user_filter' => 'Stream',
'Pool' => 'pthreads',
@@ -31,11 +30,14 @@ module Docs
'Weak' => 'Weakref',
'Worker' => 'pthreads',
'XsltProcessor' => 'XSLT',
- 'Yar' => 'Yar',
- 'ZipArchive' => 'Zip' }
+ 'ZipArchive' => 'Zip',
+ 'Rar' => 'Rar',
+ 'Direct IO' => 'Dio',
+ 'Zoo' => 'Zookeeper'
+ }
%w(APC Directory DOM Event Gearman Gmagick Imagick mysqli OAuth PDO Phar Reflection
- Session SimpleXML Solr Sphinx SQLite3 Varnish XSLT Yaf).each do |str|
+ Session SimpleXML Solr Sphinx SQLite3 Varnish XSLT Yaf OpenAL Blenc Componere OPcache phpdbg runkit7 Uopz WinCache Xhprof Yac Radius Ncurses Readline Lzf Mhash Sodium SVM dbx FPM xattr xdiff Enchant Pspell Parle Recode FDF GnuPG ssdeep Yar Lua Stomp SPL zookeeper SDO).each do |str|
TYPE_BY_NAME_STARTS_WITH[str] = str
end
@@ -76,7 +78,7 @@ module Docs
TYPE_GROUPS = {
'Classes and Functions' => ['Classes/Object', 'Function handling', 'Predefined Interfaces and Classes', 'runkit', 'Throwable'],
'Encoding' => ['Gettext', 'iconv', 'Multibyte String'],
- 'Compression' => ['Bzip2', 'Zip', 'Zlib'],
+ 'Compression' => ['Bzip2', 'Zip', 'Zlib', 'Rar'],
'Cryptography' => ['Hash', 'Mcrypt', 'OpenSSL', 'Password Hashing'],
'Database' => ['DBA', 'ODBC', 'PDO'],
'Date and Time' => ['Calendar', 'Date/Time'],
@@ -94,7 +96,13 @@ module Docs
def get_name
return 'IntlException' if slug == 'class.intlexception'
- name = css('> .sect1 > .title', 'h1', 'h2').first.content
+
+ if at_css('h1')
+ name = at_css('h1').content.strip
+ else
+ name = at_css('h2').content.strip
+ end
+
name.remove! 'The '
name.sub! ' class', ' (class)'
name.sub! ' interface', ' (interface)'
@@ -102,10 +110,10 @@ module Docs
end
def get_type
- return 'Language Reference' if subpath.start_with?('language.') || subpath.start_with?('functions.')
+ return 'Language Reference' if subpath.start_with?('language.') || subpath.start_with?('functions.') || subpath.start_with?('reserved')
return 'PCRE Reference' if subpath.start_with?('regexp.')
- type = at_css('.up').content.strip
+ type = at_css('.breadcrumbs-container li ~ li').content.strip
type = 'SPL/Iterators' if type.end_with? 'Iterator'
type = 'Ev' if type =~ /\AEv[A-Z]/
type.remove! ' Functions'
@@ -167,6 +175,7 @@ module Docs
def include_default_entry?
!initial_page? && doc.at_css('.reference', '.refentry', '.sect1', '.simpara', '.para')
end
+
end
end
end
diff --git a/lib/docs/filters/php/fix_urls.rb b/lib/docs/filters/php/fix_urls.rb
index c2d96d75..c8387d08 100644
--- a/lib/docs/filters/php/fix_urls.rb
+++ b/lib/docs/filters/php/fix_urls.rb
@@ -3,7 +3,7 @@ module Docs
class FixUrlsFilter < Filter
def call
html.gsub! File.join(Php.base_url, Php.root_path), Php.base_url
- html.gsub! %r{https://secure\.php\.net/manual/en/([^"']+?)\.html}, 'https://secure.php.net/manual/en/\1.php'
+ html.gsub! %r{https://www.php\.net/manual/en/([^"']+?)\.html}, 'https://www.php.net/manual/en/\1.php'
html
end
end
diff --git a/lib/docs/scrapers/php.rb b/lib/docs/scrapers/php.rb
index c6ab0581..2db16d87 100644
--- a/lib/docs/scrapers/php.rb
+++ b/lib/docs/scrapers/php.rb
@@ -1,13 +1,12 @@
module Docs
class Php < FileScraper
# Downloaded from php.net/download-docs.php
-
include FixInternalUrlsBehavior
self.name = 'PHP'
self.type = 'php'
- self.release = '7.2.9'
- self.base_url = 'https://secure.php.net/manual/en/'
+ self.release = '8.0'
+ self.base_url = 'https://www.php.net/manual/en/'
self.root_path = 'index.html'
self.initial_paths = %w(
funcref.html
@@ -21,7 +20,7 @@ module Docs
reserved.variables.html)
self.links = {
- home: 'https://secure.php.net/',
+ home: 'https://www.php.net/',
code: 'https://git.php.net/?p=php-src.git;a=summary'
}
@@ -42,15 +41,15 @@ module Docs
/\Areserved\.interfaces/,
/\Areserved\.variables/]
- BOOKS = %w(apache apc apcu array bc bzip2 calendar csprng classobj ctype curl
- datetime dba dir dom ds eio errorfunc ev event exec exif fileinfo filesystem filter
- ftp funchand gearman geoip gettext gmagick gmp hash ibase iconv iisfunc image
- imagick imap info inotify intl json judy ldap libevent libxml lua mail mailparse
- math mbstring mcrypt memcached misc mysqli network oauth openssl
- outcontrol password pcntl pcre pdo pgsql phar posix proctitle pthreads quickhash regex runkit
- reflection sca session sem session-pgsql shmop simplexml soap sockets solr sphinx spl
- spl-types sqlite3 sqlsrv ssh2 stats stream strings sync taint tidy tokenizer uodbc url
- v8js var varnish weakref xml xmlreader xmlrpc xmlwriter xsl yaf yar yaml zip zlib)
+ BOOKS = %w(apache apc apcu array bc blenc bzip2 calendar csprng componere classobj ctype curl
+ datetime dba dbx dir dio dom ds eio errorfunc enchant ev event exec exif fileinfo filesystem filter
+ fdf ftp funchand fpm gearman geoip gettext gmagick gmp gnupg hash ibase iconv iisfunc image
+ imagick imap info inotify intl iisfunc json judy ldap libevent libxml lua lzf mail mailparse
+ math mhash mbstring mcrypt memcached misc mysqli ncurses network nsapi oauth openssl openal opcache
+ outcontrol password parle pcntl phpdbg pcre pdo pgsql phar posix proctitle pspell pthreads quickhash recode regex runkit runkit7 radius rar
+ reflection readline sca session sem session-pgsql shmop simplexml ssdeep sdo sdodasrel sdo-das-xml sodium soap sockets solr snmp sphinx spl stomp
+ spl-types sqlite3 sqlsrv ssh2 stats stream strings sync svm svn taint tidy tokenizer uodbc url uopz
+ v8js var varnish wddx weakref wincache xattr xdiff xhprof xml xmlreader xmlrpc xmlwriter xsl yaf yar yaml yac zip zookeeper zlib)
options[:only] = BOOKS.map { |s| "book.#{s}.html" }
@@ -63,7 +62,7 @@ module Docs
options[:skip_patterns] = [/mysqlnd/, /xdevapi/i]
options[:attribution] = <<-HTML
- © 1997–2018 The PHP Documentation Group
+ © 1997–2020 The PHP Documentation Group
Licensed under the Creative Commons Attribution License v3.0 or later.
HTML
@@ -71,5 +70,6 @@ module Docs
doc = fetch_doc('https://www.php.net/supported-versions.php', opts)
doc.at_css('table > tbody > .stable:last-of-type > td > a').content.strip
end
+
end
end