diff --git a/lib/docs/filters/postgresql/clean_html.rb b/lib/docs/filters/postgresql/clean_html.rb
index 8c9a9f45..a30e4543 100644
--- a/lib/docs/filters/postgresql/clean_html.rb
+++ b/lib/docs/filters/postgresql/clean_html.rb
@@ -11,6 +11,10 @@ module Docs
end
def other
+ @doc = at_css('#docContent')
+
+ css('.NAVHEADER', '.NAVFOOTER').remove
+
css('a[name]').each do |node|
node.parent['id'] = node['name']
node.before(node.children).remove
diff --git a/lib/docs/filters/postgresql/entries.rb b/lib/docs/filters/postgresql/entries.rb
index b8904484..c39ee4a2 100644
--- a/lib/docs/filters/postgresql/entries.rb
+++ b/lib/docs/filters/postgresql/entries.rb
@@ -11,98 +11,144 @@ module Docs
'System Administration Functions' => 'Administration Functions',
'System Information Functions' => 'Information Functions' }
- def get_name
- name = at_css('h1').content
- clean_heading_name(name)
+ PREPEND_TYPES = [
+ 'Type Conversion',
+ 'Full Text Search',
+ 'Performance Tips',
+ 'Server Configuration',
+ 'Monitoring' ]
+
+ REPLACE_TYPES = {
+ 'Routine Database Maintenance Tasks' => 'Maintenance',
+ 'High Availability, Load Balancing, and Replication' => 'High Availability',
+ 'Monitoring Database Activity' => 'Monitoring',
+ 'Monitoring Disk Usage' => 'Monitoring',
+ 'Reliability and the Write-Ahead Log' => 'Write-Ahead Log' }
+
+ def base_name
+ @base_name ||= clean_heading_name(at_css('h1').content)
+ end
- if %w(Overview Introduction).include?(name)
+ def get_name
+ if %w(Overview Introduction).include?(base_name)
result[:pg_chapter_name]
+ elsif PREPEND_TYPES.include?(type)
+ "#{type}: #{base_name}"
else
- name.remove! ' (Common Table Expressions)'
- REPLACE_NAMES[name] || name
+ REPLACE_NAMES[base_name] || base_name
end
end
- def clean_heading_name(name)
- name.remove! %r{\A[\d\.\s]+}
- name.remove! 'Using '
- name.remove! %r{\AThe }
- name
- end
-
def get_type
return if initial_page?
if result[:pg_up_path] == 'sql-commands.html'
'Commands'
- elsif result[:pg_up_path].start_with? 'reference-'
+ elsif result[:pg_up_path].start_with?('reference-')
'Applications'
elsif type = result[:pg_chapter_name]
- if type.start_with?('Func') && (match = name.match(/\A(?!Form|Seq|Set|Enum)(.+) Func/))
+ if type.start_with?('Func') && (match = base_name.match(/\A(?!Form|Seq|Set|Enum)(.+) Func/))
"Functions: #{match[1]}"
else
- type.remove 'SQL '
+ type.remove! 'SQL '
+ REPLACE_TYPES[type] || type
end
end
end
def additional_entries
return [] if skip_additional_entries?
- return get_config_entries if config_page?
+ return config_additional_entries if type && type.include?('Configuration')
+ return data_types_additional_entries if type == 'Data Types'
return get_heading_entries('h3[id]') if slug == 'functions-xml'
- if type == 'Data Types'
- return get_custom_entries case slug
- when 'rangetypes' then 'li > p > .TYPE:first-child'
- when 'datatype-textsearch' then '.SECT2 > .TYPE'
- else '.CALSTABLE td:first-child > .TYPE' end
- end
-
entries = get_heading_entries('h2[id]')
- if slug == 'queries-union'
+ case slug
+ when 'queries-union'
entries.concat get_custom_entries('p > .LITERAL:first-child')
- elsif slug == 'queries-table-expressions'
+ when 'queries-table-expressions'
entries.concat get_heading_entries('h3[id]')
entries.concat get_custom_entries('dt > .LITERAL:first-child')
- elsif slug == 'functions-logical'
+ when 'functions-logical'
entries.concat get_custom_entries('> table td:first-child > code')
- elsif slug == 'functions-formatting'
+ when 'functions-formatting'
entries.concat get_custom_entries('#FUNCTIONS-FORMATTING-TABLE td:first-child > code')
- elsif slug == 'functions-admin'
+ when 'functions-admin'
entries.concat get_custom_entries('.TABLE td:first-child > code')
- elsif slug == 'functions-string'
+ when 'functions-string'
entries.concat get_custom_entries('> div[id^="FUNC"] td:first-child > code')
- elsif type && type.start_with?('Functions')
- entries.concat get_custom_entries('> .TABLE td:first-child > code:first-child')
- entries.concat get_comparison_entries if slug == 'functions-comparison'
+ else
+ if type && type.start_with?('Functions')
+ entries.concat get_custom_entries('> .TABLE td:first-child > code:first-child')
+ entries.concat %w(IS NULL BETWEEN DISTINCT\ FROM).map { |name| ["#{self.name}: #{name}"] } if slug == 'functions-comparison'
+ end
end
entries
end
- def get_config_entries
+ def config_additional_entries
css('.VARIABLELIST dt[id]').map do |node|
name = node.at_css('.VARNAME').content
["Config: #{name}", node['id']]
end
end
+ def data_types_additional_entries
+ selector = case slug
+ when 'rangetypes'
+ 'li > p > .TYPE:first-child'
+ when 'datatype-textsearch'
+ '.SECT2 > .TYPE'
+ else
+ '.CALSTABLE td:first-child > .TYPE'
+ end
+ get_custom_entries(selector)
+ end
+
+ def include_default_entry?
+ !initial_page? && !at_css('.TOC')
+ end
+
+ SKIP_ENTRIES_SLUGS = [
+ 'config-setting',
+ 'applevel-consistency' ]
+
+ SKIP_ENTRIES_TYPES = [
+ 'Localization',
+ 'Type Conversion',
+ 'Full Text Search',
+ 'Performance Tips',
+ 'Client Authentication',
+ 'Managing Databases',
+ 'Maintenance',
+ 'Backup and Restore',
+ 'High Availability',
+ 'Monitoring' ]
+
+ def skip_additional_entries?
+ SKIP_ENTRIES_SLUGS.include?(slug) || SKIP_ENTRIES_TYPES.include?(type)
+ end
+
+ def clean_heading_name(name)
+ name.remove! %r{\A[\d\.\s]+}
+ name.remove! 'Using '
+ name.remove! %r{\AThe }
+ name.remove! ' (Common Table Expressions)'
+ name
+ end
+
def get_heading_entries(selector)
- css(selector).inject [] do |entries, node|
+ css(selector).each_with_object([]) do |node, entries|
name = node.content
clean_heading_name(name)
-
- unless skip_heading?(name)
- entries << ["#{additional_entry_prefix}: #{name}", node['id']]
- end
-
- entries
+ entries << ["#{additional_entry_prefix}: #{name}", node['id']] unless skip_heading?(name)
end
end
def get_custom_entries(selector)
- css(selector).inject [] do |entries, node|
+ css(selector).each_with_object([]) do |node, entries|
name = node.content
name.remove! %r{\(.*?\)}m
name.remove! %r{\[.*?\]}m
@@ -117,14 +163,6 @@ module Docs
node['id'] = id
entries << [name, id]
end
-
- entries
- end
- end
-
- def get_comparison_entries
- %w(IS NULL BETWEEN DISTINCT\ FROM).map do |name|
- ["#{self.name}: #{name}"]
end
end
@@ -132,22 +170,10 @@ module Docs
type.dup.gsub!('Functions: ', '') || self.name
end
- def skip_additional_entries?
- slug == 'config-setting' || %w(Concurrency\ Control Localization).include?(type)
- end
-
def skip_heading?(name)
%w(Usage\ Patterns Portability Caveats Overview).include?(name) ||
(type.start_with?('Functions') && slug != 'functions-xml' && name.split.first.upcase!)
end
-
- def include_default_entry?
- !(initial_page? || at_css('.TOC') || config_page?)
- end
-
- def config_page?
- slug.start_with? 'runtime-config'
- end
end
end
end
diff --git a/lib/docs/filters/postgresql/clean_nav.rb b/lib/docs/filters/postgresql/extract_metadata.rb
similarity index 87%
rename from lib/docs/filters/postgresql/clean_nav.rb
rename to lib/docs/filters/postgresql/extract_metadata.rb
index 0f6c7090..50e15d87 100644
--- a/lib/docs/filters/postgresql/clean_nav.rb
+++ b/lib/docs/filters/postgresql/extract_metadata.rb
@@ -1,10 +1,9 @@
module Docs
class Postgresql
- class CleanNavFilter < Filter
+ class ExtractMetadataFilter < Filter
def call
extract_up_path
extract_chapter
- css('.NAVHEADER', '.NAVFOOTER').remove
doc
end
diff --git a/lib/docs/scrapers/postgresql.rb b/lib/docs/scrapers/postgresql.rb
index 909036fd..fdfe07b8 100644
--- a/lib/docs/scrapers/postgresql.rb
+++ b/lib/docs/scrapers/postgresql.rb
@@ -1,14 +1,13 @@
module Docs
- class Postgresql < FileScraper
+ class Postgresql < UrlScraper
self.name = 'PostgreSQL'
self.type = 'postgres'
- self.version = 'up to 9.3.2'
- self.dir = '/Users/Thibaut/DevDocs/Docs/PostgreSQL'
- self.base_url = 'http://www.postgresql.org/docs/9.3/static/'
+ self.version = '9.4'
+ self.base_url = "http://www.postgresql.org/docs/#{version}/static/"
self.root_path = 'reference.html'
- self.initial_paths = %w(sql.html runtime-config.html charset.html)
+ self.initial_paths = %w(sql.html admin.html)
- html_filters.insert_before 'normalize_urls', 'postgresql/clean_nav'
+ html_filters.insert_before 'normalize_urls', 'postgresql/extract_metadata'
html_filters.push 'postgresql/clean_html', 'postgresql/entries', 'title'
options[:title] = false
@@ -19,7 +18,6 @@ module Docs
arrays.html
rowtypes.html
rangetypes.html
- mvcc-intro.html
transaction-iso.html
explicit-locking.html
applevel-consistency.html
@@ -27,7 +25,15 @@ module Docs
config-setting.html
locale.html
collation.html
- multibyte.html)
+ multibyte.html
+ using-explain.html
+ planner-stats.html
+ explicit-joins.html
+ populate.html
+ non-durability.html
+ logfile-maintenance.html
+ continuous-archiving.html
+ dynamic-trace.html)
options[:only_patterns] = [
/\Asql\-/,
@@ -37,18 +43,31 @@ module Docs
/\Aqueries\-/,
/\Adatatype\-/,
/\Afunctions\-/,
+ /\Atypeconv\-/,
+ /\Atextsearch\-/,
+ /\Amvcc\-/,
/\Aindexes\-/,
- /\Aruntime\-config\-/]
+ /\Aruntime\-config\-/,
+ /\Aauth\-/,
+ /\Aclient\-authentication/,
+ /\Amanage\-ag/,
+ /\Aroutine/,
+ /\Abackup\-/,
+ /\Amonitoring\-/,
+ /\Awal\-/,
+ /\Adisk/,
+ /role/,
+ /recovery/,
+ /standby/]
options[:skip] = %w(
ddl-others.html
- runtime-config-custom.html
- runtime-config-short.html
functions-event-triggers.html
- functions-trigger.html)
+ functions-trigger.html
+ textsearch-migration.html)
options[:attribution] = <<-HTML
- © 1996–2013 The PostgreSQL Global Development Group
+ © 1996–2014 The PostgreSQL Global Development Group
Licensed under the PostgreSQL License.
HTML
end