From e7048a87f67aabdd72249a4e953114ecfb5e0455 Mon Sep 17 00:00:00 2001 From: Thibaut Courouble Date: Fri, 10 Nov 2017 11:40:21 -0500 Subject: [PATCH] Update PostgreSQL documentation (10.1) --- assets/stylesheets/pages/_postgres.scss | 17 ++++--- lib/docs/filters/postgresql/clean_html.rb | 39 +++++++++++++--- lib/docs/filters/postgresql/entries.rb | 44 ++++++++++--------- .../filters/postgresql/extract_metadata.rb | 18 ++++---- .../postgresql/normalize_class_names.rb | 13 ++++++ lib/docs/scrapers/postgresql.rb | 17 +++++-- 6 files changed, 105 insertions(+), 43 deletions(-) create mode 100644 lib/docs/filters/postgresql/normalize_class_names.rb diff --git a/assets/stylesheets/pages/_postgres.scss b/assets/stylesheets/pages/_postgres.scss index 487f984d..69e202c1 100644 --- a/assets/stylesheets/pages/_postgres.scss +++ b/assets/stylesheets/pages/_postgres.scss @@ -1,19 +1,24 @@ ._postgres { padding-left: 1rem; - h1, h1 ~ p, h1 ~ pre, h1 ~ blockquote, h2, .NAVFOOTER { margin-left: -1rem; } + h1, h1 ~ p, h1 ~ pre, h1 ~ ul, h1 ~ blockquote, h2, .navfooter { margin-left: -1rem; } h2 { @extend %block-heading; } - .VARIABLELIST dt { @extend %block-label, %label-blue; } + .variablelist dt { @extend %block-label, %label-blue; } - blockquote.NOTE, blockquote.IMPORTANT, blockquote.TIP, blockquote.CAUTION { @extend %note; } - blockquote.TIP { @extend %note-green; } - blockquote.CAUTION { @extend %note-orange; } + blockquote.note, blockquote.important, blockquote.tip, blockquote.caution { @extend %note; } + blockquote.tip { @extend %note-green; } + blockquote.caution { @extend %note-orange; } + + blockquote > h3 { + font-size: .875rem; + margin: 0 0 .25rem; + } p > code { @extend %label; } p.c2 { font-weight: $boldFontWeight; } - .NAVFOOTER > table { width: 100%; } + .navfooter > table { width: 100%; } td[align=center] { text-align: center; } td[align=right] { text-align: right; } } diff --git a/lib/docs/filters/postgresql/clean_html.rb b/lib/docs/filters/postgresql/clean_html.rb index 40f0a35b..b3da126c 100644 --- a/lib/docs/filters/postgresql/clean_html.rb +++ b/lib/docs/filters/postgresql/clean_html.rb @@ -13,18 +13,22 @@ module Docs def other @doc = at_css('#docContent') - css('.NAVHEADER', 'hr', '.NAVFOOTER a[accesskey="H"]').remove + css('.navheader', 'hr', '.navfooter a[accesskey="H"]').remove + + unless at_css('h1') + at_css('.refnamediv h2, .titlepage h2').name = 'h1' + end css('a[name]').each do |node| node.parent['id'] = node['name'] node.before(node.children).remove end - css('div.SECT1', 'pre > kbd', 'tt > code', 'h1 > tt', '> .CHAPTER', 'div.NOTE', '.APPENDIX').each do |node| + css('div.sect1', '.refentry', '.refnamediv', '.refentrytitle', '.refsynopsisdiv', 'pre > kbd', 'tt > code', 'h1 > tt', '> .chapter', '.appendix', '.titlepage', 'div:not([class]):not([id])', 'br', 'a.indexterm', 'acronym', '.productname', 'div.itemizedlist', 'span.sect2', 'span.application', 'em.replaceable', 'span.term').each do |node| node.before(node.children).remove end - css('div.CAUTION table.CAUTION').each do |node| + css('div.caution table.caution').each do |node| parent = node.parent title = node.at_css('.c2, .c3, .c4, .c5').content node.replace(node.css('p')) @@ -43,11 +47,27 @@ module Docs node.remove_attribute 'valign' end + css('.sect2 > h3').each do |node| + node.name = 'h2' + end + + css('.sect3 > h4').each do |node| + node.name = 'h3' + end + css('tt').each do |node| node.name = 'code' end - css('.REFSYNOPSISDIV > p').each do |node| + css('div.note', 'div.important', 'div.tip', 'div.caution').each do |node| + if node.at_css('blockquote') + node.before(node.children).remove + else + node.name = 'blockquote' + end + end + + css('.refsynopsisdiv > p').each do |node| node.name = 'pre' node.content = node.content end @@ -56,9 +76,18 @@ module Docs node.before(node.children).remove end - css('pre.SYNOPSIS', 'pre.PROGRAMLISTING').each do |node| + css('code').each do |node| + node.inner_html = node.inner_html.gsub(/\s*\n\s*/, ' ') + end + + css('pre.synopsis', 'pre.programlisting').each do |node| node['data-language'] = 'sql' end + + css('h1', 'ul', 'li', 'pre').each do |node| + node.remove_attribute 'class' + node.remove_attribute 'style' + end end end end diff --git a/lib/docs/filters/postgresql/entries.rb b/lib/docs/filters/postgresql/entries.rb index e222f310..4ddac456 100644 --- a/lib/docs/filters/postgresql/entries.rb +++ b/lib/docs/filters/postgresql/entries.rb @@ -90,33 +90,33 @@ module Docs return config_additional_entries if type && type.include?('Configuration') return data_types_additional_entries if type == 'Data Types' return command_additional_entries if type == 'Commands' - return get_heading_entries('h3[id]') if slug == 'functions-xml' + return get_heading_entries('h3[id], .sect3[id] > h3:first-child') if slug == 'functions-xml' - entries = get_heading_entries('h2[id]') + entries = get_heading_entries('h2[id], .sect2[id] > h2:first-child') case slug when 'queries-union' - entries.concat get_custom_entries('p > .LITERAL:first-child') + entries.concat get_custom_entries('p > .literal:first-child') when 'queries-table-expressions' - entries.concat get_heading_entries('h3[id]') - entries.concat get_custom_entries('dt > .LITERAL:first-child') + entries.concat get_heading_entries('h3[id], .sect3[id] > h3:first-child') + entries.concat get_custom_entries('dt > .literal:first-child') when 'functions-logical' entries.concat get_custom_entries('> table td:first-child > code') when 'functions-formatting' entries.concat get_custom_entries('#FUNCTIONS-FORMATTING-TABLE td:first-child > code') when 'functions-admin' - entries.concat get_custom_entries('.TABLE td:first-child > code') + entries.concat get_custom_entries('.table td:first-child > code') when 'functions-string' entries.concat get_custom_entries('> div[id^="FUNC"] td:first-child > code') entries.concat get_custom_entries('> div[id^="FORMAT"] td:first-child > code') else if type && type.start_with?('Functions') - entries.concat get_custom_entries('> .TABLE td:first-child > code.LITERAL:first-child') - entries.concat get_custom_entries('> .TABLE td:first-child > code.FUNCTION:first-child') - entries.concat get_custom_entries('> .TABLE td:first-child > code:not(.LITERAL):first-child + code.LITERAL') - entries.concat get_custom_entries('> .TABLE td:first-child > p > code.LITERAL:first-child') - entries.concat get_custom_entries('> .TABLE td:first-child > p > code.FUNCTION:first-child') - entries.concat get_custom_entries('> .TABLE td:first-child > p > code:not(.LITERAL):first-child + code.LITERAL') + entries.concat get_custom_entries('> .table td:first-child > code.literal:first-child') + entries.concat get_custom_entries('> .table td:first-child > code.function:first-child') + entries.concat get_custom_entries('> .table td:first-child > code:not(.literal):first-child + code.literal') + entries.concat get_custom_entries('> .table td:first-child > p > code.literal:first-child') + entries.concat get_custom_entries('> .table td:first-child > p > code.function:first-child') + entries.concat get_custom_entries('> .table td:first-child > p > code:not(.literal):first-child + code.literal') if slug == 'functions-comparison' && !at_css('#FUNCTIONS-COMPARISON-PRED-TABLE') # before 9.6 entries.concat %w(IS NULL BETWEEN DISTINCT\ FROM).map { |name| ["#{self.name}: #{name}"] } end @@ -127,8 +127,8 @@ module Docs end def config_additional_entries - css('.VARIABLELIST dt[id]').map do |node| - name = node.at_css('.VARNAME').content + css('.variablelist dt[id]').map do |node| + name = node.at_css('.varname').content ["Config: #{name}", node['id']] end end @@ -136,27 +136,27 @@ module Docs def data_types_additional_entries selector = case slug when 'rangetypes' - 'li > p > .TYPE:first-child' + 'li > p > .type:first-child' when 'datatype-textsearch' - '.SECT2 > .TYPE' + '.title > .type, .sect2 > .type' else - '.CALSTABLE td:first-child > .TYPE' + '.table-contents td:first-child > .type, .calstable td:first-child > .type' end get_custom_entries(selector) end def command_additional_entries - css('.REFSECT2[id^="SQL"]').each_with_object([]) do |node, entries| + css('.refsect2[id^="SQL"]').each_with_object([]) do |node, entries| next unless heading = node.at_css('h3') next unless heading.content.strip =~ /[A-Z_\-]+ Clause/ - name = heading.at_css('.LITERAL').content + name = heading.at_css('.literal').content name.prepend "#{self.name} ... " entries << [name, node['id']] end end def include_default_entry? - !initial_page? && !at_css('.TOC') && type + !initial_page? && (!at_css('.toc') || at_css('.sect2, .variablelist, .refsect1')) && type end SKIP_ENTRIES_SLUGS = [ @@ -199,7 +199,9 @@ module Docs css(selector).each_with_object([]) do |node, entries| name = node.content clean_heading_name(name) - entries << ["#{additional_entry_prefix}: #{name}", node['id']] unless skip_heading?(name) + id = node['id'] || node.parent['id'] + raise "missing ids for selector #{selector}" unless id + entries << ["#{additional_entry_prefix}: #{name}", id] unless skip_heading?(name) end end diff --git a/lib/docs/filters/postgresql/extract_metadata.rb b/lib/docs/filters/postgresql/extract_metadata.rb index ba313dca..260d05d0 100644 --- a/lib/docs/filters/postgresql/extract_metadata.rb +++ b/lib/docs/filters/postgresql/extract_metadata.rb @@ -8,19 +8,21 @@ module Docs end def extract_up_path - if node = at_css('.NAVHEADER a[accesskey="U"]') + if node = at_css('.navheader a[accesskey="u"], .navheader a[accesskey="U"]') result[:pg_up_path] = node['href'] end end def extract_chapter - return unless text = at_css('.NAVHEADER td[align="center"]').content - if match = text.match(/\AChapter (\d+)\. (.+)\z/) - result[:pg_chapter] = match[1].to_i - result[:pg_chapter_name] = match[2].strip - elsif match = text.match(/\AAppendix ([A-Z])\. (.+)\z/) - result[:pg_appendix] = match[1] - result[:pg_appendix_name] = match[2].strip + css('.navheader td[align="center"], .navheader th[align="center"]').each do |node| + text = node.content.strip + if match = text.match(/\AChapter (\d+)\. (.+)\z/) + result[:pg_chapter] = match[1].to_i + result[:pg_chapter_name] = match[2].strip + elsif match = text.match(/\AAppendix ([A-Z])\. (.+)\z/) + result[:pg_appendix] = match[1] + result[:pg_appendix_name] = match[2].strip + end end end end diff --git a/lib/docs/filters/postgresql/normalize_class_names.rb b/lib/docs/filters/postgresql/normalize_class_names.rb new file mode 100644 index 00000000..01c6cfa7 --- /dev/null +++ b/lib/docs/filters/postgresql/normalize_class_names.rb @@ -0,0 +1,13 @@ +module Docs + class Postgresql + class NormalizeClassNamesFilter < Filter + def call + doc.css('*').each do |node| + node['class'] = node['class'].downcase if node['class'].present? + end + + doc + end + end + end +end diff --git a/lib/docs/scrapers/postgresql.rb b/lib/docs/scrapers/postgresql.rb index cb810c6b..5cb71f03 100644 --- a/lib/docs/scrapers/postgresql.rb +++ b/lib/docs/scrapers/postgresql.rb @@ -55,19 +55,30 @@ module Docs Licensed under the PostgreSQL License. HTML + version '10' do + self.release = '10.1' + self.base_url = 'https://www.postgresql.org/docs/10/static/' + end + version '9.6' do - self.release = '9.6.5' + self.release = '9.6.6' self.base_url = 'https://www.postgresql.org/docs/9.6/static/' + + html_filters.insert_before 'postgresql/extract_metadata', 'postgresql/normalize_class_names' end version '9.5' do - self.release = '9.5.9' + self.release = '9.5.10' self.base_url = 'https://www.postgresql.org/docs/9.5/static/' + + html_filters.insert_before 'postgresql/extract_metadata', 'postgresql/normalize_class_names' end version '9.4' do - self.release = '9.4.14' + self.release = '9.4.15' self.base_url = 'https://www.postgresql.org/docs/9.4/static/' + + html_filters.insert_before 'postgresql/extract_metadata', 'postgresql/normalize_class_names' end end end