From 14aa61a798799e33d7cc1fe8ba391ce7af3f43dd Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Tue, 6 Nov 2018 21:17:18 +0100 Subject: [PATCH 1/4] Add GnuCOBOL documentation --- .../templates/pages/about_tmpl.coffee | 5 ++ lib/docs/filters/gnu_cobol/clean_html.rb | 56 +++++++++++++++++++ lib/docs/filters/gnu_cobol/entries.rb | 50 +++++++++++++++++ lib/docs/scrapers/gnu_cobol.rb | 20 +++++++ 4 files changed, 131 insertions(+) create mode 100644 lib/docs/filters/gnu_cobol/clean_html.rb create mode 100644 lib/docs/filters/gnu_cobol/entries.rb create mode 100644 lib/docs/scrapers/gnu_cobol.rb diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index c7e5a414..e0ef245f 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -290,6 +290,11 @@ credits = [ '2005-2018 Linus Torvalds and others', 'GPLv2', 'https://raw.githubusercontent.com/git/git/master/COPYING' + ], [ + 'GnuCOBOL', + 'Free Software Foundation', + 'GFDL', + 'https://www.gnu.org/licenses/fdl-1.3.en.html' ], [ 'Go', 'Google, Inc.', diff --git a/lib/docs/filters/gnu_cobol/clean_html.rb b/lib/docs/filters/gnu_cobol/clean_html.rb new file mode 100644 index 00000000..4e0d9ef2 --- /dev/null +++ b/lib/docs/filters/gnu_cobol/clean_html.rb @@ -0,0 +1,56 @@ +module Docs + class GnuCobol + class CleanHtmlFilter < Filter + def call + # Replace the title + at_css('.settitle').content = 'GnuCOBOL' + + # Remove the Table of Contents + # It's huge and the DevDocs sidebar is basically a direct copy + css('.contents, .contents-heading').remove + + # Remove the changelog + at_css('p').remove + at_css('ol').remove + + # Remove everything after Appendix B + # This includes the license text, the document changelog, the compiler changelog and the footnote + start_element = at_css('a[name="Appendix-C-_002d-GNU-Free-Documentation-License"]').previous_element + next_element = start_element.next_element + until start_element.nil? + start_element.remove + start_element = next_element + next_element = start_element.nil? ? nil : start_element.next_element + end + + # Make headers bigger + css('h4').each {|node| node.name = 'h3'} + + # Remove the newlines + # All paragraphs are inside

tags already anyways + css('br').remove + + # The original document contains sub-headers surrounded by equal signs + # Convert that to actual header elements + css('div[align="center"]').each do |node| + if node.content.include?('=' * 50) + node.replace('


') + else + node.remove_attribute('align') + node.name = 'h4' + end + end + + # Remove all hr's after h4's + css('h4').each do |node| + next_element = node.next_element + if !next_element.nil? && next_element.name == 'hr' + next_element.remove + end + end + + doc + end + end + end +end diff --git a/lib/docs/filters/gnu_cobol/entries.rb b/lib/docs/filters/gnu_cobol/entries.rb new file mode 100644 index 00000000..a11e2edd --- /dev/null +++ b/lib/docs/filters/gnu_cobol/entries.rb @@ -0,0 +1,50 @@ +module Docs + class GnuCobol + class EntriesFilter < Docs::EntriesFilter + # The entire reference is one big page, so get_name and get_type are not necessary + + def additional_entries + entries = [] + + css('.contents > ul > li:not(:last-child)').each do |node| + parent = node.at_css('a') + + entries << create_entry(parent, parent) + + node.css('ul a').each do |link| + entries << create_entry(parent, link) + end + end + + entries.compact + end + + def create_entry(parent_link, current_link) + name = current_link.content + id = current_link['href'][1..-1] + type = parent_link.content + + # The navigation link don't actually navigate to the correct header + # Instead, it references an `a` tag above it + # The `a` tag it is referencing is removed by a filter further down the pipeline + # This adds the id to the correct header element + target_node = at_css("a[name='#{id}']") + target_node.next_element.next_element['id'] = id + + if name.start_with?('Appendix') + type = 'Appendices' + end + + # Everything after Appendix B is removed by the clean_html filter + ignored_names = [ + 'Appendix C - GNU Free Documentation License', + 'Appendix D - Summary of Document Changes', + 'Appendix E - Summary of Compiler Changes since 2009 and version v1-1', + 'Index' + ] + + ignored_names.include?(name) ? nil : [name, id, type] + end + end + end +end diff --git a/lib/docs/scrapers/gnu_cobol.rb b/lib/docs/scrapers/gnu_cobol.rb new file mode 100644 index 00000000..75d939a9 --- /dev/null +++ b/lib/docs/scrapers/gnu_cobol.rb @@ -0,0 +1,20 @@ +module Docs + class GnuCobol < UrlScraper + self.name = 'GnuCOBOL' + self.slug = 'gnu_cobol' + self.type = 'simple' + self.release = '2.2' + self.base_url = 'https://open-cobol.sourceforge.io/HTML/gnucobpg.html' + self.links = { + home: 'https://sourceforge.net/projects/open-cobol/', + code: 'https://sourceforge.net/p/open-cobol/code/HEAD/tree/trunk/' + } + + html_filters.push 'gnu_cobol/entries', 'gnu_cobol/clean_html' + + options[:attribution] = <<-HTML + Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
+ Licensed under the GNU Free Documentation License. + HTML + end +end From e61f3f7202f749ea60c1c9761394c5f154ecd98e Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Tue, 13 Aug 2019 23:11:50 +0200 Subject: [PATCH 2/4] gnu_cobol: implement get_latest_version --- lib/docs/scrapers/gnu_cobol.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/docs/scrapers/gnu_cobol.rb b/lib/docs/scrapers/gnu_cobol.rb index 75d939a9..9965359d 100644 --- a/lib/docs/scrapers/gnu_cobol.rb +++ b/lib/docs/scrapers/gnu_cobol.rb @@ -16,5 +16,11 @@ module Docs Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
Licensed under the GNU Free Documentation License. HTML + + def get_latest_version(opts) + doc = fetch_doc('https://open-cobol.sourceforge.io/HTML/gnucobpg.html', opts) + title = doc.at_css('h1').content + title.scan(/([0-9.]+)/)[0][0] + end end end From 054fde33035e1fcc031a63995b02d4f600f0748d Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Wed, 28 Aug 2019 21:23:12 +0200 Subject: [PATCH 3/4] gnu_cobol: process review comments --- lib/docs/filters/gnu_cobol/clean_html.rb | 36 +++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/lib/docs/filters/gnu_cobol/clean_html.rb b/lib/docs/filters/gnu_cobol/clean_html.rb index 4e0d9ef2..0ca5552f 100644 --- a/lib/docs/filters/gnu_cobol/clean_html.rb +++ b/lib/docs/filters/gnu_cobol/clean_html.rb @@ -13,6 +13,9 @@ module Docs at_css('p').remove at_css('ol').remove + # Remove horizontal lines + css('hr').remove + # Remove everything after Appendix B # This includes the license text, the document changelog, the compiler changelog and the footnote start_element = at_css('a[name="Appendix-C-_002d-GNU-Free-Documentation-License"]').previous_element @@ -25,27 +28,40 @@ module Docs # Make headers bigger css('h4').each {|node| node.name = 'h3'} + css('h3.unnumberedsec').each {|node| node.name = 'h2'} # Remove the newlines # All paragraphs are inside

tags already anyways css('br').remove # The original document contains sub-headers surrounded by equal signs - # Convert that to actual header elements + # Convert those to actual header elements css('div[align="center"]').each do |node| if node.content.include?('=' * 50) - node.replace('


') - else - node.remove_attribute('align') - node.name = 'h4' + previous = node.previous_element + if !previous.nil? && previous.name == 'div' && previous['align'] == 'center' + previous.name = 'h4' + end + + node.remove end end - # Remove all hr's after h4's - css('h4').each do |node| - next_element = node.next_element - if !next_element.nil? && next_element.name == 'hr' - next_element.remove + # Remove align="center" attributes + css('[align="center"]').remove_attribute('align') + + # Convert tt tags into inline code blocks and remove any surrounding quotes + css('tt').each do |node| + node.name = 'code' + + previous_node = node.previous + if !previous_node.nil? && previous_node.text? + previous_node.content = previous_node.content.sub(/([^"]?")\Z/, '') + end + + next_node = node.next + if !next_node.nil? && next_node.text? + next_node.content = next_node.content.sub(/\A("[^"]?)/, '') end end From 873f92d3c27c5cf1a2e15be8849cdef9e7f8b639 Mon Sep 17 00:00:00 2001 From: Jasper van Merle Date: Sun, 1 Sep 2019 03:24:11 +0200 Subject: [PATCH 4/4] gnu_cobol: add logo and fix review comments in clean html filter --- lib/docs/filters/gnu_cobol/clean_html.rb | 14 ++++++++------ public/icons/docs/gnu_cobol/16.png | Bin 0 -> 661 bytes public/icons/docs/gnu_cobol/16@2x.png | Bin 0 -> 1977 bytes public/icons/docs/gnu_cobol/SOURCE | 1 + 4 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 public/icons/docs/gnu_cobol/16.png create mode 100644 public/icons/docs/gnu_cobol/16@2x.png create mode 100644 public/icons/docs/gnu_cobol/SOURCE diff --git a/lib/docs/filters/gnu_cobol/clean_html.rb b/lib/docs/filters/gnu_cobol/clean_html.rb index 0ca5552f..6ac3e7b3 100644 --- a/lib/docs/filters/gnu_cobol/clean_html.rb +++ b/lib/docs/filters/gnu_cobol/clean_html.rb @@ -16,14 +16,16 @@ module Docs # Remove horizontal lines css('hr').remove + # Remove acronym tags but keep the content + css('acronym').each {|node| node.name = 'span'} + # Remove everything after Appendix B # This includes the license text, the document changelog, the compiler changelog and the footnote - start_element = at_css('a[name="Appendix-C-_002d-GNU-Free-Documentation-License"]').previous_element - next_element = start_element.next_element - until start_element.nil? - start_element.remove - start_element = next_element - next_element = start_element.nil? ? nil : start_element.next_element + current_element = at_css('a[name="Appendix-C-_002d-GNU-Free-Documentation-License"]').previous + until current_element.nil? + next_element = current_element.next + current_element.remove + current_element = next_element end # Make headers bigger diff --git a/public/icons/docs/gnu_cobol/16.png b/public/icons/docs/gnu_cobol/16.png new file mode 100644 index 0000000000000000000000000000000000000000..24a558f127eb9e0944f2bfdd31e5f673fd7036e9 GIT binary patch literal 661 zcmV;G0&4waZ?B(Pdf&}S?HCeFNhHtZgXCFJuxcl3ZZnJ5cElD?W z&NoP21~4T#4I%teO1VOE?(P$kQ#0EIU{zJ!IyySqtg7nyDCrYP9f0$are^jSzz9GI zA#6+PBn3(5%xpJ=@cDc`-vO}G_dTM!XOaN$6u<<)G07(9{2qXc-EEW$2 zZUKM{z=I+ECz3|8A^APYI{>}|@Z&&z9zwW3fVLChYinzlQc4d~N?)gxZl{#)w{82< z=^us=z8H;0)3$A|oU&8bb!}WM7QfbY{cS1bnwfni=`E7)jK||YNjk}Mec!JGFp`s| zX|@1d1+aB+aNtZS-7>Q`NghiY1Nhy{lBA0ye*>`U`~Iex9h=z?X0{FBg;L7vB>#@? z{%I-Y#Tet=f%Y2!?yXj<%OQkKl8-_ND|c_)eNt7`lbrL*l3sQ9bv8|N9l%w0zZheD z?CvzPza(9dv~>3fU}9!dk~<_PIp<66z8*sO=A;WGv9YmH@9ph9C+Wnms_Lj|ntr)l vj?7H%ZnN3!5J3NL%firuvr6=T69C{JlQSgw-@t&`00000NkvXXu0mjfUzs;) literal 0 HcmV?d00001 diff --git a/public/icons/docs/gnu_cobol/16@2x.png b/public/icons/docs/gnu_cobol/16@2x.png new file mode 100644 index 0000000000000000000000000000000000000000..b87c38a4b5228e02ee8608546ed56b67bb6bb2c8 GIT binary patch literal 1977 zcmV;q2S)gbP)OLX+AgZ9yQ)7`uU>uLJ9{q&eIFJrO&ox_laq6E@4091 zz4p56>{yLa!v+}vEVl(GZBeo5!t{nnJy36jS< zoz7$ZDBdq)?YJ>BGjmrdjV`i%WdJ(ajQhE}=xrkUG*_Jc{-~h>k0Ayyx-8Yk5uBz(gh!~SJ5fMx&y;(|m zB_h6%b5_pz5qE!p05Cp2-fFkow*pWp<+A|3BB=lxSn^H86NSHnSkLjY&G0QhH9(W&k~s(iD-Zf+hx86F<~T0}G#7Z(p%v)R1E-S088rvW?zplfDbl3htjQZJ=+rrxmW?zaHQ zIp=9JTTUrWMMRV2c>u-T_et7eX0s8oljPx)($6}b&as*|wzRbLx8>#KL#SE2AHW!Z zL~?*6B)OzP0AXf}BxlWR)ZJU|ehtYXcW+7B3ZPLle!9-Mur+M1Yjh4W;WaF^>&l& zkqk4N1F$WnbkyD7!oMFtZy!WP@_0lvB<&=51i-wRbt9q`5u42HoTL$VZ$-pd4WpAh zPjU%BhvdAOt<`(>YG*?LPOYx4PG9VFy;-x_oFzFAU=zuGB%iFIH2^DSHfLt@Ip;U( zceVj&0T``Ix6R!TO4>v+NEVXQ0IG<12^VzZl8T$>=jUgd&E{`PDc_WId(QcRQp#jX zX-h=V-S+|*NGbg#rSuJwnAJjlGB5OgO9GPth`(HZbGa@D=?UA%%W)A`=lG+i`cU}Q#)N>!&Ome~9A*ms0o4dc< zFY`Kw0bCHijT<-a1fYxyOn+U{8p$?*36fWs*>_S(e~*Z30X#l2F)@4U)TzzwcKgiw z#0LfjcCW3iy-BibW@~+MBH5`O+VJr3ZT;*^DfcWbEuD~bORLp-CIe8X(>XmjIQS5N zyCqFWL@~3=Nq#IMx@J~pW@f%?W{qaE*_8CBl+qD*|B{*QXfzs6bi3WV04%4JRwH6D z=X~|h(9m>5ES6I4%sIa*X&1=@02Tn8l5{IaM@KhB#3X=y5pk`gW<*?8Rn?1TR`q(l zosu3mvl~ecOB#uYJ!bZg8iiGoC%WD4=OW^&ob!v4wpUg4MEzwPk+hIfI_>T^M#Rk| z&*DN~a5$Gzz9;DpNi#X;lkR?HO6hl{lyLx`G_xaBRh^dfguCCCb3P&IIRIzPY(UbY zyI)4KWoBp0tnKbzO1ZA8s_z5XB5AbFexBs?0grzCB6_q`FZ3BUmW!vKP1ad@k}Nsr91+c06XqkL-D0+7eE7k{P>wntyZ;~b@Areh7}fRb{=e*>tNx9L<|r7J00000 LNkvXXu0mjfV}HJP literal 0 HcmV?d00001 diff --git a/public/icons/docs/gnu_cobol/SOURCE b/public/icons/docs/gnu_cobol/SOURCE new file mode 100644 index 00000000..9aa7d9a2 --- /dev/null +++ b/public/icons/docs/gnu_cobol/SOURCE @@ -0,0 +1 @@ +https://sourceforge.net/p/open-cobol/icon