From 98aba4a05587c569addc0b3701aa6eaf3ed552a0 Mon Sep 17 00:00:00 2001 From: Romeo Van Snick Date: Wed, 21 May 2014 10:58:28 +0200 Subject: [PATCH 1/7] start adding haskell scraper --- lib/docs/scrapers/haskell.rb | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100755 lib/docs/scrapers/haskell.rb diff --git a/lib/docs/scrapers/haskell.rb b/lib/docs/scrapers/haskell.rb new file mode 100755 index 00000000..5ba2fb83 --- /dev/null +++ b/lib/docs/scrapers/haskell.rb @@ -0,0 +1,9 @@ +module Docs + class Haskell < UrlScraper + self.name = 'Haskell' + self.slug = 'haskell' + self.version = '7.8.2' + self.base_url = 'http://www.haskell.org/ghc/docs/7.8.2/html/libraries' + + end +end From 073dbf1ab772ab5c57155a0932a63c8c4d886b58 Mon Sep 17 00:00:00 2001 From: Romeo Van Snick Date: Wed, 21 May 2014 15:03:11 +0200 Subject: [PATCH 2/7] get the thing working for haskell --- lib/docs/filters/haskell/clean_html.rb | 32 +++++++++++++++ lib/docs/filters/haskell/entries.rb | 56 ++++++++++++++++++++++++++ lib/docs/scrapers/haskell.rb | 15 +++++++ 3 files changed, 103 insertions(+) create mode 100644 lib/docs/filters/haskell/clean_html.rb create mode 100644 lib/docs/filters/haskell/entries.rb diff --git a/lib/docs/filters/haskell/clean_html.rb b/lib/docs/filters/haskell/clean_html.rb new file mode 100644 index 00000000..d722b45a --- /dev/null +++ b/lib/docs/filters/haskell/clean_html.rb @@ -0,0 +1,32 @@ +module Docs + class Haskell + class CleanHtmlFilter < Filter + def call + + # remove unwanted elements + css('#footer', '#package-header', '#module-header', '#synopsis', '.link', '#table-of-contents', '.empty', '.package').remove + + # turn captions into real headers + css('.caption').each do |node| + node.name = 'h2' + end + + css('table .caption').each do |node| + node.name = 'h3' + end + + # # turn source listing in to pre + css('.src').each do |node| + node.name = 'pre' + end + + + if at_css('h1') && at_css('h1').content == 'Haskell Hierarchical Libraries' + css('h1').remove + end + + doc + end + end + end +end diff --git a/lib/docs/filters/haskell/entries.rb b/lib/docs/filters/haskell/entries.rb new file mode 100644 index 00000000..b42a5710 --- /dev/null +++ b/lib/docs/filters/haskell/entries.rb @@ -0,0 +1,56 @@ +module Docs + class Haskell + class EntriesFilter < Docs::EntriesFilter + + # gets name and type in one fell swoop + # + # eg. + # Control.Monad > [Monad, Control] + # Control.Concurrent.Mvar > [Concurrent.MVar, Control] + # Array > [Array, nil] + def get_name_and_type + if at_css('h1') && at_css('h1').content == 'Haskell Hierarchical Libraries' + name = 'Haskell' + type = nil + else + # find full module identifier + caption = at_css('#module-header .caption') + + if caption + # split the module path + parts = caption.content.split('.') + + if parts.length > 1 + # if more than one part then the + # first is the type and the rest is the name + type = parts[0] + name = parts.drop(1).join('.') + else + # if only one part, this is the name + name = parts[0] + type = nil + end + else + # no caption found -> no type / no name + name = 'no-name' + type = 'no-type' + end + end + [name, type] + end + + # get the name + def get_name + n, t = get_name_and_type() + n + end + + # get the type + def get_type + n, t = get_name_and_type() + t + end + + end + end +end diff --git a/lib/docs/scrapers/haskell.rb b/lib/docs/scrapers/haskell.rb index 5ba2fb83..7692f8a6 100755 --- a/lib/docs/scrapers/haskell.rb +++ b/lib/docs/scrapers/haskell.rb @@ -2,8 +2,23 @@ module Docs class Haskell < UrlScraper self.name = 'Haskell' self.slug = 'haskell' + self.type = 'haskell' self.version = '7.8.2' self.base_url = 'http://www.haskell.org/ghc/docs/7.8.2/html/libraries' + self.initial_paths = ['/index.html'] + + html_filters.push 'haskell/entries' + html_filters.push 'haskell/clean_html' + html_filters.push 'title' + + + options[:container] = '#content' + options[:skip_patterns] = [/src/, /index/, /haskell2010/] # skip source listings and index files + + options[:attribution] = <<-HTML + © The University Court of the University of Glasgow.
+ All rights reserved. See here for more info + HTML end end From 5e18b70afdb900a32aead0545742561b0ca2d615 Mon Sep 17 00:00:00 2001 From: Romeo Van Snick Date: Wed, 21 May 2014 15:05:14 +0200 Subject: [PATCH 3/7] add haskell logos --- public/icons/docs/haskell/16.png | Bin 0 -> 994 bytes public/icons/docs/haskell/16@2x.png | Bin 0 -> 1287 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 public/icons/docs/haskell/16.png create mode 100644 public/icons/docs/haskell/16@2x.png diff --git a/public/icons/docs/haskell/16.png b/public/icons/docs/haskell/16.png new file mode 100644 index 0000000000000000000000000000000000000000..97f907327a1712d03ddb9a35d275f42fdbe8ea8e GIT binary patch literal 994 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstU$g(vPY0F z14ES>14Ba#1H&(%P{RubhEf9thF1v;3|2E37{m+a>h>wg}@|4a}Uh}Ot495iJBiR3^LL>x$NRATs(g+&~w<)jtE|6COHK*8712*Lkb zIQ}2w0@-zx3vS>h6^6e+1G5?aw==_Kk=1~V-HgzPtapnV%y6K+2cafI6ugN7y98)# z9}B`*n0NkfL^2RA2xkCw6cKQo5{k-&Be^c&CbZdb5dmDa-jW;N#5=*P2HRSy8tTTc*4`gF+}2Wa)JWK4NHriJ1Q%FN+dGjoE9&dw5e;=tX*sZ8Ab+%nVGL&yfS?uEg>Z-J$nYz)EP6qy&@S* zBcmd3N8Dy&o$=<4tjt`gIk7Q!@7%R*V4COPyl0QJR87s_KYy7YSTQwAH90#SJ|sEA z!XeGg#m)8jkz+@VHyfy^n5n5QU$JD_(sYxQu8o@vQ&OKldG=IX#K=fY^z^hTMn+Sn zhKGcOUN!lvVtU&J%W50 z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+081LO4opAc7|g0=Dt|BrG3 zNo2HIj^Teb!~Yx<1Xr+6AG`P(psr$s6kPpsDTe=fSj6Gx099hui=y!;7to!^PCaM{ zGc$?d|8b}+ni>>i7qG!)mrKH&l*RD>3^&BsgGMj|Gf)hKdt?RBBZW{kK*2-CF!ldM z;U+_UnhG-k7=}yO5GKQ{1F8qP0UpA@@Hq`~2FSs1K`>*#0lElK0nm0dWl#}d5dHr@ zL4!9C7_~D?g8YJkv{FTVU6l%FWa$Xh)97Atya5^LF?YpTv~J_ZPG zo5$56XB8uMcmhlLRTY=F{vsOcLfhwZwVsx8xbxz9rS-F~QQ}AXxTdvTQFi^{BY%Dl zciSmx=P8qY6aT)@(S6ywgS*{_DRX+I+exX2uRUB`Pu_be9DS)0)Nw*aLM^(VS?YK< zm$%nM>CShx8MXFoan{%SE&(0KnB?v5GN*9QA$1^!v%n*=n1Ml08H5=tmfDvA6|{M} zIEGl9PEL^EYM9wNwYPU}^W^T??d{X~yP4`5XF4cosOT7FU(*RR)X_Sk+Q6lYVsb*=aAV;|P62`W9K{(yMgfjqSy5IeCh;h< zshRZONNMstaf)rGfwqadxxT#|&jK|AF_q@K0USll0wSW*r!3mRcvM^=AZ%X$8iu6C zSXUE|bw(l{A)(i=TswG}QEJjx0e>@-rYWYT+atC!FvUoKoMo#GO%?$veaGW+rsbp>6sF0dr|zj5%IJj>~p z0k;D~&PkOH)icK?tNZIccrfRXb*a{QlNBp3r!!RE*?Z-bcp XtHiCLV4K!9paup{S3j3^P6 Date: Sat, 24 May 2014 17:07:15 +0200 Subject: [PATCH 4/7] clean up complexity notation and deprecated, better caption handling --- assets/stylesheets/pages/_haskell.scss | 61 ++++++++++++++++ lib/docs/filters/haskell/clean_html.rb | 96 ++++++++++++++++++++++++-- lib/docs/filters/haskell/entries.rb | 12 ++++ 3 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 assets/stylesheets/pages/_haskell.scss diff --git a/assets/stylesheets/pages/_haskell.scss b/assets/stylesheets/pages/_haskell.scss new file mode 100644 index 00000000..dd8e661e --- /dev/null +++ b/assets/stylesheets/pages/_haskell.scss @@ -0,0 +1,61 @@ +._icon-haskell:before { + background-image: image-url('/icons/docs/haskell/16.png'); + background-size: cover; + background-repeat: no-repeat; +} + +td.src { + font-family: $monoFont; + font-weight: normal; + font-style: normal; + background: #f8f8f8; + width: 20%; +} + +// warnings are red +.warning { + @extend %note; + @extend %note-red; +} + + +// complexity classes are blue boxes +.with-complexity { + display: flex; + justify-content: space-between; + align-items: flex-start; + align-content: stretch; + flex-direction: row; +} + +.complexity { + @extend %note; + @extend %note-blue; + margin: 0; + margin-left: 1em; + margin-bottom: 0.75em; + font-style: italic; + white-space: nowrap; + flex-shrink: 0; + order:2; +} + +.complexity + span { + order: 1; +} + +.added { + @extend %note; + @extend %note-gold; +} + +.top { + margin-bottom: 3em; +} + +.example { + -webkit-flex: none; + background: #faf9e2; + border: 1px solid; + border-color: #dddaaa #dddaaa #d7d7a9; +} diff --git a/lib/docs/filters/haskell/clean_html.rb b/lib/docs/filters/haskell/clean_html.rb index d722b45a..66c2e917 100644 --- a/lib/docs/filters/haskell/clean_html.rb +++ b/lib/docs/filters/haskell/clean_html.rb @@ -4,29 +4,115 @@ module Docs def call # remove unwanted elements - css('#footer', '#package-header', '#module-header', '#synopsis', '.link', '#table-of-contents', '.empty', '.package').remove + css('#footer', '#package-header', '#module-header', '#synopsis', '.link', '#table-of-contents', '.show .empty', '.package').remove + + css('pre').each do |node| + node.add_css_class('example') + end + + # cpations in tables are h3 + css('table .caption').each do |node| + node.name = 'h3' + end # turn captions into real headers css('.caption').each do |node| + node.name = 'h1' + end + + css('.top > .caption').each do |node| node.name = 'h2' end - css('table .caption').each do |node| + # subsections + css('.top > .subs > .caption', '.fields > .caption').each do |node| node.name = 'h3' end - # # turn source listing in to pre - css('.src').each do |node| - node.name = 'pre' + # subsubsections + css('.top > .subs > .subs > .caption').each do |node| + node.name = 'h4' end + css('.top > .subs > .subs > .subs > .caption').each do |node| + node.name = 'h5' + end + + css('.top > .subs > .subs > .subs > .subs > .caption').each do |node| + node.name = 'h6' + end + + # turn source listing in to pre + css('.src').each do |node| + if node.name == "td" + # pre = doc.create_element 'pre' + # pre.children = node.children + # node.children = [pre] + else + node.name = 'pre' + end + end if at_css('h1') && at_css('h1').content == 'Haskell Hierarchical Libraries' css('h1').remove end + css('a').each do |node| + if node['name'] + node['id'] = node['name'] + end + end + + css('.caption').each do |node| + if node.content == 'Arguments' + node.remove + end + end + + # add some informational boxes + css('em').each do |node| + if node.content == 'Deprecated.' + # Make deprecated messages red. + node.parent.add_css_class('warning') + elsif node.content =~ /O\(.*\)/ + # this is big_O notation, but only apply the class if this is not + # inside running text (it must be at the start of a paragraph) + # from: + #

O(n). Koel ok

+ # to: + #

+ # O(n) + # Koel ok + #

+ if node.previous == nil + node.add_css_class('complexity') # add css class + node.name="span" # just make it div + node.next.content = node.next.content.gsub(/^. /, "") # remove . if directly after em + node.content = node.content.gsub(/\.$/, "") # remove trailing . if it's inside em + + # reparent the nodes + cont = doc.document.create_element "p", :class => "with-complexity" + node.parent.previous = cont + par = node.parent + node.parent = cont + par.parent = cont + par.name = "span" + end + elsif node.content =~ /Since: .*/ + # add box to 'Since:' annotations + node.add_css_class('added') + end + end + doc end end end end + +class Nokogiri::XML::Node + def add_css_class( *classes ) + existing = (self['class'] || "").split(/\s+/) + self['class'] = existing.concat(classes).uniq.join(" ") + end +end diff --git a/lib/docs/filters/haskell/entries.rb b/lib/docs/filters/haskell/entries.rb index b42a5710..07de7c40 100644 --- a/lib/docs/filters/haskell/entries.rb +++ b/lib/docs/filters/haskell/entries.rb @@ -51,6 +51,18 @@ module Docs t end + # def additional_entries + # css('a').inject [] do |entries, node| + # name = node.content + # id = node['name'] + # if id + # puts id + # entries << [name, id, nil] + # end + # entries + # end + # end + end end end From de0e3ab80f79c914489730bdf7da27bb4f019c88 Mon Sep 17 00:00:00 2001 From: Romeo Van Snick Date: Sat, 24 May 2014 19:34:14 +0200 Subject: [PATCH 5/7] fix tables to work everywhere, add comments --- assets/stylesheets/pages/_haskell.scss | 59 +++++++++++++++++-- lib/docs/filters/haskell/clean_html.rb | 79 +++++++++++++++++++------- lib/docs/filters/haskell/entries.rb | 14 +---- 3 files changed, 115 insertions(+), 37 deletions(-) diff --git a/assets/stylesheets/pages/_haskell.scss b/assets/stylesheets/pages/_haskell.scss index dd8e661e..4abf9474 100644 --- a/assets/stylesheets/pages/_haskell.scss +++ b/assets/stylesheets/pages/_haskell.scss @@ -4,12 +4,36 @@ background-repeat: no-repeat; } +.empty-table .empty { + display: none; +} + +.arguments td.src { + background: #faf9e2; + width: 30%; +} + +th.src, td.src { font-family: $monoFont; font-weight: normal; font-style: normal; background: #f8f8f8; - width: 20%; +} + +caption { + font-weight: bold; + text-align: left; + font-style: italic; + font-size: 1.1em; +} + +// remove margin in descript listing +dd > pre { + @extend %pre; + margin: 0; + background: #faf9e2; + border-color: #dddaaa #dddaaa #d7d7a9; } // warnings are red @@ -44,18 +68,43 @@ td.src { order: 1; } +// add box type to "since: ..." .added { @extend %note; @extend %note-gold; } -.top { - margin-bottom: 3em; +.added-cell { + @extend %note-gold; +} + +.fields h3 { + display: none; +} + +// separate types more +.src { + margin-top: 2.5em; +} + +h1 + .top .src, +h2 + .top .src, +h3 + .top .src, +.caption + .top .src { + margin-top: 0; +} + +// but not for first type +h1 + .top, +h2 + .top, +h3 + .top, +h4 + .top { + margin-top: 0; } +// change color of example code .example { - -webkit-flex: none; - background: #faf9e2; border: 1px solid; + background: #faf9e2; border-color: #dddaaa #dddaaa #d7d7a9; } diff --git a/lib/docs/filters/haskell/clean_html.rb b/lib/docs/filters/haskell/clean_html.rb index 66c2e917..5f6105a7 100644 --- a/lib/docs/filters/haskell/clean_html.rb +++ b/lib/docs/filters/haskell/clean_html.rb @@ -4,11 +4,7 @@ module Docs def call # remove unwanted elements - css('#footer', '#package-header', '#module-header', '#synopsis', '.link', '#table-of-contents', '.show .empty', '.package').remove - - css('pre').each do |node| - node.add_css_class('example') - end + css('#footer', '#package-header', '#module-header', '#synopsis', '.link', '#table-of-contents', '.package').remove # cpations in tables are h3 css('table .caption').each do |node| @@ -20,6 +16,7 @@ module Docs node.name = 'h1' end + # section css('.top > .caption').each do |node| node.name = 'h2' end @@ -34,39 +31,79 @@ module Docs node.name = 'h4' end + # ... css('.top > .subs > .subs > .subs > .caption').each do |node| node.name = 'h5' end + # ...... css('.top > .subs > .subs > .subs > .subs > .caption').each do |node| node.name = 'h6' end + # all pre's are examples + css('pre').each do |node| + node.add_css_class('example') + end + # turn source listing in to pre css('.src').each do |node| - if node.name == "td" - # pre = doc.create_element 'pre' - # pre.children = node.children - # node.children = [pre] - else + if node.name != "td" node.name = 'pre' end end - if at_css('h1') && at_css('h1').content == 'Haskell Hierarchical Libraries' - css('h1').remove + # check if second column of table is totally empty. + # and remove it if it is + css('table').each do |table| + empty = true + table.css('td + td').each do |snd| + empty = empty && snd['class'] =~ /empty/ + end + if empty + # remove empty column + table.css('td + td').remove + end end + # move table captions into the tables + css(".caption + table").each do |table| + caption = table.previous + caption.name = "caption" + caption.parent = table + end + + css(".caption + .show table").each do |table| + caption = table.parent.parent.css('.caption')[0] + caption.name = 'caption' + caption.parent = table + end + + # better arguments display: + css('.src + .arguments table').each do |table| + src = table.parent.previous # the function name + row = doc.document.create_element('tr') + table.css('tr')[0].before(row) + src.parent = row + src.name = "th" + src['colspan'] = 2 + end + + # remove root page title + if root_page? + at_css('h1').remove + end + + # add id to links (based on name) css('a').each do |node| if node['name'] node['id'] = node['name'] end end - css('.caption').each do |node| - if node.content == 'Arguments' - node.remove - end + # make code in description into proper pre + css('dd > code').each do |node| + node.name = 'pre' end # add some informational boxes @@ -86,8 +123,8 @@ module Docs #

if node.previous == nil node.add_css_class('complexity') # add css class - node.name="span" # just make it div - node.next.content = node.next.content.gsub(/^. /, "") # remove . if directly after em + node.name="span" # just make it div + node.next.content = node.next.content.gsub(/^. /, "") # remove . if directly after em node.content = node.content.gsub(/\.$/, "") # remove trailing . if it's inside em # reparent the nodes @@ -100,7 +137,11 @@ module Docs end elsif node.content =~ /Since: .*/ # add box to 'Since:' annotations - node.add_css_class('added') + if node.parent.parent.name == "td" + node.parent.parent.add_css_class('added-cell') + else + node.add_css_class('added') + end end end diff --git a/lib/docs/filters/haskell/entries.rb b/lib/docs/filters/haskell/entries.rb index 07de7c40..c0fbcbb6 100644 --- a/lib/docs/filters/haskell/entries.rb +++ b/lib/docs/filters/haskell/entries.rb @@ -10,6 +10,7 @@ module Docs # Array > [Array, nil] def get_name_and_type if at_css('h1') && at_css('h1').content == 'Haskell Hierarchical Libraries' + puts 'ok' name = 'Haskell' type = nil else @@ -50,19 +51,6 @@ module Docs n, t = get_name_and_type() t end - - # def additional_entries - # css('a').inject [] do |entries, node| - # name = node.content - # id = node['name'] - # if id - # puts id - # entries << [name, id, nil] - # end - # entries - # end - # end - end end end From f4c9b6942d6e87e4dacfc6a0c34d9db569510cdb Mon Sep 17 00:00:00 2001 From: Romeo Van Snick Date: Sat, 24 May 2014 19:34:30 +0200 Subject: [PATCH 6/7] fix correct index --- lib/docs/scrapers/haskell.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/docs/scrapers/haskell.rb b/lib/docs/scrapers/haskell.rb index 7692f8a6..9fbb7af0 100755 --- a/lib/docs/scrapers/haskell.rb +++ b/lib/docs/scrapers/haskell.rb @@ -4,7 +4,7 @@ module Docs self.slug = 'haskell' self.type = 'haskell' self.version = '7.8.2' - self.base_url = 'http://www.haskell.org/ghc/docs/7.8.2/html/libraries' + self.base_url = 'http://www.haskell.org/ghc/docs/7.8.2/html/libraries/' self.initial_paths = ['/index.html'] html_filters.push 'haskell/entries' @@ -13,7 +13,7 @@ module Docs options[:container] = '#content' - options[:skip_patterns] = [/src/, /index/, /haskell2010/] # skip source listings and index files + options[:skip_patterns] = [/src/, /index/, /haskell2010/, /ghc-/, /Cabal-/] # skip source listings and index files options[:attribution] = <<-HTML © The University Court of the University of Glasgow.
From 923fc900ee9d46258d23fe2e649d73c8cbfcce42 Mon Sep 17 00:00:00 2001 From: Romeo Van Snick Date: Sat, 24 May 2014 19:39:23 +0200 Subject: [PATCH 7/7] make flex work with webkit --- assets/stylesheets/pages/_haskell.scss | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/assets/stylesheets/pages/_haskell.scss b/assets/stylesheets/pages/_haskell.scss index 4abf9474..15b026b6 100644 --- a/assets/stylesheets/pages/_haskell.scss +++ b/assets/stylesheets/pages/_haskell.scss @@ -46,10 +46,16 @@ dd > pre { // complexity classes are blue boxes .with-complexity { display: flex; + display: -webkit-flex; + justify-content: space-between; + -webkit-justify-content: space-between; + align-items: flex-start; + -webkit-align-items: flex-start; + align-content: stretch; - flex-direction: row; + -webkit-align-content: stretch; } .complexity { @@ -60,12 +66,17 @@ dd > pre { margin-bottom: 0.75em; font-style: italic; white-space: nowrap; + flex-shrink: 0; - order:2; + -webkit-flex-shrink: 0; + + order: 2; + -webkit-order: 2; } .complexity + span { order: 1; + -webkit-order: 1; } // add box type to "since: ..."