From c5ace7effd0bbb7f226b1cd90ae74976a02fc09b Mon Sep 17 00:00:00 2001 From: Thibaut Date: Sun, 15 Mar 2015 22:42:43 -0400 Subject: [PATCH] Improve JavaScript scraper --- lib/docs/filters/javascript/entries.rb | 4 ++-- lib/docs/filters/mdn/clean_html.rb | 2 +- lib/docs/scrapers/mdn/javascript.rb | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/docs/filters/javascript/entries.rb b/lib/docs/filters/javascript/entries.rb index b8ed5eb1..dcb79804 100644 --- a/lib/docs/filters/javascript/entries.rb +++ b/lib/docs/filters/javascript/entries.rb @@ -8,14 +8,14 @@ module Docs def get_name if slug.start_with? 'Global_Objects/' - name, method = *slug.sub('Global_Objects/', '').split('/') + name, method, *rest = *slug.sub('Global_Objects/', '').split('/') name.prepend 'Intl.' if INTL_OBJECTS.include?(name) if method unless method == method.upcase || method == 'NaN' method = method[0].downcase + method[1..-1] # e.g. Trim => trim end - name << ".#{method}" + name << ".#{([method] + rest).join('.')}" end name diff --git a/lib/docs/filters/mdn/clean_html.rb b/lib/docs/filters/mdn/clean_html.rb index 1bc61e48..80a3c675 100644 --- a/lib/docs/filters/mdn/clean_html.rb +++ b/lib/docs/filters/mdn/clean_html.rb @@ -20,7 +20,7 @@ module Docs node.before(node.children).remove end - css('h2[style]', 'pre[style]').remove_attr('style') + css('h2[style]', 'pre[style]', 'th[style]', 'div[style*="line-height"]').remove_attr('style') doc end diff --git a/lib/docs/scrapers/mdn/javascript.rb b/lib/docs/scrapers/mdn/javascript.rb index 3ba4f8b1..9cd67f74 100644 --- a/lib/docs/scrapers/mdn/javascript.rb +++ b/lib/docs/scrapers/mdn/javascript.rb @@ -35,6 +35,8 @@ module Docs /Operators /Statements) + options[:skip_patterns] = [/additional_examples/i, /noSuchMethod/i] + options[:fix_urls] = ->(url) do url.sub! 'https://developer.mozilla.org/en-US/docs/JavaScript/Reference', Javascript.base_url url.sub! 'https://developer.mozilla.org/en/JavaScript/Reference', Javascript.base_url