From 2f5ed72fb23988c1aecc29ee763fa3e4f6db3fcf Mon Sep 17 00:00:00 2001 From: MasterEnoc Date: Thu, 17 Dec 2020 10:35:28 -0600 Subject: [PATCH] Typescript: add 'tsconfig' section and update scraper to 4.1.3 --- lib/docs/filters/typescript/clean_html.rb | 16 ++++++++++++++- lib/docs/filters/typescript/entries.rb | 20 ++++++++++++++++-- lib/docs/scrapers/typescript.rb | 25 ++++++++++++++++++----- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/lib/docs/filters/typescript/clean_html.rb b/lib/docs/filters/typescript/clean_html.rb index de52765a..b0a62a51 100644 --- a/lib/docs/filters/typescript/clean_html.rb +++ b/lib/docs/filters/typescript/clean_html.rb @@ -1,8 +1,16 @@ module Docs class Typescript class CleanHtmlFilter < Filter + def call - root_page? ? root : other + if slug.include?('index') + root + elsif slug == ('tsconfig/') + tsconfig + else + other + end + doc end @@ -27,6 +35,12 @@ module Docs node.remove_attribute('class') end end + + def tsconfig + css('h2 a', 'h3 a').remove + css('svg').remove + end + end end end diff --git a/lib/docs/filters/typescript/entries.rb b/lib/docs/filters/typescript/entries.rb index eec6439e..98d42a78 100644 --- a/lib/docs/filters/typescript/entries.rb +++ b/lib/docs/filters/typescript/entries.rb @@ -3,7 +3,7 @@ module Docs class EntriesFilter < Docs::EntriesFilter def get_name - return at_css('h2').content + at_css('h1') ? at_css('h1').content : at_css('h2').content end def get_type @@ -11,9 +11,25 @@ module Docs end def additional_entries - css('h2').each_with_object [] do |node,entries| + entries = [] + + css('h2').each do |node| + + if slug == 'tsconfig/' + node.css('a').remove + end + entries << [node.content, node['id'], name] end + + if slug == 'tsconfig/' + css('h3').each do |node| + node.css('a').remove + entries << [node.content, node['id'], name] + end + end + + entries end end diff --git a/lib/docs/scrapers/typescript.rb b/lib/docs/scrapers/typescript.rb index 18a09ec6..9da3c021 100644 --- a/lib/docs/scrapers/typescript.rb +++ b/lib/docs/scrapers/typescript.rb @@ -2,9 +2,13 @@ module Docs class Typescript < UrlScraper self.name = 'TypeScript' self.type = 'simple' - self.release = '4.1.2' - self.base_url = 'https://www.typescriptlang.org/docs/handbook/' - self.root_path = 'index.html' + self.release = '4.1.3' + self.base_url = 'https://www.typescriptlang.org/' + self.root_path = 'docs/handbook/index.html' + self.initial_paths = [ + 'tsconfig/' + ] + self.links = { home: 'https://www.typescriptlang.org', code: 'https://github.com/Microsoft/TypeScript' @@ -15,14 +19,24 @@ module Docs options[:container] = 'main' options[:skip] = [ - 'react-&-webpack.html', + 'docs/handbook/react-&-webpack.html' ] options[:skip_patterns] = [ /2/, - /release-notes/, + /release-notes/ + ] + + options[:only_patterns] = [ + /docs\/handbook\//, + /tsconfig\// ] + options[:fix_urls] = -> (url) do + url.gsub!(/docs\/handbook\/index.html/, "index.html") + url + end + options[:attribution] = <<-HTML © 2012-2020 Microsoft
Licensed under the Apache License, Version 2.0. @@ -31,5 +45,6 @@ module Docs def get_latest_version(opts) get_latest_github_release('Microsoft', 'TypeScript', opts) end + end end