Add a Terraform scraper

pull/822/merge
Doug Fitzmaurice 7 years ago committed by Thibaut Courouble
parent eca9d7ef09
commit 716fc26af1

@ -96,6 +96,7 @@
'pages/support_tables',
'pages/tcl_tk',
'pages/tensorflow',
'pages/terraform',
'pages/underscore',
'pages/vue',
'pages/webpack',

@ -0,0 +1,4 @@
._terraform {
@extend %simple;
.note, .alert { @extend %note; }
}

@ -0,0 +1,28 @@
module Docs
class Terraform
class CleanHtmlFilter < Filter
def call
@doc = at_css('#inner')
css('hr', 'a.anchor').remove
css('.alert').each do |node|
node.name = 'blockquote'
end
css('pre').each do |node|
if language = node['class'][/(json|shell|ruby)/, 1]
node['data-language'] = language
end
# HCL isn't currently supported by Prism, Ruby syntax does an acceptable job for now
if language = node['class'][/(hcl)/, 1]
node['data-language'] = 'ruby'
end
node.content = node.content
end
doc
end
end
end
end

@ -0,0 +1,76 @@
module Docs
class Terraform
class EntriesFilter < Docs::EntriesFilter
# Some providers have non-trivial mappings between the directory they live in and their name
# Anything *not* in this list will be capitalized instead.
PROVIDER_NAME_MAP = {
'aws' => 'AWS',
'azure' => 'Azure (Legacy)',
'azurerm' => 'Azure',
'centurylinkcloud' => 'CenturyLinkCloud',
'cloudscale' => 'CloudScale.ch',
'cloudstack' => 'CloudStack',
'dme' => 'DNSMadeEasy',
'dns' => 'DNS',
'dnsimple' => 'DNSimple',
'do' => 'DigitalOcean',
'github' => 'GitHub',
'google' => 'Google Cloud',
'http' => 'HTTP',
'mysql' => 'MySQL',
'newrelic' => 'New Relic',
'oneandone' => '1&1',
'opentelekomcloud' => 'OpenTelekomCloud',
'opsgenie' => 'OpsGenie',
'opc' => 'Oracle Public Cloud',
'oraclepaas' => 'Oracle Cloud Platform',
'ovh' => 'OVH',
'pagerduty' => 'PagerDuty',
'panos' => 'Palo Alto Networks',
'postgresql' => 'PostgreSQL',
'powerdns' => 'PowerDNS',
'profitbricks' => 'ProfitBricks',
'rabbitmq' => 'RabbitMQ',
'softlayer' => 'SoftLayer',
'statuscake' => 'StatusCake',
'tls' => 'TLS',
'ultradns' => 'UltraDNS',
'vcd' => 'VMware vCloud Director',
'nsxt' => 'VMware NSX-T',
'vsphere' => 'VMware vSphere',
}
# Some providers have a lot (> 100) entries, which makes browsing them unwieldy.
# Any present in the list below will have an extra set of types added, breaking the pages out into the different
# products they offer.
LARGE_PROVIDERS = {
"aws" => true,
"azurerm" => true,
"google" => true,
}
def get_name
name ||= at_css('#inner h1').content
name.remove! "» "
name.remove! "Data Source: "
name
end
def get_type
category, subcategory, subfolder, page = *slug.split('/')
provider = page ? subcategory : category
nice_provider = PROVIDER_NAME_MAP[provider] || provider.capitalize
if LARGE_PROVIDERS[provider]
category_node = at_css('ul > li > ul > li.active')
parent_node = category_node.parent.previous_element if category_node
nice_provider = nice_provider + ": #{parent_node.content}" if category_node
end
nice_provider
end
end
end
end

@ -0,0 +1,23 @@
module Docs
class Terraform < UrlScraper
self.name = 'Terraform'
self.type = 'terraform'
self.release = '0.11.7'
self.base_url = 'https://www.terraform.io/docs/'
# self.dir = '/mnt/c/Users/Doug/Code/terraform-docs/www.terraform.io/docs'
self.root_path = 'index.html'
self.links = {
home: 'https://www.terraform.io/',
code: 'https://github.com/hashicorp/terraform'
}
html_filters.push 'terraform/entries', 'terraform/clean_html'
options[:skip_patterns] = [/enterprise/, /enterprise-legacy/]
options[:attribution] = <<-HTML
Copyright &copy; 2018 HashiCorp</br>
Licensed under the MPL 2.0 License.
HTML
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 806 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Loading…
Cancel
Save