Improve MDN scrapers

pull/142/merge
Thibaut 10 years ago
parent a677417665
commit e4ac0e8757

@ -11,10 +11,12 @@ module Docs
'Geolocation' => 'Geolocation',
'Media Capture' => 'Media',
'Media Source' => 'Media',
'MediaStream' => 'MediaRecorder',
'MediaStream' => 'Media',
'Navigation Timing' => 'Navigation Timing',
'Network Information' => 'Network Information',
'Service Workers' => 'Service Workers',
'Web Audio' => 'Web Audio',
'Web Storage' => 'Web Storage',
'Web Workers' => 'Web Workers',
'WebRTC' => 'WebRTC' }
@ -44,6 +46,7 @@ module Docs
'Range' => 'Range',
'RTC' => 'WebRTC',
'Selection' => 'Selection',
'Storage' => 'Web Storage',
'StyleSheet' => 'CSS',
'Stylesheet' => 'CSS',
'SVG' => 'SVG',
@ -55,10 +58,15 @@ module Docs
'XMLHttpRequest' => 'XMLHTTPRequest' }
TYPE_BY_NAME_INCLUDES = {
'IndexedDB' => 'IndexedDB',
'udio' => 'Web Audio',
'WebGL' => 'Canvas',
'Worker' => 'Web Workers' }
'ImageData' => 'Canvas',
'IndexedDB' => 'IndexedDB',
'MediaStream' => 'Media',
'Path2D' => 'Canvas',
'ServiceWorker' => 'Service Workers',
'TextMetrics' => 'Canvas',
'udio' => 'Web Audio',
'WebGL' => 'Canvas',
'Worker' => 'Web Workers' }
TYPE_BY_NAME_MATCHES = {}

@ -34,7 +34,7 @@ module Docs
'Statements'
elsif slug.start_with? 'Operators'
'Operators'
elsif slug.start_with?('Functions_and_function_scope') || slug.start_with?('Functions')
elsif slug.start_with?('Functions_and_function_scope') || slug.start_with?('Functions') || slug.include?('GeneratorFunction')
'Function'
elsif slug.start_with? 'Global_Objects'
object, method = *slug.remove('Global_Objects/').split('/')
@ -59,7 +59,8 @@ module Docs
return true unless node && node.parent == doc && !node.previous_element
!node.content.include?('not on a standards track') &&
!node.content.include?('removed from the Web')
!node.content.include?('removed from the Web') &&
!node.content.include?('could be removed at any time')
end
end
end

@ -16,6 +16,12 @@ module Docs
node.name = 'th'
end
css('nobr').each do |node|
node.before(node.children).remove
end
css('h2[style]', 'pre[style]').remove_attr('style')
doc
end
end

@ -118,6 +118,8 @@ module Docs
url.sub! "#{Dom.base_url}/Selection/", "#{Dom.base_url}/Selection."
url.sub! "#{Dom.base_url}/windowTimers", "#{Dom.base_url}/window"
url.sub! "#{Dom.base_url}/windowEventHandlers", "#{Dom.base_url}/window"
url.sub! %r{\/windowLocalStorage(\.localStorage)?}i, "/window.localStorage"
url.sub! %r{\/windowSessionStorage(\.sessionStorage)?}i, "/window.sessionStorage"
url.sub! "#{Dom.base_url}/Screen.", "#{Dom.base_url}/window.screen"
url
end

@ -21,6 +21,7 @@ module Docs
end
end
options[:skip] = ['/Element/shadow']
options[:only_patterns] = [/\A\/Element/]
options[:replace_paths] = {

@ -25,7 +25,9 @@ module Docs
/Functions/rest_parameters
/Methods_Index
/Properties_Index
/Strict_mode/Transitioning_to_strict_mode)
/Strict_mode/Transitioning_to_strict_mode
/Operators/Legacy_generator_function
/Statements/Legacy_generator_function)
# Duplicates
options[:skip].concat %w(

Loading…
Cancel
Save