Improve DOM scraper

pull/200/head
Thibaut 10 years ago
parent 018628ea7d
commit 60f6c284a0

@ -5,8 +5,11 @@ module Docs
'Battery Status' => 'Battery Status',
'Canvas ' => 'Canvas',
'CSS Object Model' => 'CSS',
'Cryptography' => 'Web Cryptography',
'Device Orientation' => 'Device Orientation',
'Encoding' => 'Encoding',
'Encrypted Media Extensions' => 'Encrypted Media',
'Fetch' => 'Fetch',
'File API' => 'File',
'Geolocation' => 'Geolocation',
'Media Capture' => 'Media',
@ -14,30 +17,35 @@ module Docs
'MediaStream' => 'Media',
'Navigation Timing' => 'Navigation Timing',
'Network Information' => 'Network Information',
'Push API' => 'Push',
'Service Workers' => 'Service Workers',
'Web Animations' => 'Animation',
'Web Audio' => 'Web Audio',
'Web Messaging' => 'Web Messaging',
'Web Storage' => 'Web Storage',
'Web Workers' => 'Web Workers',
'WebRTC' => 'WebRTC' }
TYPE_BY_NAME_STARTS_WITH = {
'Audio' => 'Web Audio',
'Broadcast' => 'Broadcast Channel',
'Canvas' => 'Canvas',
'ChildNode' => 'Node',
'Console' => 'Console',
'CSS' => 'CSS',
'ChildNode' => 'Node',
'console' => 'Console',
'document' => 'Document',
'DocumentFragment' => 'DocumentFragment',
'DOM' => 'DOM',
'element' => 'Element',
'event' => 'Event',
'Event' => 'Event',
'Fetch' => 'Fetch',
'File' => 'File',
'GlobalEventHandlers' => 'GlobalEventHandlers',
'history' => 'History',
'HTML' => 'Elements',
'IDB' => 'IndexedDB',
'Location' => 'Location',
'location' => 'Location',
'navigator' => 'Navigator',
'MediaQuery' => 'MediaQuery',
'Node' => 'Node',
@ -45,6 +53,7 @@ module Docs
'ParentNode' => 'Node',
'Range' => 'Range',
'RTC' => 'WebRTC',
'screen' => 'Screen',
'Selection' => 'Selection',
'Storage' => 'Web Storage',
'StyleSheet' => 'CSS',
@ -54,13 +63,17 @@ module Docs
'TreeWalker' => 'TreeWalker',
'Uint' => 'Typed Arrays',
'URL' => 'URL',
'window' => 'window',
'window' => 'Window',
'Window' => 'Window',
'XMLHttpRequest' => 'XMLHTTPRequest' }
TYPE_BY_NAME_INCLUDES = {
'ChildNode' => 'Node',
'Crypto' => 'Web Cryptography',
'ImageData' => 'Canvas',
'IndexedDB' => 'IndexedDB',
'MediaStream' => 'Media',
'NodeList' => 'Node',
'Path2D' => 'Canvas',
'ServiceWorker' => 'Service Workers',
'TextMetrics' => 'Canvas',
@ -129,10 +142,19 @@ module Docs
end
end
SKIP_CONTENT = [
'not on a standards track',
'removed from the Web',
'not on a current W3C standards track',
'This feature is not built into all browsers',
'not currently supported in any browser'
]
def include_default_entry?
(node = doc.at_css '.overheadIndicator').nil? ||
type == 'Console' ||
(node.content.exclude?('not on a standards track') && node.content.exclude?('removed from the Web'))
return true if type == 'Console'
return true unless node = doc.at_css('.overheadIndicator')
content = node.content
SKIP_CONTENT.none? { |str| content.include?(str) }
end
end
end

@ -2,53 +2,34 @@ module Docs
class Dom < Mdn
self.name = 'DOM'
self.base_url = 'https://developer.mozilla.org/en-US/docs/Web/API'
self.fix_redirections = true
html_filters.push 'dom/clean_html', 'dom/entries', 'title'
options[:root_title] = 'DOM'
# Don't want
options[:skip] = %w(
/App
/CallEvent
/CanvasPixelArray
/ChromeWorker
/document.createProcessingInstruction
/document.documentURIObject
/document.loadOverlay
/document.tooltipNode
/Reference
/Index
/Document_Object_Model
/document/createProcessingInstruction
/document/documentURIObject
/document/loadOverlay
/document/tooltipNode
/DOMErrorHandler
/DOMLocator
/DOMObject
/DOMStringList
/Event/Comparison_of_Event_Targets
/FMRadio
/IDBDatabaseException
/IndexedDB_API/Using_JavaScript_Generators_in_Firefox
/NamedNodeMap
/Node.baseURIObject
/Node.nodePrincipal
/Notation
/PowerManager
/PushManager
/ProcessingInstruction
/TCPServerSocket
/TCPSocket
/TypeInfo
/Using_the_Browser_API
/Web_Video_Text_Tracks_Format
/WifiManager
/window.controllers
/window.crypto
/window.getAttention
/window.messageManager
/window.navigator.addIdleObserver
/window.navigator.getDeviceStorage
/window.navigator.getDeviceStorages
/window.navigator.removeIdleObserver
/window.navigator.requestWakeLock
/window.updateCommands
/window.pkcs11)
/window/getAttention
/window/messageManager
/window/updateCommands
/window/pkcs11)
options[:skip_patterns] = [
/NS/,
@ -58,69 +39,42 @@ module Docs
/webkit/i,
/gamepad/i,
/UserData/,
/Bluetooth/,
/FMRadio/i,
/XDomainRequest/i,
/\A\/Camera/,
/\A\/Data_Store_API/,
/\A\/DataStore/,
/\A\/DeviceStorage/,
/\A\/DocumentTouch/,
/\A\/document\.xml/,
/\A\/document\/xml/,
/\A\/XMLDocument/,
/\A\/DOMCursor/,
/\A\/DOMRequest/,
/\A\/element\.on/,
/\A\/Entity/,
/\A\/navigator\.id/i,
/\A\/Settings/,
/\A\/Telephony/,
/Bluetooth/,
/\A\/Window\.\w+bar/i,
/\A\/Window\/\w+bar/i,
/\A\/Apps/,
/\A\/Contact/,
/\A\/L10n/,
/\A\/Permission/]
# Broken / Empty
options[:skip].concat %w(
/Attr.isId
/document.nodePrincipal
/Event/UIEvent
/Extensions
/StyleSheetList
/SVGPoint
/Window.dispatchEvent
/Window.restore
/Window.routeEvent
/Window.QueryInterface)
# Duplicates
options[:skip].concat %w(
/Reference
/Index
/form.elements
/select.type
/table.rows
/XMLHttpRequest/FormData
/Performance.now
/Document_Object_Model)
options[:fix_urls] = ->(url) do
return if url.include?('_') || url.include?('?')
url.sub! 'https://developer.mozilla.org/en-US/docs/DOM/', "#{Dom.base_url}/"
url.sub! 'https://developer.mozilla.org/en/DOM/', "#{Dom.base_url}/"
url.sub! "#{Dom.base_url}/Document\.", "#{Dom.base_url}/document."
url.sub! "#{Dom.base_url}/Console", "#{Dom.base_url}/console"
url.sub! "#{Dom.base_url}/Document\/", "#{Dom.base_url}/document\/"
url.sub! "#{Dom.base_url}/Element", "#{Dom.base_url}/element"
url.sub! "#{Dom.base_url}/History", "#{Dom.base_url}/history"
url.sub! "#{Dom.base_url}/Location", "#{Dom.base_url}/location"
url.sub! "#{Dom.base_url}/Navigator", "#{Dom.base_url}/navigator"
url.sub! "#{Dom.base_url}/Screen", "#{Dom.base_url}/screen"
url.sub! "#{Dom.base_url}/Window\/", "#{Dom.base_url}/window\/"
url.sub! "#{Dom.base_url}/notification", "#{Dom.base_url}/Notification"
url.sub! "#{Dom.base_url}/range", "#{Dom.base_url}/Range"
url.sub! "#{Dom.base_url}/Window", "#{Dom.base_url}/window"
url.sub! "#{Dom.base_url}/window.navigator", "#{Dom.base_url}/navigator"
url.sub! "#{Dom.base_url}/Selection/", "#{Dom.base_url}/Selection."
url.sub! "#{Dom.base_url}/windowTimers", "#{Dom.base_url}/window"
url.sub! "#{Dom.base_url}/windowEventHandlers", "#{Dom.base_url}/window"
url.sub! %r{\/windowLocalStorage(\.localStorage)?}i, "/window.localStorage"
url.sub! %r{\/windowSessionStorage(\.sessionStorage)?}i, "/window.sessionStorage"
url.sub! "#{Dom.base_url}/Screen.", "#{Dom.base_url}/window.screen"
url.sub! "#{Dom.base_url}/event", "#{Dom.base_url}/Event"
url
end
end

@ -11,7 +11,7 @@ module Docs
options[:trailing_slash] = false
options[:skip_link] = ->(link) { link['title'].try(:include?, 'hasn\'t been written yet'.freeze) }
options[:skip_link] = ->(link) { link['title'].try(:include?, 'written'.freeze) }
options[:attribution] = <<-HTML
&copy; 2015 Mozilla Contributors<br>

Loading…
Cancel
Save