class MediaWiki::Gateway
Constants
- USER_AGENT
Attributes
Public Class Methods
Set up a MediaWiki::Gateway for a given MediaWiki installation
- url
-
Path to API of target MediaWiki (eg. 'en.wikipedia.org/w/api.php')
- options
-
Hash of options
- http_options
-
Hash of options for RestClient::Request (via #http_send)
Options:
- :bot
-
When set to true, executes API queries with the bot parameter (see www.mediawiki.org/wiki/API:Edit#Parameters). Defaults to false.
- :ignorewarnings
-
Log API warnings and invalid page titles, instead throwing MediaWiki::APIError
- :limit
-
Maximum number of results returned per search (see www.mediawiki.org/wiki/API:Query_-_Lists#Limits), defaults to the MediaWiki default of 500.
- :logdevice
-
Log device to use. Defaults to STDERR
- :loglevel
-
Log level to use, defaults to Logger::WARN. Set to Logger::DEBUG to dump every request and response to the log.
- :maxlag
-
Maximum allowed server lag (see www.mediawiki.org/wiki/Manual:Maxlag_parameter), defaults to 5 seconds.
- :retry_count
-
Number of times to try before giving up if MediaWiki returns 503 Service Unavailable, defaults to 3 (original request plus two retries).
- :retry_delay
-
Seconds to wait before retry if MediaWiki returns 503 Service Unavailable, defaults to 10 seconds.
- :user_agent
-
User-Agent header to send with requests, defaults to ::default_user_agent or nil.
# File lib/media_wiki/gateway.rb, line 34 def initialize(url, options = {}, http_options = {}) @options = { bot: false, limit: 500, logdevice: STDERR, loglevel: Logger::WARN, max_results: 500, maxlag: 5, retry_count: 3, retry_delay: 10, user_agent: self.class.default_user_agent }.merge(options) @log = Logger.new(@options[:logdevice]) @log.level = @options[:loglevel] @http_options, @wiki_url, @cookies, @headers = http_options, url, {}, { 'User-Agent' => [@options[:user_agent], USER_AGENT].compact.join(' '), 'Accept-Encoding' => 'gzip' } end
Public Instance Methods
Make generic request to API
- form_data
-
hash of attributes to post
- continue_xpath
-
XPath selector for query continue parameter
Returns XML document
# File lib/media_wiki/gateway.rb, line 64 def send_request(form_data, continue_xpath = nil) make_api_request(form_data, continue_xpath).first end
Private Instance Methods
Get API XML response If there are errors or warnings, raise APIError Otherwise return XML root
# File lib/media_wiki/gateway.rb, line 188 def get_response(res) begin res = res.force_encoding('UTF-8') if res.respond_to?(:force_encoding) doc = REXML::Document.new(res).root rescue REXML::ParseException raise MediaWiki::Exception.new('Response is not XML. Are you sure you are pointing to api.php?') end log.debug("RES: #{doc}") unless %w[api mediawiki].include?(doc.name) raise MediaWiki::Exception.new("Response does not contain Mediawiki API XML: #{res}") end if error = doc.elements['error'] raise APIError.new(*error.attributes.values_at(*%w[code info])) end if warnings = doc.elements['warnings'] warning("API warning: #{warnings.children.map(&:text).join(', ')}") end doc end
Fetch token (type 'delete', 'edit', 'email', 'import', 'move', 'protect')
# File lib/media_wiki/gateway.rb, line 71 def get_token(type, page_titles) res = send_request( 'action' => 'query', 'prop' => 'info', 'intoken' => type, 'titles' => page_titles ) unless token = res.elements['query/pages/page'].attributes[type + 'token'] raise Unauthorized.new "User is not permitted to perform this operation: #{type}" end token end
Execute the HTTP request using either GET or POST as appropriate
# File lib/media_wiki/gateway.rb, line 175 def http_send url, form_data, headers, &block opts = @http_options.merge(url: url, headers: headers) opts[:method] = form_data['action'] == 'query' ? :get : :post opts[:method] == :get ? headers[:params] = form_data : opts[:payload] = form_data log.debug("#{opts[:method].upcase}: #{form_data.inspect}, #{@cookies.inspect}") RestClient::Request.execute(opts, &block) end
Iterate over query results
- list
-
list name to query
- res_xpath
-
XPath selector for results
- attr
-
attribute name to extract, if any
- param
-
parameter name to continue query
- options
-
additional query options
Yields each attribute value, or, if attr
is nil, each
REXML::Element.
# File lib/media_wiki/gateway.rb, line 95 def iterate_query(list, res_xpath, attr, param, options, &block) items, block = [], lambda { |item| items << item } unless block attribute_names = %w[from continue].map { |name| "name()='#{param[0, 2]}#{name}'" } req_xpath = "//query-continue/#{list}/@*[#{attribute_names.join(' or ')}]" res_xpath = "//query/#{list}/#{res_xpath}" unless res_xpath.start_with?('/') options, continue = options.merge('action' => 'query', 'list' => list), nil loop { res, continue = make_api_request(options, req_xpath) REXML::XPath.match(res, res_xpath).each { |element| block[attr ? element.attributes[attr] : element] } continue ? options[param] = continue : break } items end
Make generic request to API
- form_data
-
hash of attributes to post
- continue_xpath
-
XPath selector for query continue parameter
- retry_count
-
Counter for retries
Returns array of XML document and query continue parameter.
# File lib/media_wiki/gateway.rb, line 127 def make_api_request(form_data, continue_xpath = nil, retry_count = 1) form_data.update('format' => 'xml', 'maxlag' => @options[:maxlag]) http_send(@wiki_url, form_data, @headers.merge(cookies: @cookies)) { |response, &block| if response.code == 503 && retry_count < @options[:retry_count] log.warn("503 Service Unavailable: #{response.body}. Retry in #{@options[:retry_delay]} seconds.") sleep(@options[:retry_delay]) make_api_request(form_data, continue_xpath, retry_count + 1) end # Check response for errors and return XML unless response.code >= 200 && response.code < 300 raise MediaWiki::Exception.new("Bad response: #{response}") end doc = get_response(response.dup) # login and createaccount actions require a second request with a token received on the first request if %w[login createaccount].include?(action = form_data['action']) action_result = doc.elements[action].attributes['result'] @cookies.update(response.cookies) case action_result.downcase when 'success' return [doc, false] when 'needtoken' token = doc.elements[action].attributes['token'] if action == 'login' return make_api_request(form_data.merge('lgtoken' => token)) elsif action == 'createaccount' return make_api_request(form_data.merge('token' => token)) end else if action == 'login' raise Unauthorized.new("Login failed: #{action_result}") elsif action == 'createaccount' raise Unauthorized.new("Account creation failed: #{action_result}") end end end return [doc, (continue_xpath && doc.elements['query-continue']) ? REXML::XPath.first(doc, continue_xpath) : nil] } end
# File lib/media_wiki/gateway.rb, line 221 def valid_page?(page) page && !page.attributes['missing'] && (!page.attributes['invalid'] || warning("Invalid title '#{page.attributes['title']}'")) end
# File lib/media_wiki/gateway.rb, line 213 def validate_options(options, valid_options) options.each_key { |opt| unless valid_options.include?(opt.to_s) raise ArgumentError, "Unknown option '#{opt}'", caller(1) end } end
# File lib/media_wiki/gateway.rb, line 226 def warning(msg) raise APIError.new('warning', msg) unless @options[:ignorewarnings] log.warn(msg) false end