소스 검색

Merge pull request #250 from afro88/website_agent_receive

Updated WebsiteAgent to receive events
Andrew Cantino 11 년 전
부모
커밋
48cfbb8a81
2개의 변경된 파일34개의 추가작업 그리고 7개의 파일을 삭제
  1. 20 6
      app/models/agents/website_agent.rb
  2. 14 1
      spec/models/agents/website_agent_spec.rb

+ 20 - 6
app/models/agents/website_agent.rb

@@ -4,7 +4,6 @@ require 'date'
 
 module Agents
   class WebsiteAgent < Agent
-    cannot_receive_events!
 
     default_schedule "every_12h"
 
@@ -46,6 +45,8 @@ module Agents
       Set `uniqueness_look_back` to limit the number of events checked for uniqueness (typically for performance).  This defaults to the larger of #{UNIQUENESS_LOOK_BACK} or #{UNIQUENESS_FACTOR}x the number of detected received results.
 
       Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset.
+
+      The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload.
     MD
 
     event_description do
@@ -105,19 +106,23 @@ module Agents
     end
 
     def check
-      hydra = Typhoeus::Hydra.new
       log "Fetching #{options['url']}"
+      check_url options['url']
+    end
+
+    def check_url(in_url)
+      hydra = Typhoeus::Hydra.new
       request_opts = { :followlocation => true }
       request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present?
 
       requests = []
 
-      if options['url'].kind_of?(Array)
-        options['url'].each do |url|
+      if in_url.kind_of?(Array)
+        in_url.each do |url|
            requests.push(Typhoeus::Request.new(url, request_opts))
         end
       else
-        requests.push(Typhoeus::Request.new(options['url'], request_opts))
+        requests.push(Typhoeus::Request.new(in_url, request_opts))
       end
 
       requests.each do |request|
@@ -185,7 +190,7 @@ module Agents
               options['extract'].keys.each do |name|
                 result[name] = output[name][index]
                 if name.to_s == 'url'
-                  result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
+                  result[name] = URI.join(request.base_url, result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
                 end
               end
 
@@ -202,6 +207,13 @@ module Agents
       end
     end
 
+    def receive(incoming_events)
+      incoming_events.each do |event|
+        url_to_scrape = event.payload['url']
+        check_url(url_to_scrape) if url_to_scrape =~ /^https?:\/\//i
+      end
+    end
+
     private
 
     # This method returns true if the result should be stored as a new event.
@@ -275,5 +287,7 @@ module Agents
         false
       end
     end
+
   end
+
 end

+ 14 - 1
spec/models/agents/website_agent_spec.rb

@@ -331,6 +331,19 @@ describe Agents::WebsiteAgent do
         end
       end
     end
+
+    describe "#receive" do
+      it "should scrape from the url element in incoming event payload" do
+        @event = Event.new
+        @event.agent = agents(:bob_rain_notifier_agent)
+        @event.payload = { 'url' => "http://xkcd.com" }
+
+        lambda {
+          @checker.options = @site
+          @checker.receive([@event])
+        }.should change { Event.count }.by(1)
+      end
+    end
   end
 
   describe "checking with http basic auth" do
@@ -361,4 +374,4 @@ describe Agents::WebsiteAgent do
       end
     end
   end
-end
+end