Browse Source

adding a basic RSS agent

Andrew Cantino 10 years ago
parent
commit
f4df522f2f

+ 1 - 0
Gemfile

@@ -72,6 +72,7 @@ gem 'em-http-request', '~> 1.1.2'
 gem 'weibo_2', '~> 0.1.4'
 gem 'hipchat', '~> 1.2.0'
 gem 'xmpp4r',  '~> 0.5.6'
+gem 'feed-normalizer'
 gem 'slack-notifier', '~> 0.5.0'
 
 gem 'therubyracer', '~> 0.12.1'

+ 6 - 0
Gemfile.lock

@@ -116,6 +116,9 @@ GEM
       multipart-post (>= 1.2, < 3)
     faraday_middleware (0.9.1)
       faraday (>= 0.7.4, < 0.10)
+    feed-normalizer (1.5.2)
+      hpricot (>= 0.6)
+      simple-rss (>= 1.1)
     ffi (1.9.3)
     forecast_io (2.0.0)
       faraday
@@ -144,6 +147,7 @@ GEM
     hike (1.2.3)
     hipchat (1.2.0)
       httparty
+    hpricot (0.8.6)
     http (0.5.1)
       http_parser.rb
     http_parser.rb (0.6.0)
@@ -265,6 +269,7 @@ GEM
       faraday (>= 0.9.0.rc5)
       jwt (>= 0.1.5)
       multi_json (>= 1.0.0)
+    simple-rss (1.3.1)
     simple_oauth (0.2.0)
     simplecov (0.8.2)
       docile (~> 1.1.0)
@@ -359,6 +364,7 @@ DEPENDENCIES
   em-http-request (~> 1.1.2)
   faraday (~> 0.9.0)
   faraday_middleware
+  feed-normalizer
   forecast_io (~> 2.0.0)
   foreman (~> 0.63.0)
   geokit (~> 1.8.4)

+ 61 - 0
app/concerns/web_request_concern.rb

@@ -0,0 +1,61 @@
+module WebRequestConcern
+  extend ActiveSupport::Concern
+
+  def validate_web_request_options!
+    if options['user_agent'].present?
+      errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
+    end
+
+    unless headers(options['headers']).is_a?(Hash)
+      errors.add(:base, "if provided, headers must be a hash")
+    end
+
+    begin
+      basic_auth_credentials(options['basic_auth'])
+    rescue ArgumentError => e
+      errors.add(:base, e.message)
+    end
+  end
+
+  def faraday
+    @faraday ||= Faraday.new { |builder|
+      builder.headers = headers if headers.length > 0
+
+      if (user_agent = interpolated['user_agent']).present?
+        builder.headers[:user_agent] = user_agent
+      end
+
+      builder.use FaradayMiddleware::FollowRedirects
+      builder.request :url_encoded
+      if userinfo = basic_auth_credentials
+        builder.request :basic_auth, *userinfo
+      end
+
+      case backend = faraday_backend
+        when :typhoeus
+          require 'typhoeus/adapters/faraday'
+      end
+      builder.adapter backend
+    }
+  end
+
+  def headers(value = interpolated['headers'])
+    value.presence || {}
+  end
+
+  def basic_auth_credentials(value = interpolated['basic_auth'])
+    case value
+      when nil, ''
+        return nil
+      when Array
+        return value if value.size == 2
+      when /:/
+        return value.split(/:/, 2)
+    end
+    raise ArgumentError.new("bad value for basic_auth: #{value.inspect}")
+  end
+
+  def faraday_backend
+    ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym
+  end
+end

+ 89 - 0
app/models/agents/rss_agent.rb

@@ -0,0 +1,89 @@
+require 'rss'
+require 'feed-normalizer'
+
+module Agents
+  class RssAgent < Agent
+    include WebRequestConcern
+
+    cannot_receive_events!
+    default_schedule "every_1d"
+
+    description do
+      <<-MD
+        This Agent consumes RSS feeds and emits events when they change.
+
+        (If you want to *output* an RSS feed, use the DataOutputAgent.  Also, you can technically parse RSS and XML feeds
+        with the WebsiteAgent as well.  See [this example](https://github.com/cantino/huginn/wiki/Agent-configuration-examples#itunes-trailers).)
+
+        Options:
+
+          * `url` - The URL of the RSS feed.
+          * `clean` - Attempt to use [feed-normalizer](https://github.com/aasmith/feed-normalizer)'s' `clean!` method to cleanup HTML in the feed.  Set to `true` to use.
+          * `expected_update_period_in_days` - How often you expect this RSS feed to change.  If more than this amount of time passes without an update, the Agent will mark itself as not working.
+      MD
+    end
+
+    def default_options
+      {
+        'expected_update_period_in_days' => "5",
+        'clean' => 'false',
+        'url' => "https://github.com/cantino/huginn/commits/master.atom"
+      }
+    end
+
+    def working?
+      event_created_within?((interpolated['expected_update_period_in_days'].presence || 10).to_i) && !recent_error_logs?
+    end
+
+    def validate_options
+      errors.add(:base, "url is required") unless options['url'].present?
+
+      unless options['expected_update_period_in_days'].present? && options['expected_update_period_in_days'].to_i > 0
+        errors.add(:base, "Please provide 'expected_update_period_in_days' to indicate how many days can pass without an update before this Agent is considered to not be working")
+      end
+
+      validate_web_request_options!
+    end
+
+    def check
+      response = faraday.get(interpolated['url'])
+      if response.success?
+        feed = FeedNormalizer::FeedNormalizer.parse(response.body)
+        feed.clean! if interpolated['clean'] == 'true'
+        created_event_count = 0
+        feed.entries.each do |entry|
+          if check_and_track(entry.id)
+            created_event_count += 1
+            create_event(:payload => {
+              :id => entry.id,
+              :date_published => entry.date_published,
+              :last_updated => entry.last_updated,
+              :urls => entry.urls,
+              :description => entry.description,
+              :content => entry.content,
+              :title => entry.title,
+              :authors => entry.authors,
+              :categories => entry.categories
+            })
+          end
+        end
+        log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)."
+      else
+        error "Failed to fetch #{interpolated['url']}: #{response.inspect}"
+      end
+    end
+
+    protected
+
+    def check_and_track(entry_id)
+      memory['seen_ids'] ||= []
+      if memory['seen_ids'].include?(entry_id)
+        false
+      else
+        memory['seen_ids'].unshift entry_id
+        memory['seen_ids'].pop if memory['seen_ids'].length > 500
+        true
+      end
+    end
+  end
+end

+ 2 - 55
app/models/agents/website_agent.rb

@@ -5,6 +5,7 @@ require 'date'
 
 module Agents
   class WebsiteAgent < Agent
+    include WebRequestConcern
 
     default_schedule "every_12h"
 
@@ -109,19 +110,7 @@ module Agents
         end
       end
 
-      if options['user_agent'].present?
-        errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String)
-      end
-
-      unless headers.is_a?(Hash)
-        errors.add(:base, "if provided, headers must be a hash")
-      end
-
-      begin
-        basic_auth_credentials()
-      rescue => e
-        errors.add(:base, e.message)
-      end
+      validate_web_request_options!
     end
 
     def check
@@ -291,47 +280,5 @@ module Agents
         false
       end
     end
-
-    def faraday
-      @faraday ||= Faraday.new { |builder|
-        builder.headers = headers if headers.length > 0
-
-        if (user_agent = interpolated['user_agent']).present?
-          builder.headers[:user_agent] = user_agent
-        end
-
-        builder.use FaradayMiddleware::FollowRedirects
-        builder.request :url_encoded
-        if userinfo = basic_auth_credentials()
-          builder.request :basic_auth, *userinfo
-        end
-
-        case backend = faraday_backend
-        when :typhoeus
-          require 'typhoeus/adapters/faraday'
-        end
-        builder.adapter backend
-      }
-    end
-
-    def faraday_backend
-      ENV.fetch('FARADAY_HTTP_BACKEND', 'typhoeus').to_sym
-    end
-
-    def basic_auth_credentials
-      case value = interpolated['basic_auth']
-      when nil, ''
-        return nil
-      when Array
-        return value if value.size == 2
-      when /:/
-        return value.split(/:/, 2)
-      end
-      raise "bad value for basic_auth: #{value.inspect}"
-    end
-
-    def headers
-      interpolated['headers'].presence || {}
-    end
   end
 end

+ 356 - 0
spec/data_fixtures/github_rss.atom

@@ -0,0 +1,356 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xml:lang="en-US">
+  <id>tag:github.com,2008:/cantino/huginn/commits/master</id>
+  <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commits/master"/>
+  <link type="application/atom+xml" rel="self" href="https://github.com/cantino/huginn/commits/master.atom"/>
+  <title>Recent Commits to huginn:master</title>
+  <updated>2014-07-16T22:26:22-07:00</updated>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/d0a844662846cf3c83b94c637c1803f03db5a5b0</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0"/>
+    <title>
+        Merge pull request #402 from albertsun/safer-liquid-migration
+    </title>
+    <updated>2014-07-16T22:26:22-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #402 from albertsun/safer-liquid-migration
+
+Inline models into migration&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/4a433806eeace44f1e39f02ac61cefdadf3597e2</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/4a433806eeace44f1e39f02ac61cefdadf3597e2"/>
+    <title>
+        inline models into migration
+    </title>
+    <updated>2014-07-16T15:25:08-04:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/382862?s=30"/>
+    <author>
+      <name>albertsun</name>
+      <uri>https://github.com/albertsun</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>inline models into migration&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/6ffa528ab0af7f9f5bb4b68437e7613e74fdb8c4</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/6ffa528ab0af7f9f5bb4b68437e7613e74fdb8c4"/>
+    <title>
+        Merge pull request #398 from knu/imap_use_uid
+    </title>
+    <updated>2014-07-15T19:47:37-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #398 from knu/imap_use_uid
+
+Use &quot;last seen UID&quot; in ImapFolderAgent&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/c7e29492c98652cc9738c374d02dcbb7c9bdeac6</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/c7e29492c98652cc9738c374d02dcbb7c9bdeac6"/>
+    <title>
+        Merge pull request #391 from theofpa/master
+    </title>
+    <updated>2014-07-12T15:19:56-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #391 from theofpa/master
+
+Ignore xmlns when evaluating xpath&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/f3552ece2e9af187bd5e613783dd27810b63c32f</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/f3552ece2e9af187bd5e613783dd27810b63c32f"/>
+    <title>
+        ImapFolderAgent: Emit a log message when creating an event or skipping it.
+    </title>
+    <updated>2014-07-11T19:19:12+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Emit a log message when creating an event or skipping it.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/d144d3797d2db362943357c6d85238ec657cfa06</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d144d3797d2db362943357c6d85238ec657cfa06"/>
+    <title>
+        ImapFolderAgent: Enable notification of mails already marked as read.
+    </title>
+    <updated>2014-07-11T19:08:55+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Enable notification of mails already marked as read.
+
+Add a condition key &quot;is_unread&quot; to allow user to select mails based on
+the read status.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/d1196a35ada22418bf0cf8b0d5947c2164e983e6</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d1196a35ada22418bf0cf8b0d5947c2164e983e6"/>
+    <title>
+        ImapFolderAgent: &quot;conditions&quot; must not actually be nil.
+    </title>
+    <updated>2014-07-11T18:02:09+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: &quot;conditions&quot; must not actually be nil.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/280c09415ea8114d8a128cd7c2583ae0e0aa480d</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/280c09415ea8114d8a128cd7c2583ae0e0aa480d"/>
+    <title>
+        ImapFolderAgent: Do not fail when port is blank.
+    </title>
+    <updated>2014-07-11T18:02:09+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Do not fail when port is blank.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/045fb957b2370d80190fa8dc036863076d8806fb</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/045fb957b2370d80190fa8dc036863076d8806fb"/>
+    <title>
+        ImapFolderAgent now recognizes &quot;true&quot;/&quot;false&quot; as boolean option values.
+    </title>
+    <updated>2014-07-11T18:02:09+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent now recognizes &quot;true&quot;/&quot;false&quot; as boolean option values.
+
+Add a utility method Agent#boolify to make it easier to handle boolean
+option values.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/c1b9caa8ccb0c8b8f6103fc80b90fba57a822435</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/c1b9caa8ccb0c8b8f6103fc80b90fba57a822435"/>
+    <title>
+        ImapFolderAgent: Unstringify integer keys of a hash saved in JSON.
+    </title>
+    <updated>2014-07-11T18:01:26+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Unstringify integer keys of a hash saved in JSON.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/6a06a32447721abc4477979610e36db0650e2f92</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/6a06a32447721abc4477979610e36db0650e2f92"/>
+    <title>
+        ImapFolderAgent: Only keep a single UID value for each folder in memory.
+    </title>
+    <updated>2014-07-11T18:01:26+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>ImapFolderAgent: Only keep a single UID value for each folder in memory.
+
+Previously it used to keep a list of the UIDs of unread mails.  Now we
+start to assume that UIDs in a folder identified by a UID VALIDITY value
+are strictly ascending (monotonically increasing) as suggested by RFC
+3501 and 4549 and just keep the highest UID seen in the last run.
+
+This enhancement will help reduce the size of memory typically where
+mails are left unread forever.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/9ed63e45b247c30a02e8e59b4d24fccbe8644876</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/9ed63e45b247c30a02e8e59b4d24fccbe8644876"/>
+    <title>
+        Merge pull request #397 from cantino/update_rails_and_gems
+    </title>
+    <updated>2014-07-05T16:34:29-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #397 from cantino/update_rails_and_gems
+
+upgrade rails and gems&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/87a7abda23a82305d7050ac0bb400ce36c863d01</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"/>
+    <title>
+        upgrade rails and gems
+    </title>
+    <updated>2014-07-05T08:01:36-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>upgrade rails and gems&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/ea7594fa976fe24bb7024b6e3e0d2881dd86033a</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/ea7594fa976fe24bb7024b6e3e0d2881dd86033a"/>
+    <title>
+        Merge pull request #396 from knu/show_propagate_immediately
+    </title>
+    <updated>2014-07-03T20:50:40-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #396 from knu/show_propagate_immediately
+
+Make propagate_immediately more visible in agent details and the diagram.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/0e80f5341587aace2c023b06eb9265b776ac4535</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"/>
+    <title>
+        Dashed line in a diagram indicates propagate_immediately being false.
+    </title>
+    <updated>2014-07-04T03:42:52+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Dashed line in a diagram indicates propagate_immediately being false.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/cf9cdfb3ac9d47b7fdf5d7669577c964bee9a186</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/cf9cdfb3ac9d47b7fdf5d7669577c964bee9a186"/>
+    <title>
+        Show the propagate_immediately flag in agent details.
+    </title>
+    <updated>2014-07-04T02:53:31+09:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars2.githubusercontent.com/u/10236?s=30"/>
+    <author>
+      <name>knu</name>
+      <uri>https://github.com/knu</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Show the propagate_immediately flag in agent details.&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/b1128335b8de98afc5cad1b2ca5573e3bab1da1d</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/b1128335b8de98afc5cad1b2ca5573e3bab1da1d"/>
+    <title>
+        Merge pull request #389 from dsander/silence_worker_status
+    </title>
+    <updated>2014-07-01T21:47:40-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #389 from dsander/silence_worker_status
+
+Supress logging for requests to the /worker_status&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/d25e670b1c040f78eb648120c117853421d522c3</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d25e670b1c040f78eb648120c117853421d522c3"/>
+    <title>
+        Merge pull request #393 from CloCkWeRX/google_calendar
+    </title>
+    <updated>2014-07-01T21:47:16-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Merge pull request #393 from CloCkWeRX/google_calendar
+
+Add Google calendar publish agent&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/d7b0e35aaaafec3032d3fe271b426f1e9d3727b4</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d7b0e35aaaafec3032d3fe271b426f1e9d3727b4"/>
+    <title>
+        switch to cantino-twitter-stream
+    </title>
+    <updated>2014-07-01T21:36:38-07:00</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/83835?s=30"/>
+    <author>
+      <name>cantino</name>
+      <uri>https://github.com/cantino</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>switch to cantino-twitter-stream&lt;/pre>
+    </content>
+  </entry>
+  <entry>
+    <id>tag:github.com,2008:Grit::Commit/d465158f77dcd9078697e6167b50abbfdfa8b1af</id>
+    <link type="text/html" rel="alternate" href="https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"/>
+    <title>
+        Shift to dev group
+    </title>
+    <updated>2014-07-01T16:37:47+09:30</updated>
+    <media:thumbnail height="30" width="30" url="https://avatars3.githubusercontent.com/u/365751?s=30"/>
+    <author>
+      <name>CloCkWeRX</name>
+      <uri>https://github.com/CloCkWeRX</uri>
+    </author>
+    <content type="html">
+      &lt;pre style='white-space:pre-wrap;width:81ex'>Shift to dev group&lt;/pre>
+    </content>
+  </entry>
+</feed>

+ 81 - 0
spec/models/agents/rss_agent_spec.rb

@@ -0,0 +1,81 @@
+require 'spec_helper'
+
+describe Agents::RssAgent do
+  before do
+    @valid_options = {
+      'expected_update_period_in_days' => "2",
+      'url' => "https://github.com/cantino/huginn/commits/master.atom",
+    }
+
+    stub_request(:any, /github.com/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")), :status => 200)
+  end
+
+  let(:agent) do
+    _agent = Agents::RssAgent.new(:name => "github rss feed", :options => @valid_options)
+    _agent.user = users(:bob)
+    _agent.save!
+    _agent
+  end
+
+  it_behaves_like WebRequestConcern
+
+  describe "validations" do
+    it "should validate the presence of url" do
+      agent.options['url'] = "http://google.com"
+      agent.should be_valid
+
+      agent.options['url'] = ""
+      agent.should_not be_valid
+
+      agent.options['url'] = nil
+      agent.should_not be_valid
+    end
+
+    it "should validate the presence and numericality of expected_update_period_in_days" do
+      agent.options['expected_update_period_in_days'] = "5"
+      agent.should be_valid
+
+      agent.options['expected_update_period_in_days'] = "wut?"
+      agent.should_not be_valid
+
+      agent.options['expected_update_period_in_days'] = 0
+      agent.should_not be_valid
+
+      agent.options['expected_update_period_in_days'] = nil
+      agent.should_not be_valid
+
+      agent.options['expected_update_period_in_days'] = ""
+      agent.should_not be_valid
+    end
+  end
+
+  describe "emitting RSS events" do
+    it "should emit items as events" do
+      lambda {
+        agent.check
+      }.should change { agent.events.count }.by(20)
+    end
+
+    it "should track ids and not re-emit the same item when seen again" do
+      agent.check
+      agent.memory['seen_ids'].should == agent.events.map {|e| e.payload['id'] }
+
+      newest_id = agent.memory['seen_ids'][0]
+      agent.events.first.payload['id'].should == newest_id
+      agent.memory['seen_ids'] = agent.memory['seen_ids'][1..-1] # forget the newest id
+
+      lambda {
+        agent.check
+      }.should change { agent.events.count }.by(1)
+
+      agent.events.first.payload['id'].should == newest_id
+      agent.memory['seen_ids'][0].should == newest_id
+    end
+
+    it "should truncate the seen_ids in memory at 500 items" do
+      agent.memory['seen_ids'] = ['x'] * 490
+      agent.check
+      agent.memory['seen_ids'].length.should == 500
+    end
+  end
+end

+ 39 - 51
spec/models/agents/website_agent_spec.rb

@@ -4,9 +4,9 @@ describe Agents::WebsiteAgent do
   describe "checking without basic auth" do
     before do
       stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
-      @site = {
+      @valid_options = {
         'name' => "XKCD",
-        'expected_update_period_in_days' => 2,
+        'expected_update_period_in_days' => "2",
         'type' => "html",
         'url' => "http://xkcd.com",
         'mode' => 'on_change',
@@ -16,11 +16,13 @@ describe Agents::WebsiteAgent do
           'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
         }
       }
-      @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site, :keep_events_for => 2)
+      @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2)
       @checker.user = users(:bob)
       @checker.save!
     end
 
+    it_behaves_like WebRequestConcern
+
     describe "validations" do
       before do
         @checker.should be_valid
@@ -42,20 +44,6 @@ describe Agents::WebsiteAgent do
         @checker.should be_valid
       end
 
-      it "should validate headers" do
-        @checker.options['headers'] = "blah"
-        @checker.should_not be_valid
-
-        @checker.options['headers'] = ""
-        @checker.should be_valid
-
-        @checker.options['headers'] = {}
-        @checker.should be_valid
-
-        @checker.options['headers'] = { 'foo' => 'bar' }
-        @checker.should be_valid
-      end
-
       it "should validate mode" do
         @checker.options['mode'] = "nonsense"
         @checker.should_not be_valid
@@ -97,16 +85,16 @@ describe Agents::WebsiteAgent do
 
       it "should always save events when in :all mode" do
         lambda {
-          @site['mode'] = 'all'
-          @checker.options = @site
+          @valid_options['mode'] = 'all'
+          @checker.options = @valid_options
           @checker.check
           @checker.check
         }.should change { Event.count }.by(2)
       end
 
       it "should take uniqueness_look_back into account during deduplication" do
-        @site['mode'] = 'all'
-        @checker.options = @site
+        @valid_options['mode'] = 'all'
+        @checker.options = @valid_options
         @checker.check
         @checker.check
         event = Event.last
@@ -114,47 +102,47 @@ describe Agents::WebsiteAgent do
         event.save
 
         lambda {
-          @site['mode'] = 'on_change'
-          @site['uniqueness_look_back'] = 2
-          @checker.options = @site
+          @valid_options['mode'] = 'on_change'
+          @valid_options['uniqueness_look_back'] = 2
+          @checker.options = @valid_options
           @checker.check
         }.should_not change { Event.count }
 
         lambda {
-          @site['mode'] = 'on_change'
-          @site['uniqueness_look_back'] = 1
-          @checker.options = @site
+          @valid_options['mode'] = 'on_change'
+          @valid_options['uniqueness_look_back'] = 1
+          @checker.options = @valid_options
           @checker.check
         }.should change { Event.count }.by(1)
       end
 
       it "should log an error if the number of results for a set of extraction patterns differs" do
-        @site['extract']['url']['css'] = "div"
-        @checker.options = @site
+        @valid_options['extract']['url']['css'] = "div"
+        @checker.options = @valid_options
         @checker.check
         @checker.logs.first.message.should =~ /Got an uneven number of matches/
       end
 
       it "should accept an array for url" do
-        @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
-        @checker.options = @site
+        @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
+        @checker.options = @valid_options
         lambda { @checker.save! }.should_not raise_error;
         lambda { @checker.check }.should_not raise_error;
       end
 
       it "should parse events from all urls in array" do
         lambda {
-          @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
-          @site['mode'] = 'all'
-          @checker.options = @site
+          @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
+          @valid_options['mode'] = 'all'
+          @checker.options = @valid_options
           @checker.check
         }.should change { Event.count }.by(2)
       end
 
       it "should follow unique rules when parsing array of urls" do
         lambda {
-          @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
-          @checker.options = @site
+          @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
+          @checker.options = @valid_options
           @checker.check
         }.should change { Event.count }.by(1)
       end
@@ -170,7 +158,7 @@ describe Agents::WebsiteAgent do
           }, :status => 200)
         site = {
           'name' => "Some JSON Response",
-          'expected_update_period_in_days' => 2,
+          'expected_update_period_in_days' => "2",
           'type' => "json",
           'url' => "http://no-encoding.example.com",
           'mode' => 'on_change',
@@ -197,7 +185,7 @@ describe Agents::WebsiteAgent do
           }, :status => 200)
         site = {
           'name' => "Some JSON Response",
-          'expected_update_period_in_days' => 2,
+          'expected_update_period_in_days' => "2",
           'type' => "json",
           'url' => "http://wrong-encoding.example.com",
           'mode' => 'on_change',
@@ -248,11 +236,11 @@ describe Agents::WebsiteAgent do
       end
 
       it "parses XPath" do
-        @site['extract'].each { |key, value|
+        @valid_options['extract'].each { |key, value|
           value.delete('css')
           value['xpath'] = "//*[@id='comic']//img"
         }
-        @checker.options = @site
+        @checker.options = @valid_options
         @checker.check
         event = Event.last
         event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
@@ -263,7 +251,7 @@ describe Agents::WebsiteAgent do
       it "should turn relative urls to absolute" do
         rel_site = {
           'name' => "XKCD",
-          'expected_update_period_in_days' => 2,
+          'expected_update_period_in_days' => "2",
           'type' => "html",
           'url' => "http://xkcd.com",
           'mode' => "on_change",
@@ -291,7 +279,7 @@ describe Agents::WebsiteAgent do
           stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
           site = {
             'name' => "Some JSON Response",
-            'expected_update_period_in_days' => 2,
+            'expected_update_period_in_days' => "2",
             'type' => "json",
             'url' => "http://json-site.com",
             'mode' => 'on_change',
@@ -322,7 +310,7 @@ describe Agents::WebsiteAgent do
           stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
           site = {
             'name' => "Some JSON Response",
-            'expected_update_period_in_days' => 2,
+            'expected_update_period_in_days' => "2",
             'type' => "json",
             'url' => "http://json-site.com",
             'mode' => 'on_change',
@@ -358,7 +346,7 @@ describe Agents::WebsiteAgent do
           stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
           site = {
             'name' => "Some JSON Response",
-            'expected_update_period_in_days' => 2,
+            'expected_update_period_in_days' => "2",
             'type' => "json",
             'url' => "http://json-site.com",
             'mode' => 'on_change'
@@ -382,7 +370,7 @@ describe Agents::WebsiteAgent do
         @event.payload = { 'url' => "http://xkcd.com" }
 
         lambda {
-          @checker.options = @site
+          @checker.options = @valid_options
           @checker.receive([@event])
         }.should change { Event.count }.by(1)
       end
@@ -394,9 +382,9 @@ describe Agents::WebsiteAgent do
       stub_request(:any, /example/).
         with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
         to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
-      @site = {
+      @valid_options = {
         'name' => "XKCD",
-        'expected_update_period_in_days' => 2,
+        'expected_update_period_in_days' => "2",
         'type' => "html",
         'url' => "http://www.example.com",
         'mode' => 'on_change',
@@ -407,7 +395,7 @@ describe Agents::WebsiteAgent do
         },
         'basic_auth' => "user:pass"
       }
-      @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @site)
+      @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
       @checker.user = users(:bob)
       @checker.save!
     end
@@ -425,9 +413,9 @@ describe Agents::WebsiteAgent do
       stub_request(:any, /example/).
         with(headers: { 'foo' => 'bar', 'user_agent' => /Faraday/ }).
         to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
-      @site = {
+      @valid_options = {
         'name' => "XKCD",
-        'expected_update_period_in_days' => 2,
+        'expected_update_period_in_days' => "2",
         'type' => "html",
         'url' => "http://www.example.com",
         'mode' => 'on_change',
@@ -436,7 +424,7 @@ describe Agents::WebsiteAgent do
           'url' => { 'css' => "#comic img", 'attr' => "src" },
         }
       }
-      @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site)
+      @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
       @checker.user = users(:bob)
       @checker.save!
     end

+ 66 - 0
spec/support/shared_examples/web_request_concern.rb

@@ -0,0 +1,66 @@
+require 'spec_helper'
+
+shared_examples_for WebRequestConcern do
+  let(:agent) do
+    _agent = described_class.new(:name => "some agent", :options => @valid_options || {})
+    _agent.user = users(:jane)
+    _agent
+  end
+
+  describe "validations" do
+    it "should be valid" do
+      agent.should be_valid
+    end
+
+    it "should validate user_agent" do
+      agent.options['user_agent'] = nil
+      agent.should be_valid
+
+      agent.options['user_agent'] = ""
+      agent.should be_valid
+
+      agent.options['user_agent'] = "foo"
+      agent.should be_valid
+
+      agent.options['user_agent'] = ["foo"]
+      agent.should_not be_valid
+
+      agent.options['user_agent'] = 1
+      agent.should_not be_valid
+    end
+
+    it "should validate headers" do
+      agent.options['headers'] = "blah"
+      agent.should_not be_valid
+
+      agent.options['headers'] = ""
+      agent.should be_valid
+
+      agent.options['headers'] = {}
+      agent.should be_valid
+
+      agent.options['headers'] = { 'foo' => 'bar' }
+      agent.should be_valid
+    end
+
+    it "should validate basic_auth" do
+      agent.options['basic_auth'] = "foo:bar"
+      agent.should be_valid
+
+      agent.options['basic_auth'] = ["foo", "bar"]
+      agent.should be_valid
+
+      agent.options['basic_auth'] = ""
+      agent.should be_valid
+
+      agent.options['basic_auth'] = nil
+      agent.should be_valid
+
+      agent.options['basic_auth'] = "blah"
+      agent.should_not be_valid
+
+      agent.options['basic_auth'] = ["blah"]
+      agent.should_not be_valid
+    end
+  end
+end