Browse Source

Merge pull request #945 from cantino/rss_agent-events_order

Enhance RssAgent with the `events_order` option
Akinori MUSHA 9 years ago
parent
commit
4c82e3a547
3 changed files with 50 additions and 15 deletions
  1. 1 0
      CHANGES.md
  2. 34 15
      app/models/agents/rss_agent.rb
  3. 15 0
      spec/models/agents/rss_agent_spec.rb

+ 1 - 0
CHANGES.md

@@ -1,5 +1,6 @@
 # Changes
 
+* Jul 30, 2015   - RssAgent can configure the order of events created via `events_order`.
 * Jul 29, 2015   - WebsiteAgent can configure the order of events created via `events_order`.
 * Jul 29, 2015   - DataOutputAgent can configure the order of events in the output via `events_order`.
 * Jul 20, 2015   - Control Links (used by the SchedularAgent) are correctly exported in Scenarios.

+ 34 - 15
app/models/agents/rss_agent.rb

@@ -9,6 +9,8 @@ module Agents
     can_dry_run!
     default_schedule "every_1d"
 
+    DEFAULT_EVENTS_ORDER = [['{{date_published}}', 'time'], ['{{last_updated}}', 'time']]
+
     description do
       <<-MD
         This Agent consumes RSS feeds and emits events when they change.
@@ -29,6 +31,12 @@ module Agents
           * `disable_url_encoding` - Set to `true` to disable url encoding.
           * `user_agent` - A custom User-Agent name (default: "Faraday v#{Faraday::VERSION}").
           * `max_events_per_run` - Limit number of events created (items parsed) per run for feed.
+
+        # Ordering Events
+
+        #{description_events_order}
+
+        In this Agent, the default value for `events_order` is `#{DEFAULT_EVENTS_ORDER.to_json}`.
       MD
     end
 
@@ -70,6 +78,11 @@ module Agents
       end
 
       validate_web_request_options!
+      validate_events_order
+    end
+
+    def events_order
+      super.presence || DEFAULT_EVENTS_ORDER
     end
 
     def check
@@ -77,26 +90,15 @@ module Agents
         response = faraday.get(url)
         if response.success?
           feed = FeedNormalizer::FeedNormalizer.parse(response.body)
-          feed.clean! if interpolated['clean'] == 'true'
+          feed.clean! if boolify(interpolated['clean'])
           max_events = (interpolated['max_events_per_run'].presence || 0).to_i
           created_event_count = 0
-          feed.entries.sort_by { |entry| [entry.date_published, entry.last_updated] }.each.with_index do |entry, index|
+          sort_events(feed_to_events(feed)).each.with_index do |event, index|
             break if max_events && max_events > 0 && index >= max_events
-            entry_id = get_entry_id(entry)
+            entry_id = event.payload[:id]
             if check_and_track(entry_id)
               created_event_count += 1
-              create_event(payload: {
-                id: entry_id,
-                date_published: entry.date_published,
-                last_updated: entry.last_updated,
-                url: entry.url,
-                urls: entry.urls,
-                description: entry.description,
-                content: entry.content,
-                title: entry.title,
-                authors: entry.authors,
-                categories: entry.categories
-              })
+              create_event(event)
             end
           end
           log "Fetched #{url} and created #{created_event_count} event(s)."
@@ -122,5 +124,22 @@ module Agents
         true
       end
     end
+
+    def feed_to_events(feed)
+      feed.entries.map { |entry|
+        Event.new(payload: {
+                    id: get_entry_id(entry),
+                    date_published: entry.date_published,
+                    last_updated: entry.last_updated,
+                    url: entry.url,
+                    urls: entry.urls,
+                    description: entry.description,
+                    content: entry.content,
+                    title: entry.title,
+                    authors: entry.authors,
+                    categories: entry.categories
+                  })
+      }
+    end
   end
 end

+ 15 - 0
spec/models/agents/rss_agent_spec.rb

@@ -66,6 +66,21 @@ describe Agents::RssAgent do
       expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"])
     end
 
+    it "should emit items as events in the order specified in the events_order option" do
+      expect {
+        agent.options['events_order'] = ['{{title | replace_regex: "^[[:space:]]+", "" }}']
+        agent.check
+      }.to change { agent.events.count }.by(20)
+
+      first, *, last = agent.events.last(20)
+      expect(first.payload['title'].strip).to eq('upgrade rails and gems')
+      expect(first.payload['url']).to eq("https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01")
+      expect(first.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"])
+      expect(last.payload['title'].strip).to eq('Dashed line in a diagram indicates propagate_immediately being false.')
+      expect(last.payload['url']).to eq("https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535")
+      expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"])
+    end
+
     it "should track ids and not re-emit the same item when seen again" do
       agent.check
       expect(agent.memory['seen_ids']).to eq(agent.events.map {|e| e.payload['id'] })