rss_agent_spec.rb 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. require 'rails_helper'
  2. describe Agents::RssAgent do
  3. before do
  4. @valid_options = {
  5. 'expected_update_period_in_days' => "2",
  6. 'url' => "https://github.com/cantino/huginn/commits/master.atom",
  7. }
  8. stub_request(:any, /github.com/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")), :status => 200)
  9. stub_request(:any, /SlickdealsnetFP/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/slickdeals.atom")), :status => 200)
  10. stub_request(:any, /onethingwell.org/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/onethingwell.atom")), :status => 200)
  11. end
  12. let(:agent) do
  13. _agent = Agents::RssAgent.new(:name => "rss feed", :options => @valid_options)
  14. _agent.user = users(:bob)
  15. _agent.save!
  16. _agent
  17. end
  18. it_behaves_like WebRequestConcern
  19. describe "validations" do
  20. it "should validate the presence of url" do
  21. agent.options['url'] = "http://google.com"
  22. expect(agent).to be_valid
  23. agent.options['url'] = ["http://google.com", "http://yahoo.com"]
  24. expect(agent).to be_valid
  25. agent.options['url'] = ""
  26. expect(agent).not_to be_valid
  27. agent.options['url'] = nil
  28. expect(agent).not_to be_valid
  29. end
  30. it "should validate the presence and numericality of expected_update_period_in_days" do
  31. agent.options['expected_update_period_in_days'] = "5"
  32. expect(agent).to be_valid
  33. agent.options['expected_update_period_in_days'] = "wut?"
  34. expect(agent).not_to be_valid
  35. agent.options['expected_update_period_in_days'] = 0
  36. expect(agent).not_to be_valid
  37. agent.options['expected_update_period_in_days'] = nil
  38. expect(agent).not_to be_valid
  39. agent.options['expected_update_period_in_days'] = ""
  40. expect(agent).not_to be_valid
  41. end
  42. end
  43. describe "emitting RSS events" do
  44. it "should emit items as events" do
  45. expect {
  46. agent.check
  47. }.to change { agent.events.count }.by(20)
  48. first, *, last = agent.events.last(20)
  49. expect(first.payload['url']).to eq("https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0")
  50. expect(first.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d0a844662846cf3c83b94c637c1803f03db5a5b0"])
  51. expect(last.payload['url']).to eq("https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af")
  52. expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af"])
  53. end
  54. it "should emit items as events in the order specified in the events_order option" do
  55. expect {
  56. agent.options['events_order'] = ['{{title | replace_regex: "^[[:space:]]+", "" }}']
  57. agent.check
  58. }.to change { agent.events.count }.by(20)
  59. first, *, last = agent.events.last(20)
  60. expect(first.payload['title'].strip).to eq('upgrade rails and gems')
  61. expect(first.payload['url']).to eq("https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01")
  62. expect(first.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/87a7abda23a82305d7050ac0bb400ce36c863d01"])
  63. expect(last.payload['title'].strip).to eq('Dashed line in a diagram indicates propagate_immediately being false.')
  64. expect(last.payload['url']).to eq("https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535")
  65. expect(last.payload['urls']).to eq(["https://github.com/cantino/huginn/commit/0e80f5341587aace2c023b06eb9265b776ac4535"])
  66. end
  67. it "should track ids and not re-emit the same item when seen again" do
  68. agent.check
  69. expect(agent.memory['seen_ids']).to eq(agent.events.map {|e| e.payload['id'] })
  70. newest_id = agent.memory['seen_ids'][0]
  71. expect(agent.events.first.payload['id']).to eq(newest_id)
  72. agent.memory['seen_ids'] = agent.memory['seen_ids'][1..-1] # forget the newest id
  73. expect {
  74. agent.check
  75. }.to change { agent.events.count }.by(1)
  76. expect(agent.events.first.payload['id']).to eq(newest_id)
  77. expect(agent.memory['seen_ids'][0]).to eq(newest_id)
  78. end
  79. it "should truncate the seen_ids in memory at 500 items" do
  80. agent.memory['seen_ids'] = ['x'] * 490
  81. agent.check
  82. expect(agent.memory['seen_ids'].length).to eq(500)
  83. end
  84. it "should support an array of URLs" do
  85. agent.options['url'] = ["https://github.com/cantino/huginn/commits/master.atom", "http://feeds.feedburner.com/SlickdealsnetFP?format=atom"]
  86. agent.save!
  87. expect {
  88. agent.check
  89. }.to change { agent.events.count }.by(20 + 79)
  90. end
  91. it "should fetch one event per run" do
  92. agent.options['url'] = ["https://github.com/cantino/huginn/commits/master.atom"]
  93. agent.options['max_events_per_run'] = 1
  94. agent.check
  95. expect(agent.events.count).to eq(1)
  96. end
  97. it "should fetch all events per run" do
  98. agent.options['url'] = ["https://github.com/cantino/huginn/commits/master.atom"]
  99. # <= 0 should ignore option and get all
  100. agent.options['max_events_per_run'] = 0
  101. agent.check
  102. expect(agent.events.count).to eq(20)
  103. agent.options['max_events_per_run'] = -1
  104. expect {
  105. agent.check
  106. }.to_not change { agent.events.count }
  107. end
  108. end
  109. context "when no ids are available" do
  110. before do
  111. @valid_options['url'] = 'http://feeds.feedburner.com/SlickdealsnetFP?format=atom'
  112. end
  113. it "calculates content MD5 sums" do
  114. expect {
  115. agent.check
  116. }.to change { agent.events.count }.by(79)
  117. expect(agent.memory['seen_ids']).to eq(agent.events.map {|e| Digest::MD5.hexdigest(e.payload['content']) })
  118. end
  119. end
  120. context "parsing feeds" do
  121. before do
  122. @valid_options['url'] = 'http://onethingwell.org/rss'
  123. end
  124. it "captures multiple categories" do
  125. agent.check
  126. first, *, third = agent.events.take(3)
  127. expect(first.payload['categories']).to eq(["csv", "crossplatform", "utilities"])
  128. expect(third.payload['categories']).to eq(["web"])
  129. end
  130. end
  131. describe 'logging errors with the feed url' do
  132. it 'includes the feed URL when an exception is raised' do
  133. mock(FeedNormalizer::FeedNormalizer).parse(anything, loose: true) { raise StandardError.new("Some error!") }
  134. expect(lambda {
  135. agent.check
  136. }).not_to raise_error
  137. expect(agent.logs.last.message).to match(%r[Failed to fetch https://github.com])
  138. end
  139. end
  140. end