website_agent_spec.rb 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148
  1. require 'rails_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. @valid_options = {
  11. 'name' => "XKCD",
  12. 'expected_update_period_in_days' => "2",
  13. 'type' => "html",
  14. 'url' => "http://xkcd.com",
  15. 'mode' => 'on_change',
  16. 'extract' => {
  17. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  18. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  19. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  20. }
  21. }
  22. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2.days)
  23. @checker.user = users(:bob)
  24. @checker.save!
  25. end
  26. it_behaves_like WebRequestConcern
  27. describe "validations" do
  28. before do
  29. expect(@checker).to be_valid
  30. end
  31. it "should validate the integer fields" do
  32. @checker.options['expected_update_period_in_days'] = "2"
  33. expect(@checker).to be_valid
  34. @checker.options['expected_update_period_in_days'] = "nonsense"
  35. expect(@checker).not_to be_valid
  36. end
  37. it 'should validate the http_success_codes fields' do
  38. @checker.options['http_success_codes'] = [404]
  39. expect(@checker).to be_valid
  40. @checker.options['http_success_codes'] = [404, 404]
  41. expect(@checker).not_to be_valid
  42. @checker.options['http_success_codes'] = [404, "422"]
  43. expect(@checker).to be_valid
  44. @checker.options['http_success_codes'] = [404.0]
  45. expect(@checker).not_to be_valid
  46. @checker.options['http_success_codes'] = ["not_a_code"]
  47. expect(@checker).not_to be_valid
  48. @checker.options['http_success_codes'] = []
  49. expect(@checker).to be_valid
  50. @checker.options['http_success_codes'] = ''
  51. expect(@checker).to be_valid
  52. @checker.options['http_success_codes'] = false
  53. expect(@checker).to be_valid
  54. end
  55. it "should validate uniqueness_look_back" do
  56. @checker.options['uniqueness_look_back'] = "nonsense"
  57. expect(@checker).not_to be_valid
  58. @checker.options['uniqueness_look_back'] = "2"
  59. expect(@checker).to be_valid
  60. end
  61. it "should validate mode" do
  62. @checker.options['mode'] = "nonsense"
  63. expect(@checker).not_to be_valid
  64. @checker.options['mode'] = "on_change"
  65. expect(@checker).to be_valid
  66. @checker.options['mode'] = "all"
  67. expect(@checker).to be_valid
  68. @checker.options['mode'] = ""
  69. expect(@checker).to be_valid
  70. end
  71. it "should validate the force_encoding option" do
  72. @checker.options['force_encoding'] = ''
  73. expect(@checker).to be_valid
  74. @checker.options['force_encoding'] = 'UTF-8'
  75. expect(@checker).to be_valid
  76. @checker.options['force_encoding'] = ['UTF-8']
  77. expect(@checker).not_to be_valid
  78. @checker.options['force_encoding'] = 'UTF-42'
  79. expect(@checker).not_to be_valid
  80. end
  81. context "in 'json' type" do
  82. it "should ensure that all extractions have a 'path'" do
  83. @checker.options['type'] = 'json'
  84. @checker.options['extract'] = {
  85. 'url' => { 'foo' => 'bar' },
  86. }
  87. expect(@checker).to_not be_valid
  88. expect(@checker.errors_on(:base)).to include(/When type is json, all extractions must have a path attribute/)
  89. @checker.options['type'] = 'json'
  90. @checker.options['extract'] = {
  91. 'url' => { 'path' => 'bar' },
  92. }
  93. expect(@checker).to be_valid
  94. end
  95. end
  96. end
  97. describe "#check" do
  98. it "should check for changes (and update Event.expires_at)" do
  99. expect { @checker.check }.to change { Event.count }.by(1)
  100. event = Event.last
  101. sleep 2
  102. expect { @checker.check }.not_to change { Event.count }
  103. update_event = Event.last
  104. expect(update_event.expires_at).not_to eq(event.expires_at)
  105. end
  106. it "should always save events when in :all mode" do
  107. expect {
  108. @valid_options['mode'] = 'all'
  109. @checker.options = @valid_options
  110. @checker.check
  111. @checker.check
  112. }.to change { Event.count }.by(2)
  113. end
  114. it "should take uniqueness_look_back into account during deduplication" do
  115. @valid_options['mode'] = 'all'
  116. @checker.options = @valid_options
  117. @checker.check
  118. @checker.check
  119. event = Event.last
  120. event.payload = "{}"
  121. event.save
  122. expect {
  123. @valid_options['mode'] = 'on_change'
  124. @valid_options['uniqueness_look_back'] = 2
  125. @checker.options = @valid_options
  126. @checker.check
  127. }.not_to change { Event.count }
  128. expect {
  129. @valid_options['mode'] = 'on_change'
  130. @valid_options['uniqueness_look_back'] = 1
  131. @checker.options = @valid_options
  132. @checker.check
  133. }.to change { Event.count }.by(1)
  134. end
  135. it "should log an error if the number of results for a set of extraction patterns differs" do
  136. @valid_options['extract']['url']['css'] = "div"
  137. @checker.options = @valid_options
  138. @checker.check
  139. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  140. end
  141. it "should accept an array for url" do
  142. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  143. @checker.options = @valid_options
  144. expect { @checker.save! }.not_to raise_error;
  145. expect { @checker.check }.not_to raise_error;
  146. end
  147. it "should parse events from all urls in array" do
  148. expect {
  149. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  150. @valid_options['mode'] = 'all'
  151. @checker.options = @valid_options
  152. @checker.check
  153. }.to change { Event.count }.by(2)
  154. end
  155. it "should follow unique rules when parsing array of urls" do
  156. expect {
  157. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  158. @checker.options = @valid_options
  159. @checker.check
  160. }.to change { Event.count }.by(1)
  161. end
  162. end
  163. describe 'http_success_codes' do
  164. it 'should allow scraping from a 404 result' do
  165. json = {
  166. 'response' => {
  167. 'version' => 2,
  168. 'title' => "hello!"
  169. }
  170. }
  171. zipped = ActiveSupport::Gzip.compress(json.to_json)
  172. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
  173. site = {
  174. 'name' => "Some JSON Response",
  175. 'expected_update_period_in_days' => "2",
  176. 'type' => "json",
  177. 'url' => "http://gzip.com",
  178. 'mode' => 'on_change',
  179. 'http_success_codes' => [404],
  180. 'extract' => {
  181. 'version' => { 'path' => 'response.version' },
  182. },
  183. # no unzip option
  184. }
  185. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  186. checker.user = users(:bob)
  187. checker.save!
  188. checker.check
  189. event = Event.last
  190. expect(event.payload['version']).to eq(2)
  191. end
  192. end
  193. describe 'unzipping' do
  194. it 'should unzip automatically if the response has Content-Encoding: gzip' do
  195. json = {
  196. 'response' => {
  197. 'version' => 2,
  198. 'title' => "hello!"
  199. }
  200. }
  201. zipped = ActiveSupport::Gzip.compress(json.to_json)
  202. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 200)
  203. site = {
  204. 'name' => "Some JSON Response",
  205. 'expected_update_period_in_days' => "2",
  206. 'type' => "json",
  207. 'url' => "http://gzip.com",
  208. 'mode' => 'on_change',
  209. 'extract' => {
  210. 'version' => { 'path' => 'response.version' },
  211. },
  212. # no unzip option
  213. }
  214. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  215. checker.user = users(:bob)
  216. checker.save!
  217. checker.check
  218. event = Event.last
  219. expect(event.payload['version']).to eq(2)
  220. end
  221. it 'should unzip with unzip option' do
  222. json = {
  223. 'response' => {
  224. 'version' => 2,
  225. 'title' => "hello!"
  226. }
  227. }
  228. zipped = ActiveSupport::Gzip.compress(json.to_json)
  229. stub_request(:any, /gzip/).to_return(body: zipped, status: 200)
  230. site = {
  231. 'name' => "Some JSON Response",
  232. 'expected_update_period_in_days' => "2",
  233. 'type' => "json",
  234. 'url' => "http://gzip.com",
  235. 'mode' => 'on_change',
  236. 'extract' => {
  237. 'version' => { 'path' => 'response.version' },
  238. },
  239. 'unzip' => 'gzip',
  240. }
  241. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  242. checker.user = users(:bob)
  243. checker.save!
  244. checker.check
  245. event = Event.last
  246. expect(event.payload['version']).to eq(2)
  247. end
  248. it 'should either avoid or support a raw deflate stream (#1018)' do
  249. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /\A(?!.*deflate)/ }).
  250. to_return(body: 'hello',
  251. status: 200)
  252. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /deflate/ }).
  253. to_return(body: "\xcb\x48\xcd\xc9\xc9\x07\x00\x06\x2c".b,
  254. headers: { 'Content-Encoding' => 'deflate' },
  255. status: 200)
  256. site = {
  257. 'name' => 'Some Response',
  258. 'expected_update_period_in_days' => '2',
  259. 'type' => 'text',
  260. 'url' => 'http://deflate',
  261. 'mode' => 'on_change',
  262. 'extract' => {
  263. 'content' => { 'regexp' => '.+', 'index' => 0 }
  264. }
  265. }
  266. checker = Agents::WebsiteAgent.new(name: "Deflate Test", options: site)
  267. checker.user = users(:bob)
  268. checker.save!
  269. expect {
  270. checker.check
  271. }.to change { Event.count }.by(1)
  272. event = Event.last
  273. expect(event.payload['content']).to eq('hello')
  274. end
  275. end
  276. describe 'encoding' do
  277. it 'should be forced with force_encoding option' do
  278. huginn = "\u{601d}\u{8003}"
  279. stub_request(:any, /no-encoding/).to_return(body: {
  280. value: huginn,
  281. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  282. 'Content-Type' => 'application/json',
  283. }, status: 200)
  284. site = {
  285. 'name' => "Some JSON Response",
  286. 'expected_update_period_in_days' => "2",
  287. 'type' => "json",
  288. 'url' => "http://no-encoding.example.com",
  289. 'mode' => 'on_change',
  290. 'extract' => {
  291. 'value' => { 'path' => 'value' },
  292. },
  293. 'force_encoding' => 'EUC-JP',
  294. }
  295. checker = Agents::WebsiteAgent.new(name: "No Encoding Site", options: site)
  296. checker.user = users(:bob)
  297. checker.save!
  298. expect { checker.check }.to change { Event.count }.by(1)
  299. event = Event.last
  300. expect(event.payload['value']).to eq(huginn)
  301. end
  302. it 'should be overridden with force_encoding option' do
  303. huginn = "\u{601d}\u{8003}"
  304. stub_request(:any, /wrong-encoding/).to_return(body: {
  305. value: huginn,
  306. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  307. 'Content-Type' => 'application/json; UTF-8',
  308. }, status: 200)
  309. site = {
  310. 'name' => "Some JSON Response",
  311. 'expected_update_period_in_days' => "2",
  312. 'type' => "json",
  313. 'url' => "http://wrong-encoding.example.com",
  314. 'mode' => 'on_change',
  315. 'extract' => {
  316. 'value' => { 'path' => 'value' },
  317. },
  318. 'force_encoding' => 'EUC-JP',
  319. }
  320. checker = Agents::WebsiteAgent.new(name: "Wrong Encoding Site", options: site)
  321. checker.user = users(:bob)
  322. checker.save!
  323. expect { checker.check }.to change { Event.count }.by(1)
  324. event = Event.last
  325. expect(event.payload['value']).to eq(huginn)
  326. end
  327. it 'should be determined by charset in Content-Type' do
  328. huginn = "\u{601d}\u{8003}"
  329. stub_request(:any, /charset-euc-jp/).to_return(body: {
  330. value: huginn,
  331. }.to_json.encode(Encoding::EUC_JP), headers: {
  332. 'Content-Type' => 'application/json; charset=EUC-JP',
  333. }, status: 200)
  334. site = {
  335. 'name' => "Some JSON Response",
  336. 'expected_update_period_in_days' => "2",
  337. 'type' => "json",
  338. 'url' => "http://charset-euc-jp.example.com",
  339. 'mode' => 'on_change',
  340. 'extract' => {
  341. 'value' => { 'path' => 'value' },
  342. },
  343. }
  344. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  345. checker.user = users(:bob)
  346. checker.save!
  347. expect { checker.check }.to change { Event.count }.by(1)
  348. event = Event.last
  349. expect(event.payload['value']).to eq(huginn)
  350. end
  351. it 'should default to UTF-8 when unknown charset is found' do
  352. huginn = "\u{601d}\u{8003}"
  353. stub_request(:any, /charset-unknown/).to_return(body: {
  354. value: huginn,
  355. }.to_json.b, headers: {
  356. 'Content-Type' => 'application/json; charset=unicode',
  357. }, status: 200)
  358. site = {
  359. 'name' => "Some JSON Response",
  360. 'expected_update_period_in_days' => "2",
  361. 'type' => "json",
  362. 'url' => "http://charset-unknown.example.com",
  363. 'mode' => 'on_change',
  364. 'extract' => {
  365. 'value' => { 'path' => 'value' },
  366. },
  367. }
  368. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  369. checker.user = users(:bob)
  370. checker.save!
  371. expect { checker.check }.to change { Event.count }.by(1)
  372. event = Event.last
  373. expect(event.payload['value']).to eq(huginn)
  374. end
  375. end
  376. describe '#working?' do
  377. it 'checks if events have been received within the expected receive period' do
  378. stubbed_time = Time.now
  379. stub(Time).now { stubbed_time }
  380. expect(@checker).not_to be_working # No events created
  381. @checker.check
  382. expect(@checker.reload).to be_working # Just created events
  383. @checker.error "oh no!"
  384. expect(@checker.reload).not_to be_working # There is a recent error
  385. stubbed_time = 20.minutes.from_now
  386. @checker.events.delete_all
  387. @checker.check
  388. expect(@checker.reload).to be_working # There is a newer event now
  389. stubbed_time = 2.days.from_now
  390. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  391. end
  392. end
  393. describe "parsing" do
  394. it "parses CSS" do
  395. @checker.check
  396. event = Event.last
  397. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  398. expect(event.payload['title']).to eq("Evolving")
  399. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  400. end
  401. it "parses XPath" do
  402. @valid_options['extract'].each { |key, value|
  403. value.delete('css')
  404. value['xpath'] = "//*[@id='comic']//img"
  405. }
  406. @checker.options = @valid_options
  407. @checker.check
  408. event = Event.last
  409. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  410. expect(event.payload['title']).to eq("Evolving")
  411. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  412. end
  413. it "should turn relative urls to absolute" do
  414. rel_site = {
  415. 'name' => "XKCD",
  416. 'expected_update_period_in_days' => "2",
  417. 'type' => "html",
  418. 'url' => "http://xkcd.com",
  419. 'mode' => "on_change",
  420. 'extract' => {
  421. 'url' => {'css' => "#topLeft a", 'value' => "@href"},
  422. }
  423. }
  424. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  425. rel.user = users(:bob)
  426. rel.save!
  427. rel.check
  428. event = Event.last
  429. expect(event.payload['url']).to eq("http://xkcd.com/about")
  430. end
  431. it "should return an integer value if XPath evaluates to one" do
  432. rel_site = {
  433. 'name' => "XKCD",
  434. 'expected_update_period_in_days' => 2,
  435. 'type' => "html",
  436. 'url' => "http://xkcd.com",
  437. 'mode' => "on_change",
  438. 'extract' => {
  439. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  440. }
  441. }
  442. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  443. rel.user = users(:bob)
  444. rel.save!
  445. rel.check
  446. event = Event.last
  447. expect(event.payload['num_links']).to eq("9")
  448. end
  449. it "should return all texts concatenated if XPath returns many text nodes" do
  450. rel_site = {
  451. 'name' => "XKCD",
  452. 'expected_update_period_in_days' => 2,
  453. 'type' => "html",
  454. 'url' => "http://xkcd.com",
  455. 'mode' => "on_change",
  456. 'extract' => {
  457. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  458. }
  459. }
  460. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  461. rel.user = users(:bob)
  462. rel.save!
  463. rel.check
  464. event = Event.last
  465. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, and language.")
  466. end
  467. it "should interpolate _response_" do
  468. @valid_options['extract']['response_info'] =
  469. @valid_options['extract']['url'].merge(
  470. 'value' => '"{{ "The reponse was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." }}"'
  471. )
  472. @checker.options = @valid_options
  473. @checker.check
  474. event = Event.last
  475. expect(event.payload['response_info']).to eq('The reponse was 200 OK.')
  476. end
  477. describe "XML" do
  478. before do
  479. stub_request(:any, /github_rss/).to_return(
  480. body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")),
  481. status: 200
  482. )
  483. @checker = Agents::WebsiteAgent.new(name: 'github', options: {
  484. 'name' => 'GitHub',
  485. 'expected_update_period_in_days' => '2',
  486. 'type' => 'xml',
  487. 'url' => 'http://example.com/github_rss.atom',
  488. 'mode' => 'on_change',
  489. 'extract' => {
  490. 'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
  491. 'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
  492. 'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
  493. }
  494. }, keep_events_for: 2.days)
  495. @checker.user = users(:bob)
  496. @checker.save!
  497. end
  498. it "works with XPath" do
  499. expect {
  500. @checker.check
  501. }.to change { Event.count }.by(20)
  502. event = Event.last
  503. expect(event.payload['title']).to eq('Shift to dev group')
  504. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  505. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  506. end
  507. it "works with XPath with namespaces unstripped" do
  508. @checker.options['use_namespaces'] = 'true'
  509. @checker.save!
  510. expect {
  511. @checker.check
  512. }.to change { Event.count }.by(0)
  513. @checker.options['extract'] = {
  514. 'title' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => 'normalize-space(./xmlns:title)' },
  515. 'url' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './xmlns:link[1]/@href' },
  516. 'thumbnail' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './media:thumbnail/@url' },
  517. }
  518. @checker.save!
  519. expect {
  520. @checker.check
  521. }.to change { Event.count }.by(20)
  522. event = Event.last
  523. expect(event.payload['title']).to eq('Shift to dev group')
  524. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  525. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  526. end
  527. it "works with CSS selectors" do
  528. @checker.options['extract'] = {
  529. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  530. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  531. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  532. }
  533. @checker.save!
  534. expect {
  535. @checker.check
  536. }.to change { Event.count }.by(20)
  537. event = Event.last
  538. expect(event.payload['title']).to be_empty
  539. expect(event.payload['thumbnail']).to be_empty
  540. @checker.options['extract'] = {
  541. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./xmlns:title)' },
  542. 'url' => { 'css' => 'feed > entry', 'value' => './xmlns:link[1]/@href' },
  543. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './media:thumbnail/@url' },
  544. }
  545. @checker.save!
  546. expect {
  547. @checker.check
  548. }.to change { Event.count }.by(20)
  549. event = Event.last
  550. expect(event.payload['title']).to eq('Shift to dev group')
  551. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  552. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  553. end
  554. it "works with CSS selectors with namespaces stripped" do
  555. @checker.options['extract'] = {
  556. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  557. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  558. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  559. }
  560. @checker.options['use_namespaces'] = 'false'
  561. @checker.save!
  562. expect {
  563. @checker.check
  564. }.to change { Event.count }.by(20)
  565. event = Event.last
  566. expect(event.payload['title']).to eq('Shift to dev group')
  567. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  568. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  569. end
  570. end
  571. describe "XML with cdata" do
  572. before do
  573. stub_request(:any, /cdata_rss/).to_return(
  574. body: File.read(Rails.root.join("spec/data_fixtures/cdata_rss.atom")),
  575. status: 200
  576. )
  577. @checker = Agents::WebsiteAgent.new(name: 'cdata', options: {
  578. 'name' => 'CDATA',
  579. 'expected_update_period_in_days' => '2',
  580. 'type' => 'xml',
  581. 'url' => 'http://example.com/cdata_rss.atom',
  582. 'mode' => 'on_change',
  583. 'extract' => {
  584. 'author' => { 'xpath' => '/feed/entry/author/name', 'value' => './/text()'},
  585. 'title' => { 'xpath' => '/feed/entry/title', 'value' => './/text()' },
  586. 'content' => { 'xpath' => '/feed/entry/content', 'value' => './/text()' },
  587. }
  588. }, keep_events_for: 2.days)
  589. @checker.user = users(:bob)
  590. @checker.save!
  591. end
  592. it "works with XPath" do
  593. expect {
  594. @checker.check
  595. }.to change { Event.count }.by(10)
  596. event = Event.last
  597. expect(event.payload['author']).to eq('bill98')
  598. expect(event.payload['title']).to eq('Help: Rainmeter Skins • Test if Today is Between 2 Dates')
  599. expect(event.payload['content']).to start_with('Can I ')
  600. end
  601. end
  602. describe "JSON" do
  603. it "works with paths" do
  604. json = {
  605. 'response' => {
  606. 'version' => 2,
  607. 'title' => "hello!"
  608. }
  609. }
  610. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  611. site = {
  612. 'name' => "Some JSON Response",
  613. 'expected_update_period_in_days' => "2",
  614. 'type' => "json",
  615. 'url' => "http://json-site.com",
  616. 'mode' => 'on_change',
  617. 'extract' => {
  618. 'version' => {'path' => "response.version"},
  619. 'title' => {'path' => "response.title"}
  620. }
  621. }
  622. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  623. checker.user = users(:bob)
  624. checker.save!
  625. checker.check
  626. event = Event.last
  627. expect(event.payload['version']).to eq(2)
  628. expect(event.payload['title']).to eq("hello!")
  629. end
  630. it "can handle arrays" do
  631. json = {
  632. 'response' => {
  633. 'data' => [
  634. {'title' => "first", 'version' => 2},
  635. {'title' => "second", 'version' => 2.5}
  636. ]
  637. }
  638. }
  639. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  640. site = {
  641. 'name' => "Some JSON Response",
  642. 'expected_update_period_in_days' => "2",
  643. 'type' => "json",
  644. 'url' => "http://json-site.com",
  645. 'mode' => 'on_change',
  646. 'extract' => {
  647. :title => {'path' => "response.data[*].title"},
  648. :version => {'path' => "response.data[*].version"}
  649. }
  650. }
  651. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  652. checker.user = users(:bob)
  653. checker.save!
  654. expect {
  655. checker.check
  656. }.to change { Event.count }.by(2)
  657. (event2, event1) = Event.last(2)
  658. expect(event1.payload['version']).to eq(2.5)
  659. expect(event1.payload['title']).to eq("second")
  660. expect(event2.payload['version']).to eq(2)
  661. expect(event2.payload['title']).to eq("first")
  662. end
  663. it "stores the whole object if :extract is not specified" do
  664. json = {
  665. 'response' => {
  666. 'version' => 2,
  667. 'title' => "hello!"
  668. }
  669. }
  670. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  671. site = {
  672. 'name' => "Some JSON Response",
  673. 'expected_update_period_in_days' => "2",
  674. 'type' => "json",
  675. 'url' => "http://json-site.com",
  676. 'mode' => 'on_change'
  677. }
  678. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  679. checker.user = users(:bob)
  680. checker.save!
  681. checker.check
  682. event = Event.last
  683. expect(event.payload['response']['version']).to eq(2)
  684. expect(event.payload['response']['title']).to eq("hello!")
  685. end
  686. end
  687. describe "text parsing" do
  688. before do
  689. stub_request(:any, /text-site/).to_return(body: <<-EOF, status: 200)
  690. water: wet
  691. fire: hot
  692. EOF
  693. site = {
  694. 'name' => 'Some Text Response',
  695. 'expected_update_period_in_days' => '2',
  696. 'type' => 'text',
  697. 'url' => 'http://text-site.com',
  698. 'mode' => 'on_change',
  699. 'extract' => {
  700. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  701. 'property' => { 'regexp' => '^(.+?): (.+)$', index: '2' },
  702. }
  703. }
  704. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  705. @checker.user = users(:bob)
  706. @checker.save!
  707. end
  708. it "works with regexp with named capture" do
  709. @checker.options = @checker.options.merge('extract' => {
  710. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  711. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  712. })
  713. expect {
  714. @checker.check
  715. }.to change { Event.count }.by(2)
  716. event1, event2 = Event.last(2)
  717. expect(event1.payload['word']).to eq('water')
  718. expect(event1.payload['property']).to eq('wet')
  719. expect(event2.payload['word']).to eq('fire')
  720. expect(event2.payload['property']).to eq('hot')
  721. end
  722. it "works with regexp" do
  723. expect {
  724. @checker.check
  725. }.to change { Event.count }.by(2)
  726. event1, event2 = Event.last(2)
  727. expect(event1.payload['word']).to eq('water')
  728. expect(event1.payload['property']).to eq('wet')
  729. expect(event2.payload['word']).to eq('fire')
  730. expect(event2.payload['property']).to eq('hot')
  731. end
  732. end
  733. end
  734. describe "#receive" do
  735. describe "with a url or url_from_event" do
  736. before do
  737. @event = Event.new
  738. @event.agent = agents(:bob_rain_notifier_agent)
  739. @event.payload = {
  740. 'url' => 'http://foo.com',
  741. 'link' => 'Random'
  742. }
  743. end
  744. it "should use url_from_event as the url to scrape" do
  745. stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  746. @checker.options = @valid_options.merge(
  747. 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
  748. )
  749. @checker.receive([@event])
  750. expect(stub).to have_been_requested
  751. end
  752. it "should use the Agent's `url` option if url_from_event is not set" do
  753. expect {
  754. @checker.options = @valid_options
  755. @checker.receive([@event])
  756. }.to change { Event.count }.by(1)
  757. end
  758. it "should allow url_from_event to be an array of urls" do
  759. stub1 = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  760. stub2 = stub_request(:any, 'http://google.org/?url=http%3A%2F%2Ffoo.com')
  761. @checker.options = @valid_options.merge(
  762. 'url_from_event' => ['http://example.org/?url={{url | uri_escape}}', 'http://google.org/?url={{url | uri_escape}}']
  763. )
  764. @checker.receive([@event])
  765. expect(stub1).to have_been_requested
  766. expect(stub2).to have_been_requested
  767. end
  768. it "should interpolate values from incoming event payload" do
  769. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  770. expect {
  771. @valid_options['url_from_event'] = '{{ url }}'
  772. @valid_options['extract'] = {
  773. 'from' => {
  774. 'xpath' => '*[1]',
  775. 'value' => '{{url | to_xpath}}'
  776. },
  777. 'to' => {
  778. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  779. 'value' => '@href'
  780. },
  781. }
  782. @checker.options = @valid_options
  783. @checker.receive([@event])
  784. }.to change { Event.count }.by(1)
  785. expect(Event.last.payload).to eq({
  786. 'from' => 'http://foo.com',
  787. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  788. })
  789. end
  790. it "should use the options url if no url is in the event payload, and `url_from_event` is not provided" do
  791. @checker.options['mode'] = 'merge'
  792. @event.payload.delete('url')
  793. expect {
  794. @checker.receive([@event])
  795. }.to change { Event.count }.by(1)
  796. expect(Event.last.payload['title']).to eq('Evolving')
  797. expect(Event.last.payload['link']).to eq('Random')
  798. end
  799. it "should interpolate values from incoming event payload and _response_" do
  800. @event.payload['title'] = 'XKCD'
  801. expect {
  802. @valid_options['extract'] = {
  803. 'response_info' => @valid_options['extract']['url'].merge(
  804. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  805. )
  806. }
  807. @checker.options = @valid_options
  808. @checker.receive([@event])
  809. }.to change { Event.count }.by(1)
  810. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  811. end
  812. it "should support merging of events" do
  813. expect {
  814. @checker.options = @valid_options
  815. @checker.options[:mode] = "merge"
  816. @checker.receive([@event])
  817. }.to change { Event.count }.by(1)
  818. last_payload = Event.last.payload
  819. expect(last_payload['link']).to eq('Random')
  820. end
  821. end
  822. describe "with a data_from_event" do
  823. describe "with json data" do
  824. before do
  825. @event = Event.new
  826. @event.agent = agents(:bob_rain_notifier_agent)
  827. @event.payload = {
  828. 'something' => 'some value',
  829. 'some_object' => {
  830. 'some_data' => { hello: 'world' }.to_json
  831. }
  832. }
  833. @event.save!
  834. @checker.options = @valid_options.merge(
  835. 'type' => 'json',
  836. 'data_from_event' => '{{ some_object.some_data }}',
  837. 'extract' => {
  838. 'value' => { 'path' => 'hello' }
  839. }
  840. )
  841. end
  842. it "should extract from the event data in the incoming event payload" do
  843. expect {
  844. @checker.receive([@event])
  845. }.to change { Event.count }.by(1)
  846. expect(@checker.events.last.payload).to eq({ 'value' => 'world' })
  847. end
  848. it "should support merge mode" do
  849. @checker.options['mode'] = "merge"
  850. expect {
  851. @checker.receive([@event])
  852. }.to change { Event.count }.by(1)
  853. expect(@checker.events.last.payload).to eq(@event.payload.merge('value' => 'world'))
  854. end
  855. it "should output an error when nothing can be found at the path" do
  856. @checker.options = @checker.options.merge(
  857. 'data_from_event' => '{{ some_object.mistake }}'
  858. )
  859. expect {
  860. @checker.receive([@event])
  861. }.to_not change { Event.count }
  862. expect(@checker.logs.last.message).to match(/No data was found in the Event payload using the template {{ some_object\.mistake }}/)
  863. end
  864. it "should output an error when the data cannot be parsed" do
  865. @event.update_attribute :payload, @event.payload.merge('some_object' => { 'some_data' => '{invalid json' })
  866. expect {
  867. @checker.receive([@event])
  868. }.to_not change { Event.count }
  869. expect(@checker.logs.last.message).to match(/Error when handling event data:/)
  870. end
  871. end
  872. describe "with HTML data" do
  873. before do
  874. @event = Event.new
  875. @event.agent = agents(:bob_rain_notifier_agent)
  876. @event.payload = {
  877. 'url' => 'http://xkcd.com',
  878. 'some_object' => {
  879. 'some_data' => "<div><span class='title'>Title!</span><span class='body'>Body!</span></div>"
  880. }
  881. }
  882. @event.save!
  883. @checker.options = @valid_options.merge(
  884. 'type' => 'html',
  885. 'data_from_event' => '{{ some_object.some_data }}',
  886. 'extract' => {
  887. 'title' => { 'css' => ".title", 'value' => ".//text()" },
  888. 'body' => { 'css' => "div span.body", 'value' => ".//text()" }
  889. }
  890. )
  891. end
  892. it "should extract from the event data in the incoming event payload" do
  893. expect {
  894. @checker.receive([@event])
  895. }.to change { Event.count }.by(1)
  896. expect(@checker.events.last.payload).to eq({ 'title' => 'Title!', 'body' => 'Body!' })
  897. end
  898. end
  899. end
  900. end
  901. end
  902. describe "checking with http basic auth" do
  903. before do
  904. @valid_options = {
  905. 'name' => "XKCD",
  906. 'expected_update_period_in_days' => "2",
  907. 'type' => "html",
  908. 'url' => "http://www.example.com",
  909. 'mode' => 'on_change',
  910. 'extract' => {
  911. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  912. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  913. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  914. },
  915. 'basic_auth' => "user:pass"
  916. }
  917. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  918. @checker.user = users(:bob)
  919. @checker.save!
  920. case @checker.faraday_backend
  921. when :typhoeus
  922. # Webmock's typhoeus adapter does not read the Authorization
  923. # header: https://github.com/bblimke/webmock/pull/592
  924. stub_request(:any, "www.example.com").
  925. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m0')}" })
  926. else
  927. stub_request(:any, "user:pass@www.example.com")
  928. end.to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  929. end
  930. describe "#check" do
  931. it "should check for changes" do
  932. expect { @checker.check }.to change { Event.count }.by(1)
  933. expect { @checker.check }.not_to change { Event.count }
  934. end
  935. end
  936. end
  937. describe "checking with headers" do
  938. before do
  939. stub_request(:any, /example/).
  940. with(headers: { 'foo' => 'bar' }).
  941. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  942. @valid_options = {
  943. 'name' => "XKCD",
  944. 'expected_update_period_in_days' => "2",
  945. 'type' => "html",
  946. 'url' => "http://www.example.com",
  947. 'mode' => 'on_change',
  948. 'headers' => { 'foo' => 'bar' },
  949. 'extract' => {
  950. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  951. }
  952. }
  953. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  954. @checker.user = users(:bob)
  955. @checker.save!
  956. end
  957. describe "#check" do
  958. it "should check for changes" do
  959. expect { @checker.check }.to change { Event.count }.by(1)
  960. end
  961. end
  962. end
  963. describe "checking urls" do
  964. before do
  965. stub_request(:any, /example/).
  966. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/urlTest.html")), :status => 200)
  967. @valid_options = {
  968. 'name' => "Url Test",
  969. 'expected_update_period_in_days' => "2",
  970. 'type' => "html",
  971. 'url' => "http://www.example.com",
  972. 'mode' => 'all',
  973. 'extract' => {
  974. 'url' => { 'css' => "a", 'value' => "@href" },
  975. }
  976. }
  977. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  978. @checker.user = users(:bob)
  979. @checker.save!
  980. end
  981. describe "#check" do
  982. before do
  983. expect { @checker.check }.to change { Event.count }.by(7)
  984. @events = Event.last(7)
  985. end
  986. it "should check hostname" do
  987. event = @events[0]
  988. expect(event.payload['url']).to eq("http://google.com")
  989. end
  990. it "should check unescaped query" do
  991. event = @events[1]
  992. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  993. end
  994. it "should check properly escaped query" do
  995. event = @events[2]
  996. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  997. end
  998. it "should check unescaped unicode url" do
  999. event = @events[3]
  1000. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1001. end
  1002. it "should check unescaped unicode query" do
  1003. event = @events[4]
  1004. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1005. end
  1006. it "should check properly escaped unicode url" do
  1007. event = @events[5]
  1008. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1009. end
  1010. it "should check properly escaped unicode query" do
  1011. event = @events[6]
  1012. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1013. end
  1014. end
  1015. end
  1016. end