twitter_search_agent.rb 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. module Agents
  2. class TwitterSearchAgent < Agent
  3. include TwitterConcern
  4. cannot_receive_events!
  5. description <<-MD
  6. The Twitter Search Agent performs and emits the results of a specified Twitter search.
  7. #{twitter_dependencies_missing if dependencies_missing?}
  8. If you want realtime data from Twitter about frequent terms, you should definitely use the Twitter Stream Agent instead.
  9. To be able to use this Agent you need to authenticate with Twitter in the [Services](/services) section first.
  10. You must provide the desired `search`.
  11. Set `result_type` to specify which [type of search results](https://dev.twitter.com/rest/reference/get/search/tweets) you would prefer to receive. Options are "mixed", "recent", and "popular". (default: `mixed`)
  12. Set `max_results` to limit the amount of results to retrieve per run(default: `500`. The API rate limit is ~18,000 per 15 minutes. [Click here to learn more about rate limits](https://dev.twitter.com/rest/public/rate-limiting).
  13. Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent.
  14. Set `starting_at` to the date/time (eg. `Mon Jun 02 00:38:12 +0000 2014`) you want to start receiving tweets from (default: agent's `created_at`)
  15. MD
  16. event_description <<-MD
  17. Events are the raw JSON provided by the [Twitter API](https://dev.twitter.com/rest/reference/get/search/tweets). Should look something like:
  18. {
  19. ... every Tweet field, including ...
  20. "text": "something",
  21. "user": {
  22. "name": "Mr. Someone",
  23. "screen_name": "Someone",
  24. "location": "Vancouver BC Canada",
  25. "description": "...",
  26. "followers_count": 486,
  27. "friends_count": 1983,
  28. "created_at": "Mon Aug 29 23:38:14 +0000 2011",
  29. "time_zone": "Pacific Time (US & Canada)",
  30. "statuses_count": 3807,
  31. "lang": "en"
  32. },
  33. "retweet_count": 0,
  34. "entities": ...
  35. "lang": "en"
  36. }
  37. MD
  38. default_schedule "every_1h"
  39. def working?
  40. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  41. end
  42. def default_options
  43. {
  44. 'search' => 'freebandnames',
  45. 'expected_update_period_in_days' => '2'
  46. }
  47. end
  48. def validate_options
  49. errors.add(:base, "search is required") unless options['search'].present?
  50. errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
  51. if options[:starting_at].present?
  52. Time.parse(interpolated[:starting_at]) rescue errors.add(:base, "Error parsing starting_at")
  53. end
  54. end
  55. def starting_at
  56. if interpolated[:starting_at].present?
  57. Time.parse(interpolated[:starting_at]) rescue created_at
  58. else
  59. created_at
  60. end
  61. end
  62. def max_results
  63. (interpolated['max_results'].presence || 500).to_i
  64. end
  65. def check
  66. since_id = memory['since_id'] || nil
  67. opts = {include_entities: true, tweet_mode: 'extended'}
  68. opts.merge! result_type: interpolated[:result_type] if interpolated[:result_type].present?
  69. opts.merge! since_id: since_id unless since_id.nil?
  70. # http://www.rubydoc.info/gems/twitter/Twitter/REST/Search
  71. tweets = twitter.search(interpolated['search'], opts).take(max_results)
  72. tweets.each do |tweet|
  73. if (tweet.created_at >= starting_at)
  74. memory['since_id'] = tweet.id if !memory['since_id'] || (tweet.id > memory['since_id'])
  75. create_event payload: tweet.attrs
  76. end
  77. end
  78. save!
  79. end
  80. end
  81. end