twitter_search_agent.rb 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. module Agents
  2. class TwitterSearchAgent < Agent
  3. include TwitterConcern
  4. can_dry_run!
  5. cannot_receive_events!
  6. description <<~MD
  7. The Twitter Search Agent performs and emits the results of a specified Twitter search.
  8. #{twitter_dependencies_missing if dependencies_missing?}
  9. If you want realtime data from Twitter about frequent terms, you should definitely use the Twitter Stream Agent instead.
  10. To be able to use this Agent you need to authenticate with Twitter in the [Services](/services) section first.
  11. You must provide the desired `search`.
  12. Set `result_type` to specify which [type of search results](https://dev.twitter.com/rest/reference/get/search/tweets) you would prefer to receive. Options are "mixed", "recent", and "popular". (default: `mixed`)
  13. Set `max_results` to limit the amount of results to retrieve per run(default: `500`. The API rate limit is ~18,000 per 15 minutes. [Click here to learn more about rate limits](https://dev.twitter.com/rest/public/rate-limiting).
  14. Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent.
  15. Set `starting_at` to the date/time (eg. `Mon Jun 02 00:38:12 +0000 2014`) you want to start receiving tweets from (default: agent's `created_at`)
  16. MD
  17. event_description <<~MD
  18. Events are the raw JSON provided by the [Twitter API v1.1](https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/api-reference/get-search-tweets) with slight modifications. They should look something like this:
  19. #{tweet_event_description('full_text')}
  20. MD
  21. default_schedule "every_1h"
  22. def working?
  23. event_created_within?(interpolated[:expected_update_period_in_days]) && !recent_error_logs?
  24. end
  25. def default_options
  26. {
  27. 'search' => 'freebandnames',
  28. 'expected_update_period_in_days' => '2'
  29. }
  30. end
  31. def validate_options
  32. if options[:search].blank?
  33. errors.add(:base, "search is required")
  34. end
  35. if options[:expected_update_period_in_days].blank?
  36. errors.add(:base, "expected_update_period_in_days is required")
  37. end
  38. if options[:starting_at].present?
  39. begin
  40. Time.parse(interpolated[:starting_at])
  41. rescue StandardError
  42. errors.add(:base, "Error parsing starting_at")
  43. end
  44. end
  45. end
  46. def starting_at
  47. if interpolated[:starting_at].present?
  48. begin
  49. Time.parse(interpolated[:starting_at])
  50. rescue StandardError
  51. end
  52. end || created_at || Time.now # for dry-running
  53. end
  54. def max_results
  55. (interpolated[:max_results].presence || 500).to_i
  56. end
  57. def check
  58. opts = {
  59. include_entities: true,
  60. tweet_mode: 'extended',
  61. result_type: interpolated[:result_type].presence,
  62. since_id: memory[:since_id].presence,
  63. }.compact
  64. # http://www.rubydoc.info/gems/twitter/Twitter/REST/Search
  65. tweets = twitter.search(interpolated[:search], opts).take(max_results)
  66. tweets.each do |tweet|
  67. next unless tweet.created_at >= starting_at
  68. memory[:since_id] = [tweet.id, *memory[:since_id]].max
  69. create_event(payload: format_tweet(tweet))
  70. end
  71. save!
  72. end
  73. end
  74. end