imap_folder_agent.rb 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. require 'delegate'
  2. require 'net/imap'
  3. require 'mail'
  4. module Agents
  5. class ImapFolderAgent < Agent
  6. cannot_receive_events!
  7. can_dry_run!
  8. default_schedule "every_30m"
  9. description <<-MD
  10. The Imap Folder Agent checks an IMAP server in specified folders and creates Events based on new mails found since the last run. In the first visit to a folder, this agent only checks for the initial status and does not create events.
  11. Specify an IMAP server to connect with `host`, and set `ssl` to true if the server supports IMAP over SSL. Specify `port` if you need to connect to a port other than standard (143 or 993 depending on the `ssl` value).
  12. Specify login credentials in `username` and `password`.
  13. List the names of folders to check in `folders`.
  14. To narrow mails by conditions, build a `conditions` hash with the following keys:
  15. - `subject`
  16. - `body`
  17. Specify a regular expression to match against the decoded subject/body of each mail.
  18. Use the `(?i)` directive for case-insensitive search. For example, a pattern `(?i)alert` will match "alert", "Alert"or "ALERT". You can also make only a part of a pattern to work case-insensitively: `Re: (?i:alert)` will match either "Re: Alert" or "Re: alert", but not "RE: alert".
  19. When a mail has multiple non-attachment text parts, they are prioritized according to the `mime_types` option (which see below) and the first part that matches a "body" pattern, if specified, will be chosen as the "body" value in a created event.
  20. Named captures will appear in the "matches" hash in a created event.
  21. - `from`, `to`, `cc`
  22. Specify a shell glob pattern string that is matched against mail addresses extracted from the corresponding header values of each mail.
  23. Patterns match addresses in case insensitive manner.
  24. Multiple pattern strings can be specified in an array, in which case a mail is selected if any of the patterns matches. (i.e. patterns are OR'd)
  25. - `mime_types`
  26. Specify an array of MIME types to tell which non-attachment part of a mail among its text/* parts should be used as mail body. The default value is `['text/plain', 'text/enriched', 'text/html']`.
  27. - `is_unread`
  28. Setting this to true or false means only mails that is marked as unread or read respectively, are selected.
  29. If this key is unspecified or set to null, it is ignored.
  30. - `has_attachment`
  31. Setting this to true or false means only mails that does or does not have an attachment are selected.
  32. If this key is unspecified or set to null, it is ignored.
  33. Set `mark_as_read` to true to mark found mails as read.
  34. Set `include_raw_mail` to true to add to each created event a raw unencoded mail text, in the so-called "RFC822" format defined in the [IMAP4 standard](https://tools.ietf.org/html/rfc3501).
  35. Each agent instance memorizes the highest UID of mails that are found in the last run for each watched folder, so even if you change a set of conditions so that it matches mails that are missed previously, or if you alter the flag status of already found mails, they will not show up as new events.
  36. Also, in order to avoid duplicated notification it keeps a list of Message-Id's of 100 most recent mails, so if multiple mails of the same Message-Id are found, you will only see one event out of them.
  37. MD
  38. event_description <<-MD
  39. Events look like this:
  40. {
  41. "message_id": "...(Message-Id without angle brackets)...",
  42. "folder": "INBOX",
  43. "subject": "...",
  44. "from": "Nanashi <nanashi.gombeh@example.jp>",
  45. "to": ["Jane <jane.doe@example.com>"],
  46. "cc": [],
  47. "date": "2014-05-10T03:47:20+0900",
  48. "mime_type": "text/plain",
  49. "body": "Hello,\n\n...",
  50. "matches": {
  51. }
  52. }
  53. Additionally, "raw_mail" will be included if the `include_raw_mail` option is set.
  54. MD
  55. IDCACHE_SIZE = 100
  56. FNM_FLAGS = [:FNM_CASEFOLD, :FNM_EXTGLOB].inject(0) { |flags, sym|
  57. if File.const_defined?(sym)
  58. flags | File.const_get(sym)
  59. else
  60. flags
  61. end
  62. }
  63. def working?
  64. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  65. end
  66. def default_options
  67. {
  68. 'expected_update_period_in_days' => "1",
  69. 'host' => 'imap.gmail.com',
  70. 'ssl' => true,
  71. 'username' => 'your.account',
  72. 'password' => 'your.password',
  73. 'folders' => %w[INBOX],
  74. 'conditions' => {}
  75. }
  76. end
  77. def validate_options
  78. %w[host username password].each { |key|
  79. String === options[key] or
  80. errors.add(:base, '%s is required and must be a string' % key)
  81. }
  82. if options['port'].present?
  83. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  84. end
  85. %w[ssl mark_as_read include_raw_mail].each { |key|
  86. if options[key].present?
  87. if boolify(options[key]).nil?
  88. errors.add(:base, '%s must be a boolean value' % key)
  89. end
  90. end
  91. }
  92. case mime_types = options['mime_types']
  93. when nil
  94. when Array
  95. mime_types.all? { |mime_type|
  96. String === mime_type && mime_type.start_with?('text/')
  97. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  98. if mime_types.empty?
  99. errors.add(:base, 'mime_types should not be empty')
  100. end
  101. else
  102. errors.add(:base, 'mime_types must be an array')
  103. end
  104. case folders = options['folders']
  105. when nil
  106. when Array
  107. folders.all? { |folder|
  108. String === folder
  109. } or errors.add(:base, 'folders may only contain strings')
  110. if folders.empty?
  111. errors.add(:base, 'folders should not be empty')
  112. end
  113. else
  114. errors.add(:base, 'folders must be an array')
  115. end
  116. case conditions = options['conditions']
  117. when Hash
  118. conditions.each { |key, value|
  119. value.present? or next
  120. case key
  121. when 'subject', 'body'
  122. case value
  123. when String
  124. begin
  125. Regexp.new(value)
  126. rescue
  127. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  128. end
  129. else
  130. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  131. end
  132. when 'from', 'to', 'cc'
  133. Array(value).each { |pattern|
  134. case pattern
  135. when String
  136. begin
  137. glob_match?(pattern, '')
  138. rescue
  139. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  140. end
  141. else
  142. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  143. end
  144. }
  145. when 'is_unread', 'has_attachment'
  146. case boolify(value)
  147. when true, false
  148. else
  149. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  150. end
  151. end
  152. }
  153. else
  154. errors.add(:base, 'conditions must be a hash')
  155. end
  156. if options['expected_update_period_in_days'].present?
  157. errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
  158. end
  159. end
  160. def check
  161. each_unread_mail { |mail, notified|
  162. message_id = mail.message_id
  163. body_parts = mail.body_parts(mime_types)
  164. matched_part = nil
  165. matches = {}
  166. interpolated['conditions'].all? { |key, value|
  167. case key
  168. when 'subject'
  169. value.present? or next true
  170. re = Regexp.new(value)
  171. if m = re.match(mail.scrubbed(:subject))
  172. m.names.each { |name|
  173. matches[name] = m[name]
  174. }
  175. true
  176. else
  177. false
  178. end
  179. when 'body'
  180. value.present? or next true
  181. re = Regexp.new(value)
  182. matched_part = body_parts.find { |part|
  183. if m = re.match(part.scrubbed(:decoded))
  184. m.names.each { |name|
  185. matches[name] = m[name]
  186. }
  187. true
  188. else
  189. false
  190. end
  191. }
  192. when 'from', 'to', 'cc'
  193. value.present? or next true
  194. begin
  195. # Mail::Field really needs to define respond_to_missing?
  196. # so we could use try(:addresses) here.
  197. addresses = mail.header[key].addresses
  198. rescue NoMethodError
  199. next false
  200. end
  201. addresses.any? { |address|
  202. Array(value).any? { |pattern|
  203. glob_match?(pattern, address)
  204. }
  205. }
  206. when 'has_attachment'
  207. boolify(value) == mail.has_attachment?
  208. when 'is_unread'
  209. true # already filtered out by each_unread_mail
  210. else
  211. log 'Unknown condition key ignored: %s' % key
  212. true
  213. end
  214. } or next
  215. if notified.include?(mail.message_id)
  216. log 'Ignoring mail: %s (already notified)' % message_id
  217. else
  218. matched_part ||= body_parts.first
  219. if matched_part
  220. mime_type = matched_part.mime_type
  221. body = matched_part.scrubbed(:decoded)
  222. else
  223. mime_type = 'text/plain'
  224. body = ''
  225. end
  226. log 'Emitting an event for mail: %s' % message_id
  227. payload = {
  228. 'message_id' => message_id,
  229. 'folder' => mail.folder,
  230. 'subject' => mail.scrubbed(:subject),
  231. 'from' => mail.from_addrs.first,
  232. 'to' => mail.to_addrs,
  233. 'cc' => mail.cc_addrs,
  234. 'date' => (mail.date.iso8601 rescue nil),
  235. 'mime_type' => mime_type,
  236. 'body' => body,
  237. 'matches' => matches,
  238. 'has_attachment' => mail.has_attachment?,
  239. }
  240. if boolify(interpolated['include_raw_mail'])
  241. payload['raw_mail'] = mail.raw_mail
  242. end
  243. create_event payload: payload
  244. notified << mail.message_id if mail.message_id
  245. end
  246. if boolify(interpolated['mark_as_read'])
  247. log 'Marking as read'
  248. mail.mark_as_read unless dry_run?
  249. end
  250. }
  251. end
  252. def each_unread_mail
  253. host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
  254. ssl = boolify(ssl)
  255. port = (Integer(port) if port.present?)
  256. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  257. Client.open(host, port: port, ssl: ssl) { |imap|
  258. log "Logging in as #{username}"
  259. imap.login(username, interpolated[:password])
  260. # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
  261. lastseen, seen = self.lastseen, self.make_seen
  262. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  263. # most recent notified mails.
  264. notified = self.notified
  265. interpolated['folders'].each { |folder|
  266. log "Selecting the folder: %s" % folder
  267. imap.select(folder)
  268. uidvalidity = imap.uidvalidity
  269. lastseenuid = lastseen[uidvalidity]
  270. if lastseenuid.nil?
  271. maxseq = imap.responses['EXISTS'].last
  272. log "Recording the initial status: %s" % pluralize(maxseq, 'existing mail')
  273. if maxseq > 0
  274. seen[uidvalidity] = imap.fetch(maxseq, 'UID').last.attr['UID']
  275. end
  276. next
  277. end
  278. seen[uidvalidity] = lastseenuid
  279. is_unread = boolify(interpolated['conditions']['is_unread'])
  280. uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS').
  281. each_with_object([]) { |data, ret|
  282. uid, flags = data.attr.values_at('UID', 'FLAGS')
  283. seen[uidvalidity] = uid
  284. next if uid <= lastseenuid
  285. case is_unread
  286. when nil, !flags.include?(:Seen)
  287. ret << uid
  288. end
  289. }
  290. log pluralize(uids.size,
  291. case is_unread
  292. when true
  293. 'new unread mail'
  294. when false
  295. 'new read mail'
  296. else
  297. 'new mail'
  298. end)
  299. next if uids.empty?
  300. imap.uid_fetch_mails(uids).each { |mail|
  301. yield mail, notified
  302. }
  303. }
  304. self.notified = notified
  305. self.lastseen = seen
  306. save!
  307. }
  308. ensure
  309. log 'Connection closed'
  310. end
  311. def mime_types
  312. interpolated['mime_types'] || %w[text/plain text/enriched text/html]
  313. end
  314. def lastseen
  315. Seen.new(memory['lastseen'])
  316. end
  317. def lastseen= value
  318. memory.delete('seen') # obsolete key
  319. memory['lastseen'] = value
  320. end
  321. def make_seen
  322. Seen.new
  323. end
  324. def notified
  325. Notified.new(memory['notified'])
  326. end
  327. def notified= value
  328. memory['notified'] = value
  329. end
  330. private
  331. def is_positive_integer?(value)
  332. Integer(value) >= 0
  333. rescue
  334. false
  335. end
  336. def glob_match?(pattern, value)
  337. File.fnmatch?(pattern, value, FNM_FLAGS)
  338. end
  339. def pluralize(count, noun)
  340. "%d %s" % [count, noun.pluralize(count)]
  341. end
  342. class Client < ::Net::IMAP
  343. class << self
  344. def open(host, *args)
  345. imap = new(host, *args)
  346. yield imap
  347. ensure
  348. imap.disconnect unless imap.nil?
  349. end
  350. end
  351. attr_reader :uidvalidity
  352. def select(folder)
  353. ret = super(@folder = folder)
  354. @uidvalidity = responses['UIDVALIDITY'].last
  355. ret
  356. end
  357. def fetch(*args)
  358. super || []
  359. end
  360. def uid_fetch(*args)
  361. super || []
  362. end
  363. def uid_fetch_mails(set)
  364. uid_fetch(set, 'RFC822.HEADER').map { |data|
  365. Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
  366. }
  367. end
  368. end
  369. class Seen < Hash
  370. def initialize(hash = nil)
  371. super()
  372. if hash
  373. # Deserialize a JSON hash which keys are strings
  374. hash.each { |uidvalidity, uid|
  375. self[uidvalidity.to_i] = uid
  376. }
  377. end
  378. end
  379. def []=(uidvalidity, uid)
  380. # Update only if the new value is larger than the current value
  381. if (curr = self[uidvalidity]).nil? || curr <= uid
  382. super
  383. end
  384. end
  385. end
  386. class Notified < Array
  387. def initialize(array = nil)
  388. super()
  389. replace(array) if array
  390. end
  391. def <<(value)
  392. slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
  393. super
  394. end
  395. end
  396. class Message < SimpleDelegator
  397. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  398. attr_reader :uid, :folder, :uidvalidity
  399. module Scrubbed
  400. def scrubbed(method)
  401. (@scrubbed ||= {})[method.to_sym] ||=
  402. __send__(method).try(:scrub) { |bytes| "<#{bytes.unpack('H*')[0]}>" }
  403. end
  404. end
  405. include Scrubbed
  406. def initialize(client, fetch_data, props = {})
  407. @client = client
  408. props.each { |key, value|
  409. instance_variable_set(:"@#{key}", value)
  410. }
  411. attr = fetch_data.attr
  412. @uid = attr['UID']
  413. super(Mail.read_from_string(attr['RFC822.HEADER']))
  414. end
  415. def has_attachment?
  416. @has_attachment ||=
  417. if data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  418. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  419. else
  420. false
  421. end
  422. end
  423. def raw_mail
  424. @raw_mail ||=
  425. if data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  426. data.attr['BODY[]']
  427. else
  428. ''
  429. end
  430. end
  431. def fetch
  432. @parsed ||= Mail.read_from_string(raw_mail)
  433. end
  434. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  435. mail = fetch
  436. if mail.multipart?
  437. mail.body.set_sort_order(mime_types)
  438. mail.body.sort_parts!
  439. mail.all_parts
  440. else
  441. [mail]
  442. end.select { |part|
  443. if part.multipart? || part.attachment? || !part.text? ||
  444. !mime_types.include?(part.mime_type)
  445. false
  446. else
  447. part.extend(Scrubbed)
  448. true
  449. end
  450. }
  451. end
  452. def mark_as_read
  453. @client.uid_store(@uid, '+FLAGS', [:Seen])
  454. end
  455. private
  456. def struct_has_attachment?(struct)
  457. struct.multipart? && (
  458. struct.subtype == 'MIXED' ||
  459. struct.parts.any? { |part|
  460. struct_has_attachment?(part)
  461. }
  462. )
  463. end
  464. end
  465. end
  466. end