imap_folder_agent.rb 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. require 'delegate'
  2. require 'net/imap'
  3. require 'mail'
  4. module Agents
  5. class ImapFolderAgent < Agent
  6. cannot_receive_events!
  7. can_dry_run!
  8. default_schedule "every_30m"
  9. description <<-MD
  10. The ImapFolderAgent checks an IMAP server in specified folders
  11. and creates Events based on new mails found since the last run.
  12. In the first visit to a folder, this agent only checks for the
  13. initial status and does not create events.
  14. Specify an IMAP server to connect with `host`, and set `ssl` to
  15. true if the server supports IMAP over SSL. Specify `port` if
  16. you need to connect to a port other than standard (143 or 993
  17. depending on the `ssl` value).
  18. Specify login credentials in `username` and `password`.
  19. List the names of folders to check in `folders`.
  20. To narrow mails by conditions, build a `conditions` hash with
  21. the following keys:
  22. - "subject"
  23. - "body"
  24. Specify a regular expression to match against the decoded
  25. subject/body of each mail.
  26. Use the `(?i)` directive for case-insensitive search. For
  27. example, a pattern `(?i)alert` will match "alert", "Alert"
  28. or "ALERT". You can also make only a part of a pattern to
  29. work case-insensitively: `Re: (?i:alert)` will match either
  30. "Re: Alert" or "Re: alert", but not "RE: alert".
  31. When a mail has multiple non-attachment text parts, they are
  32. prioritized according to the `mime_types` option (which see
  33. below) and the first part that matches a "body" pattern, if
  34. specified, will be chosen as the "body" value in a created
  35. event.
  36. Named captures will appear in the "matches" hash in a
  37. created event.
  38. - "from", "to", "cc"
  39. Specify a shell glob pattern string that is matched against
  40. mail addresses extracted from the corresponding header
  41. values of each mail.
  42. Patterns match addresses in case insensitive manner.
  43. Multiple pattern strings can be specified in an array, in
  44. which case a mail is selected if any of the patterns
  45. matches. (i.e. patterns are OR'd)
  46. - "mime_types"
  47. Specify an array of MIME types to tell which non-attachment
  48. part of a mail among its text/* parts should be used as mail
  49. body. The default value is `['text/plain', 'text/enriched',
  50. 'text/html']`.
  51. - "is_unread"
  52. Setting this to true or false means only mails that is
  53. marked as unread or read respectively, are selected.
  54. If this key is unspecified or set to null, it is ignored.
  55. - "has_attachment"
  56. Setting this to true or false means only mails that does or does
  57. not have an attachment are selected.
  58. If this key is unspecified or set to null, it is ignored.
  59. Set `mark_as_read` to true to mark found mails as read.
  60. Each agent instance memorizes the highest UID of mails that are
  61. found in the last run for each watched folder, so even if you
  62. change a set of conditions so that it matches mails that are
  63. missed previously, or if you alter the flag status of already
  64. found mails, they will not show up as new events.
  65. Also, in order to avoid duplicated notification it keeps a list
  66. of Message-Id's of 100 most recent mails, so if multiple mails
  67. of the same Message-Id are found, you will only see one event
  68. out of them.
  69. MD
  70. event_description <<-MD
  71. Events look like this:
  72. {
  73. "folder": "INBOX",
  74. "subject": "...",
  75. "from": "Nanashi <nanashi.gombeh@example.jp>",
  76. "to": ["Jane <jane.doe@example.com>"],
  77. "cc": [],
  78. "date": "2014-05-10T03:47:20+0900",
  79. "mime_type": "text/plain",
  80. "body": "Hello,\n\n...",
  81. "matches": {
  82. }
  83. }
  84. MD
  85. IDCACHE_SIZE = 100
  86. FNM_FLAGS = [:FNM_CASEFOLD, :FNM_EXTGLOB].inject(0) { |flags, sym|
  87. if File.const_defined?(sym)
  88. flags | File.const_get(sym)
  89. else
  90. flags
  91. end
  92. }
  93. def working?
  94. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  95. end
  96. def default_options
  97. {
  98. 'expected_update_period_in_days' => "1",
  99. 'host' => 'imap.gmail.com',
  100. 'ssl' => true,
  101. 'username' => 'your.account',
  102. 'password' => 'your.password',
  103. 'folders' => %w[INBOX],
  104. 'conditions' => {}
  105. }
  106. end
  107. def validate_options
  108. %w[host username password].each { |key|
  109. String === options[key] or
  110. errors.add(:base, '%s is required and must be a string' % key)
  111. }
  112. if options['port'].present?
  113. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  114. end
  115. %w[ssl mark_as_read].each { |key|
  116. if options[key].present?
  117. if boolify(options[key]).nil?
  118. errors.add(:base, '%s must be a boolean value' % key)
  119. end
  120. end
  121. }
  122. case mime_types = options['mime_types']
  123. when nil
  124. when Array
  125. mime_types.all? { |mime_type|
  126. String === mime_type && mime_type.start_with?('text/')
  127. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  128. if mime_types.empty?
  129. errors.add(:base, 'mime_types should not be empty')
  130. end
  131. else
  132. errors.add(:base, 'mime_types must be an array')
  133. end
  134. case folders = options['folders']
  135. when nil
  136. when Array
  137. folders.all? { |folder|
  138. String === folder
  139. } or errors.add(:base, 'folders may only contain strings')
  140. if folders.empty?
  141. errors.add(:base, 'folders should not be empty')
  142. end
  143. else
  144. errors.add(:base, 'folders must be an array')
  145. end
  146. case conditions = options['conditions']
  147. when Hash
  148. conditions.each { |key, value|
  149. value.present? or next
  150. case key
  151. when 'subject', 'body'
  152. case value
  153. when String
  154. begin
  155. Regexp.new(value)
  156. rescue
  157. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  158. end
  159. else
  160. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  161. end
  162. when 'from', 'to', 'cc'
  163. Array(value).each { |pattern|
  164. case pattern
  165. when String
  166. begin
  167. glob_match?(pattern, '')
  168. rescue
  169. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  170. end
  171. else
  172. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  173. end
  174. }
  175. when 'is_unread', 'has_attachment'
  176. case boolify(value)
  177. when true, false
  178. else
  179. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  180. end
  181. end
  182. }
  183. else
  184. errors.add(:base, 'conditions must be a hash')
  185. end
  186. if options['expected_update_period_in_days'].present?
  187. errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
  188. end
  189. end
  190. def check
  191. each_unread_mail { |mail, notified|
  192. message_id = mail.message_id
  193. body_parts = mail.body_parts(mime_types)
  194. matched_part = nil
  195. matches = {}
  196. interpolated['conditions'].all? { |key, value|
  197. case key
  198. when 'subject'
  199. value.present? or next true
  200. re = Regexp.new(value)
  201. if m = re.match(mail.scrubbed(:subject))
  202. m.names.each { |name|
  203. matches[name] = m[name]
  204. }
  205. true
  206. else
  207. false
  208. end
  209. when 'body'
  210. value.present? or next true
  211. re = Regexp.new(value)
  212. matched_part = body_parts.find { |part|
  213. if m = re.match(part.scrubbed(:decoded))
  214. m.names.each { |name|
  215. matches[name] = m[name]
  216. }
  217. true
  218. else
  219. false
  220. end
  221. }
  222. when 'from', 'to', 'cc'
  223. value.present? or next true
  224. mail.header[key].addresses.any? { |address|
  225. Array(value).any? { |pattern|
  226. glob_match?(pattern, address)
  227. }
  228. }
  229. when 'has_attachment'
  230. boolify(value) == mail.has_attachment?
  231. when 'is_unread'
  232. true # already filtered out by each_unread_mail
  233. else
  234. log 'Unknown condition key ignored: %s' % key
  235. true
  236. end
  237. } or next
  238. if notified.include?(mail.message_id)
  239. log 'Ignoring mail: %s (already notified)' % message_id
  240. else
  241. matched_part ||= body_parts.first
  242. if matched_part
  243. mime_type = matched_part.mime_type
  244. body = matched_part.scrubbed(:decoded)
  245. else
  246. mime_type = 'text/plain'
  247. body = ''
  248. end
  249. log 'Emitting an event for mail: %s' % message_id
  250. create_event :payload => {
  251. 'folder' => mail.folder,
  252. 'subject' => mail.scrubbed(:subject),
  253. 'from' => mail.from_addrs.first,
  254. 'to' => mail.to_addrs,
  255. 'cc' => mail.cc_addrs,
  256. 'date' => (mail.date.iso8601 rescue nil),
  257. 'mime_type' => mime_type,
  258. 'body' => body,
  259. 'matches' => matches,
  260. 'has_attachment' => mail.has_attachment?,
  261. }
  262. notified << mail.message_id if mail.message_id
  263. end
  264. if boolify(interpolated['mark_as_read'])
  265. log 'Marking as read'
  266. mail.mark_as_read unless dry_run?
  267. end
  268. }
  269. end
  270. def each_unread_mail
  271. host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
  272. ssl = boolify(ssl)
  273. port = (Integer(port) if port.present?)
  274. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  275. Client.open(host, port: port, ssl: ssl) { |imap|
  276. log "Logging in as #{username}"
  277. imap.login(username, interpolated[:password])
  278. # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
  279. lastseen, seen = self.lastseen, self.make_seen
  280. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  281. # most recent notified mails.
  282. notified = self.notified
  283. interpolated['folders'].each { |folder|
  284. log "Selecting the folder: %s" % folder
  285. imap.select(folder)
  286. uidvalidity = imap.uidvalidity
  287. lastseenuid = lastseen[uidvalidity]
  288. if lastseenuid.nil?
  289. maxseq = imap.responses['EXISTS'].last
  290. log "Recording the initial status: %s" % pluralize(maxseq, 'existing mail')
  291. if maxseq > 0
  292. seen[uidvalidity] = imap.fetch(maxseq, 'UID').last.attr['UID']
  293. end
  294. next
  295. end
  296. seen[uidvalidity] = lastseenuid
  297. is_unread = boolify(interpolated['conditions']['is_unread'])
  298. uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS').
  299. each_with_object([]) { |data, ret|
  300. uid, flags = data.attr.values_at('UID', 'FLAGS')
  301. seen[uidvalidity] = uid
  302. next if uid <= lastseenuid
  303. case is_unread
  304. when nil, !flags.include?(:Seen)
  305. ret << uid
  306. end
  307. }
  308. log pluralize(uids.size,
  309. case is_unread
  310. when true
  311. 'new unread mail'
  312. when false
  313. 'new read mail'
  314. else
  315. 'new mail'
  316. end)
  317. next if uids.empty?
  318. imap.uid_fetch_mails(uids).each { |mail|
  319. yield mail, notified
  320. }
  321. }
  322. self.notified = notified
  323. self.lastseen = seen
  324. save!
  325. }
  326. ensure
  327. log 'Connection closed'
  328. end
  329. def mime_types
  330. interpolated['mime_types'] || %w[text/plain text/enriched text/html]
  331. end
  332. def lastseen
  333. Seen.new(memory['lastseen'])
  334. end
  335. def lastseen= value
  336. memory.delete('seen') # obsolete key
  337. memory['lastseen'] = value
  338. end
  339. def make_seen
  340. Seen.new
  341. end
  342. def notified
  343. Notified.new(memory['notified'])
  344. end
  345. def notified= value
  346. memory['notified'] = value
  347. end
  348. private
  349. def is_positive_integer?(value)
  350. Integer(value) >= 0
  351. rescue
  352. false
  353. end
  354. def glob_match?(pattern, value)
  355. File.fnmatch?(pattern, value, FNM_FLAGS)
  356. end
  357. def pluralize(count, noun)
  358. "%d %s" % [count, noun.pluralize(count)]
  359. end
  360. class Client < ::Net::IMAP
  361. class << self
  362. def open(host, *args)
  363. imap = new(host, *args)
  364. yield imap
  365. ensure
  366. imap.disconnect unless imap.nil?
  367. end
  368. end
  369. attr_reader :uidvalidity
  370. def select(folder)
  371. ret = super(@folder = folder)
  372. @uidvalidity = responses['UIDVALIDITY'].last
  373. ret
  374. end
  375. def fetch(*args)
  376. super || []
  377. end
  378. def uid_fetch(*args)
  379. super || []
  380. end
  381. def uid_fetch_mails(set)
  382. uid_fetch(set, 'RFC822.HEADER').map { |data|
  383. Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
  384. }
  385. end
  386. end
  387. class Seen < Hash
  388. def initialize(hash = nil)
  389. super()
  390. if hash
  391. # Deserialize a JSON hash which keys are strings
  392. hash.each { |uidvalidity, uid|
  393. self[uidvalidity.to_i] = uid
  394. }
  395. end
  396. end
  397. def []=(uidvalidity, uid)
  398. # Update only if the new value is larger than the current value
  399. if (curr = self[uidvalidity]).nil? || curr <= uid
  400. super
  401. end
  402. end
  403. end
  404. class Notified < Array
  405. def initialize(array = nil)
  406. super()
  407. replace(array) if array
  408. end
  409. def <<(value)
  410. slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
  411. super
  412. end
  413. end
  414. class Message < SimpleDelegator
  415. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  416. attr_reader :uid, :folder, :uidvalidity
  417. module Scrubbed
  418. def scrubbed(method)
  419. (@scrubbed ||= {})[method.to_sym] ||=
  420. __send__(method).scrub { |bytes| "<#{bytes.unpack('H*')[0]}>" }
  421. end
  422. end
  423. include Scrubbed
  424. def initialize(client, fetch_data, props = {})
  425. @client = client
  426. props.each { |key, value|
  427. instance_variable_set(:"@#{key}", value)
  428. }
  429. attr = fetch_data.attr
  430. @uid = attr['UID']
  431. super(Mail.read_from_string(attr['RFC822.HEADER']))
  432. end
  433. def has_attachment?
  434. @has_attachment ||=
  435. if data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  436. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  437. else
  438. false
  439. end
  440. end
  441. def fetch
  442. @parsed ||=
  443. if data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  444. Mail.read_from_string(data.attr['BODY[]'])
  445. else
  446. Mail.read_from_string('')
  447. end
  448. end
  449. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  450. mail = fetch
  451. if mail.multipart?
  452. mail.body.set_sort_order(mime_types)
  453. mail.body.sort_parts!
  454. mail.all_parts
  455. else
  456. [mail]
  457. end.select { |part|
  458. if part.multipart? || part.attachment? || !part.text? ||
  459. !mime_types.include?(part.mime_type)
  460. false
  461. else
  462. part.extend(Scrubbed)
  463. true
  464. end
  465. }
  466. end
  467. def mark_as_read
  468. @client.uid_store(@uid, '+FLAGS', [:Seen])
  469. end
  470. private
  471. def struct_has_attachment?(struct)
  472. struct.multipart? && (
  473. struct.subtype == 'MIXED' ||
  474. struct.parts.any? { |part|
  475. struct_has_attachment?(part)
  476. }
  477. )
  478. end
  479. end
  480. end
  481. end