imap_folder_agent.rb 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. require 'delegate'
  2. require 'net/imap'
  3. require 'mail'
  4. module Agents
  5. class ImapFolderAgent < Agent
  6. cannot_receive_events!
  7. default_schedule "every_30m"
  8. description <<-MD
  9. The ImapFolderAgent checks an IMAP server in specified folders
  10. and creates Events based on new unread mails.
  11. Specify an IMAP server to connect with `host`, and set `ssl` to
  12. true if the server supports IMAP over SSL. Specify `port` if
  13. you need to connect to a port other than standard (143 or 993
  14. depending on the `ssl` value).
  15. Specify login credentials in `username` and `password`.
  16. List the names of folders to check in `folders`.
  17. To narrow mails by conditions, build a `conditions` hash with
  18. the following keys:
  19. - "subject"
  20. - "body"
  21. Specify a regular expression to match against the decoded
  22. subject/body of each mail.
  23. Use the `(?i)` directive for case-insensitive search. For
  24. example, a pattern `(?i)alert` will match "alert", "Alert"
  25. or "ALERT". You can also make only a part of a pattern to
  26. work case-insensitively: `Re: (?i:alert)` will match either
  27. "Re: Alert" or "Re: alert", but not "RE: alert".
  28. When a mail has multiple non-attachment text parts, they are
  29. prioritized according to the `mime_types` option (which see
  30. below) and the first part that matches a "body" pattern, if
  31. specified, will be chosen as the "body" value in a created
  32. event.
  33. Named captues will appear in the "matches" hash in a created
  34. event.
  35. - "from", "to", "cc"
  36. Specify a shell glob pattern string that is matched against
  37. mail addresses extracted from the corresponding header
  38. values of each mail.
  39. Patterns match addresses in case insensitive manner.
  40. Multiple pattern strings can be specified in an array, in
  41. which case a mail is selected if any of the patterns
  42. matches. (i.e. patterns are OR'd)
  43. - "mime_types"
  44. Specify an array of MIME types to tell which non-attachment
  45. part of a mail among its text/* parts should be used as mail
  46. body. The default value is `['text/plain', 'text/enriched',
  47. 'text/html']`.
  48. - "has_attachment"
  49. Setting this to true or false means only mails that does or does
  50. not have an attachment are selected.
  51. If this key is unspecified or set to null, it is ignored.
  52. Set `mark_as_read` to true to mark found mails as read.
  53. Each agent instance memorizes a list of unread mails that are
  54. found in the last run, so even if you change a set of conditions
  55. so that it matches mails that are missed previously, they will
  56. not show up as new events. Also, in order to avoid duplicated
  57. notification it keeps a list of Message-Id's of 100 most recent
  58. mails, so if multiple mails of the same Message-Id are found,
  59. you will only see one event out of them.
  60. MD
  61. event_description <<-MD
  62. Events look like this:
  63. {
  64. "folder": "INBOX",
  65. "subject": "...",
  66. "from": "Nanashi <nanashi.gombeh@example.jp>",
  67. "to": ["Jane <jane.doe@example.com>"],
  68. "cc": [],
  69. "date": "2014-05-10T03:47:20+0900",
  70. "mime_type": "text/plain",
  71. "body": "Hello,\n\n...",
  72. "matches": {
  73. }
  74. }
  75. MD
  76. IDCACHE_SIZE = 100
  77. FNM_FLAGS = [:FNM_CASEFOLD, :FNM_EXTGLOB].inject(0) { |flags, sym|
  78. if File.const_defined?(sym)
  79. flags | File.const_get(sym)
  80. else
  81. flags
  82. end
  83. }
  84. def working?
  85. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  86. end
  87. def default_options
  88. {
  89. 'expected_update_period_in_days' => "1",
  90. 'host' => 'imap.gmail.com',
  91. 'ssl' => true,
  92. 'username' => 'your.account',
  93. 'password' => 'your.password',
  94. 'folders' => %w[INBOX],
  95. 'conditions' => {}
  96. }
  97. end
  98. def validate_options
  99. %w[host username password].each { |key|
  100. String === options[key] or
  101. errors.add(:base, '%s is required and must be a string' % key)
  102. }
  103. if options['port'].present?
  104. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  105. end
  106. %w[ssl mark_as_read].each { |key|
  107. if options[key].present?
  108. case options[key]
  109. when true, false
  110. else
  111. errors.add(:base, '%s must be a boolean value' % key)
  112. end
  113. end
  114. }
  115. case mime_types = options['mime_types']
  116. when nil
  117. when Array
  118. mime_types.all? { |mime_type|
  119. String === mime_type && mime_type.start_with?('text/')
  120. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  121. if mime_types.empty?
  122. errors.add(:base, 'mime_types should not be empty')
  123. end
  124. else
  125. errors.add(:base, 'mime_types must be an array')
  126. end
  127. case folders = options['folders']
  128. when nil
  129. when Array
  130. folders.all? { |folder|
  131. String === folder
  132. } or errors.add(:base, 'folders may only contain strings')
  133. if folders.empty?
  134. errors.add(:base, 'folders should not be empty')
  135. end
  136. else
  137. errors.add(:base, 'folders must be an array')
  138. end
  139. case conditions = options['conditions']
  140. when nil
  141. when Hash
  142. conditions.each { |key, value|
  143. value.present? or next
  144. case key
  145. when 'subject', 'body'
  146. case value
  147. when String
  148. begin
  149. Regexp.new(value)
  150. rescue
  151. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  152. end
  153. else
  154. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  155. end
  156. when 'from', 'to', 'cc'
  157. Array(value).each { |pattern|
  158. case pattern
  159. when String
  160. begin
  161. glob_match?(pattern, '')
  162. rescue
  163. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  164. end
  165. else
  166. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  167. end
  168. }
  169. when 'has_attachment'
  170. case value
  171. when true, false
  172. else
  173. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  174. end
  175. end
  176. }
  177. else
  178. errors.add(:base, 'conditions must be a hash')
  179. end
  180. if options['expected_update_period_in_days'].present?
  181. errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
  182. end
  183. end
  184. def check
  185. # 'seen' keeps a hash of { uidvalidity => uids, ... } which
  186. # lists unread mails in watched folders.
  187. seen = memory['seen'] || {}
  188. new_seen = Hash.new { |hash, key|
  189. hash[key] = []
  190. }
  191. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  192. # most recent notified mails.
  193. notified = memory['notified'] || []
  194. each_unread_mail { |mail|
  195. new_seen[mail.uidvalidity] << mail.uid
  196. next if (uids = seen[mail.uidvalidity]) && uids.include?(mail.uid)
  197. body_parts = mail.body_parts(mime_types)
  198. matched_part = nil
  199. matches = {}
  200. interpolated['conditions'].all? { |key, value|
  201. case key
  202. when 'subject'
  203. value.present? or next true
  204. re = Regexp.new(value)
  205. if m = re.match(mail.subject)
  206. m.names.each { |name|
  207. matches[name] = m[name]
  208. }
  209. true
  210. else
  211. false
  212. end
  213. when 'body'
  214. value.present? or next true
  215. re = Regexp.new(value)
  216. matched_part = body_parts.find { |part|
  217. if m = re.match(part.decoded)
  218. m.names.each { |name|
  219. matches[name] = m[name]
  220. }
  221. true
  222. else
  223. false
  224. end
  225. }
  226. when 'from', 'to', 'cc'
  227. value.present? or next true
  228. mail.header[key].addresses.any? { |address|
  229. Array(value).any? { |pattern|
  230. glob_match?(pattern, address)
  231. }
  232. }
  233. when 'has_attachment'
  234. value == mail.has_attachment?
  235. else
  236. log 'Unknown condition key ignored: %s' % key
  237. true
  238. end
  239. } or next
  240. unless notified.include?(mail.message_id)
  241. matched_part ||= body_parts.first
  242. if matched_part
  243. mime_type = matched_part.mime_type
  244. body = matched_part.decoded
  245. else
  246. mime_type = 'text/plain'
  247. body = ''
  248. end
  249. create_event :payload => {
  250. 'folder' => mail.folder,
  251. 'subject' => mail.subject,
  252. 'from' => mail.from_addrs.first,
  253. 'to' => mail.to_addrs,
  254. 'cc' => mail.cc_addrs,
  255. 'date' => (mail.date.iso8601 rescue nil),
  256. 'mime_type' => mime_type,
  257. 'body' => body,
  258. 'matches' => matches,
  259. 'has_attachment' => mail.has_attachment?,
  260. }
  261. notified << mail.message_id if mail.message_id
  262. end
  263. if interpolated['mark_as_read']
  264. log 'Marking as read'
  265. mail.mark_as_read
  266. end
  267. }
  268. notified.slice!(0...-IDCACHE_SIZE) if notified.size > IDCACHE_SIZE
  269. memory['seen'] = new_seen
  270. memory['notified'] = notified
  271. save!
  272. end
  273. def each_unread_mail
  274. host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
  275. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  276. Client.open(host, Integer(port), ssl) { |imap|
  277. log "Logging in as #{username}"
  278. imap.login(username, interpolated[:password])
  279. interpolated['folders'].each { |folder|
  280. log "Selecting the folder: %s" % folder
  281. imap.select(folder)
  282. unseen = imap.search('UNSEEN')
  283. if unseen.empty?
  284. log "No unread mails"
  285. next
  286. end
  287. imap.fetch_mails(unseen).each { |mail|
  288. yield mail
  289. }
  290. }
  291. }
  292. ensure
  293. log 'Connection closed'
  294. end
  295. def mime_types
  296. interpolated['mime_types'] || %w[text/plain text/enriched text/html]
  297. end
  298. private
  299. def is_positive_integer?(value)
  300. Integer(value) >= 0
  301. rescue
  302. false
  303. end
  304. def glob_match?(pattern, value)
  305. File.fnmatch?(pattern, value, FNM_FLAGS)
  306. end
  307. class Client < ::Net::IMAP
  308. class << self
  309. def open(host, port, ssl)
  310. imap = new(host, port, ssl)
  311. yield imap
  312. ensure
  313. imap.disconnect unless imap.nil?
  314. end
  315. end
  316. def select(folder)
  317. ret = super(@folder = folder)
  318. @uidvalidity = responses['UIDVALIDITY'].last
  319. ret
  320. end
  321. def fetch_mails(set)
  322. fetch(set, %w[UID RFC822.HEADER]).map { |data|
  323. Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
  324. }
  325. end
  326. end
  327. class Message < SimpleDelegator
  328. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  329. attr_reader :uid, :folder, :uidvalidity
  330. def initialize(client, fetch_data, props = {})
  331. @client = client
  332. props.each { |key, value|
  333. instance_variable_set(:"@#{key}", value)
  334. }
  335. attr = fetch_data.attr
  336. @uid = attr['UID']
  337. super(Mail.read_from_string(attr['RFC822.HEADER']))
  338. end
  339. def has_attachment?
  340. @has_attachment ||=
  341. begin
  342. data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  343. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  344. end
  345. end
  346. def fetch
  347. @parsed ||=
  348. begin
  349. data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  350. Mail.read_from_string(data.attr['BODY[]'])
  351. end
  352. end
  353. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  354. mail = fetch
  355. if mail.multipart?
  356. mail.body.set_sort_order(mime_types)
  357. mail.body.sort_parts!
  358. mail.all_parts
  359. else
  360. [mail]
  361. end.reject { |part|
  362. part.multipart? || part.attachment? || !part.text? ||
  363. !mime_types.include?(part.mime_type)
  364. }
  365. end
  366. def mark_as_read
  367. @client.uid_store(@uid, '+FLAGS', [:Seen])
  368. end
  369. private
  370. def struct_has_attachment?(struct)
  371. struct.multipart? && (
  372. struct.subtype == 'MIXED' ||
  373. struct.parts.any? { |part|
  374. struct_has_attachment?(part)
  375. }
  376. )
  377. end
  378. end
  379. end
  380. end