feedjira_extension.rb 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. require 'feedjira'
  2. require 'digest'
  3. require 'mail'
  4. module FeedjiraExtension
  5. AUTHOR_ATTRS = %i[name email uri]
  6. LINK_ATTRS = %i[href rel type hreflang title length]
  7. ENCLOSURE_ATTRS = %i[url type length]
  8. class Author < Struct.new(*AUTHOR_ATTRS)
  9. def to_json(options = nil)
  10. members.flat_map { |key|
  11. if value = self[key].presence
  12. case key
  13. when :email
  14. "<#{value}>"
  15. when :uri
  16. "(#{value})"
  17. else
  18. value
  19. end
  20. else
  21. []
  22. end
  23. }.join(' ').to_json(options)
  24. end
  25. end
  26. class AtomAuthor < Author
  27. include SAXMachine
  28. AUTHOR_ATTRS.each do |attr|
  29. element attr
  30. end
  31. end
  32. class RssAuthor < Author
  33. include SAXMachine
  34. def content=(content)
  35. @content = content
  36. begin
  37. addr = Mail::Address.new(content)
  38. rescue
  39. self.name = content
  40. else
  41. self.name = addr.name
  42. self.email = addr.address
  43. end
  44. end
  45. value :content
  46. end
  47. class Enclosure
  48. include SAXMachine
  49. ENCLOSURE_ATTRS.each do |attr|
  50. attribute attr
  51. end
  52. def to_json(options = nil)
  53. ENCLOSURE_ATTRS.each_with_object({}) { |key, hash|
  54. if value = __send__(key)
  55. hash[key] = value
  56. end
  57. }.to_json(options)
  58. end
  59. end
  60. class AtomLink
  61. include SAXMachine
  62. LINK_ATTRS.each do |attr|
  63. attribute attr
  64. end
  65. def to_json(options = nil)
  66. LINK_ATTRS.each_with_object({}) { |key, hash|
  67. if value = __send__(key)
  68. hash[key] = value
  69. end
  70. }.to_json(options)
  71. end
  72. end
  73. class RssLinkElement
  74. include SAXMachine
  75. value :href
  76. def to_json(options = nil)
  77. {
  78. href: href
  79. }.to_json(options)
  80. end
  81. end
  82. module HasAuthors
  83. def self.included(mod)
  84. mod.module_exec do
  85. case name
  86. when /RSS/
  87. %w[
  88. itunes:author
  89. dc:creator
  90. author
  91. managingEditor
  92. ].each do |name|
  93. sax_config.top_level_elements[name].clear
  94. elements name, class: RssAuthor, as: :authors
  95. end
  96. else
  97. elements :author, class: AtomAuthor, as: :authors
  98. end
  99. def alternate_link
  100. links.find { |link|
  101. link.is_a?(AtomLink) &&
  102. link.rel == 'alternate' &&
  103. (link.type == 'text/html'|| link.type.nil?)
  104. }
  105. end
  106. def url
  107. @url ||= (alternate_link || links.first).try!(:href)
  108. end
  109. end
  110. end
  111. end
  112. module HasEnclosure
  113. def self.included(mod)
  114. mod.module_exec do
  115. sax_config.top_level_elements['enclosure'].clear
  116. element :enclosure, class: Enclosure
  117. def image_enclosure
  118. case enclosure.try!(:type)
  119. when %r{\Aimage/}
  120. enclosure
  121. end
  122. end
  123. def image
  124. @image ||= image_enclosure.try!(:url)
  125. end
  126. end
  127. end
  128. end
  129. module HasLinks
  130. def self.included(mod)
  131. mod.module_exec do
  132. sax_config.top_level_elements['link'].clear
  133. sax_config.collection_elements['link'].clear
  134. case name
  135. when /RSS/
  136. elements :link, class: RssLinkElement, as: :rss_links
  137. case name
  138. when /FeedBurner/
  139. elements :'atok10:link', class: AtomLink, as: :atom_links
  140. def links
  141. @links ||= [*rss_links, *atom_links]
  142. end
  143. else
  144. alias_method :links, :rss_links
  145. end
  146. else
  147. elements :link, class: AtomLink, as: :links
  148. end
  149. def alternate_link
  150. links.find { |link|
  151. link.is_a?(AtomLink) &&
  152. link.rel == 'alternate' &&
  153. (link.type == 'text/html'|| link.type.nil?)
  154. }
  155. end
  156. def url
  157. @url ||= (alternate_link || links.first).try!(:href)
  158. end
  159. end
  160. end
  161. end
  162. module HasTimestamps
  163. attr_reader :published, :updated
  164. # Keep the "oldest" publish time found
  165. def published=(value)
  166. parsed = parse_datetime(value)
  167. @published = parsed if !@published || parsed < @published
  168. end
  169. # Keep the most recent update time found
  170. def updated=(value)
  171. parsed = parse_datetime(value)
  172. @updated = parsed if !@updated || parsed > @updated
  173. end
  174. def date_published
  175. published.try(:iso8601)
  176. end
  177. def last_updated
  178. (updated || published).try(:iso8601)
  179. end
  180. private
  181. def parse_datetime(string)
  182. DateTime.parse(string) rescue nil
  183. end
  184. end
  185. module FeedEntryExtensions
  186. def self.included(mod)
  187. mod.module_exec do
  188. include HasAuthors
  189. include HasEnclosure
  190. include HasLinks
  191. include HasTimestamps
  192. end
  193. end
  194. def id
  195. entry_id || Digest::MD5.hexdigest(content || summary || '')
  196. end
  197. end
  198. module FeedExtensions
  199. def self.included(mod)
  200. mod.module_exec do
  201. include HasAuthors
  202. include HasEnclosure
  203. include HasLinks
  204. include HasTimestamps
  205. element :id, as: :feed_id
  206. element :generator
  207. elements :rights
  208. element :published
  209. element :updated
  210. element :icon
  211. if /RSS/ === name
  212. element :guid, as: :feed_id
  213. element :copyright
  214. element :pubDate, as: :published
  215. element :'dc:date', as: :published
  216. element :lastBuildDate, as: :updated
  217. element :image, value: :url, as: :icon
  218. def copyright
  219. @copyright || super
  220. end
  221. end
  222. sax_config.collection_elements.each_value do |collection_elements|
  223. collection_elements.each do |collection_element|
  224. collection_element.accessor == 'entries' &&
  225. (entry_class = collection_element.data_class).is_a?(Class) or next
  226. entry_class.send :include, FeedEntryExtensions
  227. end
  228. end
  229. end
  230. end
  231. def copyright
  232. rights.join("\n").presence
  233. end
  234. end
  235. Feedjira::Feed.feed_classes.each do |feed_class|
  236. feed_class.send :include, FeedExtensions
  237. end
  238. end