feedjira_extension.rb 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. require 'feedjira'
  2. require 'digest'
  3. require 'mail'
  4. module FeedjiraExtension
  5. AUTHOR_ATTRS = %i[name email uri]
  6. LINK_ATTRS = %i[href rel type hreflang title length]
  7. ENCLOSURE_ATTRS = %i[url type length]
  8. class Author < Struct.new(*AUTHOR_ATTRS)
  9. def empty?
  10. all?(&:nil?)
  11. end
  12. def to_json(options = nil)
  13. each_pair.flat_map { |key, value|
  14. if value.presence
  15. case key
  16. when :email
  17. "<#{value}>"
  18. when :uri
  19. "(#{value})"
  20. else
  21. value
  22. end
  23. else
  24. []
  25. end
  26. }.join(' ').to_json(options)
  27. end
  28. end
  29. class AtomAuthor < Author
  30. include SAXMachine
  31. AUTHOR_ATTRS.each do |attr|
  32. element attr
  33. end
  34. end
  35. class RssAuthor < Author
  36. include SAXMachine
  37. def content=(content)
  38. @content = content
  39. begin
  40. addr = Mail::Address.new(content)
  41. rescue
  42. self.name = content
  43. else
  44. self.name = addr.name rescue nil
  45. self.email = addr.address rescue nil
  46. end
  47. end
  48. value :content
  49. end
  50. class ITunesRssOwner < Author
  51. include SAXMachine
  52. element :'itunes:name', as: :name
  53. element :'itunes:email', as: :email
  54. end
  55. class Enclosure
  56. include SAXMachine
  57. ENCLOSURE_ATTRS.each do |attr|
  58. attribute attr
  59. end
  60. def to_json(options = nil)
  61. ENCLOSURE_ATTRS.each_with_object({}) { |key, hash|
  62. if value = __send__(key)
  63. hash[key] = value
  64. end
  65. }.to_json(options)
  66. end
  67. end
  68. class AtomLink
  69. include SAXMachine
  70. LINK_ATTRS.each do |attr|
  71. attribute attr
  72. end
  73. def empty?
  74. LINK_ATTRS.all? { |attr|
  75. __send__(attr).nil?
  76. }
  77. end
  78. def to_json(options = nil)
  79. LINK_ATTRS.each_with_object({}) { |key, hash|
  80. if value = __send__(key)
  81. hash[key] = value
  82. end
  83. }.to_json(options)
  84. end
  85. end
  86. class RssLinkElement
  87. include SAXMachine
  88. value :href
  89. def empty?
  90. !href.is_a?(String)
  91. end
  92. def to_json(options = nil)
  93. case href
  94. when String
  95. { href: href }
  96. else
  97. # Ignore non-string values, because SaxMachine leaks its
  98. # internal value :no_buffer when the content of an element
  99. # is empty.
  100. {}
  101. end.to_json(options)
  102. end
  103. end
  104. module HasAuthors
  105. def self.included(mod)
  106. mod.module_exec do
  107. case name
  108. when /RSS/
  109. %w[
  110. itunes:author
  111. dc:creator
  112. author
  113. managingEditor
  114. ].each do |name|
  115. sax_config.top_level_elements[name].clear
  116. elements name, class: RssAuthor, as: :_authors
  117. end
  118. else
  119. elements :author, class: AtomAuthor, as: :_authors
  120. end
  121. def authors
  122. _authors.reject(&:empty?)
  123. end
  124. end
  125. end
  126. end
  127. module HasEnclosure
  128. def self.included(mod)
  129. mod.module_exec do
  130. sax_config.top_level_elements['enclosure'].clear
  131. element :enclosure, class: Enclosure
  132. def image_enclosure
  133. case enclosure.try!(:type)
  134. when %r{\Aimage/}
  135. enclosure
  136. end
  137. end
  138. def image
  139. @image ||= image_enclosure.try!(:url)
  140. end
  141. end
  142. end
  143. end
  144. module HasLinks
  145. def self.included(mod)
  146. mod.module_exec do
  147. sax_config.top_level_elements['link'].clear
  148. sax_config.collection_elements['link'].clear
  149. case name
  150. when /RSS/
  151. elements :link, class: RssLinkElement, as: :rss_links
  152. case name
  153. when /FeedBurner/
  154. elements :'atok10:link', class: AtomLink, as: :atom_links
  155. def _links
  156. [*rss_links, *atom_links]
  157. end
  158. else
  159. alias_method :_links, :rss_links
  160. end
  161. else
  162. elements :link, class: AtomLink, as: :_links
  163. end
  164. def links
  165. _links.reject(&:empty?)
  166. end
  167. def alternate_link
  168. links.find { |link|
  169. link.is_a?(AtomLink) &&
  170. link.rel == 'alternate' &&
  171. (link.type == 'text/html'|| link.type.nil?)
  172. }
  173. end
  174. def url
  175. @url ||= (alternate_link || links.first).try!(:href)
  176. end
  177. end
  178. end
  179. end
  180. module HasTimestamps
  181. attr_reader :published, :updated
  182. # Keep the "oldest" publish time found
  183. def published=(value)
  184. parsed = parse_datetime(value)
  185. @published = parsed if !@published || parsed < @published
  186. end
  187. # Keep the most recent update time found
  188. def updated=(value)
  189. parsed = parse_datetime(value)
  190. @updated = parsed if !@updated || parsed > @updated
  191. end
  192. def date_published
  193. published.try(:iso8601)
  194. end
  195. def last_updated
  196. (updated || published).try(:iso8601)
  197. end
  198. private
  199. def parse_datetime(string)
  200. DateTime.parse(string) rescue nil
  201. end
  202. end
  203. module FeedEntryExtensions
  204. def self.included(mod)
  205. mod.module_exec do
  206. include HasAuthors
  207. include HasEnclosure
  208. include HasLinks
  209. include HasTimestamps
  210. end
  211. end
  212. def id
  213. entry_id || Digest::MD5.hexdigest(content || summary || '')
  214. end
  215. end
  216. module FeedExtensions
  217. def self.included(mod)
  218. mod.module_exec do
  219. include HasAuthors
  220. include HasEnclosure
  221. include HasLinks
  222. include HasTimestamps
  223. element :id, as: :feed_id
  224. element :generator
  225. elements :rights
  226. element :published
  227. element :updated
  228. element :icon
  229. if /RSS/ === name
  230. element :guid, as: :feed_id
  231. element :copyright
  232. element :pubDate, as: :published
  233. element :'dc:date', as: :published
  234. element :lastBuildDate, as: :updated
  235. element :image, value: :url, as: :icon
  236. def copyright
  237. @copyright || super
  238. end
  239. if /ITunes/ === name
  240. sax_config.collection_elements['itunes:owner'].clear
  241. elements :"itunes:owner", as: :_itunes_owners, class: ITunesRssOwner
  242. private :_itunes_owners
  243. def itunes_owners
  244. _itunes_owners.reject(&:empty?)
  245. end
  246. end
  247. else
  248. element :subtitle, as: :description unless method_defined?(:description)
  249. end
  250. sax_config.collection_elements.each_value do |collection_elements|
  251. collection_elements.each do |collection_element|
  252. collection_element.accessor == 'entries' &&
  253. (entry_class = collection_element.data_class).is_a?(Class) or next
  254. entry_class.send :include, FeedEntryExtensions
  255. end
  256. end
  257. end
  258. end
  259. def copyright
  260. rights.join("\n").presence
  261. end
  262. end
  263. Feedjira::Feed.feed_classes.each do |feed_class|
  264. feed_class.send :include, FeedExtensions
  265. end
  266. end