diff --git a/app/jobs/get_announcement_from_rss.rb b/app/jobs/get_announcement_from_rss.rb new file mode 100644 index 0000000..267c29a --- /dev/null +++ b/app/jobs/get_announcement_from_rss.rb @@ -0,0 +1,7 @@ +class GetAnnouncementFromRss + @queue = :high + + def self.perform() + %x(ruby "#{Rails.root}/lib/rss_ntu_job.rb") + end +end \ No newline at end of file diff --git a/config/resque_schedule.yml b/config/resque_schedule.yml index e73f964..09cc651 100644 --- a/config/resque_schedule.yml +++ b/config/resque_schedule.yml @@ -8,4 +8,10 @@ generate_system_summary: cron: 0 0 12 * * * class: GenerateSystemSummary args: - description: Generate the system status such as disk free space,package version list for showing at site tab \ No newline at end of file + description: Generate the system status such as disk free space,package version list for showing at site tab + +get_announcement_from_rss: + cron: "30 * * * * *" + class: GetAnnouncementFromRss + args: + description: Loop through the announcement RSS until 24h ago \ No newline at end of file diff --git a/lib/rss_ntu_job.rb b/lib/rss_ntu_job.rb new file mode 100644 index 0000000..d7126a1 --- /dev/null +++ b/lib/rss_ntu_job.rb @@ -0,0 +1,114 @@ +# encoding: utf-8 +require 'rss' +require 'mongo' +require 'mongoid' + +# Change this according to local DB +DB_NAME = "orbit_site_new" + +Mongoid.configure do |config| + config.master = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") +end + +class Bulletin + include Mongoid::Document + include Mongoid::Timestamps + include Mongoid::MultiParameterAttributes +end + +class BufferCategory + include Mongoid::Document + include Mongoid::Timestamps +end + +# Create a hash rss site list from mongodb +db = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") +SITES = Hash[ db["announcement_feeds"].find(accepted: true).entries.collect {|f| [ f["name"], f["link"] ]} ] +CATEGORIES = Array( db["announcement_feeds"].find().entries.collect {|f| f["categories"]} ) + +yesterday = Time.now - 86400 +two_weeks_ago = Time.new - 60 * 60 * 24 * 14 + +recent_feed = {} + +SITES.each_with_index do |(name, url), i| + open("http://#{url}/panel/announcement/front_end/bulletins.rss?inner=true") do |rss| + # Giving 'false' parameter is for skipping irregular format of the RSS + feed = RSS::Parser.parse(rss, false) + + feed.items.each do |item| + category = item.category.to_s.gsub(/\<(\/)*category\>/, '') + + if ( item.pubDate > yesterday ) && ( CATEGORIES[i][0] == "全部" || CATEGORIES[i].include?(category) ) + recent_feed[item.title.strip] = { date: item.pubDate, description: item.description.gsub("\r\n", '
').strip, + link: item.link, category: category, source: name } + end + end + end +end + +def get_category_id(category, categories, coll_cat) + db = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") + if categories.keys.include? "rss_#{category}" + [categories["rss_#{category}"], categories] + else + cat = { + _type: "Category", + key: "rss_#{category}", + disable: false, + title: {:zh_tw => "rss_#{category}"}, + created_at: Time.now, + updated_at: Time.now, + module_app_id: db["module_apps"].find({:key=>"announcement"}).first['_id'] + } + categories["rss_#{category}"] = result = coll_cat.save(cat) + [result, categories] + end +end + +def get_mongo_and_categories + db = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") + coll_bulletin = db["bulletins"] + coll_cat = db["categories"] + + categories = coll_cat.find().to_a.inject({}) do |categories, category| + categories[category['key']] = category['_id'] + categories + end + + [categories, coll_bulletin, coll_cat] +end + +recent_feed.each do |title, bulletin| + + categories, coll_bulletin, coll_cat = get_mongo_and_categories + category_id, categories = get_category_id(bulletin[:category], categories, coll_cat) + + unless coll_bulletin.find_one(rss_link: bulletin[:link]) + bulletin = { _type: "Bulletin", + postdate: bulletin[:date], + created_at: bulletin[:date], + updated_at: bulletin[:date], + category_id: category_id, + title: {:zh_tw => title, :en => ""}, + text: {:zh_tw => bulletin[:description], :en => ""}, + available_for_zh_tw: true, + available_for_en: false, + rss_link: bulletin[:link], + rss_source: bulletin[:source], + is_top: false, + is_hot: false, + is_hidden: false } + + # coll_bulletin.save(bulletin) + bulletin = Bulletin.new(bulletin) + bulletin.save! + + category = { _type: "BufferCategory", + category_id: category_id, + categorizable_type: "Bulletin", + categorizable_id: bulletin.id } + buffer_category = BufferCategory.new(category) + buffer_category.save! + end +end \ No newline at end of file