# encoding: utf-8 require 'rss' require 'mongo' require 'mongoid' # Change this according to local DB DB_NAME = "orbit_site_new" Mongoid.configure do |config| config.master = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") end class Bulletin include Mongoid::Document include Mongoid::Timestamps include Mongoid::MultiParameterAttributes end class BufferCategory include Mongoid::Document include Mongoid::Timestamps end # Create a hash rss site list from mongodb db = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") SITES = Hash[ db["announcement_feeds"].find(accepted: true).entries.collect {|f| [ f["name"], f["link"] ]} ] CATEGORIES = Array( db["announcement_feeds"].find().entries.collect {|f| f["categories"]} ) yesterday = Time.now - 86400 two_weeks_ago = Time.new - 60 * 60 * 24 * 14 recent_feed = {} SITES.each_with_index do |(name, url), i| open("http://#{url}/panel/announcement/front_end/bulletins.rss?inner=true") do |rss| # Giving 'false' parameter is for skipping irregular format of the RSS feed = RSS::Parser.parse(rss, false) feed.items.each do |item| category = item.category.to_s.gsub(/\<(\/)*category\>/, '') if ( item.pubDate > yesterday ) && ( CATEGORIES[i][0] == "全部" || CATEGORIES[i].include?(category) ) recent_feed[item.title.strip] = { date: item.pubDate, description: item.description.gsub("\r\n", '
').strip, link: item.link, category: category, source: name } end end end end def get_category_id(category, categories, coll_cat) db = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") if categories.keys.include? "rss_#{category}" [categories["rss_#{category}"], categories] else cat = { _type: "Category", key: "rss_#{category}", disable: false, title: {:zh_tw => "rss_#{category}"}, created_at: Time.now, updated_at: Time.now, module_app_id: db["module_apps"].find({:key=>"announcement"}).first['_id'] } categories["rss_#{category}"] = result = coll_cat.save(cat) [result, categories] end end def get_mongo_and_categories db = Mongo::Connection.new("localhost", 27017).db("#{DB_NAME}") coll_bulletin = db["bulletins"] coll_cat = db["categories"] categories = coll_cat.find().to_a.inject({}) do |categories, category| categories[category['key']] = category['_id'] categories end [categories, coll_bulletin, coll_cat] end recent_feed.each do |title, bulletin| categories, coll_bulletin, coll_cat = get_mongo_and_categories category_id, categories = get_category_id(bulletin[:category], categories, coll_cat) unless coll_bulletin.find_one(rss_link: bulletin[:link]) bulletin = { _type: "Bulletin", postdate: bulletin[:date], created_at: bulletin[:date], updated_at: bulletin[:date], category_id: category_id, title: {:zh_tw => title, :en => ""}, text: {:zh_tw => bulletin[:description], :en => ""}, available_for_zh_tw: true, available_for_en: false, rss_link: bulletin[:link], rss_source: bulletin[:source], is_top: false, is_hot: false, is_hidden: false } # coll_bulletin.save(bulletin) bulletin = Bulletin.new(bulletin) bulletin.save! category = { _type: "BufferCategory", category_id: category_id, categorizable_type: "Bulletin", categorizable_id: bulletin.id } buffer_category = BufferCategory.new(category) buffer_category.save! end end