Orbit/vendor/built_in_modules/announcement/lib/rss_ntu_ga_aggregate.rb

98 lines
3.2 KiB
Ruby

# encoding: utf-8
require 'rss'
require 'mongo'
SITES = { "總務長室暨總務處秘書室" => "sec.ga.ntu.edu.tw",
"總務處文書組" => "doc.ga.ntu.edu.tw",
"總務處事務組" => "general.ga.ntu.edu.tw",
"總務處保管組" => "property.ga.ntu.edu.tw",
"總務處營繕組" => "construction.ga.ntu.edu.tw",
"總務處出納組" => "cashier.ga.ntu.edu.tw",
"總務處採購組" => "procurement.ga.ntu.edu.tw",
"總務處經營管理組" => "fss.ga.ntu.edu.tw",
"總務處駐衛警察隊" => "police.ga.ntu.edu.tw",
"社會科學院總務分處" => "social.ga.ntu.edu.tw",
"醫學院總務分處" => "medicine.ga.ntu.edu.tw" }
DB_BASE_NAME = "production_0"
yesterday = Time.now - 86400
two_weeks_ago = Time.new - 60 * 60 * 24 * 14
recent_feed = {}
SITES.each do |name, url|
open("http://#{url}/panel/announcement/front_end/bulletins.rss?inner=true") do |rss|
# Giving false parameter is for skipping irregular format of the RSS
feed = RSS::Parser.parse(rss, false)
feed.items.each do |item|
category = item.category.to_s.gsub(/\<(\/)*category\>/, '')
if item.pubDate > yesterday
recent_feed[item.title.strip] = { date: item.pubDate, description: item.description.gsub("\r\n", '<br/>').strip,
link: item.link, category: category, source: name }
end
end
end
end
def get_category_id(category, categories, coll_cat)
if categories.keys.include? "rss_#{category}"
[categories["rss_#{category}"], categories]
else
cat = {
_type: "BulletinCategory",
key: "rss_#{category}",
disable: false,
title: {:zh_tw => category},
created_at: Time.now,
updated_at: Time.now
}
categories["rss_#{category}"] = result = coll_cat.save(cat)
[result, categories]
end
end
def get_mongo_and_categories
db = Mongo::Connection.new("localhost", 27017).db("#{DB_BASE_NAME}")
coll_bulletin = db["bulletins"]
coll_cat = db["bulletin_categories"]
categories = coll_cat.find().to_a.inject({}) do |categories, category|
categories[category['key']] = category['_id']
categories
end
[categories, coll_bulletin, coll_cat]
end
recent_feed.each do |title, bulletin|
categories, coll_bulletin, coll_cat = get_mongo_and_categories
category_id, categories = get_category_id(bulletin[:category], categories, coll_cat)
unless coll_bulletin.find_one(rss_link: bulletin[:link])
bulletin = { _type: "Bulletin",
postdate: bulletin[:date],
created_at: bulletin[:date],
updated_at: bulletin[:date],
is_checked: true,
is_pending: false,
is_rejected: false,
bulletin_category_id: category_id,
title: {:zh_tw => title},
text: {:zh_tw => bulletin[:description]},
available_for_zh_tw: true,
rss_link: bulletin[:link],
rss_source: bulletin[:source],
is_top: false,
is_hot: false,
is_hidden: false }
coll_bulletin.save(bulletin)
end
end