Add script for NTU GA for grabbing all the bulletins

This commit is contained in:
Bernie Chiu 2013-11-08 17:42:48 +08:00
parent b68a413574
commit 6d6650042e
4 changed files with 98 additions and 12 deletions

View File

@ -1,11 +0,0 @@
class Panel::Announcement::FrontEnd::BulletinFeedsController < OrbitWidgetController
def rss
@bulletin = Bulletin.all(:order => 'created_at DESC', :limit => 50)
respond_to do |format|
format.rss { render :layout => false }
end
end
end

View File

@ -11,6 +11,7 @@ xml.rss :version => "2.0" do
for bulletin in @bulletins for bulletin in @bulletins
xml.item do xml.item do
xml.title bulletin.title_translations[I18n.locale.to_s] xml.title bulletin.title_translations[I18n.locale.to_s]
xml.category bulletin.bulletin_category.title
xml.pubDate bulletin.postdate.to_s(:rfc822) xml.pubDate bulletin.postdate.to_s(:rfc822)
xml.description bulletin.text_translations[I18n.locale.to_s] xml.description bulletin.text_translations[I18n.locale.to_s]
xml.link url_for(:action=>"show", :controller=>"panel/announcement/front_end/bulletins", :id=>bulletin ,:only_path=>false) xml.link url_for(:action=>"show", :controller=>"panel/announcement/front_end/bulletins", :id=>bulletin ,:only_path=>false)

View File

@ -0,0 +1,96 @@
# encoding: utf-8
require 'rss'
require 'mongo'
SITES = { "總務長室暨總務處秘書室" => "sec.ga.ntu.edu.tw",
"總務處文書組" => "doc.ga.ntu.edu.tw",
"總務處事務組" => "general.ga.ntu.edu.tw",
"總務處保管組" => "property.ga.ntu.edu.tw",
"總務處營繕組" => "construction.ga.ntu.edu.tw",
"總務處出納組" => "cashier.ga.ntu.edu.tw",
"總務處採購組" => "procurement.ga.ntu.edu.tw",
"總務處經營管理組" => "fss.ga.ntu.edu.tw",
"總務處駐衛警察隊" => "police.ga.ntu.edu.tw",
"社會科學院總務分處" => "social.ga.ntu.edu.tw",
"醫學院總務分處" => "medicine.ga.ntu.edu.tw" }
DB_BASE_NAME = "production_0"
yesterday = Time.now - 86400
recent_feed = {}
SITES.each do |name, url|
open("http://#{url}/panel/announcement/front_end/bulletins.rss?inner=true") do |rss|
# Giving false parameter is for skipping irregular format of the RSS
feed = RSS::Parser.parse(rss, false)
feed.items.each do |item|
category = item.category.to_s.gsub(/\<(\/)*category\>/, '')
if item.pubDate > yesterday
recent_feed[item.title.strip] = { date: item.pubDate, description: item.description.gsub("\r\n", '<br/>').strip,
link: item.link, category: category }
end
end
end
end
def get_category_id(category, categories, coll_cat)
if categories.keys.include? "rss_#{category}"
[categories["rss_#{category}"], categories]
else
cat = {
_type: "BulletinCategory",
key: "rss_#{category}",
disable: false,
title: {:zh_tw => category},
created_at: Time.now,
updated_at: Time.now
}
categories["rss_#{category}"] = result = coll_cat.save(cat)
[result, categories]
end
end
def get_mongo_and_categories
db = Mongo::Connection.new("localhost", 27017).db("#{DB_BASE_NAME}")
coll_bulletin = db["bulletins"]
coll_cat = db["bulletin_categories"]
categories = coll_cat.find().to_a.inject({}) do |categories, category|
categories[category['key']] = category['_id']
categories
end
[categories, coll_bulletin, coll_cat]
end
recent_feed.each do |title, content|
categories, coll_bulletin, coll_cat = get_mongo_and_categories
content.each_value do |bulletin|
# category_id, categories = get_category_id(bulletin[:category], categories, coll_cat)
# unless coll_bulletin.find_one(rss_link: bulletin[:link])
# bulletin = { _type: "Bulletin",
# postdate: bulletin[:date],
# created_at: bulletin[:date],
# updated_at: bulletin[:date],
# is_checked: true,
# is_pending: false,
# is_rejected: false,
# bulletin_category_id: category_id,
# title: {:zh_tw => bulletin[:title]},
# text: {:zh_tw => bulletin[:description]},
# available_for_zh_tw: true,
# rss_link: bulletin[:link],
# is_top: false,
# is_hot: false,
# is_hidden: false }
# puts bulletin
# end
end
end