92 lines
3.1 KiB
Ruby
92 lines
3.1 KiB
Ruby
|
# encoding: utf-8
|
||
|
|
||
|
require 'rss'
|
||
|
require 'mongo'
|
||
|
|
||
|
AUTHORS = %w[ 學生事務處-學生住宿服務組
|
||
|
學生事務處-學生職業生涯發展中心
|
||
|
學生事務處-衛生保健及醫療中心
|
||
|
學生事務處-課外活動指導組
|
||
|
學生事務處-學生活動中心管理組
|
||
|
學生事務處-僑生及陸生輔導組
|
||
|
學生事務處-生活輔導組
|
||
|
學生事務處-軍訓組
|
||
|
學生事務處-學生心理輔導中心
|
||
|
學生事務處-學生事務處]
|
||
|
|
||
|
DB_BASE_NAME = "site"
|
||
|
|
||
|
all = []
|
||
|
continue = true
|
||
|
i = 1
|
||
|
while continue do
|
||
|
open("http://ann.cc.ntu.edu.tw/asp/rss.asp?page=#{i}") do |rss|
|
||
|
feed = RSS::Parser.parse(rss.read.encode('utf-8', 'big5', invalid: :replace, undef: :replace, replace: '').gsub('<pubDate>Wes,', '<pubDate>Wed,').gsub(/(encoding=\"big5\")/, 'encoding="utf-8"'))
|
||
|
feed.items.size
|
||
|
feed.items.each do |item|
|
||
|
if AUTHORS.include?(item.author)
|
||
|
author = item.author.strip
|
||
|
category = item.category.to_s.gsub(/\<(\/)*category\>/, '')
|
||
|
all << {title: item.title.strip, author: author, link: item.link.strip, date: item.pubDate, category: category, description: item.description.gsub("\r\n", '<br/>').strip}
|
||
|
end
|
||
|
end
|
||
|
continue = false if feed.items.size < 100
|
||
|
end
|
||
|
i += 1
|
||
|
end
|
||
|
|
||
|
# Get corresponding category_id or create a new one
|
||
|
def get_category_id(category, categories, coll_cat)
|
||
|
if categories.keys.include? "rss_#{category}"
|
||
|
[categories["rss_#{category}"], categories]
|
||
|
else
|
||
|
cat = {
|
||
|
_type: "BulletinCategory",
|
||
|
key: "rss_#{category}",
|
||
|
disable: false,
|
||
|
title: {:zh_tw => category},
|
||
|
created_at: Time.now,
|
||
|
updated_at: Time.now
|
||
|
}
|
||
|
categories["rss_#{category}"] = result = coll_cat.save(cat)
|
||
|
[result, categories]
|
||
|
end
|
||
|
end
|
||
|
|
||
|
# Get categories and id based on a given site number
|
||
|
def get_mongo_and_categories(site_number="0")
|
||
|
db = Mongo::Connection.new("localhost", 27017).db("#{DB_BASE_NAME}_#{site_number}")
|
||
|
coll_bulletin = db["bulletins"]
|
||
|
coll_cat = db["bulletin_categories"]
|
||
|
|
||
|
categories = coll_cat.find().to_a.inject({}) do |categories, category|
|
||
|
categories[category['key']] = category['_id']
|
||
|
categories
|
||
|
end
|
||
|
[categories, coll_bulletin, coll_cat]
|
||
|
end
|
||
|
|
||
|
# Get categories
|
||
|
categories, coll_bulletin, coll_cat = get_mongo_and_categories
|
||
|
|
||
|
all.each do |bul| # Loop through all the items
|
||
|
category_id, categories = get_category_id(bul[:category], categories, coll_cat)
|
||
|
unless coll_bulletin.find_one(rss_link: bul[:link])
|
||
|
bulletin = { _type: "Bulletin",
|
||
|
postdate: bul[:date],
|
||
|
created_at: bul[:date],
|
||
|
updated_at: bul[:date],
|
||
|
is_checked: true,
|
||
|
is_pending: false,
|
||
|
is_rejected: false,
|
||
|
bulletin_category_id: category_id,
|
||
|
title: {:zh_tw => bul[:title]},
|
||
|
text: {:zh_tw => bul[:description]},
|
||
|
available_for_zh_tw: true,
|
||
|
rss_link: bul[:link] }
|
||
|
coll_bulletin.save(bulletin)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
|