sync_personal_data_task_for.../lib/tasks/sync_ntu_personal_data_task...

250 lines
8.8 KiB
Ruby

require "uri"
require "net/http"
namespace :sync_ntu_personal_plugins do
task :sync,[:arg] => :environment do |task,args|
MemberProfile.each do |mp|
u_account = mp.email.split('@')[0]
ntuseq = mp.sid
if u_account.blank?
u_account = ntuseq
end
if !ntuseq.nil? && ntuseq != ""
puts "Importing for account #{u_account}."
NtuSyncPlugin.sync_conference_data(mp,u_account, ntuseq)
NtuSyncPlugin.sync_journal_paper_data(mp,u_account, ntuseq)
NtuSyncPlugin.sync_book_data(mp,u_account, ntuseq)
else
puts "No sid present for #{u_account}."
end
end
end
end
module NtuSyncPlugin
def self.net_http_get_response(uri,headers={})
host = uri.host
port = uri.port
http = Net::HTTP.new(host, port)
scheme = uri.scheme
if scheme == "https"
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
http.use_ssl = true
end
http.open_timeout = 5
http.read_timeout = 300
relative_path = (uri.path.blank? ? "/" : uri.path)
if !relative_path.start_with?("http") && !relative_path.start_with?("/")
relative_path = "/" + relative_path
end
relative_path = relative_path.sub(/^\/\.\.\/(\.\.\/)*/,'/')
while relative_path.include?("/../") do
relative_path = relative_path.gsub(/\/[^\/]+\/\.\.\//,'/')
end
relative_path = relative_path.gsub('//','/')
relative_path = URI.decode(relative_path)
relative_path += (uri.query.blank? ? '' : "?#{URI.decode(uri.query)}")
res = http.get(relative_path.gsub(" ","%20"), headers)
if res.code == "400"
res = http.get(URI.encode(relative_path.gsub(" ","%20")), headers)
end
res.uri = uri
if res.code == "400"
if uri.host.match(/i[\d]\.wp\.com/)
host = uri.path.split("/")[1]
uri.path = "/" + uri.path.split("/")[2..-1].join("/")
uri.host = host
net_http_get_response(uri,headers)
end
end
res
end
def self.localize_data(data)
in_use_locales = Site.first.in_use_locales rescue [:en,:zh_tw]
return in_use_locales.map{|locale| [locale, data] }.to_h
end
def self.get_response(uri)
initial_headers = {
'User-Agent' => 'curl/7.35.0'
}
res = net_http_get_response(uri, initial_headers)
if res.code == "301" || res.code == "302"
location = res['Location']
cookie = res['Set-Cookie']
headers = {
'Cookie' => cookie.to_s
}
if location[0] == "/"
uri = URI.parse("#{uri.scheme}://#{uri.host}#{location}")
else
uri = URI.parse(location)
end
res = net_http_get_response(uri, initial_headers.merge(headers))
elsif res.code == "404"
raise "404 Not found: #{uri.to_s}"
end
return res
end
def self.get_sync_data_xml(ntuseq, type)
uri = URI.parse("https://my.ntu.edu.tw/achvservice/xmlpaper.asp?Seq=#{ntuseq}&type=#{type}")
response = get_response(uri)
return Nokogiri::XML(response.body)
end
def self.sync_conference_data(mp, u_account, ntuseq)
@tmp_conference_paper_ids ||= []
WritingConference.where(member_profile_id: mp.id).destroy
puts "Sync_Conference... #{u_account}."
puts ntuseq
conference_xml = get_sync_data_xml(ntuseq, "C")
@conference_papers = conference_xml.xpath("//Paper").map do |cp_node|
{
author: (cp_node>"Authors").text,
year: (cp_node>"PublishYear").text,
title: (cp_node>"PaperTitle").text,
conference: (cp_node>"PublishOn").text,
date: Date::MONTHNAMES[(cp_node>"PublishMonth").text.to_i],
location: "#{(cp_node>"Country").text} #{(cp_node>"location").text}",
conference_title: (cp_node>"PublishOn").text,
remarks: (cp_node>"Remarks").text
}
end
if @conference_papers.present?
puts "Starting to import conference papers for #{u_account}."
@conference_papers.each do |b|
@conference_paper = WritingConference.new
@conference_paper.authors_translations = self.localize_data(b[:author])
if b[:title].blank?
b[:title] = "No Title Present"
end
@conference_paper.paper_title_translations = self.localize_data(b[:title])
if b[:conference_title].blank?
b[:conference_title] = "No Title Present"
end
@conference_paper.conference_title_translations = self.localize_data(b[:conference_title])
@conference_paper.note = b[:remarks]
@conference_paper.location_translations = self.localize_data(b[:location])
@conference_paper.year = b[:year]
if @tmp_conference_paper_ids.include?(@conference_paper.id)
@conference_paper.member_profile_id = (Array(@conference_paper.member_profile_id) + [mp.id]).uniq
else
@tmp_conference_paper_ids << @conference_paper.id
@conference_paper.member_profile_id = [mp.id]
end
@conference_paper.save
end
else
puts "No conference by #{u_account}."
end
end
def self.sync_journal_paper_data(mp, u_account, ntuseq)
@tmp_journal_paper_ids ||= []
journal_data = JournalPaper.where(member_profile_id: mp.id).destroy
puts "Sync_Journal... #{u_account}."
puts ntuseq
paper_xml = get_sync_data_xml(ntuseq, "J")
@journal_papers = paper_xml.xpath("//Paper").map do |paper_node|
{
author: (paper_node>"Authors").text,
year: (paper_node>"PublishYear").text,
title: (paper_node>"PaperTitle").text,
journal: (paper_node>"PublishOn").text,
volume:(paper_node>"Volume").text,
volumeno:(paper_node>"VolumeNo").text,
beginpage:(paper_node>"BeginPage").text,
endpage:(paper_node>"EndPage").text,
subgroup:(paper_node>"subgroup").text,
remarks: (paper_node>"Remarks").text,
cate: ((paper_node>"subgroup")>"Group").text
}
end
if @journal_papers.present?
puts "Starting to import journal papers for #{u_account}."
@journal_papers.each do |b|
@journal_paper = JournalPaper.new
@journal_paper.authors_translations = self.localize_data(b[:author])
if b[:title].blank?
b[:title] = "No Title Present"
end
@journal_paper.paper_title_translations = self.localize_data(b[:title])
@journal_paper.journal_title_translations = self.localize_data(b[:journal])
@journal_paper.note = b[:remarks]
@journal_paper.year = b[:year]
@journal_paper.vol_no = b[:volumeno]
@journal_paper.form_to_start = b[:beginpage]
@journal_paper.form_to_end = b[:endpage]
if !b[:cate].blank?
@level_type = JournalLevel.where(:key => b[:cate])
if @level_type.present?
@journal_paper.journal_level_type_ids = ["#{@level_type.first.id}"]
end
end
if @tmp_journal_paper_ids.include?(@journal_paper.id)
@journal_paper.member_profile_id = (Array(@journal_paper.member_profile_id) + [mp.id]).uniq
else
@tmp_journal_paper_ids << @journal_paper.id
@journal_paper.member_profile_id = [mp.id]
end
@journal_paper.save
end
else
puts "No journal papers for #{u_account}."
end
end
def self.sync_book_data(mp, u_account, ntuseq)
@tmp_book_ids ||= []
Book.where(member_profile_id: mp.id).destroy
puts "Sync_Book:... #{u_account}."
puts ntuseq
books_xml = get_sync_data_xml(ntuseq, "B")
@books = books_xml.xpath("//Book").map do |book_node|
{
author: (book_node>"Authors").text,
year: (book_node>"PublishYear").text,
title: (book_node>"DocTitle").text,
remarks: (book_node>"Remarks").text,
publisher: (book_node>"Publisher").text,
book_title: (book_node>"BookTitle").text
}
end
if @books.present?
puts "Starting to import books for #{u_account}."
@books.each do |b|
@book = Book.new
@book.member_profile_id = mp.id
@book.authors_translations = self.localize_data(b[:author])
if !b[:book_title].blank? && !b[:title].blank?
book_title = b[:title]+","+b[:book_title]
elsif !b[:book_title].blank? && b[:title].blank?
book_title = b[:book_title]
elsif b[:book_title].blank? && !b[:title].blank?
book_title = b[:title]
end
@book.book_title_translations = self.localize_data(book_title)
@book.note = b[:remarks]
@book.publisher_translations = self.localize_data(b[:publisher])
@book.year = b[:year]
if @tmp_book_ids.include?(@book.id)
@book.member_profile_id = (Array(@book.member_profile_id) + [mp.id]).uniq
else
@tmp_book_ids << @book.id
@book.member_profile_id = [mp.id]
end
@book.save
end
else
puts "No books present for #{u_account}."
end
end
end