From 86fbe38431a05c15b876dae7dad6f5bfd60c6e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=82=B1=E5=8D=9A=E4=BA=9E?= Date: Sun, 25 Sep 2022 19:34:29 +0800 Subject: [PATCH] add new feature for rss feed --- app/assets/javascripts/new_channel.js | 9 +- app/controllers/admin/feeds_controller.rb | 165 +++++++++++++--------- app/models/site_feed.rb | 92 ++++++++++-- app/models/site_feed_annc.rb | 4 +- config/routes.rb | 2 +- lib/feed_model/cache.rb | 1 + lib/tasks/feeds_tasks.rake | 35 +---- 7 files changed, 190 insertions(+), 118 deletions(-) diff --git a/app/assets/javascripts/new_channel.js b/app/assets/javascripts/new_channel.js index 6f6028f..23b0a1d 100644 --- a/app/assets/javascripts/new_channel.js +++ b/app/assets/javascripts/new_channel.js @@ -46,11 +46,16 @@ $("#channels").fadeOut(function(){ $("#channels").html(""); displayLoading(true,"Fetching feed list for " + channel.title); + var data_to_send = {"url" : site_url, "channel" : channel.key, "feed_list_url" : channel.url}; + if (channel.feeds){ + data_to_send.feeds = JSON.stringify(channel.feeds) + } + console.log(data_to_send) $.ajax({ url : "/admin/feeds/get_feed_list", - data : {"url" : site_url, "channel" : channel.key, "feed_list_url" : channel.url}, + data : data_to_send, dataType : "json", - type : "get" + type : "post" }).done(function(data){ $.each(data.feeds, function(index,feed){ var klass = (feed.subscribed ? "active" : ""); diff --git a/app/controllers/admin/feeds_controller.rb b/app/controllers/admin/feeds_controller.rb index b075fa0..ef73cef 100644 --- a/app/controllers/admin/feeds_controller.rb +++ b/app/controllers/admin/feeds_controller.rb @@ -3,9 +3,9 @@ class Admin::FeedsController < OrbitAdminController helper_method :search_all_words def compute_layout if action_name== 'annc_content' - false + false else - 'back_end' + 'back_end' end end def index @@ -24,8 +24,8 @@ class Admin::FeedsController < OrbitAdminController @setting.update_attributes(p) Thread.new do sleep 3 - content = "UNICORN_PID=\"`fuser tmp/pids/unicorn.sock tmp/sockets/unicorn.sock tmp/unicorn.sock` `cat tmp/pids/unicorn.pid `\" && kill -s USR2 $UNICORN_PID ; n=20; while (kill -0 $UNICORN_PID > /dev/null 2>&1) && test $n -ge 0; do printf '.' && sleep 1 && n=$(( $n - 1 )); done ; if test $n -lt 0; then kill -s TERM $UNICORN_PID; sleep 3; bundle exec unicorn_rails -c config/unicorn.rb -D -E #{Rails.env}; else kill -s QUIT $UNICORN_PID; fi" - system(content) + content = "UNICORN_PID=\"`fuser tmp/pids/unicorn.sock tmp/sockets/unicorn.sock tmp/unicorn.sock` `cat tmp/pids/unicorn.pid `\" && kill -s USR2 $UNICORN_PID ; n=20; while (kill -0 $UNICORN_PID > /dev/null 2>&1) && test $n -ge 0; do printf '.' && sleep 1 && n=$(( $n - 1 )); done ; if test $n -lt 0; then kill -s TERM $UNICORN_PID; sleep 3; bundle exec unicorn_rails -c config/unicorn.rb -D -E #{Rails.env}; else kill -s QUIT $UNICORN_PID; fi" + system(content) end redirect_to admin_feeds_settings_path end @@ -108,21 +108,21 @@ class Admin::FeedsController < OrbitAdminController tp = (a.blank? || a[I18n.locale].blank?) ? b.gsub(/http:\/\/|https:\/\//,'').gsub(/\./,'-') : a[I18n.locale] tp end.uniq.map{|tp| {:title => tp,:id => tp}} - @filter_fields['feed.module_name'] = SiteFeed.all.map{|s| s.channel_key}.uniq.map do |key| - {:title => I18n.t("module_name.#{key}"),:id => key} - end - @max_content_count = 100 - @page_no = params[:page].to_i - @page_no = 1 if @page_no==0 - @skip_count = (@page_no-1)*@max_content_count - @module_name = params[:filters][:module_name].to_a rescue [] - @module_name = SiteFeed.all.map{|s| s.channel_key}.uniq if @module_name == [] + @filter_fields['feed.module_name'] = SiteFeed.all.map{|s| s.channel_key}.uniq.map do |key| + {:title => I18n.t("module_name.#{key}"),:id => key} + end + @max_content_count = 100 + @page_no = params[:page].to_i + @page_no = 1 if @page_no==0 + @skip_count = (@page_no-1)*@max_content_count + @module_name = params[:filters][:module_name].to_a rescue [] + @module_name = SiteFeed.all.map{|s| s.channel_key}.uniq if @module_name == [] @all_feed_annc = SiteFeedAnnc.where(:channel_key.in => @module_name).order(created_at: 1).to_a rescue [] - @source = params[:filters][:source] rescue [] - @keywords = params[:keywords] rescue nil + @source = params[:filters][:source] rescue [] + @keywords = params[:keywords] rescue nil if request.xhr? - render :partial => "announcements" - end + render :partial => "announcements" + end end def get_category_list app_key = params[:channel] @@ -140,55 +140,56 @@ class Admin::FeedsController < OrbitAdminController end def get_channel_list - puts params['url'] url = URI.decode(params['url']).chomp("/") + "/feeds/channel_lists" - puts url - uri = URI.parse(URI.decode(url).encode) - begin - res = Timeout.timeout(10){Net::HTTP.get_response(uri).body} - rescue - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = true if (uri.scheme == "https") - http.verify_mode = OpenSSL::SSL::VERIFY_NONE - res = http.get(uri.request_uri).body rescue nil - puts uri.to_s - puts uri.hostname - end - data = JSON.parse(res) rescue {} - print data - render :json => data.to_json + uri = URI(URI.decode(url).encode) + res_net = http_request(uri) rescue nil + res = res_net ? res_net.body : nil + data = {} + if res_net && res_net['Content-Type'].split(";")[0]=="application/json" + data = JSON.parse(res) rescue {} + else + uri = URI(params['url']) + body = http_request(uri).body + links = get_rss_link(params['url'], body) + if links.length > 0 + data = {channels: [{title: "Announcement", + key: "announcement", + url: params['url'], + feeds: links, + app_icon: "icons-megaphone"}]} + end + end + render :json => data.to_json end def get_feed_list - url = params['url'].chomp("/") + params[:feed_list_url] - uri = URI.parse(url) - begin - res = Net::HTTP.start(uri.host, - uri.port,:use_ssl => uri.scheme == 'https', - open_timeout: 10,read_timeout: 10, - verify_mode: OpenSSL::SSL::VERIFY_NONE) do |http| - req = Net::HTTP::Get.new(uri) - http.request(req) - end - res = res.code=='200' ? res.body : nil - rescue - res = nil - puts uri.to_s - puts uri.hostname - end - data = JSON.parse(res) rescue {} - data_to_send = {} - data_to_send["feeds"] = [] - Array(data["feeds"]).each do |feed| - sf = SiteFeed.find_by(:feed_uid => feed["uid"]) rescue nil - if !sf.nil? - feed["subscribed"] = true - else - feed["subscribed"] = false - end - data_to_send["feeds"] << feed - end - render :json => data_to_send.to_json + data_to_send = {} + if params['feeds'] + data = {"feeds"=> JSON.parse(params[:feeds])} + else + url = params['url'].chomp("/") + params[:feed_list_url] + uri = URI.parse(url) + begin + res = http_request(uri) + res = res.code=='200' ? res.body : nil + rescue + res = nil + puts uri.to_s + puts uri.hostname + end + data = JSON.parse(res) rescue {} + end + data_to_send["feeds"] = [] + Array(data["feeds"]).each do |feed| + sf = SiteFeed.find_by(:feed_uid => feed["uid"]) rescue nil + if !sf.nil? + feed["subscribed"] = true + else + feed["subscribed"] = false + end + data_to_send["feeds"] << feed + end + render :json => data_to_send.to_json end def channel_title @@ -207,7 +208,8 @@ class Admin::FeedsController < OrbitAdminController def subscribe site_feed = SiteFeed.new - site_feed.remote_site_url = params[:url].chomp("/") + uri = URI(params[:url]) + site_feed.remote_site_url = uri.to_s[/\A.*(?=#{uri.path}\z)/] site_feed.merge_with_category = params[:category] site_feed.channel_name = params[:channel] site_feed.channel_key = params[:channel_key] @@ -215,6 +217,9 @@ class Admin::FeedsController < OrbitAdminController site_feed.feed_name_translations = params[:feed][:title_translations] site_feed.disabled = false site_feed.feed_url = params[:feed][:url] + if params[:feed][:is_rss] + site_feed.is_rss = params[:feed][:is_rss] + end uri = URI(site_feed.remote_site_url) site_host = uri.host other_site_feeds = SiteFeed.where(:remote_site_url=>/:\/\/#{::Regexp.escape(site_host)}/) @@ -268,9 +273,37 @@ class Admin::FeedsController < OrbitAdminController end def search_all_words(target, word) - target = target.upcase - words = word.upcase.split(' ') - words.select { |value| target.include? value } == words + target = target.upcase + words = word.upcase.split(' ') + words.select { |value| target.include? value } == words end +private + def http_request(uri, open_timeout=10) + Net::HTTP.start(uri.host, uri.port, + :use_ssl => uri.scheme == 'https', + open_timeout: open_timeout,read_timeout: 60, + verify_mode: OpenSSL::SSL::VERIFY_NONE) do |http| + req = Net::HTTP::Get.new(uri) + req['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36' + http.request(req) + end + end + def get_rss_link(url, body) + html = Nokogiri::HTML(body) + html.search('.data_midlle_rss a').map{|v| + href = URI.join(url, v.attr('href')).to_s + uid = href.split("?n=")[-1] + title = v.text() + title_translations = I18n.available_locales.collect{|v| [v, title]}.to_h + { + url: href, + xml_url: href, + uid: uid, + title_translations: title_translations, + tags: title_translations, + is_rss: true + } + } + end end diff --git a/app/models/site_feed.rb b/app/models/site_feed.rb index 1eb6a7b..a74e9b4 100644 --- a/app/models/site_feed.rb +++ b/app/models/site_feed.rb @@ -18,12 +18,15 @@ class SiteFeed field :feeds_finish_callback field :feeds_update_statuses_callback field :enable_notify, type: Boolean, default: false + field :is_rss, type: Boolean, default: false require 'feed_model/cache' require 'fileutils' include FeedModel::Cache Category.send(:include,FeedModel::Cache) after_create do - self.add_notify + if !self.is_rss + self.add_notify + end crontab_list = `crontab -l`.split("\n") rescue [] site_root = Rails.root.to_s if crontab_list.select{|s| s.include?(site_root) && s.include?("feeds_module:make_cache")} == [] @@ -31,7 +34,9 @@ class SiteFeed end end before_destroy do - self.remove_notify + if !self.is_rss + self.remove_notify + end tmp = SiteFeedAnnc.where(feed_id: self.id) main_directory = File.join("#{Rails.root}","public","site_feeds") feed_directory = File.join(main_directory.to_s, self.id.to_s) @@ -50,8 +55,6 @@ class SiteFeed self.update_remote_site_url end end - else - self.sync_data_to_annc end @site_feed_annc = nil end @@ -80,6 +83,16 @@ class SiteFeed end end def get_annc(force_refresh=false) + def to_I18n(title, have_key=false, key="title_translations") + value = I18n.available_locales.map{|v| [v,title]}.to_h + if have_key + { + key => value + } + else + value + end + end main_directory = File.join("#{Rails.root}","public","site_feeds") feed_directory = File.join(main_directory.to_s, self.id.to_s) if !force_refresh && File.exists?(feed_directory) @@ -87,6 +100,49 @@ class SiteFeed else uri = URI(self.feed_url) res = get_response_body(uri) rescue '' + if self.is_rss + rss_data = Hash.from_xml(res) + category_title = rss_data['rss']['channel']['title'] + category_title_trans = to_I18n(category_title, true) + res_to_json = rss_data['rss']['channel']["item"].collect do |item| + tag_titles = Array(item['類別']).collect{|title| to_I18n(title, true, "name_translations")} + id = item['link'].split("&s=")[-1] + title = Array(item['title'])[0] + text = Array(item['description'])[0] + files = [] + if text.include?('附件') + files = Nokogiri::HTML(get_response_body(URI(item['link']))).search('#RelData1_liFile ol a').collect do |v| + file_title = v.attr('title') + { + "description_translations" => to_I18n(file_title), + "title_translations" => to_I18n(file_title.sub(/^\[(?:(?!\]).)*\]/,'').rpartition(".")[0]), + "url" => v.attr('href') + } + end + end + { + "show_url" => item['link'], + "org_is_top" => 0, + "id" => id, + "title_translations" => to_I18n(title), + "subtitle_translations" => {}, + "text_translations" => to_I18n(text), + "postdate" => DateTime.parse(item['pubDate']).to_s, + "image_description_translations" => {}, + "image" => {}, + "display_img" => false, + "tags" => tag_titles, + "category" => category_title_trans, + "author" => item['author'], + "params" => "#{title}-#{id}", + "bulletin_links" => [], + "bulletin_files" => files, + "bulletin_carousel_images" => [], + "external_link" => nil + } + end + res = JSON.dump({"announcements" => res_to_json}) + end FileUtils.mkdir_p(feed_directory) if !File.exists?(feed_directory) File.open(File.join(feed_directory.to_s,self.feed_uid + ".json"),"w") do |file| res.force_encoding("utf-8") @@ -102,6 +158,7 @@ class SiteFeed open_timeout: 60,read_timeout: 60, verify_mode: OpenSSL::SSL::VERIFY_NONE) do |http| req = Net::HTTP::Get.new(uri) + req['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36' http.request(req) end if res.code == "302" || res.code == "301" @@ -112,15 +169,22 @@ class SiteFeed else uri = URI.parse(location) end - res = Net::HTTP.start(uri.host, uri.port, - :use_ssl => uri.scheme == 'https', - open_timeout: 60,read_timeout: 60, - verify_mode: OpenSSL::SSL::VERIFY_NONE) do |http| - req = Net::HTTP::Get.new(uri) - req['Cookie'] = cookie - http.request(req) + begin + res = Net::HTTP.start(uri.host, uri.port, + :use_ssl => uri.scheme == 'https', + open_timeout: 60,read_timeout: 60, + verify_mode: OpenSSL::SSL::VERIFY_NONE) do |http| + req = Net::HTTP::Get.new(uri) + req['Cookie'] = cookie + http.request(req) + end + res_body = res.body + rescue => e + res_body = '' + puts e + puts e.backtrace end - return res.body + return res_body else return res.body end @@ -128,8 +192,8 @@ class SiteFeed def category Category.find(self.merge_with_category) rescue nil end - def channel_title_for_cache - !self[:channel_title].to_s.empty? ? self[:channel_title] : I18n.t("feed.source") + def channel_title_for_cache #empty? must for hash + !self[:channel_title].blank? ? self[:channel_title] : I18n.available_locales.collect{|v| [v,I18n.t("feed.source")]}.to_h end def http_request(http, request) response = http.request(request) diff --git a/app/models/site_feed_annc.rb b/app/models/site_feed_annc.rb index 8e5b9e6..99a2064 100644 --- a/app/models/site_feed_annc.rb +++ b/app/models/site_feed_annc.rb @@ -333,9 +333,9 @@ class SiteFeedAnnc end if tmp["link_to_show"].nil? if !is_widget - tmp["link_to_show"] = UseSourceUrl && tmp["show_url"] ? "#{tmp["source_url"]}/#{locale}#{tmp["show_url"]}" : OrbitHelper.url_to_show(tmp["params"]) rescue '' + tmp["link_to_show"] = UseSourceUrl && tmp["show_url"] ? URI.join("#{tmp["source_url"]}/#{locale}","#{tmp["show_url"]}").to_s : OrbitHelper.url_to_show(tmp["params"]) rescue '' else - tmp["link_to_show"] = UseSourceUrl && tmp["show_url"] ? "#{tmp["source_url"]}/#{locale}#{tmp["show_url"]}" : OrbitHelper.widget_item_url(tmp["params"]) rescue '' + tmp["link_to_show"] = UseSourceUrl && tmp["show_url"] ? URI.join("#{tmp["source_url"]}/#{locale}","#{tmp["show_url"]}").to_s : OrbitHelper.widget_item_url(tmp["params"]) rescue '' end end tmp diff --git a/config/routes.rb b/config/routes.rb index 1928ca8..b65402b 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -66,7 +66,7 @@ Rails.application.routes.draw do scope "(:locale)", locale: Regexp.new(locales.join("|")) do namespace :admin do get "/feeds/get_channel_list", to: 'feeds#get_channel_list' - get "/feeds/get_feed_list", to: 'feeds#get_feed_list' + post "/feeds/get_feed_list", to: 'feeds#get_feed_list' get "/feeds/get_category_list", to: 'feeds#get_category_list' post "/feeds/subscribe", to: 'feeds#subscribe' post "/feeds/unsubscribe", to: 'feeds#unsubscribe' diff --git a/lib/feed_model/cache.rb b/lib/feed_model/cache.rb index cc4d5d4..d2b1d50 100644 --- a/lib/feed_model/cache.rb +++ b/lib/feed_model/cache.rb @@ -16,6 +16,7 @@ module FeedModel tmp = SiteFeedAnnc.new(feed_id: feed.id) end tmp.all_contents_for_feed = tmp.cache_annc(true) + feed.sync_data_to_annc(tmp) tmp.save if feed.feeds_model && feed.feeds_finish_callback feeds_model = feed.feeds_model.constantize diff --git a/lib/tasks/feeds_tasks.rake b/lib/tasks/feeds_tasks.rake index 7520137..870614a 100644 --- a/lib/tasks/feeds_tasks.rake +++ b/lib/tasks/feeds_tasks.rake @@ -3,40 +3,9 @@ namespace :feeds_module do task :make_cache,[:url] => :environment do |task,args| main_directory = File.join("#{Rails.root}","public","site_feeds") FileUtils.mkdir_p(main_directory) if !File.exists?(main_directory) - SiteFeedAnnc + SiteFeedAnnc #Don't remove this, this will auto require SiteFeedAnnc SiteFeed.enabled.each do |site_feed| - feed_directory = File.join(main_directory.to_s, site_feed.id.to_s) - FileUtils.mkdir_p(feed_directory) if !File.exists?(feed_directory) - uri = URI(site_feed.feed_url) - res_net = Net::HTTP.start(uri.host, uri.port, - :use_ssl => uri.scheme == 'https', - open_timeout: 60,read_timeout: 60, - verify_mode: OpenSSL::SSL::VERIFY_NONE) do |http| - req = Net::HTTP::Get.new(uri) - http.request(req) - end - if (res_net.code == '301' rescue true) - begin - uri = URI(res_net['location']) - res_net = Net::HTTP.start(uri.host, uri.port, - :use_ssl => uri.scheme == 'https', - open_timeout: 60,read_timeout: 60, - verify_mode: OpenSSL::SSL::VERIFY_NONE) do |http| - req = Net::HTTP::Get.new(uri) - http.request(req) - end - rescue => e - res_net = '' - puts e - puts e.backtrace - end - end - res = res_net.body rescue '' - File.open(File.join(feed_directory.to_s,site_feed.feed_uid + ".json"),"w") do |file| - res = res.force_encoding("utf-8") rescue '' - file.write(res) - end - site_feed.save + FeedModel::Cache.recreate_annc_cache(site_feed) end end end \ No newline at end of file