176 lines
6.3 KiB
Ruby
176 lines
6.3 KiB
Ruby
require 'uri'
|
|
|
|
def full_url(rel, url)
|
|
return rel if rel.match /^[\w]*:\/\//
|
|
uri = URI(url)
|
|
if rel[0] == '/'
|
|
"#{uri.scheme}://#{uri.host}#{rel}"
|
|
else
|
|
path = uri.path.split('/')[0..-2].select{|m| !m.empty?}.join('/')
|
|
"#{uri.scheme}://#{uri.host}/#{path}/#{rel}"
|
|
end
|
|
end
|
|
|
|
def catch_data(url,html_flag=false)
|
|
html = %x[curl '#{url}']
|
|
html = Nokogiri.HTML(html)
|
|
|
|
html.css("table.baseTB").each do |v|
|
|
titles = v.css('span.ptname a')
|
|
img_url = v.css('a.item-image img').map{|v| full_url(v.attr('src'),url)}
|
|
if titles.count>0 && img_url.count>0
|
|
return [titles.map{|v| catch_album_image_data(full_url(v.attr('href'),url))},titles.map{|v| v.text()},img_url] + (html_flag ? [html]: [])
|
|
end
|
|
end;nil
|
|
end
|
|
def catch_album_image_data(url)
|
|
html = %x[curl '#{url}']
|
|
html = Nokogiri.HTML(html)
|
|
|
|
type1_out = html.css('script').map{|v| v.content}.join.scan(/imgLink\[\"\d+\"\] *\= *["']((?:(?![\t\n;]).)+)["']|imgAlt\[\"\d+\"\] *\= *["']((?:(?![\t\n;]).)*)["']/m)
|
|
if type1_out.count>0
|
|
puts 'type1_out'
|
|
links = type1_out.map{|v| v[0].blank? ? nil : full_url(v[0],url)}.compact
|
|
alts = type1_out.map{|v| v[1]}.compact
|
|
return (0...links.length).collect{|i| [alts[i],links[i]]}
|
|
end
|
|
|
|
type2_out = html.css('#album_show img,ul.ad-thumb-list img,#slider img,div.photoFlow ul img').map{|v| [v.attr('alt'),full_url(v.attr('src'),url)]}
|
|
if type2_out.count>0
|
|
puts 'type2_out'
|
|
return type2_out
|
|
end
|
|
end
|
|
def get_data_from_index_url(url)
|
|
all_titles = []
|
|
all_img_url = []
|
|
all_album_images = []
|
|
|
|
album_images,titles,img_url,html = catch_data(url,true)
|
|
page_nums = html.css('.pagenum,.pager-last a')
|
|
all_titles += titles if titles
|
|
all_img_url += img_url if img_url
|
|
all_album_images += album_images
|
|
if page_nums.count > 0
|
|
last_page = page_nums.last.attr('href')
|
|
last_num = full_url(last_page,url).scan(/\?page=\d+|-\d+.php/)[0].scan(/\d+/)[0].to_i
|
|
for i in 2..last_num
|
|
search_url = last_page.gsub(/\?page=(\d+)|-(\d+).php/){|v| v.gsub(/\d+/,i.to_s)}
|
|
album_images,titles,img_url = catch_data(search_url)
|
|
all_titles += titles if titles
|
|
all_img_url += img_url if img_url
|
|
all_album_images += album_images
|
|
end
|
|
end
|
|
[all_album_images,all_titles,all_img_url]
|
|
end
|
|
def create_album(url)
|
|
data = get_data_from_index_url(url)
|
|
data
|
|
end
|
|
url = 'https://audslp.asia.edu.tw/files/131-1080-20-1.php?Lang=zh-tw'
|
|
d=get_data_from_index_url(url)
|
|
"""
|
|
d[0][i] => [alt,link]
|
|
d[1][i] => album title
|
|
d[2][i] => 封面img_url
|
|
(改catch_album_image_data)內頁缺card版本(9)
|
|
(改catch_data)封面差 7,8,9
|
|
連結在line
|
|
缺album描述
|
|
類別: $('.module.module-path a.path') or $('#page-title')
|
|
已完成: 1,2,3( 澳門聾人協會演講[無資料], 106.09.24-25 Orientation Camp[無資料]),
|
|
"""
|
|
|
|
|
|
|
|
|
|
url = 'https://pt.asia.edu.tw/files/131-1116-45-1.php?Lang=zh-tw'
|
|
html = %x[curl '#{url}']
|
|
html = Nokogiri.HTML(html)
|
|
|
|
|
|
<span class="ptname "><a href="https://mlsb.asia.edu.tw/files/132-1119-1824,r43-1.php?Lang=zh-tw">109-1新生茶會</a></span>
|
|
|
|
|
|
1~6 10 11
|
|
$('table.baseTB').filter(function(i,v){var tp=$(v).find('span.ptname').length>0&$(v).find('img').length>0;return tp})
|
|
|
|
8
|
|
$('ul.list').filter(function(i,v){var tp=$(v).find('div.photo').length>0;return tp})
|
|
|
|
9
|
|
$('div.row').filter(function(i,v){var tp=$(v).find('div.card').length>0&$(v).find('div.row').length==0;return tp})
|
|
|
|
頁數:$('.pagenum')
|
|
=> -頁數.php
|
|
|
|
|
|
7 $('div#content-body') page:$('.item-list')
|
|
=> <li class="pager-last last"><a title="Go to last page" href="activities.php?page=2">last »</a></li>
|
|
|
|
|
|
|
|
內頁圖片:
|
|
1. imgLink\[.*\] = ".*"
|
|
變數
|
|
2. <div id="slider" class="nivoSlider">
|
|
<img alt="" src="/ezfiles/67/1067/gallery/52/1552/gallery_1552_1859031_76526.jpg" data-thumb="/ezfiles/67/1067/gallery/52/1552/gallery_1552_1859031_76526.jpg">
|
|
</div>
|
|
3. 同1
|
|
4. 同2
|
|
5. 同1
|
|
6. <ul class="ad-thumb-list">
|
|
|
|
<li>
|
|
<a href="/ezfiles/23/1023/gallery/29/1729/gallery_1729_5018869_62580.jpg" class="ad-thumb0 ad-active"><img title="" longdesc="/ezfiles/23/1023/gallery/29/1729/gallery_1729_5018869_62580.jpg" src="/ezfiles/23/1023/gallery/29/1729/gallery_1729_5018869_62580.jpg" alt="" style="opacity: 1;"></a>
|
|
</li>
|
|
</ul>
|
|
|
|
7. <div class="photoFlow">
|
|
<ul>
|
|
<li><a target="_blank" style="border: none;"><img id="0" src="images/院系特色成果展暨自辦大博會_0.jpg" alt=""></a></li>
|
|
</ul>
|
|
</div>
|
|
8. <ul id="album_show">
|
|
<li>
|
|
<div class="photo_block">
|
|
<div class="photo2">
|
|
<div style="background-image:url('/uploads/album/2021-03-05/da494b267bfa843213fc8c18edbf79e8.jpg')" class="img">
|
|
<a href="/uploads/album/2021-03-05/da494b267bfa843213fc8c18edbf79e8.jpg" title="" style="opacity:0">
|
|
<img src="/uploads/album/2021-03-05/da494b267bfa843213fc8c18edbf79e8.jpg" alt="" style="visibility: visible; opacity: 1;">
|
|
</a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</li>
|
|
</ul>
|
|
9. <div class="row">
|
|
<div class="card card-flip h-100">
|
|
<div class="card-front">
|
|
<div class="card-body" style="padding:0;">
|
|
<img class="img-fluid card-img-top" src="https://vcd.asia.edu.tw/uploads/ugm_theme_asia/prod/52962_107.png" alt="「眷戀 光復」得獎作品">
|
|
<div class="card-footer" style="background-color: #fff;">
|
|
<h3 class="card-title">「眷戀 光復」得獎作品</h3>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
<div class="card-back">
|
|
<div class="card-body" style="background-color: #587ba2;color:white;">
|
|
<h3 class="card-title">「眷戀 光復」得獎作品</h3>
|
|
<div class="mt-1">
|
|
作品名稱:罩得著<br>設計者:視傳大四學生
|
|
</div>
|
|
<a href="index.php?op=show_prod&sn=107" class="btn btn-secondary view ">
|
|
<i class="fa fa-link" aria-hidden="true"></i>
|
|
</a>
|
|
<a href="https://vcd.asia.edu.tw/uploads/ugm_theme_asia/prod/52962_107.png" data-toggle="lightbox" data-gallery="gallery" class="btn btn-secondary preview" data-title="「眷戀 光復」得獎作品" data-type="image">
|
|
<i class="fa fa-eye" aria-hidden="true"></i>
|
|
</a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
10. 同6
|
|
11. 同6 |