Orbit/lib/tasks/ntu_mongo_files.rake

168 lines
5.0 KiB
Ruby
Raw Permalink Normal View History

# encoding: utf-8
namespace :mongo_files do
MODELS = { 'ad_image' => 'file',
'archive_file_multiple' => 'file',
'asset' => 'data',
'bulletin' => 'image',
'bulletin_file' => 'file',
'design' => 'zip_file',
'design_file' => 'file',
'gallery_image' => 'file',
'image' => 'file',
'lab_file' => 'file',
'location_info' => 'file',
'preview_file' => 'file',
'project_file' => 'file',
'research_file' => 'file',
'site' => 'default_image',
'stylesheet' => 'file_orig',
'user' => 'avatar',
'writing_book_file' => 'file',
'writing_conference_file' => 'file',
'writing_journal_file' => 'file',
'writing_patent_file' => 'file' }
IMAGE_UPLOADER_MODELS = %w[ad_image bulletin image site]
task :clean => :environment do
@files = Mongoid.database['fs.files']
@chunks = Mongoid.database['fs.chunks']
clean_duplicates
clean_unused
remove_objects
end
# Remove unused files from db
def clean_unused
db_array = @files.find().inject([]) do |db_files, file|
db_files << file['filename']
db_files
end
p "# of files in db: #{db_array.size}"
name_array = MODELS.inject([]) do |model_files, (model, type)|
model.classify.constantize.all.each do |item|
url = item.send(type).url
thumb_url = item.send(type).thumb.url rescue nil
if url && !url.eql?('sign-in-logo.png')
url = url.gsub('/gridfs/', '')
model_files << url
end
if thumb_url && !thumb_url.eql?('sign-in-logo.png')
thumb_url = thumb_url.gsub('/gridfs/', '')
model_files << thumb_url
end
end
model_files
end
p "# of files from uploaders: #{name_array.size}"
useless_files = db_array - name_array
p "# of unmatched files: #{useless_files.size}"
useless_files.each do |file|
id = @files.find_one('filename' => file)['_id']
@files.remove('_id' => id)
@chunks.remove('files_id' => id)
end
# p files.remove('filename' => { "$in" => useless_files})
end
# Remove duplicate files from db
def clean_duplicates
p "# of files in db: #{@files.count}"
2013-03-27 12:40:59 +00:00
file_hash = @files.find().sort({uploadDate: -1}).inject({}) do |db_files, file|
name = file['filename']
if db_files[name]
db_files[name] += [file['_id']]
else
db_files[name] = [file['_id']]
end
db_files
end
file_array = file_hash.inject([]) do |files, (key, value)|
files += value.drop(1) if value.size > 1
files
end
p "# of duplicate ids to delete: #{file_array.size}"
file_array.each do |id|
@files.remove('_id' => id)
@chunks.remove('files_id' => id)
end
end
task :file_size => :environment do
files = Mongoid.database['fs.files']
size_array = files.find().inject([]) do |size, file|
# size << [file['length'], file['filename']]
size << [file['filename'], file['length']]
size
end
size_array.sort.each do |pair|
p "#{pair[0]} - #{pair[1]}"
end
p size_array.size
end
def remove_objects
# Destroy writing_journals and writing_journal_files
Mongoid.database['writing_journals'].remove()
Mongoid.database['writing_journal_files'].remove()
db_array = @files.find().inject([]) do |db_files, file|
2013-03-27 12:40:59 +00:00
db_files << file['filename'] if file['filename'] =~ /writing_journal_file|news_bulletin_file|image\/image/
db_files
end
p db_array.size
db_array.each do |file|
id = @files.find_one('filename' => file)['_id']
@files.remove('_id' => id)
@chunks.remove('files_id' => id)
end
end
def remove_unlinked
# name_array = AdBanner.all.inject([]) do |files, banner|
# banner.ad_images.inject(files) do |image_files, image|
# url = image.file.url
# thumb_url = image.file.thumb.url rescue nil
# if url && !url.eql?('sign-in-logo.png')
# url = url.gsub('/gridfs/', '')
# image_files << url
# end
# if thumb_url && !thumb_url.eql?('sign-in-logo.png')
# thumb_url = thumb_url.gsub('/gridfs/', '')
# image_files << thumb_url
# end
# image_files
# end
# files
# end
name_array = Design.all.inject([]) do |files, design|
files += design.images.inject(files) do |image_files, image|
p image_files += get_url_and_thumb(image.file)
image_files
end
files
end
p name_array.size
end
def get_url_and_thumb(file)
url = file.url
thumb_url = file.thumb.url rescue nil
files = [url, thumb_url].inject([]) do |urls, current|
urls << current.gsub('/gridfs/', '') if current && !current.eql?('sign-in-logo.png')
urls
end
files
end
task :delete_rss_ann => :environment do
Bulletin.where(create_user_id: nil).destroy
end
end