filebrowser/backend/search/indexing.go

280 lines
7.3 KiB
Go
Raw Normal View History

2023-06-15 01:08:09 +00:00
package search
import (
"log"
2023-08-12 16:30:41 +00:00
"math/rand"
"mime"
2023-06-15 01:08:09 +00:00
"os"
"path/filepath"
2023-06-18 15:04:31 +00:00
"sort"
2023-06-15 01:08:09 +00:00
"strings"
2023-06-16 17:29:43 +00:00
"sync"
2023-06-15 01:08:09 +00:00
"time"
)
2023-06-16 17:29:43 +00:00
var (
2023-08-17 21:46:49 +00:00
sessionInProgress sync.Map // Track session with requests in progress
2023-08-12 16:30:41 +00:00
rootPath string = "/srv"
indexes map[string][]string
mutex sync.RWMutex
lastIndexed time.Time
2023-06-16 17:29:43 +00:00
)
2023-06-15 01:08:09 +00:00
func InitializeIndex(intervalMinutes uint32) {
2023-06-15 01:08:09 +00:00
// Initialize the indexes map
2023-06-16 17:29:43 +00:00
indexes = make(map[string][]string)
2023-08-17 21:46:49 +00:00
indexes["dirs"] = []string{}
indexes["files"] = []string{}
2023-06-16 17:29:43 +00:00
var numFiles, numDirs int
log.Println("Indexing files...")
2023-06-16 17:29:43 +00:00
lastIndexedStart := time.Now()
2023-06-15 01:08:09 +00:00
// Call the function to index files and directories
2023-06-18 15:04:31 +00:00
totalNumFiles, totalNumDirs, err := indexFiles(rootPath, &numFiles, &numDirs)
2023-06-15 01:08:09 +00:00
if err != nil {
log.Fatal(err)
}
2023-06-16 17:29:43 +00:00
lastIndexed = lastIndexedStart
go indexingScheduler(intervalMinutes)
log.Println("Successfully indexed files.")
2023-06-18 15:04:31 +00:00
log.Println("Files found :", totalNumFiles)
log.Println("Directories found :", totalNumDirs)
2023-06-15 01:08:09 +00:00
}
func indexingScheduler(intervalMinutes uint32) {
2023-06-18 15:04:31 +00:00
log.Printf("Indexing scheduler will run every %v minutes", intervalMinutes)
for {
time.Sleep(time.Duration(intervalMinutes) * time.Minute)
2023-06-16 17:29:43 +00:00
var numFiles, numDirs int
lastIndexedStart := time.Now()
2023-06-18 15:04:31 +00:00
totalNumFiles, totalNumDirs, err := indexFiles(rootPath, &numFiles, &numDirs)
if err != nil {
log.Fatal(err)
}
2023-06-16 17:29:43 +00:00
lastIndexed = lastIndexedStart
if totalNumFiles+totalNumDirs > 0 {
log.Println("re-indexing found changes and updated the index.")
}
2023-06-15 01:08:09 +00:00
}
}
// Define a function to recursively index files and directories
2023-06-18 15:04:31 +00:00
func indexFiles(path string, numFiles *int, numDirs *int) (int, int, error) {
2023-06-15 01:08:09 +00:00
// Check if the current directory has been modified since last indexing
dir, err := os.Open(path)
if err != nil {
// directory must have been deleted, remove from index
delete(indexes, path)
}
defer dir.Close()
dirInfo, err := dir.Stat()
if err != nil {
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, err
2023-06-15 01:08:09 +00:00
}
// Compare the last modified time of the directory with the last indexed time
2023-06-16 17:29:43 +00:00
if dirInfo.ModTime().Before(lastIndexed) {
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, nil
2023-06-15 01:08:09 +00:00
}
// Read the directory contents
files, err := dir.Readdir(-1)
if err != nil {
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, err
2023-06-15 01:08:09 +00:00
}
// Iterate over the files and directories
for _, file := range files {
if file.IsDir() {
2023-06-16 17:29:43 +00:00
*numDirs++
2023-07-13 02:23:29 +00:00
addToIndex(path, file.Name(), true)
indexFiles(path+"/"+file.Name(), numFiles, numDirs) // recursive
} else {
*numFiles++
addToIndex(path, file.Name(), false)
2023-06-15 01:08:09 +00:00
}
}
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, nil
2023-06-15 01:08:09 +00:00
}
2023-07-13 02:23:29 +00:00
func addToIndex(path string, fileName string, isDir bool) {
2023-06-16 17:29:43 +00:00
mutex.Lock()
defer mutex.Unlock()
2023-06-18 15:04:31 +00:00
path = strings.TrimPrefix(path, rootPath+"/")
2023-08-12 19:41:59 +00:00
path = strings.TrimSuffix(path, "/")
2023-08-17 21:46:49 +00:00
adjustedPath := path + "/" + fileName
if path == rootPath {
adjustedPath = fileName
}
2023-07-13 02:23:29 +00:00
if isDir {
2023-08-17 21:46:49 +00:00
indexes["dirs"] = append(indexes["dirs"], adjustedPath)
2023-08-12 16:30:41 +00:00
} else {
2023-08-17 21:46:49 +00:00
indexes["files"] = append(indexes["files"], adjustedPath)
2023-06-15 01:08:09 +00:00
}
}
2023-08-17 21:46:49 +00:00
func SearchAllIndexes(search string, scope string, sourceSession string) ([]string, map[string]map[string]bool) {
runningHash := generateRandomHash(4)
sessionInProgress.Store(sourceSession, runningHash) // Store the value in the sync.Map
2023-08-17 21:46:49 +00:00
2023-06-18 15:04:31 +00:00
searchOptions := ParseSearch(search)
2023-06-16 17:29:43 +00:00
mutex.RLock()
defer mutex.RUnlock()
2023-07-13 02:23:29 +00:00
fileListTypes := make(map[string]map[string]bool)
var matching []string
2023-08-17 21:46:49 +00:00
maximum := 100
2023-06-18 15:04:31 +00:00
for _, searchTerm := range searchOptions.Terms {
2023-07-04 23:55:15 +00:00
if searchTerm == "" {
continue
}
2023-06-18 15:04:31 +00:00
// Iterate over the indexes
2023-08-17 21:46:49 +00:00
for _, i := range []string{"dirs", "files"} {
isdir := i == "dirs"
count := 0
for _, path := range indexes[i] {
value, found := sessionInProgress.Load(sourceSession)
if !found || value != runningHash {
return []string{}, map[string]map[string]bool{}
}
if count > maximum {
break
}
pathName := scopedPathNameFilter(path, scope)
if pathName == "" {
continue
}
matches, fileType := containsSearchTerm(path, searchTerm, *searchOptions, isdir)
if !matches {
continue
}
2023-08-17 21:46:49 +00:00
if isdir {
pathName = pathName + "/"
}
matching = append(matching, pathName)
fileListTypes[pathName] = fileType
count++
2023-07-13 02:23:29 +00:00
}
2023-06-15 01:08:09 +00:00
}
}
2023-06-16 17:29:43 +00:00
// Sort the strings based on the number of elements after splitting by "/"
sort.Slice(matching, func(i, j int) bool {
parts1 := strings.Split(matching[i], "/")
parts2 := strings.Split(matching[j], "/")
2023-06-16 17:29:43 +00:00
return len(parts1) < len(parts2)
})
return matching, fileListTypes
2023-06-15 01:08:09 +00:00
}
2023-08-17 21:46:49 +00:00
func scopedPathNameFilter(pathName string, scope string) string {
scope = strings.TrimPrefix(scope, "/")
if strings.HasPrefix(pathName, scope) {
2023-08-12 19:41:59 +00:00
pathName = strings.TrimPrefix(pathName, scope)
2023-08-17 21:46:49 +00:00
} else {
pathName = ""
}
2023-08-17 21:46:49 +00:00
return pathName
}
2023-08-12 16:30:41 +00:00
func containsSearchTerm(pathName string, searchTerm string, options SearchOptions, isDir bool) (bool, map[string]bool) {
conditions := options.Conditions
2023-08-12 16:30:41 +00:00
path := getLastPathComponent(pathName)
// Convert to lowercase once
lowerSearchTerm := searchTerm
if !conditions["exact"] {
2023-08-12 19:41:59 +00:00
path = strings.ToLower(path)
2023-08-12 16:30:41 +00:00
lowerSearchTerm = strings.ToLower(searchTerm)
}
2023-08-12 19:41:59 +00:00
if strings.Contains(path, lowerSearchTerm) {
2023-08-12 16:30:41 +00:00
// Reuse the fileTypes map and clear its values
fileTypes := map[string]bool{
"audio": false,
"image": false,
"video": false,
"doc": false,
"archive": false,
"dir": false,
}
// Calculate fileSize only if needed
var fileSize int64
if conditions["larger"] || conditions["smaller"] {
fileSize = getFileSize(pathName)
}
matchesAllConditions := true
2023-08-12 19:41:59 +00:00
extension := filepath.Ext(path)
2023-08-12 16:30:41 +00:00
mimetype := mime.TypeByExtension(extension)
fileTypes["audio"] = strings.HasPrefix(mimetype, "audio")
fileTypes["image"] = strings.HasPrefix(mimetype, "image")
fileTypes["video"] = strings.HasPrefix(mimetype, "video")
fileTypes["doc"] = isDoc(extension)
fileTypes["archive"] = isArchive(extension)
fileTypes["dir"] = isDir
for t, v := range conditions {
if t == "exact" {
continue
}
var matchesCondition bool
switch t {
2023-08-12 16:30:41 +00:00
case "larger":
2023-08-12 19:41:59 +00:00
matchesCondition = fileSize > int64(options.LargerThan)*1000000
2023-08-12 16:30:41 +00:00
case "smaller":
2023-08-12 19:41:59 +00:00
matchesCondition = fileSize < int64(options.SmallerThan)*1000000
2023-08-12 16:30:41 +00:00
default:
matchesCondition = v == fileTypes[t]
}
2023-08-12 16:30:41 +00:00
if !matchesCondition {
2023-07-31 22:20:14 +00:00
matchesAllConditions = false
}
}
2023-07-31 22:20:14 +00:00
return matchesAllConditions, fileTypes
}
2023-08-12 16:30:41 +00:00
// Clear variables and return
return false, map[string]bool{}
2023-07-13 02:23:29 +00:00
}
func isDoc(extension string) bool {
for _, typefile := range documentTypes {
if extension == typefile {
return true
2023-07-04 23:55:15 +00:00
}
2023-07-13 02:23:29 +00:00
}
return false
}
func getFileSize(filepath string) int64 {
2023-08-12 16:30:41 +00:00
fileInfo, err := os.Stat(rootPath + "/" + filepath)
if err != nil {
return 0
}
return fileInfo.Size()
}
2023-07-13 02:23:29 +00:00
func isArchive(extension string) bool {
for _, typefile := range compressedFile {
if extension == typefile {
return true
2023-07-04 23:55:15 +00:00
}
2023-07-13 02:23:29 +00:00
}
return false
2023-06-15 01:08:09 +00:00
}
2023-06-18 15:04:31 +00:00
2023-06-15 01:08:09 +00:00
func getLastPathComponent(path string) string {
// Use filepath.Base to extract the last component of the path
return filepath.Base(path)
2023-06-18 15:04:31 +00:00
}
func generateRandomHash(length int) string {
const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
rand.Seed(rand.Int63()) // Automatically seeded based on current time
result := make([]byte, length)
for i := range result {
result[i] = charset[rand.Intn(len(charset))]
}
return string(result)
}
func stringExistsInArray(target string, strings []string) bool {
for _, s := range strings {
if s == target {
return true
}
}
return false
2023-08-12 16:30:41 +00:00
}