filebrowser/src/backend/search/indexing.go

231 lines
6.0 KiB
Go
Raw Normal View History

2023-06-15 01:08:09 +00:00
package search
import (
"log"
"os"
"path/filepath"
2023-06-18 15:04:31 +00:00
"sort"
2023-06-15 01:08:09 +00:00
"strings"
2023-06-16 17:29:43 +00:00
"sync"
2023-06-15 01:08:09 +00:00
"time"
2023-07-04 23:55:15 +00:00
"mime"
2023-06-15 01:08:09 +00:00
)
2023-06-16 17:29:43 +00:00
var (
2023-07-04 23:55:15 +00:00
rootPath string = "/srv"
2023-06-18 15:04:31 +00:00
indexes map[string][]string
mutex sync.RWMutex
2023-06-16 17:29:43 +00:00
lastIndexed time.Time
)
2023-06-15 01:08:09 +00:00
func InitializeIndex(intervalMinutes uint32) {
2023-06-15 01:08:09 +00:00
// Initialize the indexes map
2023-06-16 17:29:43 +00:00
indexes = make(map[string][]string)
var numFiles, numDirs int
log.Println("Indexing files...")
2023-06-16 17:29:43 +00:00
lastIndexedStart := time.Now()
2023-06-15 01:08:09 +00:00
// Call the function to index files and directories
2023-06-18 15:04:31 +00:00
totalNumFiles, totalNumDirs, err := indexFiles(rootPath, &numFiles, &numDirs)
2023-06-15 01:08:09 +00:00
if err != nil {
log.Fatal(err)
}
2023-06-16 17:29:43 +00:00
lastIndexed = lastIndexedStart
go indexingScheduler(intervalMinutes)
log.Println("Successfully indexed files.")
2023-06-18 15:04:31 +00:00
log.Println("Files found :", totalNumFiles)
log.Println("Directories found :", totalNumDirs)
2023-06-15 01:08:09 +00:00
}
func indexingScheduler(intervalMinutes uint32) {
2023-06-18 15:04:31 +00:00
log.Printf("Indexing scheduler will run every %v minutes", intervalMinutes)
for {
time.Sleep(time.Duration(intervalMinutes) * time.Minute)
2023-06-16 17:29:43 +00:00
var numFiles, numDirs int
lastIndexedStart := time.Now()
2023-06-18 15:04:31 +00:00
totalNumFiles, totalNumDirs, err := indexFiles(rootPath, &numFiles, &numDirs)
if err != nil {
log.Fatal(err)
}
2023-06-16 17:29:43 +00:00
lastIndexed = lastIndexedStart
if totalNumFiles+totalNumDirs > 0 {
log.Println("re-indexing found changes and updated the index.")
}
2023-06-15 01:08:09 +00:00
}
}
// Define a function to recursively index files and directories
2023-06-18 15:04:31 +00:00
func indexFiles(path string, numFiles *int, numDirs *int) (int, int, error) {
2023-06-15 01:08:09 +00:00
// Check if the current directory has been modified since last indexing
dir, err := os.Open(path)
if err != nil {
// directory must have been deleted, remove from index
delete(indexes, path)
}
defer dir.Close()
dirInfo, err := dir.Stat()
if err != nil {
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, err
2023-06-15 01:08:09 +00:00
}
// Compare the last modified time of the directory with the last indexed time
2023-06-16 17:29:43 +00:00
if dirInfo.ModTime().Before(lastIndexed) {
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, nil
2023-06-15 01:08:09 +00:00
}
// Read the directory contents
files, err := dir.Readdir(-1)
if err != nil {
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, err
2023-06-15 01:08:09 +00:00
}
// Iterate over the files and directories
for _, file := range files {
if file.IsDir() {
2023-06-16 17:29:43 +00:00
*numDirs++
2023-06-18 15:04:31 +00:00
indexFiles(path+"/"+file.Name(), numFiles, numDirs)
2023-06-15 01:08:09 +00:00
}
2023-06-16 17:29:43 +00:00
*numFiles++
addToIndex(path, file.Name())
2023-06-15 01:08:09 +00:00
}
2023-06-18 15:04:31 +00:00
return *numFiles, *numDirs, nil
2023-06-15 01:08:09 +00:00
}
2023-06-16 17:29:43 +00:00
func addToIndex(path string, fileName string) {
mutex.Lock()
defer mutex.Unlock()
2023-06-18 15:04:31 +00:00
path = strings.TrimPrefix(path, rootPath+"/")
path = strings.TrimSuffix(path, "/")
2023-06-16 17:29:43 +00:00
if path == rootPath {
path = "/"
}
2023-06-15 01:08:09 +00:00
info, exists := indexes[path]
if !exists {
2023-06-16 17:29:43 +00:00
info = []string{}
2023-06-15 01:08:09 +00:00
}
2023-06-16 17:29:43 +00:00
info = append(info, fileName)
2023-06-15 01:08:09 +00:00
indexes[path] = info
}
2023-06-18 15:04:31 +00:00
func SearchAllIndexes(search string, scope string) ([]string, []string) {
searchOptions := ParseSearch(search)
2023-06-16 17:29:43 +00:00
mutex.RLock()
defer mutex.RUnlock()
var matchingFiles []string
var matchingDirs []string
2023-06-18 15:04:31 +00:00
maximum := 100
count := 0
for _, searchTerm := range searchOptions.Terms {
2023-07-04 23:55:15 +00:00
if searchTerm == "" {
continue
}
2023-06-18 15:04:31 +00:00
// Iterate over the indexes
for dirName, v := range indexes {
if count > maximum {
break
}
searchItems := v
// Iterate over the path names
for _, pathName := range searchItems {
if count > maximum {
break
}
if dirName != "/" {
pathName = dirName + "/" + pathName
}
// Check if the path name contains the search term
2023-07-04 23:55:15 +00:00
if !containsSearchTerm(pathName, searchTerm, searchOptions.Conditions) {
2023-06-18 15:04:31 +00:00
continue
}
pathName = scopedPathNameFilter(pathName, scope)
if pathName == "" {
continue
}
count++
matchingFiles = append(matchingFiles, pathName)
2023-06-16 17:29:43 +00:00
}
2023-06-15 01:08:09 +00:00
// Check if the path name contains the search term
2023-07-04 23:55:15 +00:00
if !containsSearchTerm(dirName, searchTerm, searchOptions.Conditions) {
continue
}
2023-06-18 15:04:31 +00:00
pathName := scopedPathNameFilter(dirName, scope)
if pathName == "" {
continue
2023-06-15 01:08:09 +00:00
}
2023-06-18 15:04:31 +00:00
count++
matchingDirs = append(matchingDirs, pathName)
2023-06-15 01:08:09 +00:00
}
}
2023-06-16 17:29:43 +00:00
// Sort the strings based on the number of elements after splitting by "/"
sort.Slice(matchingFiles, func(i, j int) bool {
parts1 := strings.Split(matchingFiles[i], "/")
parts2 := strings.Split(matchingFiles[j], "/")
return len(parts1) < len(parts2)
})
// Sort the strings based on the number of elements after splitting by "/"
sort.Slice(matchingDirs, func(i, j int) bool {
parts1 := strings.Split(matchingDirs[i], "/")
parts2 := strings.Split(matchingDirs[j], "/")
return len(parts1) < len(parts2)
})
2023-06-18 15:04:31 +00:00
return matchingFiles, matchingDirs
2023-06-15 01:08:09 +00:00
}
func scopedPathNameFilter(pathName string, scope string) string {
scope = strings.TrimPrefix(scope, "/")
if strings.HasPrefix(pathName, scope) {
pathName = strings.TrimPrefix(pathName, scope)
} else {
pathName = ""
}
return pathName
}
2023-07-04 23:55:15 +00:00
func containsSearchTerm(pathName string, searchTerm string, conditions map[string]bool) bool {
path := getLastPathComponent(pathName)
if !conditions["exact"] {
path = strings.ToLower(path)
searchTerm = strings.ToLower(searchTerm)
}
matchesCondition := true
if conditions["audio"] {
extension := filepath.Ext(path)
mimetype := mime.TypeByExtension(extension)
matchesCondition = strings.HasPrefix(mimetype, "audio")
}
if conditions["video"] {
extension := filepath.Ext(path)
mimetype := mime.TypeByExtension(extension)
matchesCondition = strings.HasPrefix(mimetype, "video")
}
if conditions["image"] {
extension := filepath.Ext(path)
mimetype := mime.TypeByExtension(extension)
matchesCondition = strings.HasPrefix(mimetype, "image")
}
if conditions["doc"] {
extension := filepath.Ext(path)
for _, typefile := range documentTypes {
if extension == typefile {
matchesCondition = true
continue
} else {
matchesCondition = false
}
}
}
if conditions["zip"] {
extension := filepath.Ext(path)
for _, typefile := range compressedFile {
if extension == typefile {
matchesCondition = true
continue
} else {
matchesCondition = false
}
}
}
return strings.Contains(path, searchTerm) && matchesCondition
2023-06-15 01:08:09 +00:00
}
2023-06-18 15:04:31 +00:00
2023-06-15 01:08:09 +00:00
func getLastPathComponent(path string) string {
// Use filepath.Base to extract the last component of the path
return filepath.Base(path)
2023-06-18 15:04:31 +00:00
}