gosearch/pkg/index/index.go
Alex Wellnitz cf95ee2acb
Some checks failed
Go / build (push) Successful in -11s
renovate / renovate (push) Successful in 19s
SonarQube Scan / SonarQube Trigger (push) Failing after -17s
fix: Updated AddDocToIndex function to ignore additional special characters in scanner.Text()
2025-03-26 08:14:11 +01:00

135 lines
2.7 KiB
Go

package index
import (
"bufio"
"bytes"
"encoding/gob"
"errors"
"fmt"
"log"
"os"
"strings"
)
// needs to be saved in a file or database
type InvertedIndex map[string]map[string]int
type DocumentFrequency map[string]int
var Index InvertedIndex
var DocFreq DocumentFrequency
var Documents []string
type SavedIndex struct {
Index InvertedIndex
DocFreq DocumentFrequency
Documents []string
}
func InitIndex(name string) {
createIndex()
// check if index as file exists
// if not, create one and save it
indexFilePath := "/tmp/" + name + ".db"
if _, err := os.Stat(indexFilePath); errors.Is(err, os.ErrNotExist) {
f, err := os.Create(indexFilePath)
if err != nil {
log.Fatal(err)
}
defer f.Close()
} else {
var savedIndex SavedIndex
err = readStructFromFile(indexFilePath, &savedIndex)
if err != nil {
fmt.Println("Fehler beim Lesen:", err)
return
}
Index = savedIndex.Index
DocFreq = savedIndex.DocFreq
Documents = savedIndex.Documents
}
}
func createIndex() {
index := make(InvertedIndex)
docFreq := make(DocumentFrequency)
var docs []string
Documents = docs
Index = index
DocFreq = docFreq
}
func updateIndex(name string) {
savedIndex := SavedIndex{
Index: Index,
DocFreq: DocFreq,
Documents: Documents,
}
indexFilePath := "/tmp/" + name + ".db"
err := writeStructToFile(indexFilePath, savedIndex)
if err != nil {
fmt.Println("Fehler beim Schreiben:", err)
return
}
}
func AddDocToIndex(url string, content string) {
Documents = append(Documents, url)
reader := strings.NewReader(content)
seenTerms := make(map[string]bool) // Track terms in this document
scanner := bufio.NewScanner(reader)
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
word := strings.ToLower(strings.Trim(scanner.Text(), ",.!?&<>;:=§$%{}[]()|"))
if Index[word] == nil {
Index[word] = make(map[string]int)
}
Index[word][url]++
if !seenTerms[word] {
DocFreq[word]++
seenTerms[word] = true
}
}
go updateIndex("default")
}
func writeStructToFile(filename string, data interface{}) error {
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
err := enc.Encode(data)
if err != nil {
return fmt.Errorf("error when encoding the structs: %w", err)
}
err = os.WriteFile(filename, buf.Bytes(), 0644)
if err != nil {
return fmt.Errorf("error when writing to the file: %w", err)
}
return nil
}
func readStructFromFile(filename string, data interface{}) error {
content, err := os.ReadFile(filename)
if err != nil {
return fmt.Errorf("error reading the file: %w", err)
}
buf := bytes.NewBuffer(content)
dec := gob.NewDecoder(buf)
err = dec.Decode(data)
if err != nil {
return fmt.Errorf("error when decoding the structs: %w", err)
}
return nil
}