diff --git a/internal/controller/search.go b/internal/controller/search.go index 23f4dab..a738db7 100644 --- a/internal/controller/search.go +++ b/internal/controller/search.go @@ -1,9 +1,49 @@ package controller import ( + "fmt" + "os" + "strings" + + "git.dev-null.rocks/alexohneander/gosearch/pkg/search" "github.com/gofiber/fiber/v3" ) func SearchQuery(c fiber.Ctx) error { - return c.SendString("Hello, World!") + query := c.Params("query") + query = strings.TrimSpace(query) + + terms, queryType := parseQuery(query) + results := search.Search(terms, queryType, search.Index, search.DocFreq, len(search.Files)) + + var response string + + response = fmt.Sprintf("Search Results (%s query):\n", queryType) + for _, result := range results { + response = response + "\n" + fmt.Sprintf("- %s (Score: %.4f)\n", result.Document, result.Score) + } + + return c.SendString(response) +} + +// parseQuery parses the query to determine query type and terms +func parseQuery(query string) ([]string, string) { + if strings.Contains(query, "AND") { + return strings.Split(query, " AND "), "AND" + } else if strings.Contains(query, "OR") { + return strings.Split(query, " OR "), "OR" + } + return strings.Fields(query), "SIMPLE" +} + +// phraseMatch checks if all terms appear in the given document in sequence +func phraseMatch(terms []string, doc string) bool { + // Read the full document content + content, err := os.ReadFile(doc) + if err != nil { + return false + } + // Check if the exact phrase (joined terms) is in the document content + phrase := strings.Join(terms, " ") + return strings.Contains(strings.ToLower(string(content)), phrase) } diff --git a/internal/http/router.go b/internal/http/router.go index 566c63b..be3b67b 100644 --- a/internal/http/router.go +++ b/internal/http/router.go @@ -12,7 +12,7 @@ func configureRoutes(app *fiber.App) *fiber.App { app.Get("/test", controller.Index) // Search - app.Get("/api/search/:index/:query", controller.SearchQuery) + app.Get("/api/search/:query", controller.SearchQuery) // Monitor // app.Get("/metrics", monitor.New(monitor.Config{Title: "MyService Metrics Page"})) diff --git a/main.go b/main.go index 78bcf91..04ca71f 100644 --- a/main.go +++ b/main.go @@ -1,8 +1,13 @@ package main -import "git.dev-null.rocks/alexohneander/gosearch/internal/http" +import ( + "git.dev-null.rocks/alexohneander/gosearch/internal/http" + "git.dev-null.rocks/alexohneander/gosearch/pkg/search" +) func main() { + search.TestIndex() + // Start HTTP Server http.StartService() } diff --git a/pkg/search/index.go b/pkg/search/index.go new file mode 100644 index 0000000..7c6dbb0 --- /dev/null +++ b/pkg/search/index.go @@ -0,0 +1,63 @@ +package search + +import ( + "bufio" + "log" + "os" + "strings" +) + +// needs to be saved in a file or database +type InvertedIndex map[string]map[string]int +type DocumentFrequency map[string]int + +var Index InvertedIndex +var DocFreq DocumentFrequency +var Files []string + +// BuildIndex reads files and builds an inverted index. +func BuildIndex(files []string) (InvertedIndex, DocumentFrequency, error) { + index := make(InvertedIndex) + docFreq := make(DocumentFrequency) + + for _, file := range files { + f, err := os.Open(file) + if err != nil { + return nil, nil, err + } + defer f.Close() + + seenTerms := make(map[string]bool) // Track terms in this document + scanner := bufio.NewScanner(f) + scanner.Split(bufio.ScanWords) + + for scanner.Scan() { + word := strings.ToLower(strings.Trim(scanner.Text(), ",.!?")) + + if index[word] == nil { + index[word] = make(map[string]int) + } + index[word][file]++ + + if !seenTerms[word] { + docFreq[word]++ + seenTerms[word] = true + } + } + } + return index, docFreq, nil +} + +func TestIndex() { + // Index files + files := []string{"data/doc1.txt", "data/doc2.txt", "data/doc3.txt"} + + index, docFreq, err := BuildIndex(files) + if err != nil { + log.Fatalf("Error building index: %v", err) + } + + Files = files + Index = index + DocFreq = docFreq +} diff --git a/pkg/search/search.go b/pkg/search/search.go new file mode 100644 index 0000000..05b543f --- /dev/null +++ b/pkg/search/search.go @@ -0,0 +1,87 @@ +package search + +import ( + "math" + "sort" +) + +// SearchResult stores the document and its relevance score. +type SearchResult struct { + Document string + Score float64 +} + +// Search processes different types of queries using TF-IDF scoring. +func Search(terms []string, queryType string, index InvertedIndex, docFreq DocumentFrequency, numDocs int) []SearchResult { + scores := make(map[string]float64) + + if queryType == "AND" { + // Ensure all terms appear in the document (AND logic) + for _, doc := range intersectDocs(terms, index) { + scores[doc] = scoreDoc(terms, doc, index, docFreq, numDocs) + } + } else if queryType == "OR" { + // Include any document that contains at least one of the terms (OR logic) + for _, term := range terms { + for doc := range index[term] { + scores[doc] += scoreDoc([]string{term}, doc, index, docFreq, numDocs) + } + } + } else { + // Simple query - score documents based on TF-IDF for any terms + for _, term := range terms { + for doc := range index[term] { + scores[doc] += scoreDoc([]string{term}, doc, index, docFreq, numDocs) + } + } + } + + return rankResults(scores) +} + +// Helper function to score a single document based on terms +func scoreDoc(terms []string, doc string, index InvertedIndex, docFreq DocumentFrequency, numDocs int) float64 { + score := 0.0 + for _, term := range terms { + tf := float64(index[term][doc]) + idf := math.Log(float64(numDocs) / float64(docFreq[term])) + score += tf * idf + //fmt.Printf("Score: %f64 %f64 %f64\n", tf, idf, score) + } + return score +} + +// Helper function to intersect documents for AND logic +func intersectDocs(terms []string, index InvertedIndex) []string { + if len(terms) == 0 { + return nil + } + docs := make(map[string]bool) + for doc := range index[terms[0]] { + docs[doc] = true + } + for _, term := range terms[1:] { + for doc := range docs { + if _, exists := index[term][doc]; !exists { + delete(docs, doc) + } + } + } + result := []string{} + for doc := range docs { + result = append(result, doc) + } + return result +} + +// rankResults sorts the documents by score +func rankResults(scores map[string]float64) []SearchResult { + results := make([]SearchResult, 0, len(scores)) + for doc, score := range scores { + results = append(results, SearchResult{Document: doc, Score: score}) + } + sort.Slice(results, func(i, j int) bool { + return results[i].Score > results[j].Score + }) + return results +}