17 Commits

Author SHA1 Message Date
64efaf5514 fix: add default log level
Some checks failed
Release / Generate changelog (push) Failing after -16s
2025-03-26 09:32:00 +01:00
8d320f96cd docs: add sonarqube badge
All checks were successful
Go / build (push) Successful in -11s
renovate / renovate (push) Successful in 19s
SonarQube Scan / Build and analyze (push) Successful in 11s
2025-03-26 08:33:01 +01:00
75c5e7ea4c fix: add sonarqube properties
All checks were successful
Go / build (push) Successful in -12s
renovate / renovate (push) Successful in 18s
SonarQube Scan / Build and analyze (push) Successful in 20s
2025-03-26 08:29:48 +01:00
e6e668451a ci: add quality gate
Some checks failed
Go / build (push) Successful in -11s
renovate / renovate (push) Successful in 19s
SonarQube Scan / Build and analyze (push) Failing after 2s
2025-03-26 08:24:21 +01:00
9528fca452 ci: update sonarqube pipeline
Some checks failed
Go / build (push) Successful in -12s
renovate / renovate (push) Successful in 18s
SonarQube Scan / SonarQube Trigger (push) Failing after 3s
2025-03-26 08:19:30 +01:00
cf95ee2acb fix: Updated AddDocToIndex function to ignore additional special characters in scanner.Text()
Some checks failed
Go / build (push) Successful in -11s
renovate / renovate (push) Successful in 19s
SonarQube Scan / SonarQube Trigger (push) Failing after -17s
2025-03-26 08:14:11 +01:00
4a673d1894 Merge branch 'main' of https://git.dev-null.rocks/alexohneander/gosearch
Some checks failed
Go / build (push) Successful in -9s
renovate / renovate (push) Successful in 22s
SonarQube Scan / SonarQube Trigger (push) Failing after -18s
2025-03-26 08:11:10 +01:00
0d22da3643 ci: add sonarqube pipeline 2025-03-26 08:10:56 +01:00
0107f77570 Merge pull request 'chore(deps): update ghcr.io/renovatebot/renovate docker tag to v39.216.1' (#3) from renovate/ghcr.io-renovatebot-renovate-39.x into main
All checks were successful
Go / build (push) Successful in -12s
renovate / renovate (push) Successful in 48s
Reviewed-on: #3
2025-03-26 00:07:43 +00:00
d5f658244b chore(deps): update ghcr.io/renovatebot/renovate docker tag to v39.216.1
All checks were successful
Go / build (pull_request) Successful in -12s
2025-03-26 00:00:58 +00:00
72128f97a3 refactor: search logic to improve performance and reduce false positives by making query case-insensitive for simple queries and using trimmed whitespace in phrases 2025-03-26 00:28:23 +01:00
78e2568e2b docs: update changelog
All checks were successful
Go / build (push) Successful in -11s
renovate / renovate (push) Successful in 37s
2025-03-26 00:11:13 +01:00
75ba450d9b feat(index): add save and load functionality for Index using gob encoding/decoding 2025-03-26 00:10:28 +01:00
dd7f0c0a28 docs: add first release in Changelog
All checks were successful
Go / build (push) Successful in -11s
renovate / renovate (push) Successful in 16s
2025-03-25 23:09:54 +01:00
3f072184e0 ci: add release pipeline
Some checks failed
Go / build (push) Successful in -12s
renovate / renovate (push) Successful in 18s
Release / Generate changelog (push) Failing after -16s
2025-03-25 23:06:38 +01:00
9d932ec70e Merge pull request 'chore(deps): update ghcr.io/renovatebot/renovate docker tag to v39.215.2' (#1) from renovate/ghcr.io-renovatebot-renovate-39.x into main
All checks were successful
Go / build (push) Successful in -12s
renovate / renovate (push) Successful in 54s
Reviewed-on: #1
2025-03-25 21:55:29 +00:00
4094141c0b chore(deps): update ghcr.io/renovatebot/renovate docker tag to v39.215.2
All checks were successful
Go / build (pull_request) Successful in -12s
2025-03-25 21:54:14 +00:00
11 changed files with 292 additions and 10 deletions

View File

@@ -0,0 +1,38 @@
name: Release
on:
push:
tags:
- "*"
jobs:
changelog:
name: Generate changelog
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate changelog
uses: orhun/git-cliff-action@v4
with:
config: cliff.toml
args: --verbose
env:
OUTPUT: CHANGELOG.md
GITHUB_REPO: ${{ github.repository }}
- name: Commit
run: |
git checkout <branch>
git config user.name 'github-actions[bot]'
git config user.email 'github-actions[bot]@users.noreply.github.com'
set +e
git add CHANGELOG.md
git commit -m "Update changelog"
git push https://${{ secrets.GITHUB_TOKEN }}@github.com/${GITHUB_REPOSITORY}.git <branch>

View File

@@ -10,7 +10,7 @@ on:
jobs:
renovate:
runs-on: ubuntu-latest
container: ghcr.io/renovatebot/renovate:39.211.0
container: ghcr.io/renovatebot/renovate:39.216.1
steps:
- uses: actions/checkout@v4
- run: renovate

View File

@@ -0,0 +1,25 @@
on:
push:
branches:
- main
pull_request:
types: [opened, synchronize, reopened]
name: SonarQube Scan
jobs:
build:
name: Build and analyze
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis
- uses: SonarSource/sonarqube-scan-action@v4
env:
SONAR_TOKEN: ${{ secrets.SONARQUBE_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONARQUBE_HOST }}
- uses: SonarSource/sonarqube-quality-gate-action@v1
timeout-minutes: 5
env:
SONAR_TOKEN: ${{ secrets.SONARQUBE_TOKEN }}

45
CHANGELOG.md Normal file
View File

@@ -0,0 +1,45 @@
# Changelog
All notable changes to this project will be documented in this file.
## [unreleased]
### 🚀 Features
- *(index)* Add save and load functionality for Index using gob encoding/decoding
### 📚 Documentation
- Add first release in Changelog
## [0.1.0] - 2025-03-25
### 🚀 Features
- Added documentation for gosearch system architecture and updated Go module version to 1.23.5
- Update internal HTTP server to use Fiber v2 with new routes and middleware configuration
- Added new search route and metric endpoint to internal controller
- Update fiber version from 2 to 3 and add health checks
- Added search functionality with TF-IDF scoring and inverted index data structure for efficient document retrieval.
- Update Fiber version to v2 and update internal controller and search logic to use new index module
- Update search.go to include debug logging for relevance scores
- *(doc)* Update diagram explanation for improved clarity
### 🚜 Refactor
- Search controller to convert query to lowercase
- Index and removed old functions
### 📚 Documentation
- Added high-level architecture diagram for Elasticsearch SearchEngine in Go, including components and interactions
### ⚙️ Miscellaneous Tasks
- Update dependencies to latest versions (go.mod, go.sum)
- Added golang workflow for building and testing project
- Update Go version in build-and-test workflow to 1.23
- Added renovate configuration files for Gitea workflow automation
- Add release pipeline
<!-- generated by git-cliff -->

View File

@@ -1,5 +1,7 @@
# gosearch
[![Quality Gate Status](https://sonar.dev-null.rocks/api/project_badges/measure?project=gosearch&metric=alert_status&token=sqb_4d86c3b73f6837027a319df42d2f70ccb46e56a2)](https://sonar.dev-null.rocks/dashboard?id=gosearch)
### Diagram of the Architecture
```mermaid

84
cliff.toml Normal file
View File

@@ -0,0 +1,84 @@
# git-cliff ~ default configuration file
# https://git-cliff.org/docs/configuration
#
# Lines starting with "#" are comments.
# Configuration options are organized into tables and keys.
# See documentation for more information on available options.
[changelog]
# template for the changelog header
header = """
# Changelog\n
All notable changes to this project will be documented in this file.\n
"""
# template for the changelog body
# https://keats.github.io/tera/docs/#introduction
body = """
{% if version %}\
## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
{% else %}\
## [unreleased]
{% endif %}\
{% for group, commits in commits | group_by(attribute="group") %}
### {{ group | striptags | trim | upper_first }}
{% for commit in commits %}
- {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
{% if commit.breaking %}[**breaking**] {% endif %}\
{{ commit.message | upper_first }}\
{% endfor %}
{% endfor %}\n
"""
# template for the changelog footer
footer = """
<!-- generated by git-cliff -->
"""
# remove the leading and trailing s
trim = true
# postprocessors
postprocessors = [
# { pattern = '<REPO>', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL
]
# render body even when there are no releases to process
# render_always = true
# output file path
# output = "test.md"
[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true
# filter out the commits that are not conventional
filter_unconventional = true
# process each line of a commit as an individual commit
split_commits = false
# regex for preprocessing the commit messages
commit_preprocessors = [
# Replace issue numbers
#{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](<REPO>/issues/${2}))"},
# Check spelling of the commit with https://github.com/crate-ci/typos
# If the spelling is incorrect, it will be automatically fixed.
#{ pattern = '.*', replace_command = 'typos --write-changes -' },
]
# regex for parsing and grouping commits
commit_parsers = [
{ message = "^feat", group = "<!-- 0 -->🚀 Features" },
{ message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
{ message = "^doc", group = "<!-- 3 -->📚 Documentation" },
{ message = "^perf", group = "<!-- 4 -->⚡ Performance" },
{ message = "^refactor", group = "<!-- 2 -->🚜 Refactor" },
{ message = "^style", group = "<!-- 5 -->🎨 Styling" },
{ message = "^test", group = "<!-- 6 -->🧪 Testing" },
{ message = "^chore\\(release\\): prepare for", skip = true },
{ message = "^chore\\(deps.*\\)", skip = true },
{ message = "^chore\\(pr\\)", skip = true },
{ message = "^chore\\(pull\\)", skip = true },
{ message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
{ body = ".*security", group = "<!-- 8 -->🛡️ Security" },
{ message = "^revert", group = "<!-- 9 -->◀️ Revert" },
{ message = ".*", group = "<!-- 10 -->💼 Other" },
]
# filter out the commits that are not matched by commit parsers
filter_commits = false
# sort the tags topologically
topo_order = false
# sort the commits inside sections by oldest/newest order
sort_commits = "oldest"

View File

@@ -12,7 +12,7 @@ import (
func SearchQuery(c *fiber.Ctx) error {
query := c.Params("query")
query = strings.TrimSpace(strings.ToLower(query))
query = strings.TrimSpace(query)
terms, queryType := parseQuery(query)
results := search.Search(terms, queryType, index.Index, index.DocFreq, len(index.Documents))
@@ -29,12 +29,12 @@ func SearchQuery(c *fiber.Ctx) error {
// parseQuery parses the query to determine query type and terms
func parseQuery(query string) ([]string, string) {
if strings.Contains(query, "AND") {
return strings.Split(query, " AND "), "AND"
} else if strings.Contains(query, "OR") {
return strings.Split(query, " OR "), "OR"
if strings.Contains(strings.ToLower(query), "AND") {
return strings.Split(strings.ToLower(query), " AND "), "AND"
} else if strings.Contains(strings.ToLower(query), "OR") {
return strings.Split(strings.ToLower(query), " OR "), "OR"
}
return strings.Fields(query), "SIMPLE"
return strings.Fields(strings.ToLower(query)), "SIMPLE"
}
// phraseMatch checks if all terms appear in the given document in sequence

View File

@@ -2,6 +2,7 @@ package http
import (
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/log"
"github.com/gofiber/fiber/v2/middleware/logger"
)
@@ -12,6 +13,7 @@ func StartService() {
app.Use(logger.New(logger.Config{
Format: "[${ip}]:${port} ${status} - ${method} ${path}\n",
}))
log.SetLevel(log.LevelInfo)
// Configure Routes
app = configureRoutes(app)

View File

@@ -7,7 +7,7 @@ import (
func main() {
// Initialize Index
index.CreateIndex()
index.InitIndex("default")
// Start HTTP Server
http.StartService()

View File

@@ -2,6 +2,12 @@ package index
import (
"bufio"
"bytes"
"encoding/gob"
"errors"
"fmt"
"log"
"os"
"strings"
)
@@ -13,7 +19,40 @@ var Index InvertedIndex
var DocFreq DocumentFrequency
var Documents []string
func CreateIndex() {
type SavedIndex struct {
Index InvertedIndex
DocFreq DocumentFrequency
Documents []string
}
func InitIndex(name string) {
createIndex()
// check if index as file exists
// if not, create one and save it
indexFilePath := "/tmp/" + name + ".db"
if _, err := os.Stat(indexFilePath); errors.Is(err, os.ErrNotExist) {
f, err := os.Create(indexFilePath)
if err != nil {
log.Fatal(err)
}
defer f.Close()
} else {
var savedIndex SavedIndex
err = readStructFromFile(indexFilePath, &savedIndex)
if err != nil {
fmt.Println("Fehler beim Lesen:", err)
return
}
Index = savedIndex.Index
DocFreq = savedIndex.DocFreq
Documents = savedIndex.Documents
}
}
func createIndex() {
index := make(InvertedIndex)
docFreq := make(DocumentFrequency)
var docs []string
@@ -23,6 +62,21 @@ func CreateIndex() {
DocFreq = docFreq
}
func updateIndex(name string) {
savedIndex := SavedIndex{
Index: Index,
DocFreq: DocFreq,
Documents: Documents,
}
indexFilePath := "/tmp/" + name + ".db"
err := writeStructToFile(indexFilePath, savedIndex)
if err != nil {
fmt.Println("Fehler beim Schreiben:", err)
return
}
}
func AddDocToIndex(url string, content string) {
Documents = append(Documents, url)
@@ -33,7 +87,7 @@ func AddDocToIndex(url string, content string) {
scanner.Split(bufio.ScanWords)
for scanner.Scan() {
word := strings.ToLower(strings.Trim(scanner.Text(), ",.!?"))
word := strings.ToLower(strings.Trim(scanner.Text(), ",.!?&<>;:=§$%{}[]()|"))
if Index[word] == nil {
Index[word] = make(map[string]int)
@@ -46,4 +100,35 @@ func AddDocToIndex(url string, content string) {
}
}
go updateIndex("default")
}
func writeStructToFile(filename string, data interface{}) error {
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
err := enc.Encode(data)
if err != nil {
return fmt.Errorf("error when encoding the structs: %w", err)
}
err = os.WriteFile(filename, buf.Bytes(), 0644)
if err != nil {
return fmt.Errorf("error when writing to the file: %w", err)
}
return nil
}
func readStructFromFile(filename string, data interface{}) error {
content, err := os.ReadFile(filename)
if err != nil {
return fmt.Errorf("error reading the file: %w", err)
}
buf := bytes.NewBuffer(content)
dec := gob.NewDecoder(buf)
err = dec.Decode(data)
if err != nil {
return fmt.Errorf("error when decoding the structs: %w", err)
}
return nil
}

1
sonar-project.properties Normal file
View File

@@ -0,0 +1 @@
sonar.projectKey=gosearch