54 lines
1.7 KiB
Rust
54 lines
1.7 KiB
Rust
#[cfg(test)]
|
|
mod tests {
|
|
use rustysearch::analyze::tokenizer::Tokenizer;
|
|
|
|
#[test]
|
|
fn test_split_into_words() {
|
|
let text = "The quick brown fox jumps over the lazy dog.";
|
|
let stopwords = vec!["the".to_string(), "over".to_string()];
|
|
let tokenizer = Tokenizer::new(text, stopwords, None);
|
|
let words = tokenizer.split_into_words();
|
|
assert_eq!(
|
|
words,
|
|
vec![
|
|
"quick".to_string(),
|
|
"brown".to_string(),
|
|
"fox".to_string(),
|
|
"jumps".to_string(),
|
|
"lazy".to_string(),
|
|
"dog".to_string(),
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_split_into_sentences() {
|
|
let text = "The quick brown fox jumps over the lazy dog. The end.";
|
|
let stopwords = vec!["the".to_string(), "over".to_string()];
|
|
let tokenizer = Tokenizer::new(text, stopwords, None);
|
|
let sentences = tokenizer.split_into_sentences();
|
|
assert_eq!(
|
|
sentences,
|
|
vec![
|
|
"quick brown fox jumps lazy dog".to_string(),
|
|
"end".to_string(),
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_split_into_paragraphs() {
|
|
let text = "The quick brown fox jumps over the lazy dog.\n\nThe end.";
|
|
let stopwords = vec!["the".to_string(), "over".to_string()];
|
|
let tokenizer = Tokenizer::new(text, stopwords, None);
|
|
let paragraphs = tokenizer.split_into_paragraphs();
|
|
assert_eq!(
|
|
paragraphs,
|
|
vec![
|
|
"quick brown fox jumps lazy dog".to_string(),
|
|
"end".to_string(),
|
|
]
|
|
);
|
|
}
|
|
}
|