Skip to content

Commit

Permalink
feature: make pagefind rust lib more sdk like
Browse files Browse the repository at this point in the history
  • Loading branch information
cdxker committed Dec 11, 2024
1 parent 596a56e commit e6ce8ef
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 80 deletions.
1 change: 1 addition & 0 deletions pagefind/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use hashbrown::HashMap;
use index::PagefindIndexes;
pub use options::{PagefindInboundConfig, SearchOptions};
use output::SyntheticFile;
pub use service::api;
use wax::{Glob, WalkEntry};

use crate::index::build_indexes;
Expand Down
177 changes: 177 additions & 0 deletions pagefind/src/service/api.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
use hashbrown::HashMap;
use std::path::PathBuf;

use crate::{
fossick::{parser::DomParserResult, Fossicker},
PagefindInboundConfig, SearchOptions, SearchState,
};
use base64::{engine::general_purpose, Engine as _};

use super::{IndexedFileResponse, SyntheticFileResponse};

pub struct PagefindIndex {
search_index: SearchState,
}

impl PagefindIndex {
/// Create a new PagefindIndex instance.
///
/// # Arguments
/// * `config` - An optional PagefindServiceConfig to apply to the service.
///
/// # Returns
/// An optional PagefindIndex instance. If the search options are invalid, it
/// will return None.
pub fn new(config: PagefindInboundConfig) -> Option<Self> {
match SearchOptions::load(config) {
Ok(opts) => Some(Self {
search_index: SearchState::new(opts),
}),
Err(_) => None,
}
}

/// Add a file into this search index.
/// Either a filepath or a URL must be provided.
///
/// # Arguments
/// * `file_path` - The path to the file to add.
/// * `url` - The URL to the file to add.
/// * `file_contents` - The contents of the file to add.
///
/// # Returns
/// Either the PageFragmentData of the file added or an error message, if it fails to add the
/// file.
pub async fn add_file(
&mut self,
file_path: Option<String>,
url: Option<String>,
file_contents: String,
) -> Result<IndexedFileResponse, String> {
if file_path.is_none() && url.is_none() {
return Err("Either file_path or url must be provided".into());
}

let file = Fossicker::new_synthetic(file_path.map(PathBuf::from), url, file_contents);
let data = self.search_index.fossick_one(file).await;

match data {
Ok(data) => Ok(IndexedFileResponse {
page_word_count: data.fragment.data.word_count as u32,
page_url: data.fragment.data.url,
page_meta: data.fragment.data.meta
}),
Err(_) => Err("Failed to add file".to_string()),
}
}

/// Add a record to the search index.
/// This is a more manual way to add a record to the search index, allowing for more control
/// over the data. This is useful for adding records that are not files.
///
/// # Arguments
/// * `url` - The URL of the record.
/// * `content` - The content of the record.
/// * `language` - The language of the record.
/// * `meta` - Optional metadata to add to the record.
/// * `filters` - Optional filters to apply to the record.
/// * `sort` - Optional sorting to apply to the record.
pub async fn add_record(
&mut self,
url: String,
content: String,
language: String,
meta: Option<HashMap<String, String>>,
filters: Option<HashMap<String, Vec<String>>>,
sort: Option<HashMap<String, String>>,
) -> Result<IndexedFileResponse, String> {
let data = DomParserResult {
digest: content,
filters: filters.unwrap_or_default(),
sort: sort.unwrap_or_default(),
meta: meta.unwrap_or_default(),
anchor_content: HashMap::new(),
has_custom_body: false,
force_inclusion: true,
has_html_element: true,
has_old_bundle_reference: false,
language: self
.search_index
.options
.force_language
.clone()
.unwrap_or(language),
};
let file = Fossicker::new_with_data(url, data);
let data = self.search_index.fossick_one(file).await;

match data {
Ok(data) => Ok(IndexedFileResponse {
page_word_count: data.fragment.data.word_count as u32,
page_url: data.fragment.data.url,
page_meta: data.fragment.data.meta
}),
Err(_) => Err("Failed to add file".to_string()),
}
}

/// Add a directory to the search index with a glob pattern.
///
/// # Arguments
/// * `path` - The path to the directory to index.
/// * `glob` - A glob pattern to match files in the directory. If not provided, the default glob pattern will be used.
///
/// # Returns
/// Either the number of pages indexed or an error message, if it fails to index the directory.
pub async fn add_dir(&mut self, path: String, glob: Option<String>) -> Result<usize, String> {
let defaults: PagefindInboundConfig =
serde_json::from_str("{}").expect("All fields have serde defaults");
let glob = glob.unwrap_or(defaults.glob);

let data = self
.search_index
.fossick_many(PathBuf::from(path), glob)
.await;
match data {
Ok(page_count) => Ok(page_count),
Err(_) => Err("Failed to index directory".to_string()),
}
}

/// Build the search index for this instance and hold it in memory.
pub async fn build_indexes(&mut self) {
self.search_index.build_indexes().await;
}

/// Build the search index for this instance and write the files to disk.
///
/// # Arguments
/// * `output_path` - The path to write the files to. If not provided, the default output path will be used.
pub async fn write_files(&mut self, output_path: Option<String>) -> String {
self.search_index.build_indexes().await;
let resolved_output_path = self
.search_index
.write_files(output_path.map(Into::into))
.await;

resolved_output_path.to_string_lossy().into()
}

/// Build the search index for this instance and return the files as a list of
/// SyntheticFileResponse.
///
/// # Returns
/// A list of SyntheticFileResponse containing the path and content of each file.
pub async fn get_files(&mut self) -> Vec<SyntheticFileResponse> {
self.search_index.build_indexes().await;
self.search_index
.get_files()
.await
.into_iter()
.map(|file| SyntheticFileResponse {
path: file.filename.to_string_lossy().into(),
content: general_purpose::STANDARD.encode(file.contents),
})
.collect()
}
}
116 changes: 39 additions & 77 deletions pagefind/src/service/mod.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,17 @@
use std::{
io::{BufRead, Write},
path::PathBuf,
};
use std::io::{BufRead, Write};

use api::PagefindIndex;
use base64::{engine::general_purpose, Engine as _};
use hashbrown::HashMap;
use rust_patch::Patch;
use tokio::sync::mpsc;

use crate::{
fossick::{parser::DomParserResult, Fossicker},
PagefindInboundConfig, SearchOptions, SearchState,
};
pub mod api;

use requests::*;
use responses::*;

use crate::PagefindInboundConfig;

mod requests;
mod responses;

Expand All @@ -37,19 +33,18 @@ pub async fn run_service() {
std::process::exit(0);
}

let Ok(decoded) = general_purpose::STANDARD
.decode(buf) else {
parse_error_outgoing_tx
.send(ServiceResponse {
message_id: None,
payload: ResponseAction::Error {
original_message: None,
message: "Unparseable message, not valid base64".into()
},
})
.expect("Channel is open");
return;
};
let Ok(decoded) = general_purpose::STANDARD.decode(buf) else {
parse_error_outgoing_tx
.send(ServiceResponse {
message_id: None,
payload: ResponseAction::Error {
original_message: None,
message: "Unparseable message, not valid base64".into(),
},
})
.expect("Channel is open");
return;
};

match serde_json::from_slice::<ServiceRequest>(&decoded) {
Ok(msg) => {
Expand Down Expand Up @@ -118,10 +113,10 @@ pub async fn run_service() {
};

fn get_index<'a>(
indexes: &'a mut Vec<Option<SearchState>>,
indexes: &'a mut Vec<Option<api::PagefindIndex>>,
index_id: u32,
err: impl FnOnce(&str),
) -> Option<&'a mut SearchState> {
) -> Option<&'a mut api::PagefindIndex> {
match indexes.get_mut(index_id as usize) {
Some(Some(index)) => Some(index),
Some(None) => {
Expand All @@ -138,22 +133,21 @@ pub async fn run_service() {
match msg.payload {
RequestAction::NewIndex { config } => {
let index_id = indexes.len();

let mut service_options: PagefindInboundConfig =
serde_json::from_str("{}").expect("All fields have serde defaults");
service_options.service = true;

if let Some(config) = config {
service_options = config.apply(service_options);
}

match SearchOptions::load(service_options) {
Ok(opts) => {
indexes.insert(index_id, Some(SearchState::new(opts)));
match PagefindIndex::new(service_options) {
Some(index) => {
indexes.insert(index_id, Some(index));
send(ResponseAction::NewIndex {
index_id: index_id as u32,
});
}
Err(_) => {
None => {
err("Invalid config supplied");
}
}
Expand All @@ -165,22 +159,14 @@ pub async fn run_service() {
file_contents,
} => {
if let Some(index) = get_index(&mut indexes, index_id, err) {
if file_path.is_none() && url.is_none() {
return err(
"Either a source path to the file, or an explicit URL must be provided",
);
}

let file =
Fossicker::new_synthetic(file_path.map(PathBuf::from), url, file_contents);
let data = index.fossick_one(file).await;
match data {
let page_fragment = index.add_file(file_path, url, file_contents).await;
match page_fragment {
Ok(data) => send(ResponseAction::IndexedFile {
page_word_count: data.fragment.data.word_count as u32,
page_url: data.fragment.data.url.clone(),
page_meta: data.fragment.data.meta.clone(),
page_word_count: data.page_word_count,
page_url: data.page_url.clone(),
page_meta: data.page_meta.clone(),
}),
Err(_) => err("Failed to add file"),
Err(message) => err(&message),
}
}
}
Expand All @@ -194,25 +180,14 @@ pub async fn run_service() {
sort,
} => {
if let Some(index) = get_index(&mut indexes, index_id, err) {
let data = DomParserResult {
digest: content,
filters: filters.unwrap_or_default(),
sort: sort.unwrap_or_default(),
meta: meta.unwrap_or_default(),
anchor_content: HashMap::new(),
has_custom_body: false,
force_inclusion: true,
has_html_element: true,
has_old_bundle_reference: false,
language: index.options.force_language.clone().unwrap_or(language),
};
let file = Fossicker::new_with_data(url, data);
let data = index.fossick_one(file).await;
let data = index
.add_record(url, content, language, meta, filters, sort)
.await;
match data {
Ok(data) => send(ResponseAction::IndexedFile {
page_word_count: data.fragment.data.word_count as u32,
page_url: data.fragment.data.url.clone(),
page_meta: data.fragment.data.meta.clone(),
page_word_count: data.page_word_count,
page_url: data.page_url.clone(),
page_meta: data.page_meta.clone(),
}),
Err(_) => err("Failed to add file"),
}
Expand All @@ -224,12 +199,7 @@ pub async fn run_service() {
glob,
} => {
if let Some(index) = get_index(&mut indexes, index_id, err) {
let defaults: PagefindInboundConfig =
serde_json::from_str("{}").expect("All fields have serde defaults");
let glob = glob.unwrap_or_else(|| defaults.glob);

let data = index.fossick_many(PathBuf::from(path), glob).await;
match data {
match index.add_dir(path, glob).await {
Ok(page_count) => send(ResponseAction::IndexedDir {
page_count: page_count as u32,
}),
Expand All @@ -251,23 +221,15 @@ pub async fn run_service() {
index.build_indexes().await;
let resolved_output_path = index.write_files(output_path.map(Into::into)).await;
send(ResponseAction::WriteFiles {
output_path: resolved_output_path.to_string_lossy().into(),
output_path: resolved_output_path,
});
}
}
RequestAction::GetFiles { index_id } => {
if let Some(index) = get_index(&mut indexes, index_id, err) {
index.build_indexes().await;
let files = index.get_files().await;
send(ResponseAction::GetFiles {
files: files
.into_iter()
.map(|file| SyntheticFileResponse {
path: file.filename.to_string_lossy().into(),
content: general_purpose::STANDARD.encode(file.contents),
})
.collect(),
});
send(ResponseAction::GetFiles { files });
}
}
RequestAction::DeleteIndex { index_id } => match indexes.get_mut(index_id as usize) {
Expand Down
Loading

0 comments on commit e6ce8ef

Please sign in to comment.