feat: more flexible raw string handling with kdl
This commit is contained in:
@@ -14,6 +14,7 @@ derive_more = "2.0.1"
|
||||
glob = "0.3.2"
|
||||
hf-hub = "0.4.2"
|
||||
hora = "0.1.1"
|
||||
kdl = "6.3.4"
|
||||
serde = "1.0.219"
|
||||
serde_json = "1.0.140"
|
||||
tokenizers = "0.21.1"
|
||||
|
||||
19
src/main.rs
19
src/main.rs
@@ -1,11 +1,13 @@
|
||||
use actix_web::{App, HttpServer, web};
|
||||
use anyhow::{Error as E, Result};
|
||||
use anyhow::{Context, Error as E, Result};
|
||||
use clap::Parser;
|
||||
use hora::core::ann_index::ANNIndex;
|
||||
use hora::index::hnsw_idx::HNSWIndex;
|
||||
use kdl::KdlDocument;
|
||||
use std::collections::HashMap;
|
||||
mod embed;
|
||||
mod v1;
|
||||
// mod v2;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
@@ -48,7 +50,7 @@ async fn main() -> Result<()> {
|
||||
let mut embed = embed::Embed::new(args)?;
|
||||
let mut dict = HashMap::default();
|
||||
|
||||
let paths = glob::glob("./snippets/v1/*/*.json")?;
|
||||
let paths = glob::glob("./snippets/v1/*/*.kdl")?;
|
||||
for path in paths {
|
||||
let path = path?;
|
||||
let parent = path
|
||||
@@ -67,10 +69,17 @@ async fn main() -> Result<()> {
|
||||
HNSWIndex::<f32, String>::new(dimension, ¶ms)
|
||||
});
|
||||
|
||||
let snippet: v1::api::SnippetOnDisk =
|
||||
serde_json::from_str(&std::fs::read_to_string(path)?)?;
|
||||
let doc_str = std::fs::read_to_string(path)?;
|
||||
let doc: KdlDocument = doc_str.parse().context("failed to parse KDL")?;
|
||||
|
||||
let Some(desc) = doc.get_arg("desc").and_then(|v| v.as_string()) else {
|
||||
continue;
|
||||
};
|
||||
let Some(body) = doc.get_arg("body").and_then(|v| v.as_string()) else {
|
||||
continue;
|
||||
};
|
||||
current_lang_index
|
||||
.add(&embed.embed(&snippet.desc)?, snippet.body)
|
||||
.add(&embed.embed(desc)?, body.to_string())
|
||||
.map_err(E::msg)?;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user