20 Commits

Author SHA1 Message Date
dependabot[bot]
2a00534a4f chore(deps): bump tokenizers from 0.21.4 to 0.22.1
Bumps [tokenizers](https://github.com/huggingface/tokenizers) from 0.21.4 to 0.22.1.
- [Release notes](https://github.com/huggingface/tokenizers/releases)
- [Changelog](https://github.com/huggingface/tokenizers/blob/main/RELEASE.md)
- [Commits](https://github.com/huggingface/tokenizers/compare/v0.21.4...v0.22.1)

---
updated-dependencies:
- dependency-name: tokenizers
  dependency-version: 0.22.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-17 20:37:09 +00:00
Himadri Bhattacharjee
8e045994a8 feat: devShell: add bacon to flake 2025-11-13 19:04:16 +05:30
Himadri Bhattacharjee
48a15defcf feat: add dry-run subcommand to check snippet on sample code 2025-11-13 19:03:59 +05:30
Himadri Bhattacharjee
4fb97f27b0 deps: bump cargo dependencies 2025-11-13 19:03:39 +05:30
Himadri Bhattacharjee
bb6e5fca7e Merge pull request #13 from lavafroth/dependabot/cargo/tracing-subscriber-0.3.20
chore(deps): bump tracing-subscriber from 0.3.19 to 0.3.20
2025-11-13 17:35:05 +05:30
Himadri Bhattacharjee
fd5fb501df Merge pull request #12 from lavafroth/dependabot/cargo/tree-sitter-javascript-0.25.0
chore(deps): bump tree-sitter-javascript from 0.23.1 to 0.25.0
2025-11-13 17:33:55 +05:30
dependabot[bot]
dbeea93010 chore(deps): bump tracing-subscriber from 0.3.19 to 0.3.20
Bumps [tracing-subscriber](https://github.com/tokio-rs/tracing) from 0.3.19 to 0.3.20.
- [Release notes](https://github.com/tokio-rs/tracing/releases)
- [Commits](https://github.com/tokio-rs/tracing/compare/tracing-subscriber-0.3.19...tracing-subscriber-0.3.20)

---
updated-dependencies:
- dependency-name: tracing-subscriber
  dependency-version: 0.3.20
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-08 20:56:25 +00:00
dependabot[bot]
af831852d9 chore(deps): bump tree-sitter-javascript from 0.23.1 to 0.25.0
Bumps [tree-sitter-javascript](https://github.com/tree-sitter/tree-sitter-javascript) from 0.23.1 to 0.25.0.
- [Release notes](https://github.com/tree-sitter/tree-sitter-javascript/releases)
- [Commits](https://github.com/tree-sitter/tree-sitter-javascript/compare/v0.23.1...v0.25.0)

---
updated-dependencies:
- dependency-name: tree-sitter-javascript
  dependency-version: 0.25.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-08 20:54:52 +00:00
Himadri Bhattacharjee
6eab02575a Merge pull request #10 from lavafroth/dependabot/cargo/clap-4.5.45
chore(deps): bump clap from 4.5.41 to 4.5.45
2025-09-01 05:29:39 +00:00
Himadri Bhattacharjee
80fe8a3f16 Merge pull request #8 from lavafroth/dependabot/cargo/tokenizers-0.21.4
chore(deps): bump tokenizers from 0.21.1 to 0.21.4
2025-09-01 05:24:10 +00:00
Himadri Bhattacharjee
60256c06cc feat: update vscode lsp config 2025-08-26 12:02:58 +05:30
Himadri Bhattacharjee
caf4f51d22 Merge branch 'dump-expression' 2025-08-26 11:59:13 +05:30
Himadri Bhattacharjee
a543e80a04 ver: bump version for next release 2025-08-26 11:58:53 +05:30
Himadri Bhattacharjee
f0c137ade4 fix: reintroduce the root node for anchoring flexibility 2025-08-26 11:57:15 +05:30
Himadri Bhattacharjee
7b0f818d38 feat: parse capture groups with + or * wildcards 2025-08-26 10:41:54 +05:30
dependabot[bot]
4195cbb734 chore(deps): bump clap from 4.5.41 to 4.5.45
Bumps [clap](https://github.com/clap-rs/clap) from 4.5.41 to 4.5.45.
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.41...clap_complete-v4.5.45)

---
updated-dependencies:
- dependency-name: clap
  dependency-version: 4.5.45
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-19 05:05:31 +00:00
Himadri Bhattacharjee
e5602c688c add subcommands for ast quick actions 2025-08-18 16:57:03 +05:30
Himadri Bhattacharjee
89e4c3b5fb docs: document binary releases 2025-08-18 10:29:54 +05:30
Himadri Bhattacharjee
7d9c3a448f feat: cli flag to dump S expression for a source file
TODO: move to being a subcommand
2025-08-17 17:34:43 +05:30
dependabot[bot]
e348b9a830 chore(deps): bump tokenizers from 0.21.1 to 0.21.4
Bumps [tokenizers](https://github.com/huggingface/tokenizers) from 0.21.1 to 0.21.4.
- [Release notes](https://github.com/huggingface/tokenizers/releases)
- [Changelog](https://github.com/huggingface/tokenizers/blob/main/RELEASE.md)
- [Commits](https://github.com/huggingface/tokenizers/compare/v0.21.1...v0.21.4)

---
updated-dependencies:
- dependency-name: tokenizers
  dependency-version: 0.21.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-12 06:07:01 +00:00
14 changed files with 756 additions and 626 deletions

View File

@@ -1,6 +1,11 @@
[language-server.silos]
command = "silos"
command = "./target/debug/silos"
args = ["lsp"]
[[language]]
name = "go"
language-servers = [ { name = "silos" } ]
language-servers = [ { name = "silos" }, "gopls" ]
[[language]]
name = "rust"
language-servers = [ ]

View File

@@ -9,6 +9,7 @@
],
"command": [
"silos"
"lsp"
]
}
]

1071
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "silos"
version = "5.2.2"
version = "6.0.0"
edition = "2024"
[dependencies]
@@ -8,20 +8,20 @@ anyhow = "1.0.98"
candle-core = "0.9.1"
candle-nn = "0.9.1"
candle-transformers = "0.9.1"
clap = { version = "4.5.41", features = ["derive"] }
clap = { version = "4.5.45", features = ["derive"] }
derive_more = { version = "2.0.1", features = ["display", "error"] }
hf-hub = "0.4.2"
hora = "0.1.1"
kdl = "6.3.4"
serde_json = "1.0.140"
tokenizers = "0.21.1"
tokenizers = "0.22.1"
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
tracing-subscriber = "0.3.20"
tree-sitter = "0.25.8"
tree-sitter-c = "0.24.1"
tree-sitter-go = "0.23.4"
tree-sitter-rust = "0.24.0"
tokio = { version = "1.45.1", features = ["io-std", "macros", "rt", "rt-multi-thread"] }
tower-lsp = "0.20.0"
tree-sitter-javascript = "0.23.1"
tree-sitter-javascript = "0.25.0"
tree-sitter-cpp = "0.23.4"

View File

@@ -6,9 +6,7 @@ Dumb, proomptable modular snippet search.
## Installation
### Binary releases
There are no binary releases yet.
You can download a binary from releases tab or build the project from source.
### From source
@@ -86,7 +84,7 @@ This API parses code into an AST (Abstract Syntax Tree) via tree-sitter and can
``` kdl
description "describes the mutation collection"
mutation {
expression "some ((beautiful) @adjective) AST expression"
expression "(some ((beautiful) @adjective) AST expression) @root"
substitute {
literal "hello"
capture "adjective"
@@ -95,7 +93,7 @@ mutation {
}
mutation {
expression "another"
expression "(another) @root"
substitute {
literal "multiple mutations work"
literal "as long as their expression"
@@ -106,14 +104,30 @@ mutation {
- `description`: A textual description of the mutation collection.
- `mutation`: Defines individual code changes.
- `expression`: Uses tree-sitter to match and capture AST nodes with `@` prefixes, The special `@root` node is reserved for the entire expression.
- `expression`: Uses tree-sitter to match and capture AST nodes with `@` prefixes,
- The special `@root` node must be specify the expression to be replaced.
- `substitute`: Constructs the modified code using literals and captured arguments.
See the example mutation collection in `./snippets/v2/go/mutations.kdl`.
See the example mutation collection in `./snippets/v2/go/filepath-parent.kdl`.
- The API performs a single-pass substitution based on the closest matching mutation.
- Captured groups are used within the `substitute` block and the mutated code is returned.
> Every capture group must contain the largest atom to be operated on.
For example: if you wish to operate on elements of an array, capture each identifier inside the array
Correct way: Here the `array` and `identifier` only hints about where the expression `root` lies.
```
(array (identifier @root))
```
Incorrect way: Here the root expression matches the block all the array elements inside the braces, not each element.
```
(array ((identifier)*) @entire-block-capture) @root
```
**Further reading**
- [tree-sitter query snytax](https://tree-sitter.github.io/tree-sitter/using-parsers/queries/1-syntax.html) to create mutation expressions.

View File

@@ -13,6 +13,7 @@
packages = with pkgs; [
stdenv.cc.cc
pkg-config
bacon
];
libraries = with pkgs; [

View File

@@ -0,0 +1,13 @@
description "base64 import"
mutation {
expression "(import_spec_list ((import_spec)* @spec)) @root"
substitute {
literal "("
literal "\n"
capture "spec"
literal "\n"
literal #""base64""#
literal "\n"
literal ")"
}
}

View File

@@ -4,7 +4,7 @@ mutation {
(call_expression
function: (_) @func (#eq? @func "filepath.Base")
arguments: (_) @args
)
) @root
"""
substitute {
literal "filepath.Base(filepath.Dir(filepath.Clean"

View File

@@ -1,8 +1,15 @@
use clap::Parser;
use clap::{Args, Parser, Subcommand};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub(crate) struct Args {
pub(crate) struct Cli {
#[command(subcommand)]
pub command: Command,
}
#[derive(Args, Debug)]
pub(crate) struct Lsp {
/// Run on the Nth GPU device.
#[arg(long)]
pub(crate) gpu: Option<usize>,
@@ -20,7 +27,44 @@ pub(crate) struct Args {
pub(crate) snippets: std::path::PathBuf,
}
impl Args {
#[derive(Args, Debug)]
pub struct DumpExpression {
pub path: PathBuf,
}
#[derive(Args, Debug)]
pub struct ShowCaptures {
pub path: PathBuf,
pub expression: String,
}
#[derive(Args, Debug)]
pub struct DryRun {
pub path: PathBuf,
pub edit_file: PathBuf,
}
#[derive(Subcommand, Debug)]
pub enum Ast {
/// Dump the S expression for a given source file
DumpExpression(DumpExpression),
/// Show what parts of a source file gets captured by an S expression
ShowCaptures(ShowCaptures),
/// Test your edit snippets on a sample file
DryRun(DryRun),
}
#[derive(Subcommand, Debug)]
pub enum Command {
/// quick actions to dump, modify and verify abstract syntax trees
#[command(subcommand)]
Ast(Ast),
/// spawn a language server for use with a text editor
Lsp(Lsp),
}
impl Lsp {
pub(crate) fn resolve_model_and_revision(&self) -> (String, String) {
let default_model = "sentence-transformers/all-MiniLM-L6-v2".to_string();
let default_revision = "refs/pr/21".to_string();

View File

@@ -60,12 +60,18 @@ impl LanguageServer for Backend {
}
async fn did_open(&self, params: DidOpenTextDocumentParams) {
self.body.lock().await.insert(params.text_document.uri, params.text_document.text);
self.body
.lock()
.await
.insert(params.text_document.uri, params.text_document.text);
}
async fn did_change(&self, params: DidChangeTextDocumentParams) {
if let Some(body) = params.content_changes.into_iter().next() {
self.body.lock().await.insert(params.text_document.uri, body.text);
self.body
.lock()
.await
.insert(params.text_document.uri, body.text);
}
}

View File

@@ -3,7 +3,7 @@ use clap::Parser;
use hora::core::{ann_index::ANNIndex, metrics::Metric::Euclidean};
use hora::index::hnsw_idx::HNSWIndex;
use kdl::KdlDocument;
use state::State;
use state::{State, dump_expression};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::Mutex;
@@ -13,14 +13,49 @@ mod args;
mod embed;
mod lsp;
mod mutation;
mod state;
mod sources;
mod state;
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt::init();
let args = args::Args::parse();
let args = match args::Cli::parse().command {
args::Command::Ast(ast) => {
match ast {
args::Ast::DumpExpression(source_file) => {
println!("{}", dump_expression(&source_file.path)?);
}
args::Ast::ShowCaptures(show_captures) => {
let source_bytes = std::fs::read(&show_captures.path)?;
let langfn = state::lang_from_file_extension(&show_captures.path)?;
let tree = state::parse_into_tree(&source_bytes, &langfn)?;
let root_node = tree.root_node();
let cooked = mutation::query(
root_node,
&show_captures.expression,
&langfn,
&source_bytes,
);
println!("{:#?}", cooked);
}
args::Ast::DryRun(dry_run) => {
let mutation_collection = mutation::from_path(dry_run.edit_file)?;
let source_bytes = std::fs::read(&dry_run.path)?;
let langfn = state::lang_from_file_extension(&dry_run.path)?;
let tree = state::parse_into_tree(&source_bytes, &langfn)?;
let root_node = tree.root_node();
let cooked =
mutation::apply(langfn, &source_bytes, root_node, &mutation_collection)?;
println!("{cooked}");
}
}
return Ok(());
}
args::Command::Lsp(lsp) => lsp,
};
let (model_id, revision) = args.resolve_model_and_revision();
let embed = embed::Embed::new(args.gpu, &model_id, &revision)?;
let mut dict = HashMap::default();
let dimensions = embed.hidden_size;
@@ -64,7 +99,10 @@ async fn main() -> Result<()> {
.or_insert_with(|| HNSWIndex::new(dimensions, &Default::default()));
current_lang_index
.add(&embed.embed(&mutations.description)?, mutations_collection.len())
.add(
&embed.embed(&mutations.description)?,
mutations_collection.len(),
)
.map_err(E::msg)?;
mutations_collection.push(mutations);
}

View File

@@ -72,10 +72,8 @@ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<MutationCollection> {
substitute.push(substitutor);
}
let expression = format!("({expression}) @root");
mutations.push(Mutation {
expression,
expression: expression.to_string(),
substitute,
})
}
@@ -127,7 +125,7 @@ pub fn apply(
}
#[derive(Debug)]
struct QueryCooked {
pub struct QueryCooked {
captures: HashMap<String, String>,
end: usize,
start: usize,
@@ -152,7 +150,7 @@ fn split_at_indices<'a>(c: &'a [u8], idx: &[usize]) -> SplitMap<'a> {
SplitMap { values, indices }
}
fn query<'a>(
pub fn query<'a>(
node: Node<'a>,
expr: &'a str,
lang: &Language,
@@ -164,6 +162,7 @@ fn query<'a>(
let mut query_matches = qc.matches(&query, node, source_bytes);
let capture_names = query.capture_names();
// println!("names: {capture_names:#?}");
let mut cooked = vec![];
@@ -171,19 +170,35 @@ fn query<'a>(
let mut capture_cooked = HashMap::new();
let mut start = 0;
let mut end = 0;
for cap in matcha.captures {
let Some(name) = capture_names.get(cap.index as usize) else {
if matcha.captures.is_empty() {
continue;
}
// println!("match {:#?}", matcha.id());
for (ix, name) in capture_names.iter().enumerate() {
let nodes = matcha.nodes_for_capture_index(ix.try_into().unwrap());
let mut start_pos = None;
let mut end_pos = None;
debug!("matches for {name}");
for node in nodes {
start_pos.get_or_insert(node.start_byte());
end_pos.replace(node.end_byte());
debug!("hit {node:#?}");
}
let (Some(start_pos), Some(end_pos)) = (start_pos, end_pos) else {
continue;
};
if *name == "root" {
start = cap.node.start_byte();
end = cap.node.end_byte();
continue;
start = start_pos;
end = end_pos;
}
capture_cooked.insert(
name.to_string(),
cap.node.utf8_text(source_bytes).unwrap().to_string(),
);
let text_bytes = &source_bytes[start_pos..end_pos];
let text = std::str::from_utf8(text_bytes).unwrap();
// println!("text: {text}");
capture_cooked.insert(name.to_string(), text.to_string());
}
cooked.push(QueryCooked {
start,

View File

@@ -1,23 +1,32 @@
use std::{fs, io, path::{Path, PathBuf}, collections::HashMap};
use std::{
collections::HashMap,
fs, io,
path::{Path, PathBuf},
};
pub fn rule_files<P: AsRef<Path>>(path: P) -> io::Result<HashMap<String, Vec<PathBuf>>> {
let per_language_dirs: Vec<_> = fs::read_dir(path)?
.filter_map(|res| res.ok())
.map(|direntry| direntry.path())
.filter(|dir| dir.is_dir()).collect();
.filter_map(|res| res.ok())
.map(|direntry| direntry.path())
.filter(|dir| dir.is_dir())
.collect();
let mut basename_to_paths = HashMap::new();
for language_dir in per_language_dirs {
let Some(dirname) = language_dir.file_stem().and_then(|v|v.to_str()).map(|v| v.to_string()) else {
let Some(dirname) = language_dir
.file_stem()
.and_then(|v| v.to_str())
.map(|v| v.to_string())
else {
continue;
};
let rule_file_paths: Vec<_> = fs::read_dir(&language_dir)?
.filter_map(|res| res.ok())
.map(|entry| entry.path())
.filter(|file| file.is_file() && file.extension().is_some_and(|ext| ext == "kdl"))
.map(|path| path.to_path_buf())
.collect();
.filter_map(|res| res.ok())
.map(|entry| entry.path())
.filter(|file| file.is_file() && file.extension().is_some_and(|ext| ext == "kdl"))
.map(|path| path.to_path_buf())
.collect();
basename_to_paths.insert(dirname, rule_file_paths);
}
Ok(basename_to_paths)

View File

@@ -4,6 +4,7 @@ use derive_more::Error;
use hora::core::ann_index::ANNIndex;
use hora::index::hnsw_idx::HNSWIndex;
use std::collections::HashMap;
use std::path::Path;
use tree_sitter::Parser;
#[derive(Debug, Display, Error)]
@@ -22,18 +23,6 @@ pub struct Refactor {
}
impl Refactor {
fn get_lang(s: &str) -> Result<tree_sitter::Language, Error> {
Ok(match s {
"go" => tree_sitter_go::LANGUAGE,
"c" | "h" => tree_sitter_c::LANGUAGE,
"cpp" | "hpp" => tree_sitter_cpp::LANGUAGE,
"js" | "ts" => tree_sitter_javascript::LANGUAGE,
"rs" => tree_sitter_rust::LANGUAGE,
_ => return Err(Error::UnknownLang),
}
.into())
}
pub fn search(
&self,
lang: &str,
@@ -41,17 +30,9 @@ impl Refactor {
body: &str,
top_k: usize,
) -> Result<Vec<String>, Error> {
let langfn = Self::get_lang(lang)?;
let mut parser = Parser::new();
parser
.set_language(&langfn)
.map_err(|_| Error::UnknownLang)?;
let source_code = body;
let source_bytes = source_code.as_bytes();
let tree = parser
.parse(source_code, None)
.ok_or(Error::SnippetParsing)?;
let langfn = lang_from_name(lang)?;
let source_bytes = body.as_bytes();
let tree = parse_into_tree(source_bytes, &langfn)?;
let root_node = tree.root_node();
// search for k nearest neighbors
@@ -81,6 +62,48 @@ impl Refactor {
Ok(collected)
}
}
pub fn lang_from_name(s: &str) -> Result<tree_sitter::Language, Error> {
Ok(match s {
"go" => tree_sitter_go::LANGUAGE,
"c" | "h" => tree_sitter_c::LANGUAGE,
"cpp" | "hpp" => tree_sitter_cpp::LANGUAGE,
"js" | "ts" => tree_sitter_javascript::LANGUAGE,
"rs" => tree_sitter_rust::LANGUAGE,
_ => return Err(Error::UnknownLang),
}
.into())
}
pub fn lang_from_file_extension(path: &Path) -> Result<tree_sitter::Language, Error> {
let Some(lang) = path.extension() else {
return Err(Error::UnknownLang);
};
let lang = lang.to_str().ok_or(Error::UnknownLang)?;
lang_from_name(lang)
}
// parses `body` written in the language `langfn` into tree sitter AST
pub fn parse_into_tree(
body: &[u8],
langfn: &tree_sitter::Language,
) -> Result<tree_sitter::Tree, Error> {
let mut parser = Parser::new();
parser
.set_language(langfn)
.map_err(|_| Error::UnknownLang)?;
let tree = parser.parse(body, None).ok_or(Error::SnippetParsing)?;
Ok(tree)
}
pub fn dump_expression(path: &Path) -> Result<String, Error> {
let source_bytes = std::fs::read(path).map_err(|_| Error::SnippetParsing)?;
let tree = parse_into_tree(&source_bytes, &lang_from_file_extension(path)?)?;
Ok(tree.root_node().to_sexp().to_string())
}
pub struct Generate {
pub dict: HashMap<String, HNSWIndex<f32, String>>,
}