32 Commits

Author SHA1 Message Date
dependabot[bot]
92a5a7dc9d chore(deps): bump serde_json from 1.0.140 to 1.0.145
Bumps [serde_json](https://github.com/serde-rs/json) from 1.0.140 to 1.0.145.
- [Release notes](https://github.com/serde-rs/json/releases)
- [Commits](https://github.com/serde-rs/json/compare/v1.0.140...v1.0.145)

---
updated-dependencies:
- dependency-name: serde_json
  dependency-version: 1.0.145
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-15 20:36:30 +00:00
Himadri Bhattacharjee
6eab02575a Merge pull request #10 from lavafroth/dependabot/cargo/clap-4.5.45
chore(deps): bump clap from 4.5.41 to 4.5.45
2025-09-01 05:29:39 +00:00
Himadri Bhattacharjee
80fe8a3f16 Merge pull request #8 from lavafroth/dependabot/cargo/tokenizers-0.21.4
chore(deps): bump tokenizers from 0.21.1 to 0.21.4
2025-09-01 05:24:10 +00:00
Himadri Bhattacharjee
60256c06cc feat: update vscode lsp config 2025-08-26 12:02:58 +05:30
Himadri Bhattacharjee
caf4f51d22 Merge branch 'dump-expression' 2025-08-26 11:59:13 +05:30
Himadri Bhattacharjee
a543e80a04 ver: bump version for next release 2025-08-26 11:58:53 +05:30
Himadri Bhattacharjee
f0c137ade4 fix: reintroduce the root node for anchoring flexibility 2025-08-26 11:57:15 +05:30
Himadri Bhattacharjee
7b0f818d38 feat: parse capture groups with + or * wildcards 2025-08-26 10:41:54 +05:30
dependabot[bot]
4195cbb734 chore(deps): bump clap from 4.5.41 to 4.5.45
Bumps [clap](https://github.com/clap-rs/clap) from 4.5.41 to 4.5.45.
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.41...clap_complete-v4.5.45)

---
updated-dependencies:
- dependency-name: clap
  dependency-version: 4.5.45
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-19 05:05:31 +00:00
Himadri Bhattacharjee
e5602c688c add subcommands for ast quick actions 2025-08-18 16:57:03 +05:30
Himadri Bhattacharjee
89e4c3b5fb docs: document binary releases 2025-08-18 10:29:54 +05:30
Himadri Bhattacharjee
e24e62873f ci: remove macos release currently resulting unusable builds 2025-08-18 09:36:43 +05:30
Himadri Bhattacharjee
0b9ab89f35 ci: release build script
ci: use stable toolchain
2025-08-17 18:13:36 +05:30
Himadri Bhattacharjee
650329206d docs: document new supported languages 2025-08-17 17:37:34 +05:30
Himadri Bhattacharjee
7d9c3a448f feat: cli flag to dump S expression for a source file
TODO: move to being a subcommand
2025-08-17 17:34:43 +05:30
Himadri Bhattacharjee
633c1a206b deps: bump flake 2025-08-13 19:31:42 +05:30
Himadri Bhattacharjee
ab4c62fcf4 lint: clippy 2025-08-13 19:31:33 +05:30
Himadri Bhattacharjee
6ff9ba9d16 feat: add javascript and cpp language support 2025-08-13 19:31:26 +05:30
dependabot[bot]
e348b9a830 chore(deps): bump tokenizers from 0.21.1 to 0.21.4
Bumps [tokenizers](https://github.com/huggingface/tokenizers) from 0.21.1 to 0.21.4.
- [Release notes](https://github.com/huggingface/tokenizers/releases)
- [Changelog](https://github.com/huggingface/tokenizers/blob/main/RELEASE.md)
- [Commits](https://github.com/huggingface/tokenizers/compare/v0.21.1...v0.21.4)

---
updated-dependencies:
- dependency-name: tokenizers
  dependency-version: 0.21.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-12 06:07:01 +00:00
Himadri Bhattacharjee
d359121afd feat: add support for multiple files in the workdir 2025-08-01 20:29:58 +05:30
Himadri Bhattacharjee
4abd2cffac Merge pull request #5 from lavafroth/dependabot/cargo/clap-4.5.41
chore(deps): bump clap from 4.5.39 to 4.5.41
2025-07-29 07:04:12 +00:00
Himadri Bhattacharjee
daccd63006 feat: remove redundant normalization by token count before l2_norm of embeddings 2025-07-22 19:38:39 +05:30
dependabot[bot]
87e096f0bc Merge pull request #3 from lavafroth/dependabot/cargo/tree-sitter-0.25.8 2025-07-19 14:59:25 +00:00
Himadri Bhattacharjee
91d2640c11 feat: add preview gif 2025-07-19 19:23:58 +05:30
dependabot[bot]
ec3b89f455 chore(deps): bump clap from 4.5.39 to 4.5.41
Bumps [clap](https://github.com/clap-rs/clap) from 4.5.39 to 4.5.41.
- [Release notes](https://github.com/clap-rs/clap/releases)
- [Changelog](https://github.com/clap-rs/clap/blob/master/CHANGELOG.md)
- [Commits](https://github.com/clap-rs/clap/compare/clap_complete-v4.5.39...clap_complete-v4.5.41)

---
updated-dependencies:
- dependency-name: clap
  dependency-version: 4.5.41
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-07-19 08:07:55 +00:00
Himadri Bhattacharjee
c734c81a04 feat: implement shallow globbing; removed dep glob 2025-07-19 13:36:39 +05:30
dependabot[bot]
e7cae348a1 chore(deps): bump tree-sitter from 0.25.6 to 0.25.8
Bumps [tree-sitter](https://github.com/tree-sitter/tree-sitter) from 0.25.6 to 0.25.8.
- [Release notes](https://github.com/tree-sitter/tree-sitter/releases)
- [Commits](https://github.com/tree-sitter/tree-sitter/compare/v0.25.6...v0.25.8)

---
updated-dependencies:
- dependency-name: tree-sitter
  dependency-version: 0.25.8
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-07-17 07:43:28 +00:00
Himadri Bhattacharjee
faea784d8f ci: add dependabot 2025-07-17 13:12:23 +05:30
Himadri Bhattacharjee
e8970f21ff ci: add pipeline to build and test crate 2025-07-17 07:34:50 +00:00
Himadri Bhattacharjee
a1445b2f03 fmt: cargo fmt 2025-07-14 17:36:43 +05:30
Himadri Bhattacharjee
8f5e618841 fix: reshape tokenized f32 vector dimensions to hidden_size 2025-07-14 17:36:43 +05:30
Himadri Bhattacharjee
996142c8dd feat: custom snippet dir support 2025-07-14 17:18:24 +05:30
28 changed files with 512 additions and 182 deletions

8
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,8 @@
version: 2
updates:
- package-ecosystem: "cargo" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
open-pull-requests-limit: 4

26
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,26 @@
on:
release:
types: [created]
jobs:
release:
name: release ${{ matrix.target }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: x86_64-pc-windows-gnu
archive: zip
- target: x86_64-unknown-linux-musl
archive: tar.zst
steps:
- uses: actions/checkout@master
- name: Compile and release
uses: rust-build/rust-build.action@v1.4.5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
RUSTTARGET: ${{ matrix.target }}
ARCHIVE_TYPES: ${{ matrix.archive }}
TOOLCHAIN_VERSION: stable

22
.github/workflows/rust.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: Build and test
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build --verbose
- name: Run tests
run: cargo test --verbose

View File

@@ -1,6 +1,11 @@
[language-server.silos]
command = "silos"
command = "./target/debug/silos"
args = ["lsp"]
[[language]]
name = "go"
language-servers = [ { name = "silos" } ]
language-servers = [ { name = "silos" }, "gopls" ]
[[language]]
name = "rust"
language-servers = [ ]

View File

@@ -9,6 +9,7 @@
],
"command": [
"silos"
"lsp"
]
}
]

157
Cargo.lock generated
View File

@@ -17,6 +17,20 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"getrandom 0.3.3",
"once_cell",
"serde",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
@@ -282,6 +296,15 @@ dependencies = [
"tracing",
]
[[package]]
name = "castaway"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
dependencies = [
"rustversion",
]
[[package]]
name = "cc"
version = "1.2.26"
@@ -299,9 +322,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.39"
version = "4.5.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f"
checksum = "1fc0e74a703892159f5ae7d3aac52c8e6c392f5ae5f359c70b5881d60aaac318"
dependencies = [
"clap_builder",
"clap_derive",
@@ -309,9 +332,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.39"
version = "4.5.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51"
checksum = "b3e7f4214277f3c7aa526a59dd3fbe306a370daee1f8b7b8c987069cd8e888a8"
dependencies = [
"anstream",
"anstyle",
@@ -321,9 +344,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.5.32"
version = "4.5.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7"
checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6"
dependencies = [
"heck",
"proc-macro2",
@@ -343,6 +366,21 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
[[package]]
name = "compact_str"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
dependencies = [
"castaway",
"cfg-if",
"itoa",
"rustversion",
"ryu",
"serde",
"static_assertions",
]
[[package]]
name = "console"
version = "0.15.11"
@@ -447,6 +485,15 @@ dependencies = [
"syn",
]
[[package]]
name = "dary_heap"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
dependencies = [
"serde",
]
[[package]]
name = "dashmap"
version = "5.5.3"
@@ -1050,12 +1097,6 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "glob"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
[[package]]
name = "h2"
version = "0.4.10"
@@ -1430,18 +1471,9 @@ checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.11.0"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
dependencies = [
"either",
]
@@ -2153,12 +2185,12 @@ dependencies = [
[[package]]
name = "rayon-cond"
version = "0.3.0"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "059f538b55efd2309c9794130bc149c6a553db90e9d99c2030785c82f0bd7df9"
checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f"
dependencies = [
"either",
"itertools 0.11.0",
"itertools",
"rayon",
]
@@ -2417,18 +2449,28 @@ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
[[package]]
name = "serde"
version = "1.0.219"
version = "1.0.224"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
checksum = "6aaeb1e94f53b16384af593c71e20b095e958dab1d26939c1b70645c5cfbcc0b"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.224"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f39390fa6346e24defbcdd3d9544ba8a19985d0af74df8501fbfe9a64341ab"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
version = "1.0.224"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
checksum = "87ff78ab5e8561c9a675bfc1785cb07ae721f0ee53329a595cefd8c04c2ac4e0"
dependencies = [
"proc-macro2",
"quote",
@@ -2437,15 +2479,16 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.140"
version = "1.0.145"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
dependencies = [
"indexmap",
"itoa",
"memchr",
"ryu",
"serde",
"serde_core",
]
[[package]]
@@ -2497,7 +2540,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "silos"
version = "3.0.0"
version = "6.0.0"
dependencies = [
"anyhow",
"candle-core",
@@ -2505,11 +2548,9 @@ dependencies = [
"candle-transformers",
"clap",
"derive_more",
"glob",
"hf-hub",
"hora",
"kdl",
"serde",
"serde_json",
"tokenizers",
"tokio",
@@ -2518,7 +2559,9 @@ dependencies = [
"tracing-subscriber",
"tree-sitter",
"tree-sitter-c",
"tree-sitter-cpp",
"tree-sitter-go",
"tree-sitter-javascript",
"tree-sitter-rust",
]
@@ -2576,6 +2619,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "streaming-iterator"
version = "0.1.9"
@@ -2748,23 +2797,25 @@ dependencies = [
[[package]]
name = "tokenizers"
version = "0.21.1"
version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3169b3195f925496c895caee7978a335d49218488ef22375267fba5a46a40bd7"
checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476"
dependencies = [
"ahash",
"aho-corasick",
"compact_str",
"dary_heap",
"derive_builder",
"esaxx-rs",
"getrandom 0.2.16",
"getrandom 0.3.3",
"indicatif",
"itertools 0.13.0",
"lazy_static",
"itertools",
"log",
"macro_rules_attribute",
"monostate",
"onig",
"paste",
"rand 0.8.5",
"rand 0.9.1",
"rayon",
"rayon-cond",
"regex",
@@ -3007,9 +3058,9 @@ dependencies = [
[[package]]
name = "tree-sitter"
version = "0.25.6"
version = "0.25.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7cf18d43cbf0bfca51f657132cc616a5097edc4424d538bae6fa60142eaf9f0"
checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2"
dependencies = [
"cc",
"regex",
@@ -3029,6 +3080,16 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-cpp"
version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-go"
version = "0.23.4"
@@ -3039,6 +3100,16 @@ dependencies = [
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-javascript"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.5"

View File

@@ -1,6 +1,6 @@
[package]
name = "silos"
version = "4.0.0"
version = "6.0.0"
edition = "2024"
[dependencies]
@@ -8,20 +8,20 @@ anyhow = "1.0.98"
candle-core = "0.9.1"
candle-nn = "0.9.1"
candle-transformers = "0.9.1"
clap = { version = "4.5.39", features = ["derive"] }
clap = { version = "4.5.45", features = ["derive"] }
derive_more = { version = "2.0.1", features = ["display", "error"] }
glob = "0.3.2"
hf-hub = "0.4.2"
hora = "0.1.1"
kdl = "6.3.4"
serde = "1.0.219"
serde_json = "1.0.140"
tokenizers = "0.21.1"
serde_json = "1.0.145"
tokenizers = "0.21.4"
tracing = "0.1.41"
tracing-subscriber = "0.3.19"
tree-sitter = "0.25.6"
tree-sitter = "0.25.8"
tree-sitter-c = "0.24.1"
tree-sitter-go = "0.23.4"
tree-sitter-rust = "0.24.0"
tokio = { version = "1.45.1", features = ["io-std", "macros", "rt", "rt-multi-thread"] }
tower-lsp = "0.20.0"
tree-sitter-javascript = "0.23.1"
tree-sitter-cpp = "0.23.4"

View File

@@ -2,11 +2,11 @@
Dumb, proomptable modular snippet search.
![preview](./assets/preview.gif)
## Installation
### Binary releases
There are no binary releases yet.
You can download a binary from releases tab or build the project from source.
### From source
@@ -76,13 +76,15 @@ This API parses code into an AST (Abstract Syntax Tree) via tree-sitter and can
- C
- Rust
- Go
- Javascript
- C++
### Defining mutation collections
``` kdl
description "describes the mutation collection"
mutation {
expression "some ((beautiful) @adjective) AST expression"
expression "(some ((beautiful) @adjective) AST expression) @root"
substitute {
literal "hello"
capture "adjective"
@@ -91,7 +93,7 @@ mutation {
}
mutation {
expression "another"
expression "(another) @root"
substitute {
literal "multiple mutations work"
literal "as long as their expression"
@@ -102,14 +104,30 @@ mutation {
- `description`: A textual description of the mutation collection.
- `mutation`: Defines individual code changes.
- `expression`: Uses tree-sitter to match and capture AST nodes with `@` prefixes, The special `@root` node is reserved for the entire expression.
- `expression`: Uses tree-sitter to match and capture AST nodes with `@` prefixes,
- The special `@root` node must be specify the expression to be replaced.
- `substitute`: Constructs the modified code using literals and captured arguments.
See the example mutation collection in `./snippets/v2/go/mutations.kdl`.
See the example mutation collection in `./snippets/v2/go/filepath-parent.kdl`.
- The API performs a single-pass substitution based on the closest matching mutation.
- Captured groups are used within the `substitute` block and the mutated code is returned.
> Every capture group must contain the largest atom to be operated on.
For example: if you wish to operate on elements of an array, capture each identifier inside the array
Correct way: Here the `array` and `identifier` only hints about where the expression `root` lies.
```
(array (identifier @root))
```
Incorrect way: Here the root expression matches the block all the array elements inside the braces, not each element.
```
(array ((identifier)*) @entire-block-capture) @root
```
**Further reading**
- [tree-sitter query snytax](https://tree-sitter.github.io/tree-sitter/using-parsers/queries/1-syntax.html) to create mutation expressions.

BIN
assets/preview.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 MiB

6
flake.lock generated
View File

@@ -2,11 +2,11 @@
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1749776303,
"narHash": "sha256-OHibOvVwKqO1qvRg0r3agtd1EagW4THBcoWT7QGgcNo=",
"lastModified": 1755020227,
"narHash": "sha256-gGmm+h0t6rY88RPTaIm3su95QvQIVjAJx558YUG4Id8=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "6e7721e37bf00fa7ea44ac3cfc9d2411284ec3ef",
"rev": "695d5db1b8b20b73292501683a524e0bd79074fb",
"type": "github"
},
"original": {

View File

@@ -0,0 +1,13 @@
description "base64 import"
mutation {
expression "(import_spec_list ((import_spec)* @spec)) @root"
substitute {
literal "("
literal "\n"
capture "spec"
literal "\n"
literal #""base64""#
literal "\n"
literal ")"
}
}

View File

@@ -4,7 +4,7 @@ mutation {
(call_expression
function: (_) @func (#eq? @func "filepath.Base")
arguments: (_) @args
)
) @root
"""
substitute {
literal "filepath.Base(filepath.Dir(filepath.Clean"

View File

@@ -1,8 +1,15 @@
use clap::Parser;
use clap::{Args, Parser, Subcommand};
use std::path::PathBuf;
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
pub(crate) struct Args {
pub(crate) struct Cli {
#[command(subcommand)]
pub command: Command,
}
#[derive(Args, Debug)]
pub(crate) struct Lsp {
/// Run on the Nth GPU device.
#[arg(long)]
pub(crate) gpu: Option<usize>,
@@ -14,9 +21,41 @@ pub(crate) struct Args {
/// Revision or branch.
#[arg(long)]
pub(crate) revision: Option<String>,
/// Path to the directory containing `generate` and `refactor` snippets.
#[arg(long, default_value = "./snippets")]
pub(crate) snippets: std::path::PathBuf,
}
impl Args {
#[derive(Args, Debug)]
pub struct DumpExpression {
pub path: PathBuf,
}
#[derive(Args, Debug)]
pub struct ShowCaptures {
pub path: PathBuf,
pub expression: String,
}
#[derive(Subcommand, Debug)]
pub enum Ast {
/// Dump the S expression for a given source file
DumpExpression(DumpExpression),
/// Show what parts of a source file gets captured by an S expression
ShowCaptures(ShowCaptures),
}
#[derive(Subcommand, Debug)]
pub enum Command {
/// quick actions to dump, modify and verify abstract syntax trees
#[command(subcommand)]
Ast(Ast),
/// spawn a language server for use with a text editor
Lsp(Lsp),
}
impl Lsp {
pub(crate) fn resolve_model_and_revision(&self) -> (String, String) {
let default_model = "sentence-transformers/all-MiniLM-L6-v2".to_string();
let default_revision = "refs/pr/21".to_string();

View File

@@ -7,17 +7,24 @@ use hf_hub::Repo;
use hf_hub::RepoType;
use hf_hub::api::sync::Api;
use std::path::PathBuf;
use tokenizers::Tokenizer;
use tokenizers::TokenizerImpl;
use tokenizers::DecoderWrapper;
use tokenizers::ModelWrapper;
use tokenizers::NormalizerWrapper;
use tokenizers::PreTokenizerWrapper;
use tokenizers::PostProcessorWrapper;
use tokenizers::DecoderWrapper;
use tokenizers::PreTokenizerWrapper;
use tokenizers::Tokenizer;
use tokenizers::TokenizerImpl;
pub struct Embed {
model: BertModel,
tokenizer: TokenizerImpl<ModelWrapper, NormalizerWrapper, PreTokenizerWrapper, PostProcessorWrapper, DecoderWrapper>,
pub hidden_size: usize,
tokenizer: TokenizerImpl<
ModelWrapper,
NormalizerWrapper,
PreTokenizerWrapper,
PostProcessorWrapper,
DecoderWrapper,
>,
}
impl Embed {
@@ -41,9 +48,14 @@ impl Embed {
let tokenizer = tokenizer
.with_padding(None)
.with_truncation(None)
.map_err(E::msg)?.clone();
.map_err(E::msg)?
.clone();
Ok(Embed { model, tokenizer })
Ok(Embed {
model,
tokenizer,
hidden_size: config.hidden_size,
})
}
fn download_model_files(model_id: &str, revision: &str) -> Result<(PathBuf, PathBuf, PathBuf)> {
@@ -58,7 +70,8 @@ impl Embed {
}
pub(crate) fn embed(&self, prompt: &str) -> Result<Vec<f32>> {
let tokens = self.tokenizer
let tokens = self
.tokenizer
.encode(prompt, true)
.map_err(E::msg)?
.get_ids()
@@ -68,9 +81,9 @@ impl Embed {
let token_type_ids = token_ids.zeros_like()?;
let embeddings = self.model.forward(&token_ids, &token_type_ids, None)?;
let (_n_sentence, n_tokens, _hidden_size) = embeddings.dims3()?;
let embeddings = (embeddings.sum(1)? / (n_tokens as f64))?;
let embeddings = normalize_l2(&embeddings)?.reshape(384)?.to_vec1::<f32>()?;
let embeddings = normalize_l2(&embeddings.sum(1)?)?
.reshape(self.hidden_size)?
.to_vec1::<f32>()?;
Ok(embeddings)
}
}

View File

@@ -6,7 +6,7 @@ use tower_lsp::{Client, LanguageServer};
pub struct Backend {
pub client: Client,
pub body: Arc<Mutex<String>>,
pub body: Arc<Mutex<HashMap<Url, String>>>,
pub appstate: crate::State,
}
@@ -60,13 +60,18 @@ impl LanguageServer for Backend {
}
async fn did_open(&self, params: DidOpenTextDocumentParams) {
// TODO: build an index for multiple documents in workdir
*self.body.lock().await = params.text_document.text;
self.body
.lock()
.await
.insert(params.text_document.uri, params.text_document.text);
}
async fn did_change(&self, params: DidChangeTextDocumentParams) {
if let Some(body) = params.content_changes.into_iter().next() {
*self.body.lock().await = body.text;
self.body
.lock()
.await
.insert(params.text_document.uri, body.text);
}
}
@@ -77,14 +82,20 @@ impl LanguageServer for Backend {
let uri = params.text_document.uri;
let Some(lang) = url_extension(&uri) else {
self.client
.log_message(MessageType::ERROR, "unable to determine filetype, file has no extension")
.log_message(
MessageType::ERROR,
"unable to determine filetype, file has no extension",
)
.await;
return Ok(None);
};
let body = self.body.lock().await.to_string();
let body_locked = self.body.lock().await;
let Some(body) = body_locked.get(&uri) else {
return Ok(None);
};
let mut range = params.range;
let selected_text = string_range_index(&body, range);
let selected_text = string_range_index(body, range);
let Some(comment) = ParsedAction::new(selected_text) else {
return Ok(None);
@@ -93,14 +104,15 @@ impl LanguageServer for Backend {
let action_response = match comment.action {
Action::Generate => {
range.start = range.end;
self.appstate.generate(&lang, comment.description, 1)
self.appstate
.generate(&lang, comment.description, 1)
.map(|v| v.into_iter().map(|s| format!("{s}\n")).collect())
.map_err(|e| e.to_string())
}
Action::Refactor => {
self.appstate.refactor(&lang, comment.description, selected_text, 1)
.map_err(|e| e.to_string())
}
Action::Refactor => self
.appstate
.refactor(&lang, comment.description, selected_text, 1)
.map_err(|e| e.to_string()),
};
let closest_matches = match action_response {

View File

@@ -1,9 +1,9 @@
use anyhow::{Context, Error as E, Result, bail};
use anyhow::{Context, Error as E, Result};
use clap::Parser;
use hora::core::{ann_index::ANNIndex, metrics::Metric::Euclidean};
use hora::index::hnsw_idx::HNSWIndex;
use kdl::KdlDocument;
use state::State;
use state::{State, dump_expression};
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::Mutex;
@@ -12,53 +12,69 @@ use tower_lsp::{LspService, Server};
mod args;
mod embed;
mod lsp;
mod state;
mod mutation;
fn path_to_parent_base(p: &std::path::Path) -> Result<String> {
let Some(parent) = p
.parent()
.and_then(|v| v.file_name())
.and_then(|v| v.to_str())
.map(|v| v.to_string())
else {
bail!("failed to parse snippets path, make sure the directory structure is valid");
};
Ok(parent)
}
mod sources;
mod state;
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt::init();
let args = args::Args::parse();
let args = match args::Cli::parse().command {
args::Command::Ast(ast) => {
match ast {
args::Ast::DumpExpression(source_file) => {
println!("{}", dump_expression(&source_file.path)?);
}
args::Ast::ShowCaptures(show_captures) => {
let source = std::fs::read_to_string(&show_captures.path).unwrap();
let source_bytes = source.as_bytes();
let extension = show_captures.path.extension().unwrap().to_str().unwrap();
let langfn = state::Refactor::get_lang(extension).unwrap();
let mut parser = tree_sitter::Parser::new();
parser.set_language(&langfn).unwrap();
let tree = parser.parse(source_bytes, None).unwrap();
let root_node = tree.root_node();
let cooked = mutation::query(
root_node,
&show_captures.expression,
&langfn,
source_bytes,
);
println!("{:#?}", cooked);
}
}
return Ok(());
}
args::Command::Lsp(lsp) => lsp,
};
let (model_id, revision) = args.resolve_model_and_revision();
let embed = embed::Embed::new(args.gpu, &model_id, &revision)?;
let mut dict = HashMap::default();
let dimensions = 384;
let dimensions = embed.hidden_size;
let paths = glob::glob("./snippets/v1/*/*.kdl")?;
for path in paths {
let path = path?;
let parent = path_to_parent_base(&path)?;
for (language, paths) in sources::rule_files(args.snippets.join("generate"))? {
for path in paths {
let current_lang_index = dict
.entry(language.clone())
.or_insert_with(|| HNSWIndex::new(dimensions, &Default::default()));
let current_lang_index = dict
.entry(parent)
.or_insert_with(|| HNSWIndex::new(dimensions, &Default::default()));
let doc_str = std::fs::read_to_string(&path)?;
let doc: KdlDocument = doc_str
.parse()
.context(format!("failed to parse KDL: {}", path.display()))?;
let doc_str = std::fs::read_to_string(&path)?;
let doc: KdlDocument = doc_str
.parse()
.context(format!("failed to parse KDL: {}", path.display()))?;
let Some(desc) = doc.get_arg("desc").and_then(|v| v.as_string()) else {
continue;
};
let Some(body) = doc.get_arg("body").and_then(|v| v.as_string()) else {
continue;
};
current_lang_index
.add(&embed.embed(desc)?, body.to_string())
.map_err(E::msg)?;
let Some(desc) = doc.get_arg("desc").and_then(|v| v.as_string()) else {
continue;
};
let Some(body) = doc.get_arg("body").and_then(|v| v.as_string()) else {
continue;
};
current_lang_index
.add(&embed.embed(desc)?, body.to_string())
.map_err(E::msg)?;
}
}
for index in dict.values_mut() {
@@ -67,45 +83,45 @@ async fn main() -> Result<()> {
.map_err(E::msg)?;
}
// v2
let paths = glob::glob("./snippets/v2/*/*.kdl")?;
let mut v2_dict = HashMap::new();
let mut v2_mutations_collection = vec![];
for (i, path) in paths.enumerate() {
let path = path?;
let parent = path_to_parent_base(&path)?;
let mut refactor_dict = HashMap::new();
let mut mutations_collection = vec![];
for (language, paths) in sources::rule_files(args.snippets.join("refactor"))? {
for path in paths {
let mutations = mutation::from_path(path)?;
let current_lang_index = refactor_dict
.entry(language.clone())
.or_insert_with(|| HNSWIndex::new(dimensions, &Default::default()));
let mutations = mutation::from_path(path)?;
let current_lang_index = v2_dict
.entry(parent)
.or_insert_with(|| HNSWIndex::new(dimensions, &Default::default()));
current_lang_index
.add(&embed.embed(&mutations.description)?, i)
.map_err(E::msg)?;
v2_mutations_collection.push(mutations);
current_lang_index
.add(
&embed.embed(&mutations.description)?,
mutations_collection.len(),
)
.map_err(E::msg)?;
mutations_collection.push(mutations);
}
}
for index in v2_dict.values_mut() {
for index in refactor_dict.values_mut() {
index.build(Euclidean).map_err(E::msg)?;
}
let appstate = State {
let appstate = State::new(
embed,
generate: state::Generate { dict },
refactor: state::Refactor {
dict: v2_dict,
mutations_collection: v2_mutations_collection,
state::Generate { dict },
state::Refactor {
dict: refactor_dict,
mutations_collection,
},
};
);
let stdin = tokio::io::stdin();
let stdout = tokio::io::stdout();
let (service, socket) = LspService::new(|client| lsp::Backend {
client,
body: Arc::new(Mutex::new(String::default())),
appstate
body: Arc::new(Mutex::new(HashMap::default())),
appstate,
});
Server::new(stdin, stdout, socket).serve(service).await;
Ok(())

View File

@@ -72,10 +72,8 @@ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<MutationCollection> {
substitute.push(substitutor);
}
let expression = format!("({expression}) @root");
mutations.push(Mutation {
expression,
expression: expression.to_string(),
substitute,
})
}
@@ -127,7 +125,7 @@ pub fn apply(
}
#[derive(Debug)]
struct QueryCooked {
pub struct QueryCooked {
captures: HashMap<String, String>,
end: usize,
start: usize,
@@ -152,7 +150,7 @@ fn split_at_indices<'a>(c: &'a [u8], idx: &[usize]) -> SplitMap<'a> {
SplitMap { values, indices }
}
fn query<'a>(
pub fn query<'a>(
node: Node<'a>,
expr: &'a str,
lang: &Language,
@@ -164,6 +162,7 @@ fn query<'a>(
let mut query_matches = qc.matches(&query, node, source_bytes);
let capture_names = query.capture_names();
// println!("names: {capture_names:#?}");
let mut cooked = vec![];
@@ -171,19 +170,36 @@ fn query<'a>(
let mut capture_cooked = HashMap::new();
let mut start = 0;
let mut end = 0;
for cap in matcha.captures {
let Some(name) = capture_names.get(cap.index as usize) else {
continue;
};
if *name == "root" {
start = cap.node.start_byte();
end = cap.node.end_byte();
if matcha.captures.is_empty() {
continue;
}
// println!("match {:#?}", matcha.id());
for (ix, name) in capture_names.iter().enumerate() {
let nodes = matcha.nodes_for_capture_index(ix.try_into().unwrap());
let mut start_pos = None;
let mut end_pos = None;
// println!("matches for {name}");
for node in nodes {
if start_pos.is_none() {
start_pos.replace(node.start_byte());
}
end_pos.replace(node.end_byte());
// println!("hit {node:#?}");
}
if start_pos.or(end_pos).is_none() {
continue;
}
capture_cooked.insert(
name.to_string(),
cap.node.utf8_text(source_bytes).unwrap().to_string(),
);
if *name == "root" {
start = start_pos.unwrap();
end = end_pos.unwrap();
}
let range = start_pos.unwrap()..end_pos.unwrap();
// println!("match range for {name}: {:#?}", range);
let text_bytes = &source_bytes[range];
let text = std::str::from_utf8(text_bytes).unwrap();
// println!("text: {text}");
capture_cooked.insert(name.to_string(), text.to_string());
}
cooked.push(QueryCooked {
start,

34
src/sources.rs Normal file
View File

@@ -0,0 +1,34 @@
use std::{
collections::HashMap,
fs, io,
path::{Path, PathBuf},
};
pub fn rule_files<P: AsRef<Path>>(path: P) -> io::Result<HashMap<String, Vec<PathBuf>>> {
let per_language_dirs: Vec<_> = fs::read_dir(path)?
.filter_map(|res| res.ok())
.map(|direntry| direntry.path())
.filter(|dir| dir.is_dir())
.collect();
let mut basename_to_paths = HashMap::new();
for language_dir in per_language_dirs {
let Some(dirname) = language_dir
.file_stem()
.and_then(|v| v.to_str())
.map(|v| v.to_string())
else {
continue;
};
let rule_file_paths: Vec<_> = fs::read_dir(&language_dir)?
.filter_map(|res| res.ok())
.map(|entry| entry.path())
.filter(|file| file.is_file() && file.extension().is_some_and(|ext| ext == "kdl"))
.map(|path| path.to_path_buf())
.collect();
basename_to_paths.insert(dirname, rule_file_paths);
}
Ok(basename_to_paths)
}
// fn prebuilt_index();

View File

@@ -1,10 +1,11 @@
use crate::mutation;
use derive_more::Display;
use derive_more::Error;
use tree_sitter::Parser;
use std::collections::HashMap;
use hora::index::hnsw_idx::HNSWIndex;
use hora::core::ann_index::ANNIndex;
use crate::mutation;
use hora::index::hnsw_idx::HNSWIndex;
use std::collections::HashMap;
use std::path::Path;
use tree_sitter::Parser;
#[derive(Debug, Display, Error)]
pub enum Error {
@@ -22,10 +23,12 @@ pub struct Refactor {
}
impl Refactor {
fn get_lang(s: &str) -> Result<tree_sitter::Language, Error> {
pub fn get_lang(s: &str) -> Result<tree_sitter::Language, Error> {
Ok(match s {
"go" => tree_sitter_go::LANGUAGE,
"c" => tree_sitter_c::LANGUAGE,
"c" | "h" => tree_sitter_c::LANGUAGE,
"cpp" | "hpp" => tree_sitter_cpp::LANGUAGE,
"js" | "ts" => tree_sitter_javascript::LANGUAGE,
"rs" => tree_sitter_rust::LANGUAGE,
_ => return Err(Error::UnknownLang),
}
@@ -68,7 +71,8 @@ impl Refactor {
Err(e) => {
tracing::error!(
collection_index = index,
"failed to apply mutations from collection {}", e
"failed to apply mutations from collection {}",
e
);
None
}
@@ -78,8 +82,29 @@ impl Refactor {
Ok(collected)
}
}
pub fn dump_expression(path: &Path) -> Result<String, Error> {
let Some(lang) = path.extension() else {
return Err(Error::UnknownLang);
};
let lang = lang.to_str().ok_or(Error::UnknownLang)?;
let langfn = Refactor::get_lang(lang)?;
let mut parser = Parser::new();
parser
.set_language(&langfn)
.map_err(|_| Error::UnknownLang)?;
let source_code = std::fs::read_to_string(path).map_err(|_| Error::SnippetParsing)?;
let source_bytes = source_code.as_bytes();
let tree = parser
.parse(source_bytes, None)
.ok_or(Error::SnippetParsing)?;
let root_node = tree.root_node();
Ok(root_node.to_sexp().to_string())
}
pub struct Generate {
pub dict: HashMap<String, HNSWIndex<f32, String>>
pub dict: HashMap<String, HNSWIndex<f32, String>>,
}
impl Generate {
@@ -92,14 +117,19 @@ impl Generate {
}
pub struct State {
// TODO: create new constructor and private these fields
pub embed: crate::embed::Embed,
pub generate: Generate,
pub refactor: Refactor,
embed: crate::embed::Embed,
generate: Generate,
refactor: Refactor,
}
impl State {
pub fn new(embed: crate::embed::Embed, generate: Generate, refactor: Refactor) -> Self {
Self {
embed,
generate,
refactor,
}
}
pub fn generate(&self, lang: &str, prompt: &str, top_k: usize) -> Result<Vec<String>, Error> {
let Ok(target) = self.embed.embed(prompt) else {
return Err(Error::EmbedFailed);
@@ -108,7 +138,13 @@ impl State {
self.generate.search(lang, &target, top_k)
}
pub fn refactor(&self, lang: &str, prompt: &str, body: &str, top_k: usize) -> Result<Vec<String>, Error> {
pub fn refactor(
&self,
lang: &str,
prompt: &str,
body: &str,
top_k: usize,
) -> Result<Vec<String>, Error> {
let Ok(target) = self.embed.embed(prompt) else {
return Err(Error::EmbedFailed);
};