Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
864c394ed7 | ||
|
|
55e915cc32 | ||
|
|
efec8c2220 | ||
|
|
716b9ed3e2 | ||
|
|
4b710e8675 | ||
|
|
645a987cf1 | ||
|
|
cae538f27b | ||
|
|
9bc7614ece | ||
|
|
d66be83d2b | ||
|
|
caaa89fecf | ||
|
|
19d82da5e2 | ||
|
|
3321bd4552 | ||
|
|
ca8d8e6a59 | ||
|
|
10048ffb04 | ||
|
|
7174ed5e7e | ||
|
|
8470255c5f | ||
|
|
e91333a2ca | ||
|
|
816f758e67 | ||
|
|
f07a688708 | ||
|
|
11356413ab | ||
|
|
14318cf2fb | ||
|
|
7e03d5a346 | ||
|
|
90e3983f34 | ||
|
|
7ca19deff6 |
6
.helix/languages.toml
Normal file
6
.helix/languages.toml
Normal file
@@ -0,0 +1,6 @@
|
||||
[language-server.silos]
|
||||
command = "./target/debug/silos"
|
||||
|
||||
[[language]]
|
||||
name = "go"
|
||||
language-servers = [ { name = "silos" } ]
|
||||
329
Cargo.lock
generated
329
Cargo.lock
generated
@@ -289,12 +289,34 @@ dependencies = [
|
||||
"derive_arbitrary",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.88"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-waker"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
|
||||
|
||||
[[package]]
|
||||
name = "auto_impl"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.4.0"
|
||||
@@ -569,7 +591,7 @@ dependencies = [
|
||||
"encode_unicode",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"unicode-width",
|
||||
"unicode-width 0.2.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
@@ -694,6 +716,19 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "5.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"hashbrown 0.14.5",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.4.0"
|
||||
@@ -1377,6 +1412,12 @@ dependencies = [
|
||||
"rand_distr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.14.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.3"
|
||||
@@ -1695,7 +1736,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
"hashbrown 0.15.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1707,7 +1748,7 @@ dependencies = [
|
||||
"console",
|
||||
"number_prefix",
|
||||
"portable-atomic",
|
||||
"unicode-width",
|
||||
"unicode-width 0.2.0",
|
||||
"web-time",
|
||||
]
|
||||
|
||||
@@ -1777,6 +1818,18 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kdl"
|
||||
version = "6.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12661358400b02cbbf1fbd05f0a483335490e8a6bd1867620f2eeb78f304a22f"
|
||||
dependencies = [
|
||||
"miette",
|
||||
"num",
|
||||
"thiserror 1.0.69",
|
||||
"winnow 0.6.24",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "language-tags"
|
||||
version = "0.3.2"
|
||||
@@ -1866,6 +1919,19 @@ version = "0.4.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
|
||||
|
||||
[[package]]
|
||||
name = "lsp-types"
|
||||
version = "0.94.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_repr",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "macro_rules_attribute"
|
||||
version = "0.2.2"
|
||||
@@ -1898,6 +1964,28 @@ dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miette"
|
||||
version = "7.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f98efec8807c63c752b5bd61f862c165c115b0a35685bdcfd9238c7aeb592b7"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"miette-derive",
|
||||
"unicode-width 0.1.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miette-derive"
|
||||
version = "7.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db5b29714e950dbb20d5e6f74f9dcec4edbcc1067bb7f8ed198c097b8c1a818b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
@@ -1979,6 +2067,16 @@ dependencies = [
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num"
|
||||
version = "0.4.3"
|
||||
@@ -2190,6 +2288,12 @@ version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.4"
|
||||
@@ -2225,6 +2329,26 @@ version = "2.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.16"
|
||||
@@ -2546,7 +2670,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tokio-util",
|
||||
"tower",
|
||||
"tower 0.5.2",
|
||||
"tower-http",
|
||||
"tower-service",
|
||||
"url",
|
||||
@@ -2725,6 +2849,7 @@ version = "1.0.140"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"memchr",
|
||||
"ryu",
|
||||
@@ -2740,6 +2865,17 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_repr"
|
||||
version = "0.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_urlencoded"
|
||||
version = "0.7.1"
|
||||
@@ -2763,6 +2899,15 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sharded-slab"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
@@ -2780,7 +2925,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "silos"
|
||||
version = "0.1.0"
|
||||
version = "1.1.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -2792,9 +2937,18 @@ dependencies = [
|
||||
"glob",
|
||||
"hf-hub",
|
||||
"hora",
|
||||
"kdl",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokenizers",
|
||||
"tokio",
|
||||
"tower-lsp",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tree-sitter",
|
||||
"tree-sitter-c",
|
||||
"tree-sitter-go",
|
||||
"tree-sitter-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2851,6 +3005,12 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "streaming-iterator"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.11.1"
|
||||
@@ -2996,6 +3156,15 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.41"
|
||||
@@ -3145,7 +3314,21 @@ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"toml_datetime",
|
||||
"winnow",
|
||||
"winnow 0.7.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project",
|
||||
"pin-project-lite",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3176,7 +3359,7 @@ dependencies = [
|
||||
"http-body",
|
||||
"iri-string",
|
||||
"pin-project-lite",
|
||||
"tower",
|
||||
"tower 0.5.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
@@ -3187,6 +3370,40 @@ version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
|
||||
|
||||
[[package]]
|
||||
name = "tower-lsp"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4ba052b54a6627628d9b3c34c176e7eda8359b7da9acd497b9f20998d118508"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"auto_impl",
|
||||
"bytes",
|
||||
"dashmap",
|
||||
"futures",
|
||||
"httparse",
|
||||
"lsp-types",
|
||||
"memchr",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower 0.4.13",
|
||||
"tower-lsp-macros",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-lsp-macros"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "84fd902d4e0b9a4b27f2f440108dc034e1758628a9b702f8ec61ad66355422fa"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower-service"
|
||||
version = "0.3.3"
|
||||
@@ -3223,6 +3440,82 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"once_cell",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
|
||||
dependencies = [
|
||||
"nu-ansi-term",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.25.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7cf18d43cbf0bfca51f657132cc616a5097edc4424d538bae6fa60142eaf9f0"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"serde_json",
|
||||
"streaming-iterator",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-c"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-go"
|
||||
version = "0.23.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-rust"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b9b18034c684a2420722be8b2a91c9c44f2546b631c039edf575ccba8c61be1"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3279,6 +3572,12 @@ version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.2.0"
|
||||
@@ -3332,6 +3631,7 @@ dependencies = [
|
||||
"form_urlencoded",
|
||||
"idna",
|
||||
"percent-encoding",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3346,6 +3646,12 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
@@ -3792,6 +4098,15 @@ version = "0.53.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.6.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d71a593cc5c42ad7876e2c1fda56f314f3754c084128833e64f1345ff8a03a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.7.10"
|
||||
|
||||
10
Cargo.toml
10
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "silos"
|
||||
version = "1.0.0"
|
||||
version = "2.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
@@ -18,3 +18,11 @@ kdl = "6.3.4"
|
||||
serde = "1.0.219"
|
||||
serde_json = "1.0.140"
|
||||
tokenizers = "0.21.1"
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = "0.3.19"
|
||||
tree-sitter = "0.25.6"
|
||||
tree-sitter-c = "0.24.1"
|
||||
tree-sitter-go = "0.23.4"
|
||||
tree-sitter-rust = "0.24.0"
|
||||
tokio = { version = "1.45.1", features = ["io-std", "macros", "rt", "rt-multi-thread"] }
|
||||
tower-lsp = "0.20.0"
|
||||
|
||||
113
README.md
113
README.md
@@ -4,7 +4,9 @@ Dumb, proomptable modular snippet search.
|
||||
|
||||
## Getting started
|
||||
|
||||
### Installation
|
||||
There are no binary releases yet.
|
||||
|
||||
### From source
|
||||
|
||||
Prerequisites:
|
||||
|
||||
@@ -18,54 +20,107 @@ git clone https://github.com/lavafroth/silos
|
||||
cd silos
|
||||
```
|
||||
|
||||
### Setup
|
||||
|
||||
Add your code snippets as KDL files in the `./snippets/v1/LANGUAGE/` directory, Take a look at the example snippet for golang in `./snippets/v1/go/simple_worker.kdl`.
|
||||
|
||||
The snippets must conform to the following structure:
|
||||
|
||||
``` kdl
|
||||
desc "a well articulated description of the snippet",
|
||||
body #"fn main() { println!("The body of the snippet") }"#
|
||||
```
|
||||
|
||||
KDL supports arbitrary raw strings with as many `#`s before and after the quotes to disambiguate them from the string contents.
|
||||
|
||||
After adding your snippets, run the server
|
||||
|
||||
``` sh
|
||||
cargo r
|
||||
cargo r http
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
>
|
||||
> Embedding defaults to using the CPU. You may use the `--gpu` flag with a GPU number to use a dedicated GPU.
|
||||
|
||||
### Usage
|
||||
|
||||
An HTTP REST API listens on port 8000 and can be queried for code snippets.
|
||||
|
||||
#### Query a snippet
|
||||
### v1 API
|
||||
|
||||
``` sh
|
||||
curl http://localhost:8000/api/v1/get --json '{ "desc": "channeled worker in go" }'
|
||||
V1 snippets are stored in the KDL format inside per-language directories under `./snippets/v1`. They must conform to the following structure
|
||||
|
||||
``` kdl
|
||||
desc "describes the snippet"
|
||||
body #"the snippet itself"#
|
||||
```
|
||||
|
||||
You must add the "in someLanguage" suffix to your query's description field. This is to keep the API design simple for bothIDE and non-IDE users.
|
||||
KDL supports arbitrary raw strings with as many `#`s before and after the quotes to disambiguate them from the string contents.
|
||||
|
||||
#### Add a snippet
|
||||
See the example snippet `./snippets/v1/go/simple_worker.kdl` in the go programming language.
|
||||
|
||||
#### Querying
|
||||
|
||||
We recommend the `jo` CLI to easily generate JSON payloads for the API.
|
||||
|
||||
``` sh
|
||||
jo desc="channeled worker in go" \
|
||||
curl http://localhost:8000/api/v1/get --json @-
|
||||
```
|
||||
|
||||
You must add the "in someLanguage" suffix to your query's description field. This was a bad design choice and will be deprecated in a later release.
|
||||
|
||||
#### Adding a snippet
|
||||
|
||||
``` sh
|
||||
curl http://localhost:8000/api/v1/add --json \
|
||||
'{ "desc": "Build an asynchronous shared mutable state", "lang": "rust", "body": "let object = Arc::new(Mutex::new(old));" }'
|
||||
```
|
||||
|
||||
## v2 API
|
||||
### v2 API
|
||||
|
||||
Language grammar parsing with abstract syntax tree manipulation support.
|
||||
The v2 API leverages tree-sitter to parse code into an AST (Abstract Syntax Tree) and perform subsequent mutations on the code.
|
||||
|
||||
Coming soon
|
||||
#### Supported Languages
|
||||
|
||||
## TODOs
|
||||
- C
|
||||
- Rust
|
||||
- Go
|
||||
|
||||
- [ ] Create an LSP to add the suffix based on filetype.
|
||||
#### Defining mutation collections
|
||||
|
||||
``` kdl
|
||||
description "describes the mutation collection"
|
||||
mutation {
|
||||
expression "some ((beautiful) @adjective) AST expression"
|
||||
substitute {
|
||||
literal "hello"
|
||||
capture "adjective"
|
||||
literal "world"
|
||||
}
|
||||
}
|
||||
|
||||
mutation {
|
||||
expression "another"
|
||||
substitute {
|
||||
literal "multiple mutations work"
|
||||
literal "as long as their expression"
|
||||
literal "don't collide"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
- `description`: A textual description of the mutation collection.
|
||||
- `mutation`: Defines individual code changes.
|
||||
- `expression`: Uses tree-sitter to match and capture AST nodes with `@` prefixes, The special `@root` node is reserved for the entire expression.
|
||||
- `substitute`: Constructs the modified code using literals and captured arguments.
|
||||
|
||||
See the example mutation collection in `./snippets/v2/go/mutations.kdl`.
|
||||
|
||||
#### Querying
|
||||
|
||||
``` sh
|
||||
jo body=@examples/example.go \
|
||||
desc='change the current filepath to the parent filepath in go' \
|
||||
| curl http://localhost:8000/api/v2/get --json @-
|
||||
```
|
||||
|
||||
V2 queries have the following fields
|
||||
|
||||
- `desc`: Description of the query.
|
||||
- `body`: The code to be parsed and modified.
|
||||
|
||||
The API performs a single-pass substitution based on the closest matching mutation. Captured groups are used within the `substitute` block and the mutated code is returned in the response JSON `body` field.
|
||||
|
||||
**Further reading**
|
||||
|
||||
- [tree-sitter query snytax](https://tree-sitter.github.io/tree-sitter/using-parsers/queries/1-syntax.html) to create mutation expressions.
|
||||
- [jo](https://github.com/jpmens/jo) to build the JSON body from a file.
|
||||
|
||||
## Coming soon
|
||||
|
||||
An LSP to provide Silos code actions for a given selection.
|
||||
|
||||
17
examples/example.go
Normal file
17
examples/example.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func main() {
|
||||
documentsDirectory := "/home/h/Documents/"
|
||||
resumeFilename := "resume.pdf"
|
||||
version := 3
|
||||
whereIsMyResume :=
|
||||
filepath.Base(
|
||||
documentsDirectory + "CV" + "_v" + strconv.Itoa(version) + "/" + resumeFilename)
|
||||
fmt.Println(whereIsMyResume)
|
||||
}
|
||||
@@ -1,3 +1,3 @@
|
||||
desc "display all the path entries"
|
||||
body """printf "%s\n" $PATH"""
|
||||
body #"printf "%s\n" $PATH"#
|
||||
|
||||
|
||||
14
snippets/v2/go/mutations.kdl
Normal file
14
snippets/v2/go/mutations.kdl
Normal file
@@ -0,0 +1,14 @@
|
||||
description "filepath base to parent's base"
|
||||
mutation {
|
||||
expression """
|
||||
(call_expression
|
||||
function: (_) @func (#eq? @func "filepath.Base")
|
||||
arguments: (_) @args
|
||||
)
|
||||
"""
|
||||
substitute {
|
||||
literal "filepath.Base(filepath.Dir(filepath.Clean"
|
||||
capture "args"
|
||||
literal "))"
|
||||
}
|
||||
}
|
||||
56
src/args.rs
Normal file
56
src/args.rs
Normal file
@@ -0,0 +1,56 @@
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
pub(crate) struct Args {
|
||||
/// The mode to run the server in. Defaults to LSP. The HTTP REST API can be run by specifying `http` or `http:port`. For example: `http:7047`
|
||||
pub(crate) mode: Option<String>,
|
||||
|
||||
/// Run on the Nth GPU device.
|
||||
#[arg(long)]
|
||||
pub(crate) gpu: Option<usize>,
|
||||
|
||||
/// The model to use, check out available models: https://huggingface.co/models?library=sentence-transformers&sort=trending
|
||||
#[arg(long)]
|
||||
pub(crate) model_id: Option<String>,
|
||||
|
||||
/// Revision or branch.
|
||||
#[arg(long)]
|
||||
pub(crate) revision: Option<String>,
|
||||
}
|
||||
|
||||
pub enum RunMode {
|
||||
Http(u16),
|
||||
Lsp,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
pub(crate) fn resolve_model_and_revision(&self) -> (String, String) {
|
||||
let default_model = "sentence-transformers/all-MiniLM-L6-v2".to_string();
|
||||
let default_revision = "refs/pr/21".to_string();
|
||||
|
||||
match (self.model_id.clone(), self.revision.clone()) {
|
||||
(Some(model_id), Some(revision)) => (model_id, revision),
|
||||
(Some(model_id), None) => (model_id, "main".to_owned()),
|
||||
(None, Some(revision)) => (default_model, revision),
|
||||
(None, None) => (default_model, default_revision),
|
||||
}
|
||||
}
|
||||
pub(crate) fn mode(&self) -> RunMode {
|
||||
let Some(http) = &self.mode else {
|
||||
return RunMode::Lsp;
|
||||
};
|
||||
if http == "http" {
|
||||
return RunMode::Http(8000);
|
||||
}
|
||||
let Some(port) = http.strip_prefix("http:") else {
|
||||
return RunMode::Lsp;
|
||||
};
|
||||
|
||||
let Ok(port) = port.parse() else {
|
||||
return RunMode::Lsp;
|
||||
};
|
||||
|
||||
RunMode::Http(port)
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
use super::Args;
|
||||
use anyhow::{Error as E, Result};
|
||||
use candle_core::Device;
|
||||
use candle_core::Tensor;
|
||||
@@ -15,16 +14,15 @@ pub struct Embed {
|
||||
}
|
||||
|
||||
impl Embed {
|
||||
pub(crate) fn new(args: Args) -> Result<Self> {
|
||||
let device = if let Some(gpu_dev) = args.gpu {
|
||||
pub(crate) fn new(gpu: Option<usize>, model_id: &str, revision: &str) -> Result<Self> {
|
||||
let device = if let Some(gpu_dev) = gpu {
|
||||
Device::new_cuda(gpu_dev)?
|
||||
} else {
|
||||
Device::Cpu
|
||||
};
|
||||
|
||||
let (model_id, revision) = args.resolve_model_and_revision();
|
||||
let (config_path, tokenizer_path, weights_path) =
|
||||
Self::download_model_files(&model_id, &revision)?;
|
||||
Self::download_model_files(model_id, revision)?;
|
||||
|
||||
let config = std::fs::read_to_string(config_path)?;
|
||||
let config: Config = serde_json::from_str(&config)?;
|
||||
|
||||
142
src/lsp.rs
Normal file
142
src/lsp.rs
Normal file
@@ -0,0 +1,142 @@
|
||||
use crate::StateWrapper;
|
||||
use crate::v2;
|
||||
use actix_web::web::Data;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tower_lsp::lsp_types::*;
|
||||
use tower_lsp::{Client, LanguageServer};
|
||||
use tracing::error;
|
||||
|
||||
pub struct Backend {
|
||||
pub client: Client,
|
||||
pub body: Arc<Mutex<String>>,
|
||||
pub appstate: Data<StateWrapper>,
|
||||
}
|
||||
|
||||
pub fn string_range_index(s: &str, r: Range) -> &str {
|
||||
let mut newline_count = 0;
|
||||
let mut start = None;
|
||||
let mut end = None;
|
||||
for (i, c) in s.chars().enumerate() {
|
||||
if newline_count == r.start.line && start.is_none() {
|
||||
start.replace(i + r.start.character as usize);
|
||||
}
|
||||
|
||||
if newline_count == r.end.line && end.is_none() {
|
||||
end.replace(i + r.end.character as usize);
|
||||
}
|
||||
if c == '\n' {
|
||||
newline_count += 1;
|
||||
}
|
||||
}
|
||||
&s[start.unwrap_or_default()..end.unwrap_or(s.len())]
|
||||
}
|
||||
|
||||
#[tower_lsp::async_trait]
|
||||
impl LanguageServer for Backend {
|
||||
async fn initialize(
|
||||
&self,
|
||||
_: InitializeParams,
|
||||
) -> tower_lsp::jsonrpc::Result<InitializeResult> {
|
||||
Ok(InitializeResult {
|
||||
capabilities: ServerCapabilities {
|
||||
text_document_sync: Some(TextDocumentSyncCapability::Kind(
|
||||
TextDocumentSyncKind::FULL,
|
||||
)),
|
||||
code_action_provider: Some(
|
||||
tower_lsp::lsp_types::CodeActionProviderCapability::Options(
|
||||
CodeActionOptions::default(),
|
||||
),
|
||||
),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
|
||||
async fn initialized(&self, _: InitializedParams) {
|
||||
self.client
|
||||
.log_message(MessageType::INFO, "server initialized!")
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn shutdown(&self) -> tower_lsp::jsonrpc::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn did_open(&self, params: DidOpenTextDocumentParams) {
|
||||
// TODO: build an index for multiple documents in workdir
|
||||
*self.body.lock().await = params.text_document.text;
|
||||
}
|
||||
|
||||
async fn did_change(&self, params: DidChangeTextDocumentParams) {
|
||||
if let Some(body) = params.content_changes.into_iter().next() {
|
||||
*self.body.lock().await = body.text;
|
||||
}
|
||||
}
|
||||
|
||||
async fn code_action(
|
||||
&self,
|
||||
params: CodeActionParams,
|
||||
) -> tower_lsp::jsonrpc::Result<Option<CodeActionResponse>> {
|
||||
let uri = params.text_document.uri;
|
||||
let extension = url_extension(&uri);
|
||||
let body = self.body.lock().await.to_string();
|
||||
|
||||
let range = params.range;
|
||||
let new_text = string_range_index(&body, range);
|
||||
let Some((_before, after)) = new_text.split_once("silos: ") else {
|
||||
return Ok(None);
|
||||
};
|
||||
let Some((desc, _after)) = after.split_once("\n") else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let (prompt, lang) = if let Some(ext) = extension {
|
||||
(desc, ext)
|
||||
} else if let Some((prompt, lang)) = desc.rsplit_once(" in ") {
|
||||
(prompt, lang.to_string())
|
||||
} else {
|
||||
error!("{}", v2::errors::Error::MissingSuffix);
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let closest_matches =
|
||||
match v2::api::closest_mutation(&lang, prompt, &body, 1, &self.appstate) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
error!("{}", e);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
let Some(closest) = closest_matches.into_iter().next() else {
|
||||
return Ok(None);
|
||||
};
|
||||
let text_edit = TextEdit {
|
||||
range,
|
||||
new_text: closest,
|
||||
};
|
||||
let changes: HashMap<Url, _> = [(uri, vec![text_edit])].into_iter().collect();
|
||||
let edit = Some(WorkspaceEdit {
|
||||
changes: Some(changes),
|
||||
document_changes: None,
|
||||
change_annotations: None,
|
||||
});
|
||||
let actions = vec![CodeActionOrCommand::CodeAction(CodeAction {
|
||||
title: "ask silos".to_string(),
|
||||
edit,
|
||||
..Default::default()
|
||||
})];
|
||||
Ok(Some(actions))
|
||||
}
|
||||
}
|
||||
|
||||
fn url_extension(u: &Url) -> Option<String> {
|
||||
let file_path = u.to_file_path().ok()?;
|
||||
|
||||
let extension = file_path.extension()?;
|
||||
let extension = extension.to_str()?;
|
||||
Some(extension.to_string())
|
||||
}
|
||||
149
src/main.rs
149
src/main.rs
@@ -1,66 +1,47 @@
|
||||
use actix_web::{App, HttpServer, web};
|
||||
use anyhow::{Context, Error as E, Result};
|
||||
use anyhow::{Context, Error as E, Result, bail};
|
||||
use clap::Parser;
|
||||
use hora::core::ann_index::ANNIndex;
|
||||
use hora::core::{ann_index::ANNIndex, metrics::Metric::Euclidean};
|
||||
use hora::index::hnsw_idx::HNSWIndex;
|
||||
use kdl::KdlDocument;
|
||||
use state::{State, StateWrapper};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tower_lsp::{LspService, Server};
|
||||
|
||||
mod args;
|
||||
mod embed;
|
||||
mod lsp;
|
||||
mod state;
|
||||
mod v1;
|
||||
// mod v2;
|
||||
mod v2;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
struct Args {
|
||||
/// Run on the Nth GPU device.
|
||||
#[arg(long)]
|
||||
gpu: Option<usize>,
|
||||
|
||||
/// The model to use, check out available models: https://huggingface.co/models?library=sentence-transformers&sort=trending
|
||||
#[arg(long)]
|
||||
model_id: Option<String>,
|
||||
|
||||
/// Revision or branch.
|
||||
#[arg(long)]
|
||||
revision: Option<String>,
|
||||
|
||||
/// The port for the API to listen on
|
||||
#[arg(long, default_value = "8000")]
|
||||
port: u16,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
fn resolve_model_and_revision(&self) -> (String, String) {
|
||||
let default_model = "sentence-transformers/all-MiniLM-L6-v2".to_string();
|
||||
let default_revision = "refs/pr/21".to_string();
|
||||
|
||||
match (self.model_id.clone(), self.revision.clone()) {
|
||||
(Some(model_id), Some(revision)) => (model_id, revision),
|
||||
(Some(model_id), None) => (model_id, "main".to_owned()),
|
||||
(None, Some(revision)) => (default_model, revision),
|
||||
(None, None) => (default_model, default_revision),
|
||||
}
|
||||
}
|
||||
fn path_to_parent_base(p: &std::path::Path) -> Result<String> {
|
||||
let Some(parent) = p
|
||||
.parent()
|
||||
.and_then(|v| v.file_name())
|
||||
.and_then(|v| v.to_str())
|
||||
.map(|v| v.to_string())
|
||||
else {
|
||||
bail!("failed to parse snippets path, make sure the directory structure is valid");
|
||||
};
|
||||
Ok(parent)
|
||||
}
|
||||
|
||||
#[actix_web::main]
|
||||
async fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
let port = args.port;
|
||||
let mut embed = embed::Embed::new(args)?;
|
||||
tracing_subscriber::fmt::init();
|
||||
let args = args::Args::parse();
|
||||
let mode = args.mode();
|
||||
let (model_id, revision) = args.resolve_model_and_revision();
|
||||
let mut embed = embed::Embed::new(args.gpu, &model_id, &revision)?;
|
||||
let mut dict = HashMap::default();
|
||||
|
||||
let paths = glob::glob("./snippets/v1/*/*.kdl")?;
|
||||
for path in paths {
|
||||
let path = path?;
|
||||
let parent = path
|
||||
.components()
|
||||
.rev()
|
||||
.nth(1)
|
||||
.unwrap()
|
||||
.as_os_str()
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
let parent = path_to_parent_base(&path)?;
|
||||
|
||||
let current_lang_index = dict.entry(parent).or_insert_with(|| {
|
||||
let dimension = 384;
|
||||
@@ -69,8 +50,10 @@ async fn main() -> Result<()> {
|
||||
HNSWIndex::<f32, String>::new(dimension, ¶ms)
|
||||
});
|
||||
|
||||
let doc_str = std::fs::read_to_string(path)?;
|
||||
let doc: KdlDocument = doc_str.parse().context("failed to parse KDL")?;
|
||||
let doc_str = std::fs::read_to_string(&path)?;
|
||||
let doc: KdlDocument = doc_str
|
||||
.parse()
|
||||
.context(format!("failed to parse KDL: {}", path.display()))?;
|
||||
|
||||
let Some(desc) = doc.get_arg("desc").and_then(|v| v.as_string()) else {
|
||||
continue;
|
||||
@@ -88,18 +71,66 @@ async fn main() -> Result<()> {
|
||||
.build(hora::core::metrics::Metric::Euclidean)
|
||||
.map_err(E::msg)?;
|
||||
}
|
||||
let appstate = v1::api::AppState { dict, embed };
|
||||
|
||||
// v2 stuff
|
||||
let paths = glob::glob("./snippets/v2/*/*.kdl")?;
|
||||
let mut v2_dict = HashMap::new();
|
||||
let mut v2_mutations_collection = vec![];
|
||||
for (i, path) in paths.enumerate() {
|
||||
let path = path?;
|
||||
let parent = path_to_parent_base(&path)?;
|
||||
|
||||
let mutations = v2::mutation::from_path(path)?;
|
||||
let current_lang_index = v2_dict.entry(parent).or_insert_with(|| {
|
||||
let dimension = 384;
|
||||
let params = hora::index::hnsw_params::HNSWParams::<f32>::default();
|
||||
|
||||
HNSWIndex::<f32, usize>::new(dimension, ¶ms)
|
||||
});
|
||||
|
||||
current_lang_index
|
||||
.add(&embed.embed(&mutations.description)?, i)
|
||||
.map_err(E::msg)?;
|
||||
v2_mutations_collection.push(mutations);
|
||||
}
|
||||
|
||||
for index in v2_dict.values_mut() {
|
||||
index.build(Euclidean).map_err(E::msg)?;
|
||||
}
|
||||
|
||||
let appstate = State {
|
||||
embed,
|
||||
v1: v1::api::State { dict },
|
||||
v2: v2::api::State {
|
||||
dict: v2_dict,
|
||||
mutations_collection: v2_mutations_collection,
|
||||
},
|
||||
};
|
||||
|
||||
let appstate_wrapped = web::Data::new(appstate.build());
|
||||
|
||||
HttpServer::new(move || {
|
||||
App::new()
|
||||
.app_data(appstate_wrapped.clone())
|
||||
.service(v1::api::get_snippet)
|
||||
.service(v1::api::add_snippet)
|
||||
})
|
||||
.bind(("127.0.0.1", port))?
|
||||
.run()
|
||||
.await
|
||||
.map_err(E::from)
|
||||
if let args::RunMode::Http(port) = mode {
|
||||
return HttpServer::new(move || {
|
||||
App::new()
|
||||
.app_data(appstate_wrapped.clone())
|
||||
.service(v1::api::get_snippet)
|
||||
.service(v1::api::add_snippet)
|
||||
.service(v2::api::get_snippet)
|
||||
})
|
||||
.bind(("127.0.0.1", port))?
|
||||
.run()
|
||||
.await
|
||||
.map_err(E::from);
|
||||
};
|
||||
|
||||
let stdin = tokio::io::stdin();
|
||||
let stdout = tokio::io::stdout();
|
||||
|
||||
let (service, socket) = LspService::new(|client| lsp::Backend {
|
||||
client,
|
||||
body: Arc::new(Mutex::new(String::default())),
|
||||
appstate: appstate_wrapped.clone(),
|
||||
});
|
||||
Server::new(stdin, stdout, socket).serve(service).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
19
src/state.rs
Normal file
19
src/state.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
use std::sync::Mutex;
|
||||
|
||||
pub struct StateWrapper {
|
||||
pub inner: Mutex<State>,
|
||||
}
|
||||
|
||||
pub struct State {
|
||||
pub embed: crate::embed::Embed,
|
||||
pub v1: crate::v1::api::State,
|
||||
pub v2: crate::v2::api::State,
|
||||
}
|
||||
|
||||
impl State {
|
||||
pub fn build(self) -> StateWrapper {
|
||||
StateWrapper {
|
||||
inner: Mutex::new(self),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,43 +1,20 @@
|
||||
use hora::core::ann_index::ANNIndex;
|
||||
use std::{collections::HashMap, sync::Mutex};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::errors::Error;
|
||||
use actix_web::{Responder, post, web};
|
||||
use anyhow::Result;
|
||||
use hora::index::hnsw_idx::HNSWIndex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::embed;
|
||||
|
||||
use super::errors::GetError;
|
||||
|
||||
use actix_web::{Responder, post, web};
|
||||
|
||||
use anyhow::Result;
|
||||
#[derive(Deserialize)]
|
||||
pub struct SnippetRequest {
|
||||
desc: String,
|
||||
top_k: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct SnippetOnDisk {
|
||||
pub body: String,
|
||||
pub desc: String,
|
||||
}
|
||||
|
||||
pub struct AppStateWrapper {
|
||||
inner: Mutex<AppState>,
|
||||
}
|
||||
|
||||
pub struct AppState {
|
||||
pub struct State {
|
||||
pub dict: HashMap<String, HNSWIndex<f32, String>>,
|
||||
pub embed: embed::Embed,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
pub fn build(self) -> AppStateWrapper {
|
||||
AppStateWrapper {
|
||||
inner: Mutex::new(self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -55,45 +32,48 @@ pub struct Snippet {
|
||||
|
||||
#[post("/api/v1/get")]
|
||||
pub(crate) async fn get_snippet(
|
||||
data: web::Data<AppStateWrapper>,
|
||||
data: web::Data<crate::state::StateWrapper>,
|
||||
snippet_request: web::Json<SnippetRequest>,
|
||||
) -> Result<impl Responder, GetError> {
|
||||
) -> Result<impl Responder, Error> {
|
||||
let Some((prompt, lang)) = snippet_request.desc.rsplit_once(" in ") else {
|
||||
return Err(GetError::MissingSuffix);
|
||||
return Err(Error::MissingSuffix);
|
||||
};
|
||||
|
||||
let Ok(mut appstate) = data.inner.lock() else {
|
||||
return Err(GetError::Busy);
|
||||
return Err(Error::Busy);
|
||||
};
|
||||
|
||||
let Ok(target) = appstate.embed.embed(prompt) else {
|
||||
return Err(GetError::EmbedFailed);
|
||||
return Err(Error::EmbedFailed);
|
||||
};
|
||||
|
||||
let Some(snippets_for_lang) = appstate.v1.dict.get(lang) else {
|
||||
return Err(Error::UnknownLang);
|
||||
};
|
||||
// search for k nearest neighbors
|
||||
let closest: Vec<String> =
|
||||
appstate.dict[lang].search(&target, snippet_request.top_k.unwrap_or(1));
|
||||
let closest = snippets_for_lang.search(&target, snippet_request.top_k.unwrap_or(1));
|
||||
Ok(web::Json(closest))
|
||||
}
|
||||
|
||||
#[post("/api/v1/add")]
|
||||
pub(crate) async fn add_snippet(
|
||||
data: web::Data<AppStateWrapper>,
|
||||
data: web::Data<crate::state::StateWrapper>,
|
||||
snippet: web::Json<Snippet>,
|
||||
) -> Result<impl Responder, GetError> {
|
||||
) -> Result<impl Responder, Error> {
|
||||
let Ok(mut appstate) = data.inner.lock() else {
|
||||
return Err(GetError::Busy);
|
||||
return Err(Error::Busy);
|
||||
};
|
||||
let Ok(embedding) = appstate.embed.embed(&snippet.desc) else {
|
||||
return Err(GetError::EmbedFailed);
|
||||
return Err(Error::EmbedFailed);
|
||||
};
|
||||
let index = appstate
|
||||
.v1
|
||||
.dict
|
||||
.entry(snippet.lang.clone())
|
||||
.or_insert_with(|| {
|
||||
let dimension = 384;
|
||||
let params = hora::index::hnsw_params::HNSWParams::<f32>::default();
|
||||
|
||||
|
||||
HNSWIndex::<f32, String>::new(dimension, ¶ms)
|
||||
});
|
||||
index.add(&embedding, snippet.body.clone()).unwrap();
|
||||
|
||||
@@ -6,16 +6,18 @@ use derive_more::derive::{Display, Error};
|
||||
use serde_json::json;
|
||||
|
||||
#[derive(Debug, Display, Error)]
|
||||
pub enum GetError {
|
||||
pub enum Error {
|
||||
#[display("the server is busy. come back later.")]
|
||||
Busy,
|
||||
#[display("end your request with ` in somelang`.")]
|
||||
MissingSuffix,
|
||||
#[display("failed to embed your prompt.")]
|
||||
EmbedFailed,
|
||||
#[display("snippets were requested for an unknown language")]
|
||||
UnknownLang,
|
||||
}
|
||||
|
||||
impl error::ResponseError for GetError {
|
||||
impl error::ResponseError for Error {
|
||||
fn error_response(&self) -> HttpResponse {
|
||||
let message = json!({
|
||||
"message": self.to_string(),
|
||||
@@ -29,7 +31,7 @@ impl error::ResponseError for GetError {
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match *self {
|
||||
Self::EmbedFailed => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Self::MissingSuffix => StatusCode::BAD_REQUEST,
|
||||
Self::MissingSuffix | Self::UnknownLang => StatusCode::BAD_REQUEST,
|
||||
Self::Busy => StatusCode::GATEWAY_TIMEOUT,
|
||||
}
|
||||
}
|
||||
|
||||
118
src/v2/api.rs
Normal file
118
src/v2/api.rs
Normal file
@@ -0,0 +1,118 @@
|
||||
use hora::{core::ann_index::ANNIndex, index::hnsw_idx::HNSWIndex};
|
||||
use std::collections::HashMap;
|
||||
use tracing::{error, info};
|
||||
use tree_sitter::Parser;
|
||||
|
||||
use super::{errors::Error, mutation};
|
||||
use actix_web::{Responder, post, web};
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub struct State {
|
||||
pub dict: HashMap<String, HNSWIndex<f32, usize>>,
|
||||
pub mutations_collection: Vec<mutation::MutationCollection>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct SnippetRequest {
|
||||
desc: String,
|
||||
body: String,
|
||||
top_k: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct SnippetResponse {
|
||||
id: usize,
|
||||
snippet: Snippet,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct Snippet {
|
||||
lang: String,
|
||||
desc: String,
|
||||
body: String,
|
||||
}
|
||||
|
||||
pub fn get_lang(s: &str) -> Result<tree_sitter::Language, Error> {
|
||||
Ok(match s {
|
||||
"go" => tree_sitter_go::LANGUAGE,
|
||||
"c" => tree_sitter_c::LANGUAGE,
|
||||
"rust" => tree_sitter_rust::LANGUAGE,
|
||||
_ => return Err(Error::UnknownLang),
|
||||
}
|
||||
.into())
|
||||
}
|
||||
|
||||
#[post("/api/v2/get")]
|
||||
pub(crate) async fn get_snippet(
|
||||
data: web::Data<crate::state::StateWrapper>,
|
||||
snippet_request: web::Json<SnippetRequest>,
|
||||
) -> Result<impl Responder, Error> {
|
||||
let Some((prompt, lang)) = snippet_request.desc.rsplit_once(" in ") else {
|
||||
return Err(Error::MissingSuffix);
|
||||
};
|
||||
|
||||
let closest = closest_mutation(
|
||||
lang,
|
||||
prompt,
|
||||
snippet_request.body.as_str(),
|
||||
snippet_request.top_k.unwrap_or(1),
|
||||
&data,
|
||||
)?;
|
||||
Ok(web::Json(closest))
|
||||
}
|
||||
|
||||
pub fn closest_mutation(
|
||||
lang: &str,
|
||||
prompt: &str,
|
||||
body: &str,
|
||||
top_k: usize,
|
||||
data: &web::Data<crate::state::StateWrapper>,
|
||||
) -> Result<Vec<String>, Error> {
|
||||
let langfn = get_lang(lang)?;
|
||||
|
||||
info!(prompt = prompt, language = lang, "v2 request");
|
||||
|
||||
let mut appstate = data.inner.lock().map_err(|_| Error::Busy)?;
|
||||
let target = appstate
|
||||
.embed
|
||||
.embed(prompt)
|
||||
.map_err(|_| Error::EmbedFailed)?;
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&langfn)
|
||||
.map_err(|_| Error::UnknownLang)?;
|
||||
|
||||
let source_code = body;
|
||||
let source_bytes = source_code.as_bytes();
|
||||
let tree = parser
|
||||
.parse(source_code, None)
|
||||
.ok_or(Error::SnippetParsing)?;
|
||||
let root_node = tree.root_node();
|
||||
|
||||
// search for k nearest neighbors
|
||||
let collected = appstate.v2.dict[lang]
|
||||
.search(&target, top_k)
|
||||
.iter()
|
||||
.filter_map(|&index| {
|
||||
let applied = mutation::apply(
|
||||
langfn.clone(),
|
||||
source_bytes,
|
||||
root_node,
|
||||
&appstate.v2.mutations_collection[index],
|
||||
);
|
||||
match applied {
|
||||
Ok(v) => Some(v),
|
||||
Err(e) => {
|
||||
error!(
|
||||
collection_index = index,
|
||||
"failed to apply mutations from collection {}", e
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
// TODO: change the expect to a log
|
||||
})
|
||||
.collect();
|
||||
Ok(collected)
|
||||
}
|
||||
42
src/v2/errors.rs
Normal file
42
src/v2/errors.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use actix_web::{
|
||||
HttpResponse, error,
|
||||
http::{StatusCode, header::ContentType},
|
||||
};
|
||||
use derive_more::derive::{Display, Error};
|
||||
use serde_json::json;
|
||||
|
||||
#[derive(Debug, Display, Error)]
|
||||
pub enum Error {
|
||||
#[display("the server is busy. come back later.")]
|
||||
Busy,
|
||||
#[display("end your request with ` in somelang`.")]
|
||||
MissingSuffix,
|
||||
#[display("failed to embed your prompt.")]
|
||||
EmbedFailed,
|
||||
#[display("snippets were requested for an unknown language")]
|
||||
UnknownLang,
|
||||
#[display("failed to parse corpus of code to apply mutation on")]
|
||||
SnippetParsing,
|
||||
}
|
||||
|
||||
impl error::ResponseError for Error {
|
||||
fn error_response(&self) -> HttpResponse {
|
||||
let message = json!({
|
||||
"message": self.to_string(),
|
||||
})
|
||||
.to_string();
|
||||
HttpResponse::build(self.status_code())
|
||||
.insert_header(ContentType::json())
|
||||
.body(message)
|
||||
}
|
||||
|
||||
fn status_code(&self) -> StatusCode {
|
||||
match *self {
|
||||
Self::EmbedFailed => StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Self::MissingSuffix | Self::UnknownLang | Self::SnippetParsing => {
|
||||
StatusCode::BAD_REQUEST
|
||||
}
|
||||
Self::Busy => StatusCode::GATEWAY_TIMEOUT,
|
||||
}
|
||||
}
|
||||
}
|
||||
3
src/v2/mod.rs
Normal file
3
src/v2/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub(crate) mod api;
|
||||
pub(crate) mod errors;
|
||||
pub(crate) mod mutation;
|
||||
195
src/v2/mutation.rs
Normal file
195
src/v2/mutation.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tracing::debug;
|
||||
use tree_sitter::{Language, Node, Query, QueryCursor, StreamingIterator};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use kdl::KdlDocument;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Mutation {
|
||||
pub expression: String,
|
||||
pub substitute: Vec<Substitute>,
|
||||
}
|
||||
|
||||
pub struct MutationCollection {
|
||||
pub description: String,
|
||||
pub mutations: Vec<Mutation>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Substitute {
|
||||
Literal(String),
|
||||
Capture(String),
|
||||
}
|
||||
|
||||
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<MutationCollection> {
|
||||
let contents = std::fs::read_to_string(path)?;
|
||||
let doc: KdlDocument = contents.parse()?;
|
||||
let mut mutations = vec![];
|
||||
|
||||
let mut description = None;
|
||||
|
||||
for node in doc.nodes() {
|
||||
let node_name = node.name().value();
|
||||
|
||||
if node_name != "mutation" && node_name != "description" {
|
||||
bail!(
|
||||
"document root must only contain `mutation` or `description` nodes: got {node_name}"
|
||||
);
|
||||
}
|
||||
|
||||
if node_name == "description" {
|
||||
description.replace(
|
||||
node.entry(0)
|
||||
.unwrap()
|
||||
.value()
|
||||
.as_string()
|
||||
.unwrap()
|
||||
.to_string(),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
let node = node.children().unwrap();
|
||||
let Some(expression) = node.get_arg("expression").and_then(|v| v.as_string()) else {
|
||||
bail!("mutation node must contain an expression");
|
||||
};
|
||||
let Some(substitute) = node.get("substitute") else {
|
||||
bail!("mutation node must contain an substitute");
|
||||
};
|
||||
|
||||
let children = substitute.children().unwrap().nodes();
|
||||
let mut substitute = vec![];
|
||||
for child in children {
|
||||
let attrib = child.entry(0).unwrap().value().as_string().unwrap();
|
||||
let substitutor = match child.name().value() {
|
||||
"literal" => Substitute::Literal(attrib.to_string()),
|
||||
"capture" => Substitute::Capture(attrib.to_string()),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
substitute.push(substitutor);
|
||||
}
|
||||
|
||||
let expression = format!("({expression}) @root");
|
||||
|
||||
mutations.push(Mutation {
|
||||
expression,
|
||||
substitute,
|
||||
})
|
||||
}
|
||||
|
||||
let Some(description) = description else {
|
||||
bail!("mutation collection contains no `description`");
|
||||
};
|
||||
|
||||
Ok(MutationCollection {
|
||||
description,
|
||||
mutations,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn apply(
|
||||
lang: Language,
|
||||
source_bytes: &[u8],
|
||||
root_node: Node<'_>,
|
||||
mutations: &MutationCollection,
|
||||
) -> Result<String, anyhow::Error> {
|
||||
let mut split_ats = vec![];
|
||||
let mut query_result_map = HashMap::new();
|
||||
for mutation in &mutations.mutations {
|
||||
for query_result in query(root_node, mutation.expression.as_str(), &lang, source_bytes) {
|
||||
debug!("mutation query expression matched: {query_result:?}");
|
||||
split_ats.push(query_result.start);
|
||||
split_ats.push(query_result.end);
|
||||
|
||||
let mut ast_rewrite = String::default();
|
||||
for sub in &mutation.substitute {
|
||||
ast_rewrite.push_str(match sub {
|
||||
Substitute::Literal(attrib) => attrib,
|
||||
Substitute::Capture(attrib) => &query_result.captures[attrib],
|
||||
})
|
||||
}
|
||||
debug!("AST rewritten to {ast_rewrite:?}");
|
||||
|
||||
query_result_map.insert(query_result.start, ast_rewrite);
|
||||
}
|
||||
}
|
||||
split_ats.sort();
|
||||
let splits = split_at_indices(source_bytes, &split_ats);
|
||||
let mut output = String::default();
|
||||
for (i, split) in splits.indices.iter().zip(splits.values) {
|
||||
let split = std::str::from_utf8(split)?;
|
||||
output.push_str(query_result_map.get(i).map(|v| v.as_str()).unwrap_or(split));
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct QueryCooked {
|
||||
captures: HashMap<String, String>,
|
||||
end: usize,
|
||||
start: usize,
|
||||
}
|
||||
|
||||
pub struct SplitMap<'a> {
|
||||
values: Vec<&'a [u8]>,
|
||||
indices: Vec<usize>,
|
||||
}
|
||||
|
||||
fn split_at_indices<'a>(c: &'a [u8], idx: &[usize]) -> SplitMap<'a> {
|
||||
let mut a = 0;
|
||||
let mut values = vec![];
|
||||
let mut indices = vec![a];
|
||||
for &b in idx {
|
||||
values.push(&c[a..b]);
|
||||
a = b;
|
||||
indices.push(a);
|
||||
}
|
||||
values.push(&c[a..]);
|
||||
assert_eq!(values.len(), indices.len());
|
||||
SplitMap { values, indices }
|
||||
}
|
||||
|
||||
fn query<'a>(
|
||||
node: Node<'a>,
|
||||
expr: &'a str,
|
||||
lang: &Language,
|
||||
source_bytes: &[u8],
|
||||
) -> Vec<QueryCooked> {
|
||||
let query = Query::new(lang, expr).unwrap();
|
||||
|
||||
let mut qc = QueryCursor::new();
|
||||
let mut query_matches = qc.matches(&query, node, source_bytes);
|
||||
|
||||
let capture_names = query.capture_names();
|
||||
|
||||
let mut cooked = vec![];
|
||||
|
||||
while let Some(matcha) = query_matches.next() {
|
||||
let mut capture_cooked = HashMap::new();
|
||||
let mut start = 0;
|
||||
let mut end = 0;
|
||||
for cap in matcha.captures {
|
||||
let Some(name) = capture_names.get(cap.index as usize) else {
|
||||
continue;
|
||||
};
|
||||
if *name == "root" {
|
||||
start = cap.node.start_byte();
|
||||
end = cap.node.end_byte();
|
||||
continue;
|
||||
}
|
||||
capture_cooked.insert(
|
||||
name.to_string(),
|
||||
cap.node.utf8_text(source_bytes).unwrap().to_string(),
|
||||
);
|
||||
}
|
||||
cooked.push(QueryCooked {
|
||||
start,
|
||||
end,
|
||||
captures: capture_cooked,
|
||||
})
|
||||
}
|
||||
cooked
|
||||
}
|
||||
Reference in New Issue
Block a user