{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"ScaleLLM","owner":"vectorch-ai","isFork":false,"description":"A high-performance inference system for large language models, designed for production environments.","allTopics":["performance","gpu","model","production","cuda","efficiency","inference","transformer","llama","speculative","serving","llm","llm-inference","llama3"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":5,"issueCount":29,"starsCount":317,"forksCount":24,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-07T06:43:20.473Z"}},{"type":"Public","name":"whl","owner":"vectorch-ai","isFork":false,"description":"repository to host python whl package.","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-04T00:34:11.795Z"}},{"type":"Public","name":"vcpkg","owner":"vectorch-ai","isFork":true,"description":"C++ Library Manager for Windows, Linux, and MacOS","allTopics":[],"primaryLanguage":{"name":"CMake","color":"#DA3434"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":6165,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-22T21:57:38.481Z"}},{"type":"Public","name":"LLMBench","owner":"vectorch-ai","isFork":false,"description":"A library for validating and benchmarking LLMs inference.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":4,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-18T22:57:23.277Z"}},{"type":"Public","name":"discussions","owner":"vectorch-ai","isFork":false,"description":"","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-05T16:15:37.188Z"}},{"type":"Public","name":"flashinfer","owner":"vectorch-ai","isFork":true,"description":"FlashInfer: Kernel Library for LLM Serving","allTopics":[],"primaryLanguage":{"name":"Cuda","color":"#3A4E3A"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":64,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-03T06:36:46.580Z"}},{"type":"Public","name":"chatbot-ui","owner":"vectorch-ai","isFork":true,"description":"An open source ChatGPT UI.","allTopics":[],"primaryLanguage":{"name":"TypeScript","color":"#3178c6"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":7578,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-20T22:35:16.274Z"}},{"type":"Public","name":"flash-attention","owner":"vectorch-ai","isFork":true,"description":"Fast and memory-efficient exact attention","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1054,"license":"BSD 3-Clause \"New\" or \"Revised\" License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-15T07:33:31.386Z"}},{"type":"Public","name":"tokenizers","owner":"vectorch-ai","isFork":true,"description":"💥 Fast State-of-the-Art Tokenizers optimized for Research and Production","allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":746,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-04T00:24:52.905Z"}},{"type":"Public","name":"xformers","owner":"vectorch-ai","isFork":true,"description":"Hackable and optimized Transformers building blocks, supporting a composable construction.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":564,"license":"Other","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-01T05:57:53.048Z"}},{"type":"Public","name":"FasterTransformer","owner":"vectorch-ai","isFork":true,"description":"Transformer related optimization, including BERT, GPT","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":877,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-07-28T00:27:33.290Z"}},{"type":"Public","name":"ByteTransformer","owner":"vectorch-ai","isFork":true,"description":"optimized BERT transformer inference on NVIDIA GPU. <a href=\"https://arxiv.org/abs/2210.03052\" rel=\"nofollow\">https://arxiv.org/abs/2210.03052</a>","allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":33,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-07-24T12:19:25.377Z"}}],"repositoryCount":12,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"vectorch-ai repositories"}