-
Notifications
You must be signed in to change notification settings - Fork 2
/
data.js
18 lines (18 loc) · 2.69 KB
/
data.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
const modelData = [
{"model": "Mistral Instruct 7B Q4", "hardware": "Raspberry Pi5", "speed": "2 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/5"},
{"model": "Mistral Instruct 7B Q4", "hardware": "i7-7700HQ", "speed": "3 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/1"},
{"model": "Mistral Instruct 7B Q4", "hardware": "M1", "speed": "12 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/2"},
{"model": "Mistral Instruct 7B Q4", "hardware": "Nvidia RTX 4060 Ti", "speed": "44 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/4"},
{"model": "Mistral Instruct 7B Q4", "hardware": "Nvidia Tesla P40", "speed": "45 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/3"},
{"model": "Mistral Instruct 7B Q4", "hardware": "M1 Max", "speed": "58 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/6"},
{"model": "Mistral Instruct 7B Q4", "hardware": "Nvidia RTX 3060", "speed": "59 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/4"},
{"model": "Mistral Instruct 7B Q4", "hardware": "M1 Ultra", "speed": "70 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/8"},
{"model": "Mistral Instruct 7B Q4", "hardware": "Nvidia RTX 4070", "speed": "70 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/4"},
{"model": "Mistral Instruct 7B Q4", "hardware": "Nvidia RTX 3090", "speed": "120 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/4"},
{"model": "Mistral Instruct 7B Q4", "hardware": "Nvidia RTX 4090", "speed": "140 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/4"},
{"model": "Meta Llama 3 Instruct 70B", "hardware": "2xP40", "speed": "3 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/9"},
{"model": "Meta Llama 3 Instruct 70B Q4", "hardware": "M1 Max", "speed": "6 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/7"},
{"model": "Qwen 2.5 32B Q4", "hardware": "i5-10600K", "speed": "1.3 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/14"},
{"model": "Qwen 2.5 32B Q4", "hardware": "Nvidia Tesla P40", "speed": "13 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/15"},
{"model": "Qwen 2.5 32B Q4", "hardware": "Nvidia RTX 3090", "speed": "40 tokens/sec", "proof": "https://github.com/dmatora/LLM-inference-speed-benchmarks/issues/13"},
]