This repository has been archived by the owner on Dec 5, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
interactive-chat_streamed.rs
81 lines (62 loc) · 1.94 KB
/
interactive-chat_streamed.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
//! Interactive chat streaming using Stream API
//!
//! Your LLM might mention the "previous talk" in this chat!
use std::io::{BufRead, Write};
use futures::StreamExt;
use ollama_rest::{prelude::*, Ollama};
use serde_json::json;
const MODEL_NAME: &str = "llama3.2:1b";
#[tokio::main]
async fn main() {
// Make sure Ollama serves at 127.0.0.1:11434
let ollama = Ollama::default();
let mut messages = Vec::<Message>::new();
let stdin = std::io::stdin();
let mut stdout = std::io::stdout();
print!("Loading model... ");
stdout.flush().unwrap();
ollama.load_model(MODEL_NAME).await.unwrap();
println!("done");
loop {
let mut prompt = String::new();
print!("\n>>> ");
stdout.flush().unwrap();
// User prompt input
stdin.lock().read_line(&mut prompt).unwrap();
// Exit when user typed "/bye"
if prompt.starts_with("/bye") {
break;
}
messages.push(Message {
role: Role::User,
content: prompt,
images: None,
tool_calls: None,
});
let mut completion = String::new();
println!();
// Send conversation to the LLM
let mut stream = ollama.chat_streamed(
&serde_json::from_value::<ChatRequest>(json!({
"model": MODEL_NAME,
"messages": messages,
})
).unwrap()).await.unwrap();
while let Some(Ok(res)) = stream.next().await {
if !res.done {
if let Some(msg) = &res.message {
print!("{}", msg.content);
stdout.flush().unwrap();
completion.push_str(msg.content.as_str());
}
}
}
println!();
messages.push(Message {
role: Role::Assistant,
content: completion,
images: None,
tool_calls: None,
});
}
}