Model details
View repositoryExample usage
Phi 3.5 uses the standard set of LLM parameters and has optional streaming output.
Input
1import requests
2import os
3
4# Replace the empty string with your model id below
5model_id = ""
6baseten_api_key = os.environ["BASETEN_API_KEY"]
7
8messages = [
9 {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
10 {"role": "user", "content": "Who are you?"},
11]
12data = {
13 "messages": messages,
14 "stream": True,
15 "temperature": 0.5
16}
17
18# Call model endpoint
19res = requests.post(
20 f"https://model-{model_id}.api.baseten.co/production/predict",
21 headers={"Authorization": f"Api-Key {baseten_api_key}"},
22 json=data,
23 stream=True
24)
25
26# Print the generated tokens as they get streamed
27for content in res.iter_content():
28 print(content.decode("utf-8"), end="", flush=True)
JSON output
1[
2 "arrrg",
3 "me hearty",
4 "I",
5 "be",
6 "doing",
7 "..."
8]