Files
llamactl/test_llm.py
2025-12-21 20:48:22 +01:00

74 lines
1.9 KiB
Python

#!/usr/bin/env python3
"""
Simple Python script to interact with local LLM server's OpenAI-compatible API
"""
import requests
# Local LLM server configuration
LLM_SERVER_URL = "http://localhost:8080/v1/chat/completions"
MODEL_NAME = "proxy-test" # Default model name, can be changed based on your setup
def send_message(message, model=MODEL_NAME, temperature=0.7, max_tokens=1000):
"""
Send a message to local LLM server API
Args:
message (str): The message to send
model (str): Model name (depends on your LLM server setup)
temperature (float): Controls randomness (0.0 to 1.0)
max_tokens (int): Maximum tokens in response
Returns:
str: The AI response or error message
"""
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer test-inf"
}
data = {
"model": model,
"messages": [
{
"role": "user",
"content": message
}
],
"temperature": temperature,
"max_tokens": max_tokens,
"stream": False
}
response = requests.post(LLM_SERVER_URL, headers=headers, json=data, timeout=60)
response.raise_for_status()
result = response.json()
return result["choices"][0]["message"]["content"]
def main():
"""Run in interactive mode for continuous conversation"""
print("Local LLM Chat Client")
print("-" * 40)
while True:
try:
user_input = input("\nYou: ").strip()
if not user_input:
continue
print("AI: ", end="", flush=True)
response = send_message(user_input)
print(response)
except KeyboardInterrupt:
print("\nGoodbye!")
break
except EOFError:
print("\nGoodbye!")
break
if __name__ == "__main__":
main()