diff --git a/docs/docs.go b/docs/docs.go index 851dee8..6248bd8 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -295,6 +295,45 @@ const docTemplate = `{ } } } + }, + "post": { + "description": "Forwards HTTP requests to the llama-server instance running on a specific port", + "tags": [ + "instances" + ], + "summary": "Proxy requests to a specific instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Request successfully proxied to instance" + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + }, + "503": { + "description": "Instance is not running", + "schema": { + "type": "string" + } + } + } } }, "/instances/{name}/restart": { @@ -479,6 +518,58 @@ const docTemplate = `{ } } } + }, + "/v1/": { + "post": { + "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body", + "consumes": [ + "application/json" + ], + "tags": [ + "openai" + ], + "summary": "OpenAI-compatible proxy endpoint", + "responses": { + "200": { + "description": "OpenAI response" + }, + "400": { + "description": "Invalid request body or model name", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/v1/models": { + "get": { + "description": "Returns a list of instances in a format compatible with OpenAI API", + "tags": [ + "openai" + ], + "summary": "List instances in OpenAI-compatible format", + "responses": { + "200": { + "description": "List of OpenAI-compatible instances", + "schema": { + "$ref": "#/definitions/llamactl.OpenAIListInstancesResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } } }, "definitions": { @@ -999,6 +1090,10 @@ const docTemplate = `{ "llamactl.Instance": { "type": "object", "properties": { + "created": { + "description": "Creation time", + "type": "integer" + }, "name": { "type": "string" }, @@ -1007,6 +1102,37 @@ const docTemplate = `{ "type": "boolean" } } + }, + "llamactl.OpenAIInstance": { + "type": "object", + "properties": { + "created": { + "type": "integer" + }, + "id": { + "type": "string" + }, + "object": { + "type": "string" + }, + "owned_by": { + "type": "string" + } + } + }, + "llamactl.OpenAIListInstancesResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/llamactl.OpenAIInstance" + } + }, + "object": { + "type": "string" + } + } } } }` diff --git a/docs/swagger.json b/docs/swagger.json index e9ad2b1..ae75018 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -288,6 +288,45 @@ } } } + }, + "post": { + "description": "Forwards HTTP requests to the llama-server instance running on a specific port", + "tags": [ + "instances" + ], + "summary": "Proxy requests to a specific instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Request successfully proxied to instance" + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + }, + "503": { + "description": "Instance is not running", + "schema": { + "type": "string" + } + } + } } }, "/instances/{name}/restart": { @@ -472,6 +511,58 @@ } } } + }, + "/v1/": { + "post": { + "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body", + "consumes": [ + "application/json" + ], + "tags": [ + "openai" + ], + "summary": "OpenAI-compatible proxy endpoint", + "responses": { + "200": { + "description": "OpenAI response" + }, + "400": { + "description": "Invalid request body or model name", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/v1/models": { + "get": { + "description": "Returns a list of instances in a format compatible with OpenAI API", + "tags": [ + "openai" + ], + "summary": "List instances in OpenAI-compatible format", + "responses": { + "200": { + "description": "List of OpenAI-compatible instances", + "schema": { + "$ref": "#/definitions/llamactl.OpenAIListInstancesResponse" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } } }, "definitions": { @@ -992,6 +1083,10 @@ "llamactl.Instance": { "type": "object", "properties": { + "created": { + "description": "Creation time", + "type": "integer" + }, "name": { "type": "string" }, @@ -1000,6 +1095,37 @@ "type": "boolean" } } + }, + "llamactl.OpenAIInstance": { + "type": "object", + "properties": { + "created": { + "type": "integer" + }, + "id": { + "type": "string" + }, + "object": { + "type": "string" + }, + "owned_by": { + "type": "string" + } + } + }, + "llamactl.OpenAIListInstancesResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/definitions/llamactl.OpenAIInstance" + } + }, + "object": { + "type": "string" + } + } } } } \ No newline at end of file diff --git a/docs/swagger.yaml b/docs/swagger.yaml index 0d373ff..cf45768 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -347,12 +347,35 @@ definitions: type: object llamactl.Instance: properties: + created: + description: Creation time + type: integer name: type: string running: description: Status type: boolean type: object + llamactl.OpenAIInstance: + properties: + created: + type: integer + id: + type: string + object: + type: string + owned_by: + type: string + type: object + llamactl.OpenAIListInstancesResponse: + properties: + data: + items: + $ref: '#/definitions/llamactl.OpenAIInstance' + type: array + object: + type: string + type: object info: contact: {} description: llamactl is a control server for managing Llama Server instances. @@ -548,6 +571,33 @@ paths: summary: Proxy requests to a specific instance tags: - instances + post: + description: Forwards HTTP requests to the llama-server instance running on + a specific port + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + responses: + "200": + description: Request successfully proxied to instance + "400": + description: Invalid name format + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + "503": + description: Instance is not running + schema: + type: string + summary: Proxy requests to a specific instance + tags: + - instances /instances/{name}/restart: post: description: Restarts a specific instance by name @@ -668,4 +718,40 @@ paths: summary: Get version of llama server tags: - server + /v1/: + post: + consumes: + - application/json + description: Handles all POST requests to /v1/*, routing to the appropriate + instance based on the request body + responses: + "200": + description: OpenAI response + "400": + description: Invalid request body or model name + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + summary: OpenAI-compatible proxy endpoint + tags: + - openai + /v1/models: + get: + description: Returns a list of instances in a format compatible with OpenAI + API + responses: + "200": + description: List of OpenAI-compatible instances + schema: + $ref: '#/definitions/llamactl.OpenAIListInstancesResponse' + "500": + description: Internal Server Error + schema: + type: string + summary: List instances in OpenAI-compatible format + tags: + - openai swagger: "2.0" diff --git a/pkg/handlers.go b/pkg/handlers.go index 39743ce..d19aa1a 100644 --- a/pkg/handlers.go +++ b/pkg/handlers.go @@ -402,6 +402,7 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc { // @Failure 500 {string} string "Internal Server Error" // @Failure 503 {string} string "Instance is not running" // @Router /instances/{name}/proxy [get] +// @Router /instances/{name}/proxy [post] func (h *Handler) ProxyToInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -455,7 +456,55 @@ func (h *Handler) ProxyToInstance() http.HandlerFunc { } } +// OpenAIListInstances godoc +// @Summary List instances in OpenAI-compatible format +// @Description Returns a list of instances in a format compatible with OpenAI API +// @Tags openai +// @Produces json +// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances" +// @Failure 500 {string} string "Internal Server Error" +// @Router /v1/models [get] +func (h *Handler) OpenAIListInstances() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + instances, err := h.InstanceManager.ListInstances() + if err != nil { + http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError) + return + } + + openaiInstances := make([]OpenAIInstance, len(instances)) + for i, instance := range instances { + openaiInstances[i] = OpenAIInstance{ + ID: instance.Name, + Object: "model", + Created: instance.Created, + OwnedBy: "llamactl", + } + } + + openaiResponse := OpenAIListInstancesResponse{ + Object: "list", + Data: openaiInstances, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(openaiResponse); err != nil { + http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + // OpenAIProxy godoc +// @Summary OpenAI-compatible proxy endpoint +// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body +// @Tags openai +// @Accept json +// @Produces json +// @Success 200 "OpenAI response" +// @Failure 400 {string} string "Invalid request body or model name" +// @Failure 500 {string} string "Internal Server Error" +// @Router /v1/ [post] func (h *Handler) OpenAIProxy() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { // Read the entire body first diff --git a/pkg/instance.go b/pkg/instance.go index 725361e..faedc27 100644 --- a/pkg/instance.go +++ b/pkg/instance.go @@ -10,6 +10,7 @@ import ( "net/url" "os/exec" "sync" + "time" ) type CreateInstanceOptions struct { @@ -60,6 +61,9 @@ type Instance struct { // Status Running bool `json:"running"` + // Creation time + Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created + // Logging file logger *InstanceLogger `json:"-"` @@ -153,6 +157,8 @@ func NewInstance(name string, globalSettings *InstancesConfig, options *CreateIn logger: logger, Running: false, + + Created: time.Now().Unix(), } } diff --git a/pkg/openai.go b/pkg/openai.go new file mode 100644 index 0000000..e4ff36a --- /dev/null +++ b/pkg/openai.go @@ -0,0 +1,13 @@ +package llamactl + +type OpenAIListInstancesResponse struct { + Object string `json:"object"` + Data []OpenAIInstance `json:"data"` +} + +type OpenAIInstance struct { + ID string `json:"id"` + Object string `json:"object"` + Created int64 `json:"created"` + OwnedBy string `json:"owned_by"` +} diff --git a/pkg/routes.go b/pkg/routes.go index d4ef89c..7f86f1c 100644 --- a/pkg/routes.go +++ b/pkg/routes.go @@ -50,6 +50,8 @@ func SetupRouter(handler *Handler) *chi.Mux { }) }) + r.Get(("/v1/models"), handler.OpenAIListInstances()) // List instances in OpenAI-compatible format + // OpenAI-compatible proxy endpoint // Handles all POST requests to /v1/*, including: // - /v1/completions