diff --git a/apidocs/docs.go b/docs/docs.go similarity index 56% rename from apidocs/docs.go rename to docs/docs.go index 4b521b1..69fa95e 100644 --- a/apidocs/docs.go +++ b/docs/docs.go @@ -1,5 +1,5 @@ -// Package apidocs Code generated by swaggo/swag. DO NOT EDIT -package apidocs +// Package docs Code generated by swaggo/swag. DO NOT EDIT +package docs import "github.com/swaggo/swag" @@ -108,7 +108,7 @@ const docTemplate = `{ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -193,7 +193,7 @@ const docTemplate = `{ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -241,7 +241,7 @@ const docTemplate = `{ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -274,7 +274,7 @@ const docTemplate = `{ "schema": { "type": "array", "items": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } } }, @@ -312,7 +312,7 @@ const docTemplate = `{ "200": { "description": "Instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -357,7 +357,7 @@ const docTemplate = `{ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -365,7 +365,7 @@ const docTemplate = `{ "200": { "description": "Updated instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -410,7 +410,7 @@ const docTemplate = `{ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -418,7 +418,7 @@ const docTemplate = `{ "201": { "description": "Created instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -534,7 +534,7 @@ const docTemplate = `{ "tags": [ "instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -578,7 +578,7 @@ const docTemplate = `{ "tags": [ "instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -638,7 +638,7 @@ const docTemplate = `{ "200": { "description": "Restarted instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -681,7 +681,7 @@ const docTemplate = `{ "200": { "description": "Started instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -724,7 +724,7 @@ const docTemplate = `{ "200": { "description": "Stopped instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -742,6 +742,647 @@ const docTemplate = `{ } } }, + "/llama-cpp/{name}/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the llama.cpp UI for the specified instance", + "produces": [ + "text/html" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp UI for the instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied HTML response", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/apply-template": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/completion": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/detokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/embeddings": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/infill": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/metrics": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/props": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/reranking": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/slots": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/tokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/nodes": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns a map of all nodes configured in the server (node name -\u003e node config)", + "tags": [ + "nodes" + ], + "summary": "List all configured nodes", + "responses": { + "200": { + "description": "Map of nodes", + "schema": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/server.NodeResponse" + } + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/nodes/{name}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the details of a specific node by name", + "tags": [ + "nodes" + ], + "summary": "Get details of a specific node", + "parameters": [ + { + "type": "string", + "description": "Node Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Node details", + "schema": { + "$ref": "#/definitions/server.NodeResponse" + } + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Node not found", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/": { "post": { "security": [ @@ -834,32 +1475,31 @@ const docTemplate = `{ } }, "definitions": { - "backends.BackendType": { - "type": "string", - "enum": [ - "llama_cpp", - "mlx_lm", - "vllm" - ], - "x-enum-varnames": [ - "BackendTypeLlamaCpp", - "BackendTypeMlxLm", - "BackendTypeVllm" - ] + "instance.Instance": { + "type": "object", + "properties": { + "created": { + "description": "Unix timestamp when the instance was created", + "type": "integer" + }, + "name": { + "type": "string" + } + } }, - "instance.CreateInstanceOptions": { + "instance.Options": { "type": "object", "properties": { "auto_restart": { "description": "Auto restart", "type": "boolean" }, - "backend_options": { + "environment": { + "description": "Environment variables", "type": "object", - "additionalProperties": {} - }, - "backend_type": { - "$ref": "#/definitions/backends.BackendType" + "additionalProperties": { + "type": "string" + } }, "idle_timeout": { "description": "Idle timeout", @@ -878,36 +1518,11 @@ const docTemplate = `{ } } }, - "instance.InstanceStatus": { - "type": "integer", - "enum": [ - 0, - 1, - 2 - ], - "x-enum-varnames": [ - "Stopped", - "Running", - "Failed" - ] - }, - "instance.Process": { + "server.NodeResponse": { "type": "object", "properties": { - "created": { - "description": "Creation time", - "type": "integer" - }, - "name": { + "address": { "type": "string" - }, - "status": { - "description": "Status", - "allOf": [ - { - "$ref": "#/definitions/instance.InstanceStatus" - } - ] } } }, diff --git a/apidocs/swagger.json b/docs/swagger.json similarity index 55% rename from apidocs/swagger.json rename to docs/swagger.json index 71471e6..885800a 100644 --- a/apidocs/swagger.json +++ b/docs/swagger.json @@ -101,7 +101,7 @@ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -186,7 +186,7 @@ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -234,7 +234,7 @@ "200": { "description": "Parsed options", "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } }, "400": { @@ -267,7 +267,7 @@ "schema": { "type": "array", "items": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } } }, @@ -305,7 +305,7 @@ "200": { "description": "Instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -350,7 +350,7 @@ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -358,7 +358,7 @@ "200": { "description": "Updated instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -403,7 +403,7 @@ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/instance.CreateInstanceOptions" + "$ref": "#/definitions/instance.Options" } } ], @@ -411,7 +411,7 @@ "201": { "description": "Created instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -527,7 +527,7 @@ "tags": [ "instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -571,7 +571,7 @@ "tags": [ "instances" ], - "summary": "Proxy requests to a specific instance", + "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", @@ -631,7 +631,7 @@ "200": { "description": "Restarted instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -674,7 +674,7 @@ "200": { "description": "Started instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -717,7 +717,7 @@ "200": { "description": "Stopped instance details", "schema": { - "$ref": "#/definitions/instance.Process" + "$ref": "#/definitions/instance.Instance" } }, "400": { @@ -735,6 +735,647 @@ } } }, + "/llama-cpp/{name}/": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the llama.cpp UI for the specified instance", + "produces": [ + "text/html" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp UI for the instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied HTML response", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/apply-template": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/completion": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/detokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/embeddings": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/infill": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/metrics": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/props": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/reranking": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/slots": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/llama-cpp/{name}/tokenize": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Proxy requests to llama.cpp server instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Proxied response", + "schema": { + "type": "object", + "additionalProperties": true + } + }, + "400": { + "description": "Invalid instance", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/nodes": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns a map of all nodes configured in the server (node name -\u003e node config)", + "tags": [ + "nodes" + ], + "summary": "List all configured nodes", + "responses": { + "200": { + "description": "Map of nodes", + "schema": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/server.NodeResponse" + } + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/nodes/{name}": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the details of a specific node by name", + "tags": [ + "nodes" + ], + "summary": "Get details of a specific node", + "parameters": [ + { + "type": "string", + "description": "Node Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Node details", + "schema": { + "$ref": "#/definitions/server.NodeResponse" + } + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "404": { + "description": "Node not found", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/": { "post": { "security": [ @@ -827,32 +1468,31 @@ } }, "definitions": { - "backends.BackendType": { - "type": "string", - "enum": [ - "llama_cpp", - "mlx_lm", - "vllm" - ], - "x-enum-varnames": [ - "BackendTypeLlamaCpp", - "BackendTypeMlxLm", - "BackendTypeVllm" - ] + "instance.Instance": { + "type": "object", + "properties": { + "created": { + "description": "Unix timestamp when the instance was created", + "type": "integer" + }, + "name": { + "type": "string" + } + } }, - "instance.CreateInstanceOptions": { + "instance.Options": { "type": "object", "properties": { "auto_restart": { "description": "Auto restart", "type": "boolean" }, - "backend_options": { + "environment": { + "description": "Environment variables", "type": "object", - "additionalProperties": {} - }, - "backend_type": { - "$ref": "#/definitions/backends.BackendType" + "additionalProperties": { + "type": "string" + } }, "idle_timeout": { "description": "Idle timeout", @@ -871,36 +1511,11 @@ } } }, - "instance.InstanceStatus": { - "type": "integer", - "enum": [ - 0, - 1, - 2 - ], - "x-enum-varnames": [ - "Stopped", - "Running", - "Failed" - ] - }, - "instance.Process": { + "server.NodeResponse": { "type": "object", "properties": { - "created": { - "description": "Creation time", - "type": "integer" - }, - "name": { + "address": { "type": "string" - }, - "status": { - "description": "Status", - "allOf": [ - { - "$ref": "#/definitions/instance.InstanceStatus" - } - ] } } }, diff --git a/apidocs/swagger.yaml b/docs/swagger.yaml similarity index 53% rename from apidocs/swagger.yaml rename to docs/swagger.yaml index a5db184..8caf401 100644 --- a/apidocs/swagger.yaml +++ b/docs/swagger.yaml @@ -1,25 +1,23 @@ basePath: /api/v1 definitions: - backends.BackendType: - enum: - - llama_cpp - - mlx_lm - - vllm - type: string - x-enum-varnames: - - BackendTypeLlamaCpp - - BackendTypeMlxLm - - BackendTypeVllm - instance.CreateInstanceOptions: + instance.Instance: + properties: + created: + description: Unix timestamp when the instance was created + type: integer + name: + type: string + type: object + instance.Options: properties: auto_restart: description: Auto restart type: boolean - backend_options: - additionalProperties: {} + environment: + additionalProperties: + type: string + description: Environment variables type: object - backend_type: - $ref: '#/definitions/backends.BackendType' idle_timeout: description: Idle timeout type: integer @@ -32,27 +30,10 @@ definitions: description: seconds type: integer type: object - instance.InstanceStatus: - enum: - - 0 - - 1 - - 2 - type: integer - x-enum-varnames: - - Stopped - - Running - - Failed - instance.Process: + server.NodeResponse: properties: - created: - description: Creation time - type: integer - name: + address: type: string - status: - allOf: - - $ref: '#/definitions/instance.InstanceStatus' - description: Status type: object server.OpenAIInstance: properties: @@ -140,7 +121,7 @@ paths: "200": description: Parsed options schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: @@ -193,7 +174,7 @@ paths: "200": description: Parsed options schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: @@ -223,7 +204,7 @@ paths: "200": description: Parsed options schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: @@ -243,7 +224,7 @@ paths: description: List of instances schema: items: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' type: array "500": description: Internal Server Error @@ -291,7 +272,7 @@ paths: "200": description: Instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -320,12 +301,12 @@ paths: name: options required: true schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' responses: "201": description: Created instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid request body schema: @@ -354,12 +335,12 @@ paths: name: options required: true schema: - $ref: '#/definitions/instance.CreateInstanceOptions' + $ref: '#/definitions/instance.Options' responses: "200": description: Updated instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -432,7 +413,8 @@ paths: type: string security: - ApiKeyAuth: [] - summary: Proxy requests to a specific instance + summary: Proxy requests to a specific instance, does not autostart instance + if stopped tags: - instances post: @@ -461,7 +443,8 @@ paths: type: string security: - ApiKeyAuth: [] - summary: Proxy requests to a specific instance + summary: Proxy requests to a specific instance, does not autostart instance + if stopped tags: - instances /instances/{name}/restart: @@ -477,7 +460,7 @@ paths: "200": description: Restarted instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -504,7 +487,7 @@ paths: "200": description: Started instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -531,7 +514,7 @@ paths: "200": description: Stopped instance details schema: - $ref: '#/definitions/instance.Process' + $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: @@ -545,6 +528,426 @@ paths: summary: Stop a running instance tags: - instances + /llama-cpp/{name}/: + get: + description: Proxies requests to the llama.cpp UI for the specified instance + parameters: + - description: Instance Name + in: query + name: name + required: true + type: string + produces: + - text/html + responses: + "200": + description: Proxied HTML response + schema: + type: string + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp UI for the instance + tags: + - backends + /llama-cpp/{name}/apply-template: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/completion: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/detokenize: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/embeddings: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/infill: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/metrics: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/props: + get: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/reranking: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/slots: + get: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /llama-cpp/{name}/tokenize: + post: + description: Proxies requests to the specified llama.cpp server instance, starting + it on-demand if configured + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + produces: + - application/json + responses: + "200": + description: Proxied response + schema: + additionalProperties: true + type: object + "400": + description: Invalid instance + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Proxy requests to llama.cpp server instance + tags: + - backends + /nodes: + get: + description: Returns a map of all nodes configured in the server (node name + -> node config) + responses: + "200": + description: Map of nodes + schema: + additionalProperties: + $ref: '#/definitions/server.NodeResponse' + type: object + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: List all configured nodes + tags: + - nodes + /nodes/{name}: + get: + description: Returns the details of a specific node by name + parameters: + - description: Node Name + in: path + name: name + required: true + type: string + responses: + "200": + description: Node details + schema: + $ref: '#/definitions/server.NodeResponse' + "400": + description: Invalid name format + schema: + type: string + "404": + description: Node not found + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Get details of a specific node + tags: + - nodes /v1/: post: consumes: diff --git a/pkg/server/handlers_backends.go b/pkg/server/handlers_backends.go index 47ef02d..4bc97ee 100644 --- a/pkg/server/handlers_backends.go +++ b/pkg/server/handlers_backends.go @@ -86,11 +86,21 @@ func (h *Handler) LlamaCppUIProxy() http.HandlerFunc { // @Tags backends // @Security ApiKeyAuth // @Produce json -// @Param name query string true "Instance Name" +// @Param name path string true "Instance Name" // @Success 200 {object} map[string]any "Proxied response" // @Failure 400 {string} string "Invalid instance" // @Failure 500 {string} string "Internal Server Error" -// @Router /llama-cpp/{name}/* [post] +// @Router /llama-cpp/{name}/props [get] +// @Router /llama-cpp/{name}/slots [get] +// @Router /llama-cpp/{name}/apply-template [post] +// @Router /llama-cpp/{name}/completion [post] +// @Router /llama-cpp/{name}/detokenize [post] +// @Router /llama-cpp/{name}/embeddings [post] +// @Router /llama-cpp/{name}/infill [post] +// @Router /llama-cpp/{name}/metrics [post] +// @Router /llama-cpp/{name}/props [post] +// @Router /llama-cpp/{name}/reranking [post] +// @Router /llama-cpp/{name}/tokenize [post] func (h *Handler) LlamaCppProxy() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/server/handlers_instances.go b/pkg/server/handlers_instances.go index 24fe3e7..8a2edb9 100644 --- a/pkg/server/handlers_instances.go +++ b/pkg/server/handlers_instances.go @@ -19,7 +19,7 @@ import ( // @Tags instances // @Security ApiKeyAuth // @Produces json -// @Success 200 {array} instance.Process "List of instances" +// @Success 200 {array} instance.Instance "List of instances" // @Failure 500 {string} string "Internal Server Error" // @Router /instances [get] func (h *Handler) ListInstances() http.HandlerFunc { @@ -43,7 +43,7 @@ func (h *Handler) ListInstances() http.HandlerFunc { // @Produces json // @Param name path string true "Instance Name" // @Param options body instance.Options true "Instance configuration options" -// @Success 201 {object} instance.Process "Created instance details" +// @Success 201 {object} instance.Instance "Created instance details" // @Failure 400 {string} string "Invalid request body" // @Failure 500 {string} string "Internal Server Error" // @Router /instances/{name} [post] @@ -79,7 +79,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc { // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Instance details" +// @Success 200 {object} instance.Instance "Instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" // @Router /instances/{name} [get] @@ -111,7 +111,7 @@ func (h *Handler) GetInstance() http.HandlerFunc { // @Produces json // @Param name path string true "Instance Name" // @Param options body instance.Options true "Instance configuration options" -// @Success 200 {object} instance.Process "Updated instance details" +// @Success 200 {object} instance.Instance "Updated instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" // @Router /instances/{name} [put] @@ -147,7 +147,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc { // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Started instance details" +// @Success 200 {object} instance.Instance "Started instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" // @Router /instances/{name}/start [post] @@ -183,7 +183,7 @@ func (h *Handler) StartInstance() http.HandlerFunc { // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Stopped instance details" +// @Success 200 {object} instance.Instance "Stopped instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" // @Router /instances/{name}/stop [post] @@ -213,7 +213,7 @@ func (h *Handler) StopInstance() http.HandlerFunc { // @Security ApiKeyAuth // @Produces json // @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Restarted instance details" +// @Success 200 {object} instance.Instance "Restarted instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" // @Router /instances/{name}/restart [post]