{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"servers":[{"url":"/prod"}],"paths":{"/v1/tcp_warming":{"get":{"summary":"Tcp Warming","operationId":"tcp_warming_v1_tcp_warming_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v1/chat/completions":{"post":{"summary":"Chat","operationId":"chat_v1_chat_completions_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"X-Amz-Cf-Id","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"X-Amz-Cf-Id"}},{"name":"CF-RAY","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cf-Ray"}},{"name":"X-delay-time","in":"header","required":false,"schema":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"X-Delay-Time"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/ChatCompletionRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateChatCompletionResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/completions":{"post":{"summary":"Completions","operationId":"completions_v1_completions_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"X-Amz-Cf-Id","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"X-Amz-Cf-Id"}},{"name":"CF-RAY","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cf-Ray"}},{"name":"X-delay-time","in":"header","required":false,"schema":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"X-Delay-Time"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CompletionRequest"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateCompletionResponse"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models":{"get":{"summary":"List Models","operationId":"list_models_v1_models_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"X-Amz-Cf-Id","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"X-Amz-Cf-Id"}},{"name":"CF-RAY","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cf-Ray"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelMetadataList"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models/{model_id}":{"get":{"summary":"Get Model","operationId":"get_model_v1_models__model_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}},{"name":"X-Amz-Cf-Id","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"X-Amz-Cf-Id"}},{"name":"CF-RAY","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cf-Ray"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ModelMetadata"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/public/v1/models":{"get":{"tags":["Public Models"],"summary":"List all available models","description":"List all publicly available models (modelVisibility='PUBLIC' only). Use the `format` parameter to get responses compatible with OpenRouter or HuggingFace APIs.","operationId":"list_public_models_endpoint_public_v1_models_get","parameters":[{"name":"format","in":"query","required":false,"schema":{"$ref":"#/components/schemas/ModelFormat","description":"Output format: 'default' (Cerebras schema), 'openrouter', or 'huggingface'","default":"default"},"description":"Output format: 'default' (Cerebras schema), 'openrouter', or 'huggingface'"},{"name":"X-Amz-Cf-Id","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"X-Amz-Cf-Id"}},{"name":"CF-RAY","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cf-Ray"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/PublicModelsListResponse"},{"$ref":"#/components/schemas/OpenRouterModelsResponse"},{"$ref":"#/components/schemas/HuggingFaceModelsResponse"}],"title":"Response List Public Models Endpoint Public V1 Models Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/public/v1/models/{model_id}":{"get":{"tags":["Public Models"],"summary":"Retrieve a specific model","description":"Retrieve a specific publicly available model by ID (modelVisibility='PUBLIC' only). Use the `format` parameter to get responses compatible with OpenRouter or HuggingFace APIs.","operationId":"get_public_model_public_v1_models__model_id__get","parameters":[{"name":"model_id","in":"path","required":true,"schema":{"type":"string","title":"Model Id"}},{"name":"format","in":"query","required":false,"schema":{"$ref":"#/components/schemas/ModelFormat","description":"Output format: 'default' (Cerebras schema), 'openrouter', or 'huggingface'","default":"default"},"description":"Output format: 'default' (Cerebras schema), 'openrouter', or 'huggingface'"},{"name":"X-Amz-Cf-Id","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"X-Amz-Cf-Id"}},{"name":"CF-RAY","in":"header","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Cf-Ray"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"$ref":"#/components/schemas/PublicModel"},{"$ref":"#/components/schemas/OpenRouterModel"},{"$ref":"#/components/schemas/HuggingFaceModel"}],"title":"Response Get Public Model Public V1 Models  Model Id  Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}}},"components":{"schemas":{"AssistantMessageRequest":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/TextContent"},"type":"array"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"reasoning":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/TextContent"},"type":"array"},{"type":"null"}],"title":"Reasoning"},"role":{"type":"string","const":"assistant","title":"Role","default":"assistant"},"tool_calls":{"anyOf":[{"items":{"$ref":"#/components/schemas/AssistantToolCall"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"additionalProperties":true,"type":"object","title":"AssistantMessageRequest","description":"A message request from an assistant."},"AssistantToolCall":{"properties":{"id":{"type":"string","title":"Id"},"type":{"type":"string","const":"function","title":"Type"},"function":{"$ref":"#/components/schemas/AssistantToolCallFunction"}},"additionalProperties":true,"type":"object","required":["id","type","function"],"title":"AssistantToolCall","description":"A tool call for an assistant."},"AssistantToolCallFunction":{"properties":{"name":{"type":"string","title":"Name"},"arguments":{"type":"string","title":"Arguments"}},"additionalProperties":true,"type":"object","required":["name","arguments"],"title":"AssistantToolCallFunction","description":"A function call for an assistant tool."},"ChatChunkChoice":{"properties":{"delta":{"anyOf":[{"$ref":"#/components/schemas/ChunkResponseMessage"},{"type":"null"}]},"finish_reason":{"anyOf":[{"type":"string","enum":["stop","length","content_filter","tool_calls"]},{"type":"null"}],"title":"Finish Reason"},"index":{"type":"integer","title":"Index"},"logprobs":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionLogProbs"},{"type":"null"}]},"reasoning_logprobs":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionLogProbs"},{"type":"null"}]},"tokens":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Tokens"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text"}},"additionalProperties":true,"type":"object","required":["index"],"title":"ChatChunkChoice"},"ChatChunkResponse":{"properties":{"id":{"type":"string","title":"Id"},"choices":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChatChunkChoice"},"type":"array"},{"type":"null"}],"title":"Choices"},"created":{"type":"integer","title":"Created"},"model":{"type":"string","title":"Model"},"service_tier":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Service Tier"},"system_fingerprint":{"type":"string","title":"System Fingerprint"},"object":{"type":"string","enum":["chat.completion.chunk","text_completion"],"title":"Object"},"usage":{"anyOf":[{"$ref":"#/components/schemas/Usage"},{"type":"null"}]},"time_info":{"anyOf":[{"$ref":"#/components/schemas/TimeInfo"},{"type":"null"}]}},"additionalProperties":true,"type":"object","required":["id","created","model","system_fingerprint","object"],"title":"ChatChunkResponse","examples":[{"choices":[{"delta":{"role":"assistant"},"index":0}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"chat.completion.chunk","system_fingerprint":"88719"},{"choices":[{"delta":{"content":"Generative "},"index":0}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"chat.completion.chunk","system_fingerprint":"88719"},{"choices":[{"delta":{},"finish_reason":"stop","index":0}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"chat.completion.chunk","system_fingerprint":"88719","time_info":{"completion_time":0.44,"prompt_time":0.02,"queue_time":0.0,"total_time":0.46},"usage":{"completion_tokens":1020,"prompt_tokens":40,"prompt_tokens_details":{"cached_tokens":1020},"total_tokens":1060}}]},"ChatCompletionChoice":{"properties":{"finish_reason":{"type":"string","enum":["stop","length","content_filter","tool_calls"],"title":"Finish Reason"},"index":{"type":"integer","title":"Index"},"message":{"$ref":"#/components/schemas/ChatCompletionResponseMessage"},"logprobs":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionLogProbs"},{"type":"null"}]},"reasoning_logprobs":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionLogProbs"},{"type":"null"}]}},"additionalProperties":true,"type":"object","required":["finish_reason","index","message"],"title":"ChatCompletionChoice"},"ChatCompletionLogProbs":{"properties":{"content":{"anyOf":[{"items":{"$ref":"#/components/schemas/LogProbsContent"},"type":"array"},{"type":"null"}],"title":"Content"},"refusal":{"anyOf":[{"items":{"$ref":"#/components/schemas/LogProbsContent"},"type":"array"},{"type":"null"}],"title":"Refusal"}},"additionalProperties":true,"type":"object","title":"ChatCompletionLogProbs"},"ChatCompletionRequest":{"properties":{"model":{"type":"string","title":"Model"},"stream":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream","default":false},"messages":{"items":{"oneOf":[{"$ref":"#/components/schemas/SystemMessageRequest"},{"$ref":"#/components/schemas/DeveloperMessageRequest"},{"$ref":"#/components/schemas/UserMessageRequest"},{"$ref":"#/components/schemas/AssistantMessageRequest"},{"$ref":"#/components/schemas/ToolMessageRequest"}],"discriminator":{"propertyName":"role","mapping":{"assistant":"#/components/schemas/AssistantMessageRequest","developer":"#/components/schemas/DeveloperMessageRequest","system":"#/components/schemas/SystemMessageRequest","tool":"#/components/schemas/ToolMessageRequest","user":"#/components/schemas/UserMessageRequest"}}},"type":"array","minItems":1,"title":"Messages"},"frequency_penalty":{"anyOf":[{"type":"number","maximum":2.0,"minimum":-2.0},{"type":"null"}],"title":"Frequency Penalty","description":"Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.","default":0.0},"logit_bias":{"anyOf":[{"additionalProperties":{"type":"number"},"type":"object"},{"type":"null"}],"title":"Logit Bias","description":"Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token."},"logprobs":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Logprobs","description":"Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.","default":false},"top_logprobs":{"anyOf":[{"type":"integer","maximum":20.0,"minimum":0.0},{"type":"null"}],"title":"Top Logprobs","description":"An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used."},"max_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Tokens","description":"The maximum number of tokens that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. This value is now deprecated in favor of max_completion_tokens."},"max_completion_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Completion Tokens","description":"An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens."},"min_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Min Tokens","description":"The minimum number of tokens to generate for a completion. If not specified or set to 0, the model will generate as many tokens as it deems necessary. Setting to -1 sets to max sequence length."},"min_completion_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Min Completion Tokens","description":"The minimum number of tokens to generate for a completion. If not specified or set to 0, the model will generate as many tokens as it deems necessary. Setting to -1 sets to max sequence length."},"n":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"N","description":"How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.","default":1},"presence_penalty":{"anyOf":[{"type":"number","maximum":2.0,"minimum":-2.0},{"type":"null"}],"title":"Presence Penalty","description":"Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.","default":0.0},"reasoning_effort":{"anyOf":[{"$ref":"#/components/schemas/ReasoningEffort"},{"type":"null"}],"description":"Constrains effort on reasoning for reasoning models. Currently supported values are none, low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. If set to None, the model will use the default reasoning effort for the model. If set to 'none', the model will not reason"},"disable_reasoning":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Disable Reasoning","description":"Disables reasoning for reasoning models. If set to True, the model will not use any reasoning in its response."},"clear_thinking":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Clear Thinking","description":"When True, removes reasoning content from messages that appear before the latest user message."},"reasoning_format":{"$ref":"#/components/schemas/ReasoningFormat","description":"Determines how reasoning is returned in the response. If set to `parsed`, the reasoning will be returned in the `reasoning` field of the response message as a string. If set to `raw`, the reasoning will be returned in the `content` field of the response message with special tokens. If set to `hidden`, the reasoning will not be returned in the response. If set to `none`, the model's default behavior will be used. If set to `text_parsed`, the reasoning will be returned in the `reasoning` field of the response message as a string, similar to `parsed`, but logprobs will not be separated into `reasoning_logprobs` and `logprobs`.","default":"none"},"response_format":{"anyOf":[{"oneOf":[{"$ref":"#/components/schemas/ResponseFormatText"},{"$ref":"#/components/schemas/ResponseFormatJSONObject"},{"$ref":"#/components/schemas/ResponseFormatJSONSchema"}],"discriminator":{"propertyName":"type","mapping":{"json_object":"#/components/schemas/ResponseFormatJSONObject","json_schema":"#/components/schemas/ResponseFormatJSONSchema","text":"#/components/schemas/ResponseFormatText"}}},{"type":"null"}],"title":"Response Format"},"seed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Seed","description":"If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed."},"service_tier":{"anyOf":[{"$ref":"#/components/schemas/ServiceTier"},{"type":"null"}]},"stop":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array","maxItems":4},{"type":"null"}],"title":"Stop","description":"Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."},"stream_options":{"anyOf":[{"$ref":"#/components/schemas/StreamOptions"},{"type":"null"}]},"temperature":{"anyOf":[{"type":"number","maximum":2.0,"minimum":0.0},{"type":"null"}],"title":"Temperature","description":"What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.","default":1.0},"top_p":{"anyOf":[{"type":"number","maximum":1.0,"minimum":0.0},{"type":"null"}],"title":"Top P","description":"An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.","default":1.0},"tools":{"anyOf":[{"items":{"$ref":"#/components/schemas/Tool"},"type":"array"},{"type":"null"}],"title":"Tools"},"tool_choice":{"anyOf":[{"$ref":"#/components/schemas/ToolChoice"},{"$ref":"#/components/schemas/ChoiceObject"},{"type":"null"}],"title":"Tool Choice"},"parallel_tool_calls":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Parallel Tool Calls","default":true},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User","description":"A unique identifier representing your end-user, which can help Cerebras to monitor and detect abuse. "},"prediction":{"anyOf":[{"$ref":"#/components/schemas/Prediction"},{"type":"null"}],"description":"Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when regenerating a file with only minor changes to most of the content."},"prompt_cache_key":{"anyOf":[{"type":"string","maxLength":1024},{"type":"null"}],"title":"Prompt Cache Key","description":"An optional opaque string. The requests with the same prompt cache key would highly likely share the same prompt prefixes. Examples would be IDs of chat conversations, IDs of users, the hashes of system prompts, etc."}},"additionalProperties":false,"type":"object","required":["model","messages"],"title":"ChatCompletionRequest","description":"Generic base class for chat completion requests.","examples":[{"max_completion_tokens":-1,"messages":[{"content":"You are a helpful assistant running on a CS-3 hardware at Cerebras Systems","role":"system"},{"content":"What is Generative AI?","role":"user"}],"min_tokens":1000,"model":"llama3.1-8b","seed":0,"stream":false,"temperature":0,"top_p":1}]},"ChatCompletionResponse":{"properties":{"id":{"type":"string","title":"Id"},"choices":{"items":{"$ref":"#/components/schemas/ChatCompletionChoice"},"type":"array","minItems":1,"title":"Choices"},"created":{"type":"integer","title":"Created"},"model":{"type":"string","title":"Model"},"service_tier":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Service Tier"},"system_fingerprint":{"type":"string","title":"System Fingerprint"},"object":{"type":"string","const":"chat.completion","title":"Object"},"usage":{"$ref":"#/components/schemas/Usage"},"time_info":{"$ref":"#/components/schemas/TimeInfo"}},"additionalProperties":true,"type":"object","required":["id","choices","created","model","system_fingerprint","object","usage","time_info"],"title":"ChatCompletionResponse","examples":[{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Generative AI is a subfield of artificial intelligence that focuses on generating new, original content, such as images, music, text, or videos. These models are trained on large datasets and learn to recognize patterns, relationships, and structures within the data, allowing them to generate new content that is similar in style, tone, or characteristics to the training data. \n \n Generative AI models can be categorized into two main types: Generative Adversarial Networks (GANs) and Variational Autoencoders (VAEs). GANs consist of two neural networks: a generator and a discriminator. The generator creates new content, while the discriminator evaluates the generated content and tells the generator whether it's realistic or not. VAEs are neural networks that learn to compress and reconstruct data. \n \n Generative AI has numerous applications across various industries, including art and design, content generation, data augmentation, and recommendation systems. Some examples of Generative AI in action include Deep Dream Generator, Amper Music, and DALL-E. \n \n In summary, Generative AI is a powerful technology that enables the creation of new, original content using machine learning models. It has the potential to revolutionize various industries and create new opportunities for artists, designers, and content creators.","role":"assistant"}}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"chat.completion","system_fingerprint":"88719","time_info":{"completion_time":0.44,"prompt_time":0.02,"queue_time":0.0,"total_time":0.46},"usage":{"completion_tokens":1020,"prompt_tokens":40,"prompt_tokens_details":{"cached_tokens":1020},"total_tokens":1060}}]},"ChatCompletionResponseMessage":{"properties":{"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"reasoning":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Reasoning"},"tool_calls":{"anyOf":[{"items":{"$ref":"#/components/schemas/AssistantToolCall"},"type":"array"},{"type":"null"}],"title":"Tool Calls"},"role":{"type":"string","enum":["assistant","user","system","tool"],"title":"Role"}},"additionalProperties":true,"type":"object","required":["role"],"title":"ChatCompletionResponseMessage"},"ChoiceObject":{"properties":{"type":{"type":"string","title":"Type"},"function":{"$ref":"#/components/schemas/ChoiceObjectFunction"}},"additionalProperties":true,"type":"object","required":["type","function"],"title":"ChoiceObject","description":"A choice object."},"ChoiceObjectFunction":{"properties":{"name":{"type":"string","title":"Name"}},"additionalProperties":true,"type":"object","required":["name"],"title":"ChoiceObjectFunction","description":"A function for a choice object."},"ChunkAssistantToolCall":{"properties":{"function":{"$ref":"#/components/schemas/ChunkAssistantToolCallFunction"},"type":{"type":"string","const":"function","title":"Type"},"id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Id"},"index":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Index"}},"additionalProperties":true,"type":"object","required":["function","type"],"title":"ChunkAssistantToolCall","description":"Streaming only. Represents a function call in an assistant tool call."},"ChunkAssistantToolCallFunction":{"properties":{"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"arguments":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Arguments"}},"additionalProperties":true,"type":"object","title":"ChunkAssistantToolCallFunction","description":"Streaming only. Represents a function in an assistant tool call."},"ChunkResponseMessage":{"properties":{"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"reasoning":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Reasoning"},"tool_calls":{"anyOf":[{"items":{"$ref":"#/components/schemas/ChunkAssistantToolCall"},"type":"array"},{"type":"null"}],"title":"Tool Calls"},"role":{"anyOf":[{"type":"string","enum":["assistant","user","system","tool"]},{"type":"null"}],"title":"Role"},"tokens":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Tokens"}},"additionalProperties":true,"type":"object","title":"ChunkResponseMessage"},"CompletionChoice":{"properties":{"finish_reason":{"anyOf":[{"type":"string","enum":["stop","length","content_filter"]},{"type":"null"}],"title":"Finish Reason"},"index":{"type":"integer","title":"Index"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text"},"tokens":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Tokens"},"logprobs":{"anyOf":[{"$ref":"#/components/schemas/CompletionLogProbs"},{"type":"null"}]}},"additionalProperties":true,"type":"object","required":["index"],"title":"CompletionChoice"},"CompletionChunkChoice":{"properties":{"delta":{"anyOf":[{"$ref":"#/components/schemas/ChunkResponseMessage"},{"type":"null"}]},"finish_reason":{"anyOf":[{"type":"string","enum":["stop","length","content_filter","tool_calls"]},{"type":"null"}],"title":"Finish Reason"},"index":{"type":"integer","title":"Index"},"logprobs":{"anyOf":[{"$ref":"#/components/schemas/CompletionLogProbs"},{"type":"null"}]},"tokens":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Tokens"},"text":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Text"}},"additionalProperties":true,"type":"object","required":["index"],"title":"CompletionChunkChoice"},"CompletionChunkResponse":{"properties":{"id":{"type":"string","title":"Id"},"choices":{"anyOf":[{"items":{"$ref":"#/components/schemas/CompletionChunkChoice"},"type":"array"},{"type":"null"}],"title":"Choices"},"created":{"type":"integer","title":"Created"},"model":{"type":"string","title":"Model"},"service_tier":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Service Tier"},"system_fingerprint":{"type":"string","title":"System Fingerprint"},"object":{"type":"string","enum":["chat.completion.chunk","text_completion"],"title":"Object"},"usage":{"anyOf":[{"$ref":"#/components/schemas/Usage"},{"type":"null"}]},"time_info":{"anyOf":[{"$ref":"#/components/schemas/TimeInfo"},{"type":"null"}]}},"additionalProperties":true,"type":"object","required":["id","created","model","system_fingerprint","object"],"title":"CompletionChunkResponse","examples":[{"choices":[{"delta":{"role":"assistant"},"index":0}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"chat.completion.chunk","system_fingerprint":"88719"},{"choices":[{"delta":{"content":"Generative "},"index":0}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"chat.completion.chunk","system_fingerprint":"88719"},{"choices":[{"delta":{},"finish_reason":"stop","index":0}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"chat.completion.chunk","system_fingerprint":"88719","time_info":{"completion_time":0.44,"prompt_time":0.02,"queue_time":0.0,"total_time":0.46},"usage":{"completion_tokens":1020,"prompt_tokens":40,"prompt_tokens_details":{"cached_tokens":1020},"total_tokens":1060}}]},"CompletionLogProbs":{"properties":{"text_offset":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Text Offset"},"token_logprobs":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Token Logprobs"},"tokens":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Tokens"},"top_logprobs":{"anyOf":[{"items":{"additionalProperties":{"type":"number"},"type":"object"},"type":"array"},{"type":"null"}],"title":"Top Logprobs"}},"additionalProperties":true,"type":"object","title":"CompletionLogProbs"},"CompletionRequest":{"properties":{"model":{"type":"string","title":"Model"},"stream":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Stream","default":false},"prompt":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array"},{"items":{"type":"integer"},"type":"array"},{"items":{"items":{"type":"integer"},"type":"array"},"type":"array"}],"title":"Prompt","description":"The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays."},"best_of":{"anyOf":[{"type":"integer","maximum":20.0,"minimum":0.0},{"type":"null"}],"title":"Best Of","description":"Generates `best_of` completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed. When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`. **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`","default":1},"return_raw_tokens":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Return Raw Tokens","description":"Return raw tokens instead of text","default":false},"echo":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Echo","description":"Echo back the prompt in addition to the completion","default":false},"frequency_penalty":{"anyOf":[{"type":"number","maximum":2.0,"minimum":-2.0},{"type":"null"}],"title":"Frequency Penalty","description":"Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.","default":0.0},"logit_bias":{"anyOf":[{"additionalProperties":{"type":"number"},"type":"object"},{"type":"null"}],"title":"Logit Bias","description":"Modify the likelihood of specified tokens appearing in the completion.\n \n Accepts a JSON object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token."},"logprobs":{"anyOf":[{"type":"integer","maximum":20.0,"minimum":0.0},{"type":"null"}],"title":"Logprobs","description":"Include the log probabilities on the logprobs most likely output tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response."},"max_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Max Tokens","description":"The maximum number of tokens that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. "},"min_tokens":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Min Tokens","description":"The minimum number of tokens to generate for a completion. If not specified or set to 0, the model will generate as many tokens as it deems necessary. Setting to -1 sets to max sequence length."},"grammar_root":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Grammar Root","description":"The grammar root used for structured output generation."},"n":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"N","description":"How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.","default":1},"presence_penalty":{"anyOf":[{"type":"number","maximum":2.0,"minimum":-2.0},{"type":"null"}],"title":"Presence Penalty","description":"Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.","default":0.0},"seed":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Seed","description":"If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result. Determinism is not guaranteed."},"stop":{"anyOf":[{"type":"string"},{"items":{"type":"string"},"type":"array","maxItems":4},{"type":"null"}],"title":"Stop","description":"Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."},"stream_options":{"anyOf":[{"$ref":"#/components/schemas/StreamOptions"},{"type":"null"}]},"suffix":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Suffix","description":"The suffix that comes after a completion of inserted text. (OpenAI feature, not supported)"},"temperature":{"anyOf":[{"type":"number","maximum":2.0,"minimum":0.0},{"type":"null"}],"title":"Temperature","description":"What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.","default":1.0},"top_p":{"anyOf":[{"type":"number","maximum":1.0,"minimum":0.0},{"type":"null"}],"title":"Top P","description":"An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or `temperature` but not both.","default":1.0},"user":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"User","description":"A unique identifier representing your end-user, which can help Cerebras to monitor and detect abuse. "},"prompt_cache_key":{"anyOf":[{"type":"string","maxLength":1024},{"type":"null"}],"title":"Prompt Cache Key","description":"An optional opaque string. The requests with the same prompt cache key would highly likely share the same prompt prefixes. Examples would be IDs of chat conversations, IDs of users, the hashes of system prompts, etc."},"reasoning_format":{"anyOf":[{"$ref":"#/components/schemas/ReasoningFormat"},{"type":"null"}],"description":"Determines how reasoning is returned in the response. If set to `parsed`, the reasoning will be returned in the `reasoning` field of the response message as a string. If set to `raw`, the reasoning will be returned in the `content` field of the response message with special tokens. If set to `hidden`, the reasoning will not be returned in the response.","deprecated":true}},"additionalProperties":false,"type":"object","required":["model","prompt"],"title":"CompletionRequest","description":"Request for completions endpoint","examples":[{"model":"llama3.1-70b","prompt":"Micheael Jordan is born in ","stream":true}]},"CompletionResponse":{"properties":{"id":{"type":"string","title":"Id"},"choices":{"items":{"$ref":"#/components/schemas/CompletionChoice"},"type":"array","minItems":1,"title":"Choices"},"created":{"type":"integer","title":"Created"},"model":{"type":"string","title":"Model"},"system_fingerprint":{"type":"string","title":"System Fingerprint"},"usage":{"anyOf":[{"$ref":"#/components/schemas/Usage"},{"type":"null"}]},"time_info":{"anyOf":[{"$ref":"#/components/schemas/TimeInfo"},{"type":"null"}]},"object":{"type":"string","const":"text_completion","title":"Object"}},"additionalProperties":true,"type":"object","required":["id","choices","created","model","system_fingerprint","object"],"title":"CompletionResponse","examples":[{"choices":[{"finish_reason":"stop","index":0,"text":"Generative AI is a subfield of artificial intelligence that focuses on generating new, original content, such as images, music, text, or videos. These models are trained on large datasets and learn to recognize patterns, relationships, and structures within the data, allowing them to generate new content that is similar in style, tone, or characteristics to the training data. \n \n Generative AI models can be categorized into two main types: Generative Adversarial Networks (GANs) and Variational Autoencoders (VAEs). GANs consist of two neural networks: a generator and a discriminator. The generator creates new content, while the discriminator evaluates the generated content and tells the generator whether it's realistic or not. VAEs are neural networks that learn to compress and reconstruct data. \n \n Generative AI has numerous applications across various industries, including art and design, content generation, data augmentation, and recommendation systems. Some examples of Generative AI in action include Deep Dream Generator, Amper Music, and DALL-E. \n \n In summary, Generative AI is a powerful technology that enables the creation of new, original content using machine learning models. It has the potential to revolutionize various industries and create new opportunities for artists, designers, and content creators."}],"created":1721088719,"id":"response_1721","model":"llama3.1-8b","object":"text_completion","system_fingerprint":"88719","time_info":{"completion_time":0.44,"prompt_time":0.02,"queue_time":0.0,"total_time":0.46},"usage":{"completion_tokens":1020,"prompt_tokens":40,"total_tokens":1060}}]},"CreateChatCompletionResponse":{"anyOf":[{"$ref":"#/components/schemas/ChatCompletionResponse"},{"$ref":"#/components/schemas/ChatChunkResponse"},{"$ref":"#/components/schemas/ErrorChunkResponse"}],"title":"CreateChatCompletionResponse"},"CreateCompletionResponse":{"anyOf":[{"$ref":"#/components/schemas/CompletionResponse"},{"$ref":"#/components/schemas/CompletionChunkResponse"},{"$ref":"#/components/schemas/ErrorChunkResponse"}],"title":"CreateCompletionResponse"},"DatacenterLocation":{"properties":{"country_code":{"type":"string","title":"Country Code","description":"ISO 3166 Alpha-2 country code"}},"type":"object","required":["country_code"],"title":"DatacenterLocation","description":"Datacenter location information."},"DeveloperMessageRequest":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"oneOf":[{"$ref":"#/components/schemas/TextContent"},{"$ref":"#/components/schemas/ImageUrlContent"},{"$ref":"#/components/schemas/ImageContent"}],"discriminator":{"propertyName":"type","mapping":{"image":"#/components/schemas/ImageContent","image_url":"#/components/schemas/ImageUrlContent","text":"#/components/schemas/TextContent"}}},"type":"array"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"role":{"type":"string","const":"developer","title":"Role","default":"developer"}},"additionalProperties":true,"type":"object","required":["content"],"title":"DeveloperMessageRequest","description":"A message request from the developer:\nCurrently only for openai models, where they are functionally the same as system"},"ErrorChunkContent":{"properties":{"message":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Message","default":""},"type":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Type","default":""},"param":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Param","default":""},"code":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Code","default":""},"id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Id","default":""}},"additionalProperties":true,"type":"object","title":"ErrorChunkContent"},"ErrorChunkResponse":{"properties":{"status_code":{"type":"integer","title":"Status Code"},"error":{"$ref":"#/components/schemas/ErrorChunkContent"}},"additionalProperties":true,"type":"object","required":["status_code","error"],"title":"ErrorChunkResponse"},"FunctionObject":{"properties":{"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"name":{"type":"string","title":"Name"},"parameters":{"anyOf":[{"$ref":"#/components/schemas/Parameters"},{"type":"null"}]},"strict":{"type":"boolean","title":"Strict","default":false}},"additionalProperties":true,"type":"object","required":["name"],"title":"FunctionObject","description":"A function object."},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HuggingFaceCapabilities":{"properties":{"streaming":{"type":"boolean","title":"Streaming","default":true},"function_calling":{"type":"boolean","title":"Function Calling","default":false},"structured_outputs":{"type":"boolean","title":"Structured Outputs","default":false},"vision":{"type":"boolean","title":"Vision","default":false}},"type":"object","title":"HuggingFaceCapabilities","description":"HuggingFace capabilities format."},"HuggingFaceModel":{"properties":{"id":{"type":"string","title":"Id"},"hugging_face_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hugging Face Id","description":"The corresponding HuggingFace Hub model ID"},"object":{"type":"string","const":"model","title":"Object","default":"model"},"created":{"type":"integer","title":"Created"},"owned_by":{"type":"string","title":"Owned By"},"context_length":{"type":"integer","title":"Context Length","description":"Supported context length in tokens"},"pricing":{"$ref":"#/components/schemas/HuggingFacePricing"},"capabilities":{"$ref":"#/components/schemas/HuggingFaceCapabilities"}},"type":"object","required":["id","created","owned_by","context_length","pricing"],"title":"HuggingFaceModel","description":"Model in HuggingFace-compatible format for inference providers.\n\nThis format is used by HuggingFace to power their provider comparison table\nand provider selection features."},"HuggingFaceModelsResponse":{"properties":{"object":{"type":"string","const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/HuggingFaceModel"},"type":"array","title":"Data"}},"type":"object","required":["data"],"title":"HuggingFaceModelsResponse","description":"HuggingFace-compatible list of models."},"HuggingFacePricing":{"properties":{"input":{"type":"number","title":"Input","description":"Price in USD per million input tokens"},"output":{"type":"number","title":"Output","description":"Price in USD per million output tokens"}},"type":"object","required":["input","output"],"title":"HuggingFacePricing","description":"HuggingFace pricing format - price in USD per million tokens."},"ImageContent":{"properties":{"type":{"type":"string","const":"image","title":"Type"},"image":{"type":"string","title":"Image"}},"additionalProperties":true,"type":"object","required":["type","image"],"title":"ImageContent","description":"Image URL content for a message."},"ImageUrl":{"properties":{"url":{"type":"string","title":"Url"},"detail":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Detail","default":"auto"}},"additionalProperties":true,"type":"object","required":["url"],"title":"ImageUrl","description":"Image URL"},"ImageUrlContent":{"properties":{"type":{"type":"string","const":"image_url","title":"Type"},"image_url":{"$ref":"#/components/schemas/ImageUrl"}},"additionalProperties":true,"type":"object","required":["type","image_url"],"title":"ImageUrlContent","description":"Image URL content for a message."},"JSONSchema":{"properties":{"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"schema":{"anyOf":[{"additionalProperties":true,"type":"object"},{"type":"null"}],"title":"Schema"},"strict":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Strict"}},"additionalProperties":true,"type":"object","required":["name"],"title":"JSONSchema","description":"A JSON Schema object."},"LogProbsContent":{"properties":{"token":{"type":"string","title":"Token"},"logprob":{"type":"number","title":"Logprob"},"bytes":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Bytes"},"top_logprobs":{"items":{"$ref":"#/components/schemas/TopLogProbs"},"type":"array","title":"Top Logprobs"}},"additionalProperties":true,"type":"object","required":["token","logprob","top_logprobs"],"title":"LogProbsContent"},"ModelArchitecture":{"properties":{"modality":{"type":"string","enum":["text","text+vision","multimodal"],"title":"Modality","description":"The modality of the model (e.g., 'text', 'text+vision', 'multimodal')."},"tokenizer":{"type":"string","title":"Tokenizer","description":"The tokenizer used by the model (e.g., 'Llama3', 'GPT4')."},"instruct_type":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Instruct Type","description":"The instruction format type used for fine-tuning (e.g., 'llama3', 'chatml')."}},"type":"object","required":["modality","tokenizer"],"title":"ModelArchitecture","description":"Architecture details of the model."},"ModelCapabilities":{"properties":{"streaming":{"type":"boolean","title":"Streaming","description":"Indicates if the model supports streaming responses via Server-Sent Events (SSE).","default":true},"function_calling":{"type":"boolean","title":"Function Calling","description":"Indicates if the model supports function calling (tool use).","default":false},"structured_outputs":{"type":"boolean","title":"Structured Outputs","description":"Indicates if the model supports structured outputs (e.g. JSON schema enforcement).","default":false},"vision":{"type":"boolean","title":"Vision","description":"Indicates if the model accepts image inputs (vision capabilities).","default":false},"json_mode":{"type":"boolean","title":"Json Mode","description":"Indicates if the model supports JSON mode (guaranteed JSON output).","default":false},"tools":{"type":"boolean","title":"Tools","description":"Indicates if the model supports the tools parameter.","default":false},"tool_choice":{"type":"boolean","title":"Tool Choice","description":"Indicates if the model supports the tool_choice parameter.","default":false},"parallel_tool_calls":{"type":"boolean","title":"Parallel Tool Calls","description":"Indicates if the model supports parallel tool calls.","default":false},"response_format":{"type":"boolean","title":"Response Format","description":"Indicates if the model supports the response_format parameter.","default":false},"reasoning":{"type":"boolean","title":"Reasoning","description":"Indicates if the model supports reasoning/chain-of-thought outputs.","default":false}},"type":"object","title":"ModelCapabilities","description":"Capabilities and features supported by the model."},"ModelFormat":{"type":"string","enum":["default","openrouter","huggingface"],"title":"ModelFormat","description":"Output format for public models endpoint."},"ModelLimits":{"properties":{"max_context_length":{"type":"integer","exclusiveMinimum":0.0,"title":"Max Context Length","description":"The maximum context window size in tokens."},"max_completion_tokens":{"type":"integer","exclusiveMinimum":0.0,"title":"Max Completion Tokens","description":"The maximum number of tokens that can be generated in a single completion."},"requests_per_minute":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Requests Per Minute","description":"The default rate limit for requests per minute (RPM)."},"tokens_per_minute":{"anyOf":[{"type":"integer","minimum":0.0},{"type":"null"}],"title":"Tokens Per Minute","description":"The default rate limit for tokens per minute (TPM)."}},"type":"object","required":["max_context_length","max_completion_tokens"],"title":"ModelLimits","description":"Rate limits and constraints for the model."},"ModelMetadata":{"properties":{"id":{"type":"string","title":"Id"},"object":{"type":"string","const":"model","title":"Object","default":"model"},"created":{"type":"integer","title":"Created","default":0},"owned_by":{"type":"string","title":"Owned By","default":""}},"additionalProperties":true,"type":"object","required":["id"],"title":"ModelMetadata"},"ModelMetadataList":{"properties":{"object":{"type":"string","const":"list","title":"Object","default":"list"},"data":{"items":{"$ref":"#/components/schemas/ModelMetadata"},"type":"array","title":"Data"}},"additionalProperties":true,"type":"object","required":["data"],"title":"ModelMetadataList"},"ModelPricing":{"properties":{"prompt":{"type":"string","pattern":"^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$","title":"Prompt","description":"Cost per token for prompt (input) tokens in USD."},"completion":{"type":"string","pattern":"^(?!^[-+.]*$)[+-]?0*\\d*\\.?\\d*$","title":"Completion","description":"Cost per token for completion (output) tokens in USD."}},"type":"object","required":["prompt","completion"],"title":"ModelPricing","description":"Pricing information for a model.","example":{"completion":"0.0000001","prompt":"0.0000001"}},"OpenRouterInfo":{"properties":{"slug":{"type":"string","title":"Slug","description":"OpenRouter slug for the model"}},"type":"object","required":["slug"],"title":"OpenRouterInfo","description":"OpenRouter metadata."},"OpenRouterModel":{"properties":{"id":{"type":"string","title":"Id","description":"Model ID with provider prefix, e.g., 'cerebras/llama3.1-8b'"},"hugging_face_id":{"type":"string","title":"Hugging Face Id","description":"The corresponding HuggingFace Hub model ID, if available","default":""},"name":{"type":"string","title":"Name"},"created":{"type":"integer","title":"Created","description":"Unix timestamp when model was created"},"input_modalities":{"items":{"type":"string"},"type":"array","title":"Input Modalities","description":"Supported input modalities (text, image, file)"},"output_modalities":{"items":{"type":"string"},"type":"array","title":"Output Modalities","description":"Supported output modalities (text, image, file)"},"quantization":{"type":"string","title":"Quantization","description":"Model quantization (fp16 only for Cerebras)","default":"fp16"},"context_length":{"type":"integer","title":"Context Length"},"max_output_length":{"type":"integer","title":"Max Output Length","description":"Maximum number of output tokens"},"pricing":{"$ref":"#/components/schemas/OpenRouterPricing"},"supported_sampling_parameters":{"items":{"type":"string"},"type":"array","title":"Supported Sampling Parameters","description":"List of supported sampling parameters"},"supported_features":{"items":{"type":"string"},"type":"array","title":"Supported Features","description":"List of supported features"},"description":{"type":"string","title":"Description","description":"Model description","default":""},"openrouter":{"anyOf":[{"$ref":"#/components/schemas/OpenRouterInfo"},{"type":"null"}],"description":"OpenRouter metadata"},"datacenters":{"items":{"$ref":"#/components/schemas/DatacenterLocation"},"type":"array","title":"Datacenters","description":"Datacenter locations"}},"type":"object","required":["id","name","created","context_length","max_output_length","pricing"],"title":"OpenRouterModel","description":"Model in OpenRouter-compatible format."},"OpenRouterModelsResponse":{"properties":{"data":{"items":{"$ref":"#/components/schemas/OpenRouterModel"},"type":"array","title":"Data"}},"type":"object","required":["data"],"title":"OpenRouterModelsResponse","description":"OpenRouter-compatible list of models."},"OpenRouterPricing":{"properties":{"prompt":{"type":"string","title":"Prompt","description":"Cost per input token as string"},"completion":{"type":"string","title":"Completion","description":"Cost per output token as string"},"request":{"type":"string","title":"Request","description":"Cost per request as string","default":"0"},"image":{"type":"string","title":"Image","description":"Cost per image as string","default":"0"},"input_cache_read":{"type":"string","title":"Input Cache Read","description":"Cost per cached input token read as string","default":"0"},"input_cache_write":{"type":"string","title":"Input Cache Write","description":"Cost per cached input token write as string","default":"0"}},"type":"object","required":["prompt","completion"],"title":"OpenRouterPricing","description":"OpenRouter pricing format."},"Parameters":{"additionalProperties":true,"type":"object","title":"Parameters","description":"Represents the parameters a function accepts.\nThis model is designed to be flexible to accommodate any JSON Schema.\nThe key-value pairs you provide will define the parameters."},"Prediction":{"properties":{"type":{"type":"string","const":"content","title":"Type"},"content":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/TextContent"},"type":"array"}],"title":"Content"}},"additionalProperties":true,"type":"object","required":["type","content"],"title":"Prediction"},"PublicModel":{"properties":{"id":{"type":"string","maxLength":200,"minLength":1,"title":"Id","description":"The unique identifier for the model (e.g., 'llama3.1-8b')."},"object":{"type":"string","const":"model","title":"Object","description":"The object type, which is always 'model'.","default":"model"},"created":{"type":"integer","minimum":0.0,"title":"Created","description":"The Unix timestamp (in seconds) when the model was created."},"owned_by":{"type":"string","minLength":1,"title":"Owned By","description":"The organization that owns or created the model."},"name":{"type":"string","minLength":1,"title":"Name","description":"The human-readable name of the model."},"description":{"type":"string","maxLength":1000,"minLength":1,"title":"Description","description":"A brief description of the model."},"hugging_face_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hugging Face Id","description":"The corresponding HuggingFace Hub model ID, if available (e.g., 'meta-llama/Llama-3.1-8B-Instruct')."},"pricing":{"$ref":"#/components/schemas/ModelPricing","description":"Pricing details for the model."},"capabilities":{"$ref":"#/components/schemas/ModelCapabilities","description":"The capabilities supported by the model."},"supported_parameters":{"$ref":"#/components/schemas/SupportedParameters"},"architecture":{"$ref":"#/components/schemas/ModelArchitecture","description":"Technical architecture details of the model."},"limits":{"$ref":"#/components/schemas/ModelLimits","description":"Usage limits and constraints for the model."},"datacenter_locations":{"items":{"type":"string"},"type":"array","title":"Datacenter Locations","description":"List of datacenter locations where this model is deployed (e.g., ['us-east-1', 'eu-west-1'])."},"deprecated":{"type":"boolean","title":"Deprecated","description":"Indicates if the model is deprecated and should not be used for new applications.","default":false},"preview":{"type":"boolean","title":"Preview","description":"Indicates if the model is in preview or beta status.","default":false},"quantization":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Quantization","description":"Quantization precision (e.g., 'FP16', 'FP16/FP8 (weights only)')."}},"type":"object","required":["id","created","owned_by","name","description","pricing","capabilities","supported_parameters","architecture","limits"],"title":"PublicModel","description":"Complete model specification following OpenAI-compatible schema\nwith extensions for OpenRouter/HuggingFace compatibility."},"PublicModelsListResponse":{"properties":{"object":{"type":"string","const":"list","title":"Object","description":"Object type (always 'list')","default":"list"},"data":{"items":{"$ref":"#/components/schemas/PublicModel"},"type":"array","title":"Data","description":"Array of model objects"}},"type":"object","required":["data"],"title":"PublicModelsListResponse","description":"OpenAI-compatible list of models."},"ReasoningEffort":{"type":"string","enum":["none","low","medium","high"],"title":"ReasoningEffort"},"ReasoningFormat":{"type":"string","enum":["none","parsed","text_parsed","raw","hidden"],"title":"ReasoningFormat"},"ResponseFormatJSONObject":{"properties":{"type":{"type":"string","const":"json_object","title":"Type"}},"additionalProperties":true,"type":"object","required":["type"],"title":"ResponseFormatJSONObject","description":"A response format for a JSON object."},"ResponseFormatJSONSchema":{"properties":{"json_schema":{"$ref":"#/components/schemas/JSONSchema"},"type":{"type":"string","const":"json_schema","title":"Type"}},"additionalProperties":true,"type":"object","required":["json_schema","type"],"title":"ResponseFormatJSONSchema","description":"A response format for a JSON schema."},"ResponseFormatText":{"properties":{"type":{"type":"string","const":"text","title":"Type"}},"additionalProperties":true,"type":"object","required":["type"],"title":"ResponseFormatText","description":"A response format for text."},"ServiceTier":{"type":"string","enum":["auto","default","flex","priority"],"title":"ServiceTier"},"StreamOptions":{"properties":{"include_usage":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Include Usage"}},"additionalProperties":true,"type":"object","title":"StreamOptions","description":"Options for streaming."},"SupportedParameters":{"properties":{"temperature":{"type":"boolean","title":"Temperature","description":"Supports temperature sampling parameter.","default":true},"top_p":{"type":"boolean","title":"Top P","description":"Supports top_p (nucleus) sampling parameter.","default":true},"seed":{"type":"boolean","title":"Seed","description":"Supports seed for reproducible outputs.","default":true},"stop":{"type":"boolean","title":"Stop","description":"Supports stop sequences parameter.","default":true},"max_completion_tokens":{"type":"boolean","title":"Max Completion Tokens","description":"Supports max_completion_tokens parameter.","default":true},"logprobs":{"type":"boolean","title":"Logprobs","description":"Supports logprobs output.","default":false},"top_logprobs":{"type":"boolean","title":"Top Logprobs","description":"Supports top_logprobs parameter.","default":false},"frequency_penalty":{"type":"boolean","title":"Frequency Penalty","description":"Supports frequency_penalty parameter.","default":false},"presence_penalty":{"type":"boolean","title":"Presence Penalty","description":"Supports presence_penalty parameter.","default":false},"logit_bias":{"type":"boolean","title":"Logit Bias","description":"Supports logit_bias parameter.","default":false},"repetition_penalty":{"type":"boolean","title":"Repetition Penalty","description":"Supports repetition_penalty parameter.","default":false}},"type":"object","title":"SupportedParameters","description":"Sampling parameters supported by the model."},"SystemMessageRequest":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"oneOf":[{"$ref":"#/components/schemas/TextContent"},{"$ref":"#/components/schemas/ImageUrlContent"},{"$ref":"#/components/schemas/ImageContent"}],"discriminator":{"propertyName":"type","mapping":{"image":"#/components/schemas/ImageContent","image_url":"#/components/schemas/ImageUrlContent","text":"#/components/schemas/TextContent"}}},"type":"array"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"role":{"type":"string","const":"system","title":"Role","default":"system"}},"additionalProperties":true,"type":"object","required":["content"],"title":"SystemMessageRequest","description":"A message request from the system."},"TextContent":{"properties":{"type":{"type":"string","const":"text","title":"Type"},"text":{"type":"string","title":"Text"}},"additionalProperties":true,"type":"object","required":["type","text"],"title":"TextContent","description":"Text content for a message."},"TimeInfo":{"properties":{"queue_time":{"anyOf":[{"type":"number","minimum":0.0},{"type":"null"}],"title":"Queue Time"},"prompt_time":{"anyOf":[{"type":"number","minimum":0.0},{"type":"null"}],"title":"Prompt Time"},"completion_time":{"anyOf":[{"type":"number","minimum":0.0},{"type":"null"}],"title":"Completion Time"},"total_time":{"anyOf":[{"type":"number","minimum":0.0},{"type":"null"}],"title":"Total Time"}},"additionalProperties":true,"type":"object","title":"TimeInfo","description":"Time information for different phases of request processing.\n\nAll times are measured in seconds.","examples":[{"completion_time":0.44,"prompt_time":0.02,"queue_time":0.0,"total_time":0.46}]},"Tool":{"properties":{"type":{"type":"string","title":"Type"},"function":{"$ref":"#/components/schemas/FunctionObject"}},"additionalProperties":true,"type":"object","required":["type","function"],"title":"Tool","description":"A tool object"},"ToolChoice":{"type":"string","enum":["none","auto","required"],"title":"ToolChoice","description":"A tool choice object."},"ToolMessageRequest":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"$ref":"#/components/schemas/TextContent"},"type":"array"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"role":{"type":"string","const":"tool","title":"Role","default":"tool"},"tool_call_id":{"type":"string","title":"Tool Call Id"}},"additionalProperties":true,"type":"object","required":["content","tool_call_id"],"title":"ToolMessageRequest","description":"A message request from a tool."},"TopLogProbs":{"properties":{"token":{"type":"string","title":"Token"},"logprob":{"type":"number","title":"Logprob"},"bytes":{"anyOf":[{"items":{"type":"integer"},"type":"array"},{"type":"null"}],"title":"Bytes"}},"additionalProperties":true,"type":"object","required":["token","logprob"],"title":"TopLogProbs"},"Usage":{"additionalProperties":true,"type":"object"},"UserMessageRequest":{"properties":{"content":{"anyOf":[{"type":"string"},{"items":{"oneOf":[{"$ref":"#/components/schemas/TextContent"},{"$ref":"#/components/schemas/ImageUrlContent"},{"$ref":"#/components/schemas/ImageContent"}],"discriminator":{"propertyName":"type","mapping":{"image":"#/components/schemas/ImageContent","image_url":"#/components/schemas/ImageUrlContent","text":"#/components/schemas/TextContent"}}},"type":"array"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"role":{"type":"string","const":"user","title":"Role","default":"user"}},"additionalProperties":true,"type":"object","required":["content"],"title":"UserMessageRequest","description":"A message request from the user."},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"}}}}