diff --git a/src/content/workers-ai-models/llama-3.1-8b-instruct-fast.json b/src/content/workers-ai-models/llama-3.1-8b-instruct-fast.json new file mode 100644 index 00000000000000..19bb8d167044df --- /dev/null +++ b/src/content/workers-ai-models/llama-3.1-8b-instruct-fast.json @@ -0,0 +1,745 @@ +{ + + "id": "872d9af1-5ff8-4e84-aed1-ab3caf909436", + + "source": 1, + + "name": "@cf/meta/llama-3.1-8b-instruct-fast", + + "description": "[Fast version] The Meta Llama 3.1 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction tuned generative models. The Llama 3.1 instruction tuned text only models are optimized for multilingual dialogue use cases and outperform many of the available open source and closed chat models on common industry benchmarks.", + + "task": { + + "id": "c329a1f9-323d-4e91-b2aa-582dd4188d34", + + "name": "Text Generation", + + "description": "Family of generative text models, such as large language models (LLM), that can be adapted for a variety of natural language tasks." + + }, + + "tags": [], + + "properties": [ + + { "property_id": "beta", "value": "true" }, + + { + + "property_id": "terms", + + "value": "https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE" + + } + + ], + + "schema": { + + "input": { + + "type": "object", + + "oneOf": [ + + { + + "title": "Prompt", + + "properties": { + + "prompt": { + + "type": "string", + + "minLength": 1, + + "maxLength": 131072, + + "description": "The input text prompt for the model to generate a response." + + }, + + "image": { + + "oneOf": [ + + { + + "type": "array", + + "description": "An array of integers that represent the image data constrained to 8-bit unsigned integer values", + + "items": { + + "type": "number", + + "description": "A value between 0 and 255" + + } + + }, + + { + + "type": "string", + + "format": "binary", + + "description": "Binary string representing the image contents." + + } + + ] + + }, + + "raw": { + + "type": "boolean", + + "default": false, + + "description": "If true, a chat template is not applied and you must adhere to the specific model's expected formatting." + + }, + + "stream": { + + "type": "boolean", + + "default": false, + + "description": "If true, the response will be streamed back incrementally using SSE, Server Sent Events." + + }, + + "max_tokens": { + + "type": "integer", + + "default": 256, + + "description": "The maximum number of tokens to generate in the response." + + }, + + "temperature": { + + "type": "number", + + "default": 0.6, + + "minimum": 0, + + "maximum": 5, + + "description": "Controls the randomness of the output; higher values produce more random results." + + }, + + "top_p": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Adjusts the creativity of the AI's responses by controlling how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + + }, + + "top_k": { + + "type": "integer", + + "minimum": 1, + + "maximum": 50, + + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + + }, + + "seed": { + + "type": "integer", + + "minimum": 1, + + "maximum": 9999999999, + + "description": "Random seed for reproducibility of the generation." + + }, + + "repetition_penalty": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Penalty for repeated tokens; higher values discourage repetition." + + }, + + "frequency_penalty": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + + }, + + "presence_penalty": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Increases the likelihood of the model introducing new topics." + + }, + + "lora": { + + "type": "string", + + "description": "Name of the LoRA (Low-Rank Adaptation) model to fine-tune the base model." + + } + + }, + + "required": ["prompt"] + + }, + + { + + "title": "Messages", + + "properties": { + + "messages": { + + "type": "array", + + "description": "An array of message objects representing the conversation history.", + + "items": { + + "type": "object", + + "properties": { + + "role": { + + "type": "string", + + "description": "The role of the message sender (e.g., 'user', 'assistant', 'system', 'tool')." + + }, + + "content": { + + "type": "string", + + "maxLength": 131072, + + "description": "The content of the message as a string." + + } + + }, + + "required": ["role", "content"] + + } + + }, + + "image": { + + "oneOf": [ + + { + + "type": "array", + + "description": "An array of integers that represent the image data constrained to 8-bit unsigned integer values", + + "items": { + + "type": "number", + + "description": "A value between 0 and 255" + + } + + }, + + { + + "type": "string", + + "format": "binary", + + "description": "Binary string representing the image contents." + + } + + ] + + }, + + "functions": { + + "type": "array", + + "items": { + + "type": "object", + + "properties": { + + "name": { "type": "string" }, + + "code": { "type": "string" } + + }, + + "required": ["name", "code"] + + } + + }, + + "tools": { + + "type": "array", + + "description": "A list of tools available for the assistant to use.", + + "items": { + + "type": "object", + + "oneOf": [ + + { + + "properties": { + + "name": { + + "type": "string", + + "description": "The name of the tool. More descriptive the better." + + }, + + "description": { + + "type": "string", + + "description": "A brief description of what the tool does." + + }, + + "parameters": { + + "type": "object", + + "description": "Schema defining the parameters accepted by the tool.", + + "properties": { + + "type": { + + "type": "string", + + "description": "The type of the parameters object (usually 'object')." + + }, + + "required": { + + "type": "array", + + "description": "List of required parameter names.", + + "items": { "type": "string" } + + }, + + "properties": { + + "type": "object", + + "description": "Definitions of each parameter.", + + "additionalProperties": { + + "type": "object", + + "properties": { + + "type": { + + "type": "string", + + "description": "The data type of the parameter." + + }, + + "description": { + + "type": "string", + + "description": "A description of the expected parameter." + + } + + }, + + "required": ["type", "description"] + + } + + } + + }, + + "required": ["type", "properties"] + + } + + }, + + "required": ["name", "description", "parameters"] + + }, + + { + + "properties": { + + "type": { + + "type": "string", + + "description": "Specifies the type of tool (e.g., 'function')." + + }, + + "function": { + + "type": "object", + + "description": "Details of the function tool.", + + "properties": { + + "name": { + + "type": "string", + + "description": "The name of the function." + + }, + + "description": { + + "type": "string", + + "description": "A brief description of what the function does." + + }, + + "parameters": { + + "type": "object", + + "description": "Schema defining the parameters accepted by the function.", + + "properties": { + + "type": { + + "type": "string", + + "description": "The type of the parameters object (usually 'object')." + + }, + + "required": { + + "type": "array", + + "description": "List of required parameter names.", + + "items": { "type": "string" } + + }, + + "properties": { + + "type": "object", + + "description": "Definitions of each parameter.", + + "additionalProperties": { + + "type": "object", + + "properties": { + + "type": { + + "type": "string", + + "description": "The data type of the parameter." + + }, + + "description": { + + "type": "string", + + "description": "A description of the expected parameter." + + } + + }, + + "required": ["type", "description"] + + } + + } + + }, + + "required": ["type", "properties"] + + } + + }, + + "required": ["name", "description", "parameters"] + + } + + }, + + "required": ["type", "function"] + + } + + ] + + } + + }, + + "stream": { + + "type": "boolean", + + "default": false, + + "description": "If true, the response will be streamed back incrementally." + + }, + + "max_tokens": { + + "type": "integer", + + "default": 256, + + "description": "The maximum number of tokens to generate in the response." + + }, + + "temperature": { + + "type": "number", + + "default": 0.6, + + "minimum": 0, + + "maximum": 5, + + "description": "Controls the randomness of the output; higher values produce more random results." + + }, + + "top_p": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Controls the creativity of the AI's responses by adjusting how many possible words it considers. Lower values make outputs more predictable; higher values allow for more varied and creative responses." + + }, + + "top_k": { + + "type": "integer", + + "minimum": 1, + + "maximum": 50, + + "description": "Limits the AI to choose from the top 'k' most probable words. Lower values make responses more focused; higher values introduce more variety and potential surprises." + + }, + + "seed": { + + "type": "integer", + + "minimum": 1, + + "maximum": 9999999999, + + "description": "Random seed for reproducibility of the generation." + + }, + + "repetition_penalty": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Penalty for repeated tokens; higher values discourage repetition." + + }, + + "frequency_penalty": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Decreases the likelihood of the model repeating the same lines verbatim." + + }, + + "presence_penalty": { + + "type": "number", + + "minimum": 0, + + "maximum": 2, + + "description": "Increases the likelihood of the model introducing new topics." + + } + + }, + + "required": ["messages"] + + } + + ] + + }, + + "output": { + + "oneOf": [ + + { + + "type": "object", + + "contentType": "application/json", + + "properties": { + + "response": { + + "type": "string", + + "description": "The generated text response from the model" + + }, + + "tool_calls": { + + "type": "array", + + "description": "An array of tool calls requests made during the response generation", + + "items": { + + "type": "object", + + "properties": { + + "arguments": { + + "type": "object", + + "description": "The arguments passed to be passed to the tool call request" + + }, + + "name": { + + "type": "string", + + "description": "The name of the tool to be called" + + } + + } + + } + + } + + } + + }, + + { + + "type": "string", + + "contentType": "text/event-stream", + + "format": "binary" + + } + + ] + + } + + } + +} \ No newline at end of file