Text-to-speech

curl --request POST \
  --url https://api.infery.ai/v1/audio/speech \
  --header 'Authorization: <api-key>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "model": "<string>",
  "input": "<string>",
  "voice": "<string>",
  "response_format": "mp3",
  "speed": 1
}
'

import requests

url = "https://api.infery.ai/v1/audio/speech"

payload = {
    "model": "<string>",
    "input": "<string>",
    "voice": "<string>",
    "response_format": "mp3",
    "speed": 1
}
headers = {
    "Authorization": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: '<string>',
    input: '<string>',
    voice: '<string>',
    response_format: 'mp3',
    speed: 1
  })
};

fetch('https://api.infery.ai/v1/audio/speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.infery.ai/v1/audio/speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model' => '<string>',
    'input' => '<string>',
    'voice' => '<string>',
    'response_format' => 'mp3',
    'speed' => 1
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <api-key>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.infery.ai/v1/audio/speech"

	payload := strings.NewReader("{\n  \"model\": \"<string>\",\n  \"input\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"response_format\": \"mp3\",\n  \"speed\": 1\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.infery.ai/v1/audio/speech")
  .header("Authorization", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"<string>\",\n  \"input\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"response_format\": \"mp3\",\n  \"speed\": 1\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.infery.ai/v1/audio/speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"<string>\",\n  \"input\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"response_format\": \"mp3\",\n  \"speed\": 1\n}"

response = http.request(request)
puts response.read_body

"/samples/tts.wav"

{
  "error": {
    "message": "Model not found",
    "type": "invalid_request_error",
    "code": "model_not_found",
    "param": "model"
  }
}

{
  "error": {
    "message": "Model not found",
    "type": "invalid_request_error",
    "code": "model_not_found",
    "param": "model"
  }
}

{
  "error": {
    "message": "Model not found",
    "type": "invalid_request_error",
    "code": "model_not_found",
    "param": "model"
  }
}

POST

audio

speech

Text-to-speech

curl --request POST \
  --url https://api.infery.ai/v1/audio/speech \
  --header 'Authorization: <api-key>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "model": "<string>",
  "input": "<string>",
  "voice": "<string>",
  "response_format": "mp3",
  "speed": 1
}
'

import requests

url = "https://api.infery.ai/v1/audio/speech"

payload = {
    "model": "<string>",
    "input": "<string>",
    "voice": "<string>",
    "response_format": "mp3",
    "speed": 1
}
headers = {
    "Authorization": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    model: '<string>',
    input: '<string>',
    voice: '<string>',
    response_format: 'mp3',
    speed: 1
  })
};

fetch('https://api.infery.ai/v1/audio/speech', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.infery.ai/v1/audio/speech",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model' => '<string>',
    'input' => '<string>',
    'voice' => '<string>',
    'response_format' => 'mp3',
    'speed' => 1
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: <api-key>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.infery.ai/v1/audio/speech"

	payload := strings.NewReader("{\n  \"model\": \"<string>\",\n  \"input\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"response_format\": \"mp3\",\n  \"speed\": 1\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.infery.ai/v1/audio/speech")
  .header("Authorization", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model\": \"<string>\",\n  \"input\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"response_format\": \"mp3\",\n  \"speed\": 1\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.infery.ai/v1/audio/speech")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model\": \"<string>\",\n  \"input\": \"<string>\",\n  \"voice\": \"<string>\",\n  \"response_format\": \"mp3\",\n  \"speed\": 1\n}"

response = http.request(request)
puts response.read_body

"/samples/tts.wav"

{
  "error": {
    "message": "Model not found",
    "type": "invalid_request_error",
    "code": "model_not_found",
    "param": "model"
  }
}

{
  "error": {
    "message": "Model not found",
    "type": "invalid_request_error",
    "code": "model_not_found",
    "param": "model"
  }
}

{
  "error": {
    "message": "Model not found",
    "type": "invalid_request_error",
    "code": "model_not_found",
    "param": "model"
  }
}

curl https://api.infery.ai/v1/audio/speech \
  -H "Authorization: Bearer $INFERY_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "tts-1",
    "input": "Hello from Infery",
    "voice": "alloy"
  }' --output out.mp3

Response is the audio binary (audio/mpeg / audio/wav depending on response_format).

Sample output

Generated with gemini-2.5-flash-preview-tts, voice Kore. Download tts.wav.

Parameters

voice — model-dependent (alloy, echo, onyx, nova, shimmer, etc.)
response_format — mp3, wav, opus, flac, pcm
speed — 0.25–4.0

Authorizations

Authorization

string

header

required

API key in format: Bearer inf_***

Body

application/json

model

string

required

Model ID to use for TTS

input

string

required

Text to synthesize into speech

voice

string

required

Voice to use for synthesis

response_format

enum<string>

default:mp3

Available options:

mp3,

opus,

aac,

flac

speed

number

default:1

Speed of the generated audio (0.25 to 4.0)

Response

Binary audio stream. Content-Type reflects the requested response_format: audio/mpeg (mp3, default), audio/wav, audio/ogg (opus), audio/flac, audio/aac, or audio/pcm. Credits deducted are returned in the x-credits-used response header.

The response is of type file.

Edit an image Speech-to-text

Overview

Chat Completions

Embeddings

Images

Audio

Video

Music

Files

Models

Text-to-speech

Sample output

Parameters

Authorizations

Body

Response

​Sample output

​Parameters

Authorizations

Body

Response

Sample output

Parameters