openapi: "3.1.1" info: title: "AgentsetAPI" description: "Agentset is agentic rag-as-a-service" version: "0.0.1" contact: name: "Agentset Support" email: "support@agentset.ai" url: "https://api.agentset.ai/" license: name: "MIT License" url: "https://github.com/agentset-ai/agentset/blob/main/LICENSE.md" servers: - url: "https://api.agentset.ai" description: "Production API" x-speakeasy-globals: parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/TenantIdRef" paths: /v1/namespace: get: operationId: "listNamespaces" x-speakeasy-name-override: "list" summary: "Retrieve a list of namespaces" description: "Retrieve a list of namespaces for the authenticated organization." tags: - "Namespaces" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\n\nconst namespaces = await agentset.namespaces.list();\nconsole.log(namespaces);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( token="AGENTSET_API_KEY", ) as a_client: res = a_client.namespaces.list() # Handle response print(res) responses: "200": description: "The retrieved namespaces" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "array" items: $ref: "#/components/schemas/namespace" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" post: operationId: "createNamespace" x-speakeasy-name-override: "create" x-speakeasy-usage-example: true summary: "Create a namespace." description: "Create a namespace for the authenticated organization." tags: - "Namespaces" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\n\nconst namespace = await agentset.namespaces.create({\n name: \"My Knowledge Base\",\n slug: \"my-knowledge-base\",\n // embeddingConfig: {...},\n // vectorStoreConfig: {...},\n});\nconsole.log(namespace);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( token="AGENTSET_API_KEY", ) as a_client: res = a_client.namespaces.create(name="", slug="", embedding_config={ "provider": "GOOGLE", "model": "text-embedding-004", "api_key": "", }, vector_store_config={ "provider": "PINECONE", "api_key": "", "index_host": "https://example.svc.aped-1234-a56b.pinecone.io", }) # Handle response print(res) requestBody: required: true content: application/json: schema: type: "object" properties: name: type: "string" minLength: 1 maxLength: 64 slug: type: "string" minLength: 2 maxLength: 48 embeddingConfig: default: provider: "MANAGED_OPENAI" model: "text-embedding-3-large" $ref: "#/components/schemas/embedding-model-config" vectorStoreConfig: default: provider: "MANAGED_TURBOPUFFER" $ref: "#/components/schemas/create-vector-store-config" required: - "name" - "slug" responses: "201": description: "The created namespace" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/namespace" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}: get: operationId: "getNamespace" x-speakeasy-name-override: "get" summary: "Retrieve a namespace" description: "Retrieve the info for a namespace." tags: - "Namespaces" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\n\nconst namespace = await agentset.namespaces.get(\"ns_xxx\");\nconsole.log(namespace);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.namespaces.get() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" responses: "200": description: "The retrieved namespace" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/namespace" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" patch: operationId: "updateNamespace" x-speakeasy-name-override: "update" x-speakeasy-max-method-params: 2 summary: "Update a namespace." description: "Update a namespace for the authenticated organization. If there is no change, return it as it is." tags: - "Namespaces" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\n\nconst updatedNamespace = await agentset.namespaces.update(\"ns_xxx\", {\n name: \"Updated Knowledge Base\",\n});\nconsole.log(updatedNamespace);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.namespaces.update() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" requestBody: required: true content: application/json: schema: type: "object" properties: name: type: "string" minLength: 1 maxLength: 64 slug: type: "string" minLength: 2 maxLength: 48 responses: "200": description: "The updated namespace" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/namespace" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" delete: operationId: "deleteNamespace" x-speakeasy-name-override: "delete" x-speakeasy-max-method-params: 1 summary: "Delete a namespace." description: "Delete a namespace for the authenticated organization. This will delete all the data associated with the namespace." tags: - "Namespaces" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\n\nawait agentset.namespaces.delete(\"ns_xxx\");\nconsole.log(\"Namespace queued for deletion\");\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.namespaces.delete() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" responses: "204": description: "The deleted namespace" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/namespace" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/ingest-jobs: get: operationId: "listIngestJobs" x-speakeasy-name-override: "list" x-speakeasy-group: "ingestJobs" x-speakeasy-pagination: type: "cursor" inputs: - name: "cursor" in: "parameters" type: "cursor" outputs: nextCursor: "$.pagination.nextCursor" summary: "Retrieve a list of ingest jobs" description: "Retrieve a paginated list of ingest jobs for the authenticated organization." tags: - "Ingest Jobs" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst jobs = await ns.ingestion.all();\nconsole.log(jobs);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.ingest_jobs.list(order_by="createdAt", order="desc", cursor_direction="forward", per_page=30) while res is not None: # Handle items res = res.next() parameters: - in: "query" name: "statuses" schema: description: "Comma separated list of statuses to filter by." style: "form" explode: false type: "array" items: $ref: "#/components/schemas/ingest-job-status" - in: "query" name: "orderBy" schema: default: "createdAt" type: "string" enum: - "createdAt" description: "The field to order by. Default is `createdAt`." - in: "query" name: "order" schema: default: "desc" type: "string" enum: - "asc" - "desc" description: "The sort order. Default is `desc`." - in: "query" name: "cursor" schema: $ref: "#/components/schemas/pagination-cursor" - in: "query" name: "cursorDirection" schema: default: "forward" $ref: "#/components/schemas/pagination-cursor-direction" - in: "query" name: "perPage" schema: default: 30 type: "number" minimum: 1 maximum: 100 description: "The number of records to return per page." $ref: "#/components/schemas/pagination-per-page" - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "200": description: "The retrieved ingest jobs" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "array" items: $ref: "#/components/schemas/ingest-job" pagination: type: "object" properties: nextCursor: anyOf: - type: "string" - type: "null" prevCursor: anyOf: - type: "string" - type: "null" hasMore: type: "boolean" required: - "nextCursor" - "prevCursor" - "hasMore" additionalProperties: false required: - "success" - "data" - "pagination" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" post: operationId: "createIngestJob" x-speakeasy-name-override: "create" x-speakeasy-group: "ingestJobs" summary: "Create an ingest job" description: "Create an ingest job for the authenticated organization. You can control how documents are parsed and chunked using the optional `config` object (for example, chunk size, overlap, language, and advanced OCR/LLM options)." tags: - "Ingest Jobs" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst job = await ns.ingestion.create({\n payload: {\n type: \"TEXT\",\n text: \"This is some content to ingest into the knowledge base.\",\n },\n config: {\n metadata: {\n foo: \"bar\",\n },\n chunkSize: 2048,\n },\n});\nconsole.log(job);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.ingest_jobs.create(payload={ "type": "TEXT", "text": "", }) # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/TenantIdRef" requestBody: required: true content: application/json: schema: type: "object" properties: name: description: "The name of the ingest job." anyOf: - type: "string" - type: "null" payload: $ref: "#/components/schemas/ingest-job-payload-input" config: $ref: "#/components/schemas/ingest-job-config" externalId: default: null description: "A unique external ID of the ingest job. You can use this to identify the ingest job in your system." anyOf: - type: "string" - type: "null" required: - "payload" responses: "201": description: "The created ingest job" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/ingest-job" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/ingest-jobs/{jobId}: get: operationId: "getIngestJobInfo" x-speakeasy-name-override: "get" x-speakeasy-group: "ingestJobs" summary: "Retrieve an ingest job" description: "Retrieve the info for an ingest job." tags: - "Ingest Jobs" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst job = await ns.ingestion.get(\"job_123\");\nconsole.log(job);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.ingest_jobs.get(job_id="job_123") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/JobIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "200": description: "The retrieved ingest job" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/ingest-job" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" delete: operationId: "deleteIngestJob" x-speakeasy-name-override: "delete" x-speakeasy-group: "ingestJobs" x-speakeasy-max-method-params: 1 summary: "Delete an ingest job" description: "Delete an ingest job for the authenticated organization." tags: - "Ingest Jobs" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nawait ns.ingestion.delete(\"job_123\");\nconsole.log(\"Ingest job queued for deletion\");\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.ingest_jobs.delete(job_id="job_123") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/JobIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "204": description: "The deleted ingest job" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/ingest-job" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/ingest-jobs/{jobId}/re-ingest: post: operationId: "reIngestJob" x-speakeasy-name-override: "reIngest" x-speakeasy-group: "ingestJobs" x-speakeasy-max-method-params: 1 summary: "Re-ingest a job" description: "Re-ingest a job for the authenticated organization." tags: - "Ingest Jobs" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst result = await ns.ingestion.reIngest(\"job_123\");\nconsole.log(\"Job queued for re-ingestion: \", result);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.ingest_jobs.re_ingest(job_id="job_123") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/JobIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "200": description: "The re-ingested job" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "object" properties: id: type: "string" description: "The unique ID of the ingest job." required: - "id" additionalProperties: false required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/documents: get: operationId: "listDocuments" x-speakeasy-name-override: "list" x-speakeasy-pagination: type: "cursor" inputs: - name: "cursor" in: "parameters" type: "cursor" outputs: nextCursor: "$.pagination.nextCursor" summary: "Retrieve a list of documents" description: "Retrieve a paginated list of documents for the authenticated organization." tags: - "Documents" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst docs = await ns.documents.all();\nconsole.log(docs);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.documents.list(order_by="createdAt", order="desc", cursor_direction="forward", per_page=30) while res is not None: # Handle items res = res.next() parameters: - in: "query" name: "statuses" schema: description: "Comma separated list of statuses to filter by." style: "form" explode: false type: "array" items: $ref: "#/components/schemas/document-status" - in: "query" name: "orderBy" schema: default: "createdAt" type: "string" enum: - "createdAt" description: "The field to order by. Default is `createdAt`." - in: "query" name: "order" schema: default: "desc" type: "string" enum: - "asc" - "desc" description: "The order to sort by. Default is `desc`." - in: "query" name: "ingestJobId" schema: type: "string" description: "The ingest job ID to filter documents by." - in: "query" name: "cursor" schema: $ref: "#/components/schemas/pagination-cursor" - in: "query" name: "cursorDirection" schema: default: "forward" $ref: "#/components/schemas/pagination-cursor-direction" - in: "query" name: "perPage" schema: default: 30 type: "number" minimum: 1 maximum: 100 description: "The number of records to return per page." $ref: "#/components/schemas/pagination-per-page" - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "200": description: "The retrieved ingest jobs" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "array" items: $ref: "#/components/schemas/document" pagination: type: "object" properties: nextCursor: anyOf: - type: "string" - type: "null" prevCursor: anyOf: - type: "string" - type: "null" hasMore: type: "boolean" required: - "nextCursor" - "prevCursor" - "hasMore" additionalProperties: false required: - "success" - "data" - "pagination" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/documents/{documentId}: get: operationId: "getDocument" x-speakeasy-name-override: "get" summary: "Retrieve a document" description: "Retrieve the info for a document." tags: - "Documents" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst document = await ns.documents.get(\"doc_123\");\nconsole.log(document);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.documents.get(document_id="doc_123") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/DocumentIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "200": description: "The retrieved ingest job" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/document" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" delete: operationId: "deleteDocument" x-speakeasy-name-override: "delete" x-speakeasy-max-method-params: 1 summary: "Delete a document" description: "Delete a document for the authenticated organization." tags: - "Documents" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nawait ns.documents.delete(\"doc_123\");\nconsole.log(\"Document queued for deletion\");\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.documents.delete(document_id="doc_123") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/DocumentIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "204": description: "The deleted document" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/document" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/documents/{documentId}/chunks-download-url: post: operationId: "getChunksDownloadUrl" x-speakeasy-name-override: "getChunksDownloadUrl" summary: "Get chunks download URL" description: "Get a presigned download URL for a document's chunks. Only available for completed documents." tags: - "Documents" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst { url } = await ns.documents.getChunksDownloadUrl(\"doc_123\");\nconst data = await (await fetch(url)).json();\nconsole.log(data);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.documents.get_chunks_download_url(document_id="doc_123") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/DocumentIdRef" responses: "200": description: "The presigned download URL for the chunks" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "object" properties: url: type: "string" required: - "url" additionalProperties: false required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/documents/{documentId}/file-download-url: post: operationId: "getFileDownloadUrl" x-speakeasy-name-override: "getFileDownloadUrl" summary: "Get file download URL" description: "Get a presigned download URL for a document's source file. Only available for documents with source type MANAGED_FILE." tags: - "Documents" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\nimport fs from 'fs';\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst { url } = await ns.documents.getFileDownloadUrl(\"doc_123\");\nconst file = await fetch(url);\nfs.writeFileSync(\"file.pdf\", Buffer.from(await file.arrayBuffer()));\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.documents.get_file_download_url(document_id="doc_123") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/DocumentIdRef" responses: "200": description: "The presigned download URL for the file" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "object" properties: url: type: "string" required: - "url" additionalProperties: false required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/search: post: operationId: "search" x-speakeasy-name-override: "execute" x-speakeasy-group: "search" summary: "Search a namespace" description: "Search a namespace for a query." tags: - "Search" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst results = await ns.search(\"What is machine learning?\", {\n topK: 20,\n rerank: true,\n rerankLimit: 10,\n});\nconsole.log(results);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.search.execute(query="", top_k=10, rerank=True, rerank_model="zeroentropy:zerank-2", include_relationships=False, include_metadata=True, mode="semantic") # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/TenantIdRef" requestBody: required: true content: application/json: schema: type: "object" properties: query: type: "string" description: "The query to search for." topK: default: 10 description: "The number of results to fetch from the vector store. Defaults to `10`." type: "number" minimum: 1 maximum: 100 rerank: default: true description: "Whether to rerank the results. Defaults to `true`." type: "boolean" rerankLimit: description: "The number of results to return after reranking. Defaults to `topK`." type: "number" minimum: 1 maximum: 100 rerankModel: default: "zeroentropy:zerank-2" description: "The reranking model to use." type: "string" enum: - "cohere:rerank-v4.0-pro" - "cohere:rerank-v4.0-fast" - "cohere:rerank-v3.5" - "cohere:rerank-english-v3.0" - "cohere:rerank-multilingual-v3.0" - "zeroentropy:zerank-2" - "zeroentropy:zerank-1" - "zeroentropy:zerank-1-small" filter: description: "A filter to apply to the results." type: "object" propertyNames: type: "string" additionalProperties: {} minScore: description: "The minimum score to return." type: "number" minimum: 0 maximum: 1 includeRelationships: default: false description: "Whether to include relationships in the results. Defaults to `false`." type: "boolean" includeMetadata: default: true description: "Whether to include metadata in the results. Defaults to `true`." type: "boolean" keywordFilter: type: "string" mode: default: "semantic" type: "string" enum: - "semantic" - "keyword" required: - "query" responses: "200": description: "The retrieved namespace" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "array" items: type: "object" properties: id: type: "string" score: type: "number" minimum: 0 maximum: 1 text: type: "string" relationships: type: "object" propertyNames: type: "string" additionalProperties: {} metadata: type: "object" propertyNames: type: "string" additionalProperties: {} required: - "id" - "score" additionalProperties: false required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/uploads: post: operationId: "createUpload" x-speakeasy-name-override: "create" summary: "Create presigned URL for file upload" description: "Generate a presigned URL for uploading a single file to the specified namespace." tags: - "Uploads" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\nimport fs from 'fs';\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst result = await ns.uploads.upload({\n file: fs.createReadStream(\"./example.md\"),\n contentType: \"text/markdown\",\n});\nconsole.log(\"Uploaded successfully: \", result.key);\n\n// OR get the pre-signed URL manually\nconst file = fs.readFileSync(\"./example.md\");\nconst result = await ns.uploads.create({\n fileName: \"example.md\",\n fileSize: file.length,\n contentType: \"text/markdown\",\n});\n\nawait fetch(result.url, {\n method: \"PUT\",\n body: file,\n headers: {\n \"Content-Type\": \"text/markdown\",\n },\n});\nconsole.log(\"Uploaded successfully: \", result.key);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.uploads.create(file_name="document.pdf", content_type="application/pdf", file_size=1024) # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" requestBody: required: true content: application/json: schema: $ref: "#/components/schemas/upload-file-schema" responses: "201": description: "Presigned URL generated successfully" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/upload-result-schema" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/uploads/batch: post: operationId: "createBatchUpload" x-speakeasy-name-override: "createBatch" summary: "Create presigned URLs for batch file upload" description: "Generate presigned URLs for uploading multiple files to the specified namespace." tags: - "Uploads" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\nimport fs from 'fs';\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst results = await ns.uploads.uploadBatch([\n {\n file: fs.createReadStream(\"./example-1.md\"),\n contentType: \"text/markdown\",\n },\n {\n file: fs.createReadStream(\"./example-2.md\"),\n contentType: \"text/markdown\",\n },\n]);\nconsole.log(\"Uploaded successfully: \", results.map((result) => result.key));\n\n// OR get the pre-signed URLs manually\nconst file1 = fs.readFileSync(\"./example-1.md\");\nconst file2 = fs.readFileSync(\"./example-2.md\");\n\nconst results = await ns.uploads.createBatch({\n files: [\n {\n fileName: \"example-1.md\",\n fileSize: file1.length,\n contentType: \"text/markdown\",\n },\n {\n fileName: \"example-2.md\",\n fileSize: file2.length,\n contentType: \"text/markdown\",\n },\n ],\n});\n\nawait Promise.all([file1, file2].map(async (file, i) => {\n await fetch(results[i]!.url, {\n method: \"PUT\",\n body: file,\n headers: {\n \"Content-Type\": \"text/markdown\",\n },\n });\n}));\n\nconsole.log(\"Upload URLs:\", results.map((result) => result.key));\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.uploads.create_batch(files=[]) # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" requestBody: required: true content: application/json: schema: type: "object" properties: files: minItems: 1 maxItems: 100 type: "array" items: $ref: "#/components/schemas/upload-file-schema" required: - "files" responses: "201": description: "Presigned URLs generated successfully" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "array" items: $ref: "#/components/schemas/upload-result-schema" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/hosting: get: operationId: "getHosting" x-speakeasy-name-override: "get" summary: "Retrieve hosting configuration" description: "Retrieve the hosting configuration for a namespace." tags: - "Hosting" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst hosting = await ns.hosting.get();\nconsole.log(hosting);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.hosting.get() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" responses: "200": description: "The hosting configuration" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/hosting" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" post: operationId: "enableHosting" x-speakeasy-name-override: "enable" summary: "Enable hosting" description: "Enable hosting for a namespace." tags: - "Hosting" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst hosting = await ns.hosting.enable();\nconsole.log(hosting);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.hosting.enable() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" responses: "201": description: "The created hosting configuration" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/hosting" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" patch: operationId: "updateHosting" x-speakeasy-name-override: "update" x-speakeasy-max-method-params: 1 summary: "Update hosting configuration" description: "Update the hosting configuration for a namespace. If there is no change, return it as it is." tags: - "Hosting" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nconst updatedHosting = await ns.hosting.update({\n title: \"My Knowledge Base\",\n welcomeMessage: \"Welcome to my knowledge base!\",\n searchEnabled: true,\n});\nconsole.log(updatedHosting);\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.hosting.update() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" requestBody: required: true content: application/json: schema: type: "object" properties: title: type: "string" minLength: 1 slug: type: "string" minLength: 2 maxLength: 48 logo: anyOf: - anyOf: - type: "string" pattern: "^data:image\\/(png|jpeg|jpg|gif|webp);base64," - type: "string" format: "uri" - type: "null" ogTitle: type: "string" maxLength: 70 ogDescription: type: "string" maxLength: 200 ogImage: anyOf: - anyOf: - type: "string" pattern: "^data:image\\/(png|jpeg|jpg|gif|webp);base64," - type: "string" format: "uri" - type: "null" protected: type: "boolean" allowedEmails: type: "array" items: type: "string" format: "email" pattern: "^(?!\\.)(?!.*\\.\\.)([A-Za-z0-9_'+\\-\\.]*)[A-Za-z0-9_+-]@([A-Za-z0-9][A-Za-z0-9\\-]*\\.)+[A-Za-z]{2,}$" allowedEmailDomains: type: "array" items: type: "string" systemPrompt: type: "string" exampleQuestions: maxItems: 4 type: "array" items: type: "string" exampleSearchQueries: maxItems: 4 type: "array" items: type: "string" welcomeMessage: type: "string" citationMetadataPath: type: "string" searchEnabled: type: "boolean" rerankModel: type: "string" enum: - "cohere:rerank-v4.0-pro" - "cohere:rerank-v4.0-fast" - "cohere:rerank-v3.5" - "cohere:rerank-english-v3.0" - "cohere:rerank-multilingual-v3.0" - "zeroentropy:zerank-2" - "zeroentropy:zerank-1" - "zeroentropy:zerank-1-small" llmModel: type: "string" enum: - "openai:gpt-4.1" - "openai:gpt-5.2" - "openai:gpt-5.1" - "openai:gpt-5" - "openai:gpt-5-mini" - "openai:gpt-5-nano" topK: type: "integer" minimum: 1 maximum: 100 rerankLimit: type: "integer" minimum: 1 maximum: 100 responses: "200": description: "The updated hosting configuration" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/hosting" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" delete: operationId: "deleteHosting" x-speakeasy-name-override: "delete" summary: "Delete hosting configuration" description: "Delete the hosting configuration for a namespace." tags: - "Hosting" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nawait ns.hosting.delete();\nconsole.log(\"Hosting deleted\");\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", token="AGENTSET_API_KEY", ) as a_client: res = a_client.hosting.delete() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" responses: "204": description: "The deleted hosting configuration" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: $ref: "#/components/schemas/hosting" required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" /v1/namespace/{namespaceId}/warm-up: post: operationId: "warmUp" x-speakeasy-name-override: "warmUp" x-speakeasy-group: "namespace" summary: "Warm cache for a namespace" description: "Pre-loads the namespace into the vector store's cache for faster query performance. Not all vector stores support this operation. Currently only Turbopuffer supports this operation." tags: - "Namespaces" security: - token: [] x-codeSamples: - lang: "TypeScript" source: "\nimport { Agentset } from \"agentset\";\n\nconst agentset = new Agentset({ apiKey: 'agentset_xxx' });\nconst ns = agentset.namespace('ns_xxx');\n\nawait ns.warmUp();\nconsole.log(\"Cache warmed successfully\");\n" - lang: python label: Python (SDK) source: |- from agentset import Agentset with Agentset( namespace_id="ns_123", x_tenant_id="", token="AGENTSET_API_KEY", ) as a_client: res = a_client.namespace.warm_up() # Handle response print(res) parameters: - $ref: "#/components/parameters/NamespaceIdRef" - $ref: "#/components/parameters/TenantIdRef" responses: "200": description: "Cache warming started" content: application/json: schema: type: "object" properties: success: type: "boolean" const: true data: type: "object" properties: status: type: "boolean" required: - "status" additionalProperties: false required: - "success" - "data" additionalProperties: false "400": $ref: "#/components/responses/400" "401": $ref: "#/components/responses/401" "403": $ref: "#/components/responses/403" "404": $ref: "#/components/responses/404" "409": $ref: "#/components/responses/409" "410": $ref: "#/components/responses/410" "422": $ref: "#/components/responses/422" "429": $ref: "#/components/responses/429" "500": $ref: "#/components/responses/500" components: schemas: embedding-model-config: oneOf: - $ref: "#/components/schemas/openai-embedding-config" - $ref: "#/components/schemas/azure-embedding-config" - $ref: "#/components/schemas/voyage-embedding-config" - $ref: "#/components/schemas/google-embedding-config" - type: "object" properties: provider: type: "string" const: "MANAGED_OPENAI" model: type: "string" enum: - "text-embedding-3-large" required: - "provider" - "model" description: "The embedding model config. If not provided, our managed embedding model will be used. Note: You can't change the embedding model config after the namespace is created." type: "object" openai-embedding-config: type: "object" properties: provider: type: "string" const: "OPENAI" model: $ref: "#/components/schemas/openai-embedding-model-enum" apiKey: type: "string" required: - "provider" - "model" - "apiKey" title: "OpenAI Embedding Config" openai-embedding-model-enum: type: "string" enum: - "text-embedding-3-small" - "text-embedding-3-large" description: "The OpenAI embedding model enum." azure-embedding-config: type: "object" properties: provider: type: "string" const: "AZURE_OPENAI" model: $ref: "#/components/schemas/openai-embedding-model-enum" resourceName: type: "string" format: "uri" description: "The resource name of the Azure OpenAI API. https://{resourceName}.openai.azure.com/v1" examples: - "my-resource-name" deployment: type: "string" description: "The deployment name of the Azure OpenAI API." apiKey: type: "string" description: "The API key for the Azure OpenAI API." apiVersion: default: "preview" type: "string" description: "The API version for the Azure OpenAI API. Defaults to `preview`." required: - "provider" - "model" - "resourceName" - "deployment" - "apiKey" title: "Azure Embedding Config" voyage-embedding-config: type: "object" properties: provider: type: "string" const: "VOYAGE" model: $ref: "#/components/schemas/voyage-embedding-model-enum" apiKey: type: "string" required: - "provider" - "model" - "apiKey" title: "Voyage Embedding Config" voyage-embedding-model-enum: type: "string" enum: - "voyage-3-large" - "voyage-3" - "voyage-3-lite" - "voyage-code-3" - "voyage-finance-2" - "voyage-law-2" description: "The Voyage embedding model enum." google-embedding-config: type: "object" properties: provider: type: "string" const: "GOOGLE" model: $ref: "#/components/schemas/google-embedding-model-enum" apiKey: type: "string" required: - "provider" - "model" - "apiKey" title: "Google Embedding Config" google-embedding-model-enum: type: "string" enum: - "text-embedding-004" create-vector-store-config: oneOf: - type: "object" properties: provider: type: "string" const: "MANAGED_PINECONE" required: - "provider" - type: "object" properties: provider: type: "string" const: "MANAGED_TURBOPUFFER" required: - "provider" - $ref: "#/components/schemas/pinecone-config" - $ref: "#/components/schemas/turbopuffer-config" description: "The vector store config. If not provided, our MANAGED_PINECONE vector store will be used. Note: You can't change the vector store config after the namespace is created." type: "object" pinecone-config: type: "object" properties: provider: type: "string" const: "PINECONE" apiKey: type: "string" description: "The API key for the Pinecone index." indexHost: type: "string" format: "uri" description: "The host of the Pinecone index." example: "https://example.svc.aped-1234-a56b.pinecone.io" required: - "provider" - "apiKey" - "indexHost" title: "Pinecone Config" turbopuffer-config: type: "object" properties: provider: type: "string" const: "TURBOPUFFER" apiKey: type: "string" description: "The API key for the Turbopuffer index." region: $ref: "#/components/schemas/turbopuffer-region-enum" required: - "provider" - "apiKey" - "region" title: "Turbopuffer Config" turbopuffer-region-enum: type: "string" enum: - "gcp-us-central1" - "gcp-us-west1" - "gcp-us-east4" - "gcp-northamerica-northeast2" - "gcp-europe-west3" - "gcp-asia-southeast1" - "gcp-gcp-asia-northeast3" - "aws-eu-central-1" - "aws-eu-west-1" - "aws-us-east-1" - "aws-us-west-2" - "aws-ap-southeast-2" - "aws-us-east-2" - "aws-ap-south-1" description: "The region for the Turbopuffer index. Check https://turbopuffer.com/docs/regions" ingest-job-status: type: "string" enum: - "BACKLOG" - "QUEUED" - "QUEUED_FOR_RESYNC" - "QUEUED_FOR_DELETE" - "PRE_PROCESSING" - "PROCESSING" - "DELETING" - "CANCELLING" - "COMPLETED" - "FAILED" - "CANCELLED" description: "The status of the ingest job." pagination-cursor: type: "string" description: "The cursor to paginate by." pagination-cursor-direction: type: "string" enum: - "forward" - "backward" description: "The direction to paginate by." pagination-per-page: type: "number" description: "The number of records to return per page." ingest-job-payload-input: oneOf: - $ref: "#/components/schemas/text-payload-input" - $ref: "#/components/schemas/file-payload" - $ref: "#/components/schemas/managed-file-payload" - $ref: "#/components/schemas/crawl-payload" - $ref: "#/components/schemas/youtube-payload" - $ref: "#/components/schemas/batch-payload-input" description: "The ingest job payload for creation." type: "object" discriminator: propertyName: "type" mapping: TEXT: "#/components/schemas/text-payload-input" FILE: "#/components/schemas/file-payload" MANAGED_FILE: "#/components/schemas/managed-file-payload" CRAWL: "#/components/schemas/crawl-payload" YOUTUBE: "#/components/schemas/youtube-payload" BATCH: "#/components/schemas/batch-payload-input" text-payload-input: type: "object" properties: type: type: "string" const: "TEXT" fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" text: type: "string" minLength: 1 description: "The text to ingest." required: - "type" - "text" title: "Text Payload" file-payload: type: "object" properties: type: type: "string" const: "FILE" fileUrl: type: "string" format: "uri" description: "The URL of the file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" required: - "type" - "fileUrl" title: "URL Payload" managed-file-payload: type: "object" properties: type: type: "string" const: "MANAGED_FILE" key: type: "string" description: "The key of the managed file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" required: - "type" - "key" title: "Managed File Payload" crawl-payload: type: "object" properties: type: type: "string" const: "CRAWL" url: type: "string" format: "uri" description: "The starting URL to crawl." maxDepth: type: "integer" minimum: 1 maximum: 9.007199254740991e+15 description: "Maximum depth to follow links from the starting URL. Depth 1 means only the initial page. Defaults to `5`." limit: type: "integer" minimum: 1 maximum: 9.007199254740991e+15 description: "Maximum number of pages to crawl before stopping. Helps bound large sites. Defaults to `50`." includePaths: type: "array" items: type: "string" description: "Only crawl URLs whose path matches at least one of these prefixes." excludePaths: type: "array" items: type: "string" description: "Never crawl URLs whose path matches these prefixes." headers: type: "object" propertyNames: type: "string" additionalProperties: type: "string" description: "Custom HTTP headers to send with crawl requests (for example, auth headers)." required: - "type" - "url" title: "Crawl Payload" youtube-payload: type: "object" properties: type: type: "string" const: "YOUTUBE" urls: minItems: 1 type: "array" items: type: "string" format: "uri" description: "The URLs of videos, channels, or playlists (hostname must be www.youtube.com or youtu.be)." transcriptLanguages: type: "array" items: $ref: "#/components/schemas/language-code" description: "We will try to fetch the first available transcript in the given languages. Default is `en`." includeMetadata: type: "boolean" description: "Whether to include metadata in the ingestion (like video description, tags, category, duration, etc...). Defaults to `false`." required: - "type" - "urls" title: "Youtube Payload" language-code: type: "string" enum: - "af" - "am" - "ar" - "bg" - "bn" - "ca" - "cs" - "cy" - "da" - "de" - "en" - "es" - "et" - "fa" - "fi" - "fr" - "ga" - "gl" - "he" - "hi" - "hr" - "hu" - "id" - "is" - "it" - "jp" - "kr" - "lt" - "lv" - "mk" - "ms" - "mt" - "ne" - "nl" - "no" - "pl" - "pt" - "ro" - "ru" - "sk" - "sl" - "sr" - "sv" - "sw" - "ta" - "te" - "th" - "tl" - "tr" - "uk" - "ur" - "vi" - "zh" - "zu" batch-payload-input: type: "object" properties: type: type: "string" const: "BATCH" items: minItems: 1 type: "array" items: oneOf: - type: "object" properties: type: type: "string" const: "TEXT" fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" text: type: "string" minLength: 1 description: "The text to ingest." config: $ref: "#/components/schemas/document-config" required: - "type" - "text" - type: "object" properties: type: type: "string" const: "FILE" fileUrl: type: "string" format: "uri" description: "The URL of the file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" config: $ref: "#/components/schemas/document-config" required: - "type" - "fileUrl" - type: "object" properties: type: type: "string" const: "MANAGED_FILE" key: type: "string" description: "The key of the managed file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" config: $ref: "#/components/schemas/document-config" required: - "type" - "key" type: "object" description: "The items to ingest." required: - "type" - "items" title: "Batch Payload Input" document-config: type: "object" properties: chunkSize: type: "integer" minimum: 32 maximum: 9.007199254740991e+15 description: "Chunk size (in characters). Controls approximately how much text is included in each chunk. Defaults to `2048`." delimiter: type: "string" description: "Delimiter to use for separating text before chunking." metadata: type: "object" propertyNames: type: "string" additionalProperties: anyOf: - type: "string" - type: "number" - type: "boolean" - type: "array" items: type: "string" description: "Custom metadata to be added to the ingested documents. It cannot contain nested objects; only string, number, boolean, and array of strings are allowed." languageCode: description: "Language code to use for text processing (for example, `en`, `ar`, or `fr`). When omitted, the partition API will attempt to detect the language automatically." $ref: "#/components/schemas/language-code" mode: $ref: "#/components/schemas/mode" disableImageExtraction: type: "boolean" description: "Disable image extraction from the document. When combined with `useLlm`, images may still be automatically captioned by the partition API. Defaults to `false`." disableImageCaptions: type: "boolean" description: "Disable synthetic image captions/descriptions in output. Images will be rendered as plain img tags without alt text. Defaults to `false`." chartUnderstanding: type: "boolean" description: "Enable chart understanding. This will extract the data from the charts in the document. Defaults to `false`." keepPageheaderInOutput: type: "boolean" description: "Keep the page header in the output. Defaults to `false`." keepPagefooterInOutput: type: "boolean" description: "Keep the page footer in the output. Defaults to `false`." forceOcr: type: "boolean" description: "Force OCR on the document even if selectable text exists. Useful for scanned documents with unreliable embedded text. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." disableOcrMath: type: "boolean" description: "Disable inline math recognition in OCR. This can be useful if the document contains content that is frequently misclassified as math. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." useLlm: type: "boolean" description: "Enable LLM-assisted parsing to improve tables, forms, inline math, and layout detection. May increase latency and token usage. Defaults to `true`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." chunkOverlap: type: "number" description: "[Deprecated] Custom chunk overlap (in characters) between consecutive chunks. Helps preserve context across chunk boundaries." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." maxChunkSize: type: "number" description: "[Deprecated] Hard chunk size. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." chunkingStrategy: type: "string" enum: - "basic" - "by_title" description: "[Deprecated] The legacy chunking strategy. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." strategy: type: "string" enum: - "auto" - "fast" - "hi_res" - "ocr_only" description: "[Deprecated] Legacy processing strategy used by the previous partition API. This option is ignored by the current pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." description: "The document config." mode: type: "string" enum: - "fast" - "balanced" - "accurate" description: "Processing mode for the parser. `fast` favors speed, `accurate` (pro subscription only) favors quality and layout fidelity, and `balanced` offers a compromise between the two. Defaults to `balanced`." ingest-job-config: type: "object" properties: chunkSize: type: "integer" minimum: 32 maximum: 9.007199254740991e+15 description: "Chunk size (in characters). Controls approximately how much text is included in each chunk. Defaults to `2048`." delimiter: type: "string" description: "Delimiter to use for separating text before chunking." metadata: type: "object" propertyNames: type: "string" additionalProperties: anyOf: - type: "string" - type: "number" - type: "boolean" - type: "array" items: type: "string" description: "Custom metadata to be added to the ingested documents. It cannot contain nested objects; only string, number, boolean, and array of strings are allowed." languageCode: description: "Language code to use for text processing (for example, `en`, `ar`, or `fr`). When omitted, the partition API will attempt to detect the language automatically." $ref: "#/components/schemas/language-code" mode: $ref: "#/components/schemas/mode" disableImageExtraction: type: "boolean" description: "Disable image extraction from the document. When combined with `useLlm`, images may still be automatically captioned by the partition API. Defaults to `false`." disableImageCaptions: type: "boolean" description: "Disable synthetic image captions/descriptions in output. Images will be rendered as plain img tags without alt text. Defaults to `false`." chartUnderstanding: type: "boolean" description: "Enable chart understanding. This will extract the data from the charts in the document. Defaults to `false`." keepPageheaderInOutput: type: "boolean" description: "Keep the page header in the output. Defaults to `false`." keepPagefooterInOutput: type: "boolean" description: "Keep the page footer in the output. Defaults to `false`." forceOcr: type: "boolean" description: "Force OCR on the document even if selectable text exists. Useful for scanned documents with unreliable embedded text. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." disableOcrMath: type: "boolean" description: "Disable inline math recognition in OCR. This can be useful if the document contains content that is frequently misclassified as math. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." useLlm: type: "boolean" description: "Enable LLM-assisted parsing to improve tables, forms, inline math, and layout detection. May increase latency and token usage. Defaults to `true`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." chunkOverlap: type: "number" description: "[Deprecated] Custom chunk overlap (in characters) between consecutive chunks. Helps preserve context across chunk boundaries." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." maxChunkSize: type: "number" description: "[Deprecated] Hard chunk size. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." chunkingStrategy: type: "string" enum: - "basic" - "by_title" description: "[Deprecated] The legacy chunking strategy. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." strategy: type: "string" enum: - "auto" - "fast" - "hi_res" - "ocr_only" description: "[Deprecated] Legacy processing strategy used by the previous partition API. This option is ignored by the current pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." description: "The ingest job config." document-status: type: "string" enum: - "BACKLOG" - "QUEUED" - "QUEUED_FOR_RESYNC" - "QUEUED_FOR_DELETE" - "PRE_PROCESSING" - "PROCESSING" - "DELETING" - "CANCELLING" - "COMPLETED" - "FAILED" - "CANCELLED" description: "The status of the document." upload-file-schema: type: "object" properties: fileName: type: "string" minLength: 1 description: "File name" examples: - "document.pdf" contentType: type: "string" description: "Content type" examples: - "application/pdf" fileSize: type: "number" minimum: 1 maximum: 2.097152e+08 description: "File size in bytes" examples: - 1024 required: - "fileName" - "contentType" - "fileSize" DocumentWebhookEvent: type: "object" properties: id: type: "string" event: anyOf: - type: "string" const: "document.queued" - type: "string" const: "document.queued_for_resync" - type: "string" const: "document.queued_for_deletion" - type: "string" const: "document.processing" - type: "string" const: "document.error" - type: "string" const: "document.ready" - type: "string" const: "document.deleted" createdAt: type: "string" data: type: "object" properties: id: type: "string" description: "Unique identifier for the document." name: anyOf: - type: "string" - type: "null" description: "Name of the document." namespaceId: type: "string" description: "ID of the namespace." organizationId: type: "string" description: "ID of the organization." status: description: "Current status of the document." $ref: "#/components/schemas/document-status" source: oneOf: - type: "object" properties: type: type: "string" const: "TEXT" text: type: "string" description: "The text to ingest." required: - "type" - "text" - type: "object" properties: type: type: "string" const: "FILE" fileUrl: type: "string" format: "uri" description: "The URL of the file to ingest." required: - "type" - "fileUrl" - type: "object" properties: type: type: "string" const: "MANAGED_FILE" key: type: "string" description: "The key of the managed file to ingest." required: - "type" - "key" - type: "object" properties: type: type: "string" const: "CRAWLED_PAGE" title: description: "The title of the crawled page." type: "string" description: description: "The description of the crawled page." type: "string" language: description: "The language of the crawled page." type: "string" required: - "type" - type: "object" properties: type: type: "string" const: "YOUTUBE_VIDEO" videoId: type: "string" description: "The ID of the youtube video." duration: description: "The duration of the youtube video in seconds." type: "number" required: - "type" - "videoId" description: "Source configuration of the document." type: "object" totalCharacters: description: "Total characters in the document." anyOf: - type: "number" - type: "null" totalChunks: description: "Total chunks created from the document." anyOf: - type: "number" - type: "null" totalPages: description: "Total pages in the document." anyOf: - type: "number" - type: "null" error: description: "Error message if document processing failed." anyOf: - type: "string" - type: "null" createdAt: type: "string" description: "When the document was created." updatedAt: type: "string" description: "When the document was last updated." required: - "id" - "name" - "namespaceId" - "organizationId" - "status" - "source" - "createdAt" - "updatedAt" required: - "id" - "event" - "createdAt" - "data" description: "Triggered when a document is queued, processed, ready, or deleted." IngestJobWebhookEvent: type: "object" properties: id: type: "string" event: anyOf: - type: "string" const: "ingest_job.queued" - type: "string" const: "ingest_job.queued_for_resync" - type: "string" const: "ingest_job.queued_for_deletion" - type: "string" const: "ingest_job.processing" - type: "string" const: "ingest_job.error" - type: "string" const: "ingest_job.ready" - type: "string" const: "ingest_job.deleted" createdAt: type: "string" data: type: "object" properties: id: type: "string" description: "Unique identifier for the ingest job." name: anyOf: - type: "string" - type: "null" description: "Name of the ingest job." namespaceId: type: "string" description: "ID of the namespace." organizationId: type: "string" description: "ID of the organization." status: description: "Current status of the ingest job." $ref: "#/components/schemas/ingest-job-status" error: description: "Error message if ingest job failed." anyOf: - type: "string" - type: "null" createdAt: type: "string" description: "When the ingest job was created." updatedAt: type: "string" description: "When the ingest job was last updated." required: - "id" - "name" - "namespaceId" - "organizationId" - "status" - "createdAt" - "updatedAt" required: - "id" - "event" - "createdAt" - "data" description: "Triggered when an ingest job is queued, processed, ready, or deleted." namespace: type: "object" properties: id: type: "string" description: "The unique ID of the namespace." name: type: "string" description: "The name of the namespace." slug: type: "string" description: "The slug of the namespace." organizationId: type: "string" description: "The ID of the organization that owns the namespace." createdAt: description: "The date and time the namespace was created." type: "string" embeddingConfig: default: null anyOf: - $ref: "#/components/schemas/embedding-model-configOutput" - type: "null" vectorStoreConfig: default: null anyOf: - $ref: "#/components/schemas/vector-store-config" - type: "null" required: - "id" - "name" - "slug" - "organizationId" - "createdAt" - "embeddingConfig" - "vectorStoreConfig" additionalProperties: false title: "Namespace" vector-store-config: oneOf: - type: "object" properties: provider: type: "string" const: "MANAGED_PINECONE_OLD" required: - "provider" additionalProperties: false - type: "object" properties: provider: type: "string" const: "MANAGED_PINECONE" required: - "provider" additionalProperties: false - type: "object" properties: provider: type: "string" const: "MANAGED_TURBOPUFFER" required: - "provider" additionalProperties: false - $ref: "#/components/schemas/pinecone-configOutput" - $ref: "#/components/schemas/turbopuffer-configOutput" description: "The vector store config." type: "object" ingest-job: type: "object" properties: id: type: "string" description: "The unique ID of the ingest job." name: description: "The name of the ingest job." anyOf: - type: "string" - type: "null" namespaceId: type: "string" description: "The namespace ID of the ingest job." tenantId: default: null description: "The tenant ID of the ingest job." anyOf: - type: "string" - type: "null" externalId: default: null description: "A unique external ID of the ingest job. You can use this to identify the ingest job in your system." anyOf: - type: "string" - type: "null" status: $ref: "#/components/schemas/ingest-job-status" error: default: null description: "The error message of the ingest job. Only exists when the status is failed." anyOf: - type: "string" - type: "null" payload: $ref: "#/components/schemas/ingest-job-payload" config: default: null anyOf: - $ref: "#/components/schemas/ingest-job-configOutput" - type: "null" createdAt: description: "The date and time the namespace was created." type: "string" queuedAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the ingest job was queued." preProcessingAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the ingest job was pre-processed." processingAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the ingest job was processed." completedAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the ingest job was completed." failedAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the ingest job failed." required: - "id" - "namespaceId" - "tenantId" - "externalId" - "status" - "error" - "payload" - "config" - "createdAt" - "queuedAt" - "preProcessingAt" - "processingAt" - "completedAt" - "failedAt" additionalProperties: false title: "Ingest Job" ingest-job-payload: oneOf: - $ref: "#/components/schemas/text-payload" - $ref: "#/components/schemas/file-payloadOutput" - $ref: "#/components/schemas/managed-file-payloadOutput" - $ref: "#/components/schemas/crawl-payloadOutput" - $ref: "#/components/schemas/youtube-payloadOutput" - $ref: "#/components/schemas/batch-payload" description: "The ingest job payload." type: "object" discriminator: propertyName: "type" mapping: TEXT: "#/components/schemas/text-payload" FILE: "#/components/schemas/file-payloadOutput" MANAGED_FILE: "#/components/schemas/managed-file-payloadOutput" CRAWL: "#/components/schemas/crawl-payloadOutput" YOUTUBE: "#/components/schemas/youtube-payloadOutput" BATCH: "#/components/schemas/batch-payload" text-payload: type: "object" properties: type: type: "string" const: "TEXT" fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" text: type: "string" description: "The text to ingest." required: - "type" - "text" additionalProperties: false title: "Text Payload" batch-payload: type: "object" properties: type: type: "string" const: "BATCH" items: minItems: 1 type: "array" items: oneOf: - type: "object" properties: type: type: "string" const: "TEXT" fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" text: type: "string" description: "The text to ingest." config: $ref: "#/components/schemas/document-configOutput" required: - "type" - "text" additionalProperties: false - type: "object" properties: type: type: "string" const: "FILE" fileUrl: type: "string" format: "uri" description: "The URL of the file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" config: $ref: "#/components/schemas/document-configOutput" required: - "type" - "fileUrl" additionalProperties: false - type: "object" properties: type: type: "string" const: "MANAGED_FILE" key: type: "string" description: "The key of the managed file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" config: $ref: "#/components/schemas/document-configOutput" required: - "type" - "key" additionalProperties: false type: "object" description: "The items to ingest." required: - "type" - "items" additionalProperties: false title: "Batch Payload" document: type: "object" properties: id: type: "string" description: "The unique ID of the document." ingestJobId: type: "string" description: "The ingest job ID of the document." name: default: null description: "The name of the document." anyOf: - type: "string" - type: "null" tenantId: default: null description: "The tenant ID of the ingest job." anyOf: - type: "string" - type: "null" status: $ref: "#/components/schemas/document-status" error: default: null description: "The error message of the document. Only exists when the status is failed." anyOf: - type: "string" - type: "null" source: oneOf: - type: "object" properties: type: type: "string" const: "TEXT" text: type: "string" description: "The text to ingest." required: - "type" - "text" additionalProperties: false - type: "object" properties: type: type: "string" const: "FILE" fileUrl: type: "string" format: "uri" description: "The URL of the file to ingest." required: - "type" - "fileUrl" additionalProperties: false - type: "object" properties: type: type: "string" const: "MANAGED_FILE" key: type: "string" description: "The key of the managed file to ingest." required: - "type" - "key" additionalProperties: false - type: "object" properties: type: type: "string" const: "CRAWLED_PAGE" title: description: "The title of the crawled page." type: "string" description: description: "The description of the crawled page." type: "string" language: description: "The language of the crawled page." type: "string" required: - "type" additionalProperties: false - type: "object" properties: type: type: "string" const: "YOUTUBE_VIDEO" videoId: type: "string" description: "The ID of the youtube video." duration: description: "The duration of the youtube video in seconds." type: "number" required: - "type" - "videoId" additionalProperties: false description: "The source of the document." type: "object" properties: default: null anyOf: - type: "object" properties: fileSize: type: "number" description: "The size of the file in bytes." mimeType: default: null anyOf: - type: "string" description: "The MIME type of the file." - type: "null" required: - "fileSize" - "mimeType" additionalProperties: false description: "The properties of the document." - type: "null" config: default: null anyOf: - $ref: "#/components/schemas/document-configOutput" - type: "null" totalChunks: type: "number" description: "The total number of chunks." totalTokens: type: "number" description: "The total number of tokens." totalCharacters: type: "number" description: "The total number of characters." totalPages: type: "number" description: "The total number of pages. Will be 0 if the document is not paged (e.g. PDF)." createdAt: description: "The date and time the document was created." type: "string" queuedAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the document was queued." preProcessingAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the document was pre-processed." processingAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the document was processed." completedAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the document was completed." failedAt: default: null anyOf: - type: "string" - type: "null" description: "The date and time the document failed." required: - "id" - "ingestJobId" - "name" - "tenantId" - "status" - "error" - "source" - "properties" - "config" - "totalChunks" - "totalTokens" - "totalCharacters" - "totalPages" - "createdAt" - "queuedAt" - "preProcessingAt" - "processingAt" - "completedAt" - "failedAt" additionalProperties: false title: "Document" upload-result-schema: type: "object" properties: url: type: "string" format: "uri" description: "Presigned URL for file upload. Make a `PUT` request to this URL with the file content and the `Content-Type` header." key: type: "string" description: "Key of the file in the storage. You'll send this in the `MANAGED_FILE` payload when creating an ingest job." required: - "url" - "key" additionalProperties: false title: "Upload Result" hosting: type: "object" properties: namespaceId: type: "string" description: "The ID of the namespace this hosting belongs to." title: default: null description: "The title displayed on the hosted interface." anyOf: - type: "string" - type: "null" slug: default: null description: "The unique slug for accessing the hosted interface." anyOf: - type: "string" - type: "null" logo: default: null description: "The URL or base64 encoded image of the logo." anyOf: - type: "string" - type: "null" ogTitle: default: null description: "Custom Open Graph title for social media sharing." anyOf: - type: "string" - type: "null" ogDescription: default: null description: "Custom Open Graph description for social media sharing." anyOf: - type: "string" - type: "null" ogImage: default: null description: "Custom Open Graph image URL for social media sharing." anyOf: - type: "string" - type: "null" systemPrompt: default: null description: "The system prompt used for the chat interface." anyOf: - type: "string" - type: "null" exampleQuestions: default: [] description: "Example questions to display to users in the chat interface." type: "array" items: type: "string" exampleSearchQueries: default: [] description: "Example search queries to display to users in the search interface." type: "array" items: type: "string" welcomeMessage: default: null description: "Welcome message displayed to users." anyOf: - type: "string" - type: "null" citationMetadataPath: default: null description: "Path to metadata field used for citations." anyOf: - type: "string" - type: "null" searchEnabled: default: true description: "Whether search functionality is enabled." type: "boolean" rerankConfig: description: "Configuration for the reranking model." llmConfig: description: "Configuration for the LLM model." topK: default: 50 description: "Number of documents to retrieve from vector store." type: "integer" minimum: 1 maximum: 100 protected: default: true description: "Whether the hosted interface is protected by authentication." type: "boolean" allowedEmails: default: [] description: "List of allowed email addresses (when protected is true)." type: "array" items: type: "string" allowedEmailDomains: default: [] description: "List of allowed email domains (when protected is true)." type: "array" items: type: "string" createdAt: description: "The date and time the hosting was created." type: "string" updatedAt: description: "The date and time the hosting was last updated." type: "string" required: - "namespaceId" - "title" - "slug" - "logo" - "ogTitle" - "ogDescription" - "ogImage" - "systemPrompt" - "exampleQuestions" - "exampleSearchQueries" - "welcomeMessage" - "citationMetadataPath" - "searchEnabled" - "rerankConfig" - "llmConfig" - "topK" - "protected" - "allowedEmails" - "allowedEmailDomains" - "createdAt" - "updatedAt" additionalProperties: false title: "Hosting" embedding-model-configOutput: oneOf: - $ref: "#/components/schemas/openai-embedding-configOutput" - $ref: "#/components/schemas/azure-embedding-configOutput" - $ref: "#/components/schemas/voyage-embedding-configOutput" - $ref: "#/components/schemas/google-embedding-configOutput" - type: "object" properties: provider: type: "string" const: "MANAGED_OPENAI" model: type: "string" enum: - "text-embedding-3-large" required: - "provider" - "model" additionalProperties: false description: "The embedding model config. If not provided, our managed embedding model will be used. Note: You can't change the embedding model config after the namespace is created." type: "object" openai-embedding-configOutput: type: "object" properties: provider: type: "string" const: "OPENAI" model: $ref: "#/components/schemas/openai-embedding-model-enum" apiKey: type: "string" required: - "provider" - "model" - "apiKey" additionalProperties: false title: "OpenAI Embedding Config" azure-embedding-configOutput: type: "object" properties: provider: type: "string" const: "AZURE_OPENAI" model: $ref: "#/components/schemas/openai-embedding-model-enum" resourceName: type: "string" format: "uri" description: "The resource name of the Azure OpenAI API. https://{resourceName}.openai.azure.com/v1" examples: - "my-resource-name" deployment: type: "string" description: "The deployment name of the Azure OpenAI API." apiKey: type: "string" description: "The API key for the Azure OpenAI API." apiVersion: default: "preview" type: "string" description: "The API version for the Azure OpenAI API. Defaults to `preview`." required: - "provider" - "model" - "resourceName" - "deployment" - "apiKey" additionalProperties: false title: "Azure Embedding Config" voyage-embedding-configOutput: type: "object" properties: provider: type: "string" const: "VOYAGE" model: $ref: "#/components/schemas/voyage-embedding-model-enum" apiKey: type: "string" required: - "provider" - "model" - "apiKey" additionalProperties: false title: "Voyage Embedding Config" google-embedding-configOutput: type: "object" properties: provider: type: "string" const: "GOOGLE" model: $ref: "#/components/schemas/google-embedding-model-enum" apiKey: type: "string" required: - "provider" - "model" - "apiKey" additionalProperties: false title: "Google Embedding Config" pinecone-configOutput: type: "object" properties: provider: type: "string" const: "PINECONE" apiKey: type: "string" description: "The API key for the Pinecone index." indexHost: type: "string" format: "uri" description: "The host of the Pinecone index." example: "https://example.svc.aped-1234-a56b.pinecone.io" required: - "provider" - "apiKey" - "indexHost" additionalProperties: false title: "Pinecone Config" turbopuffer-configOutput: type: "object" properties: provider: type: "string" const: "TURBOPUFFER" apiKey: type: "string" description: "The API key for the Turbopuffer index." region: $ref: "#/components/schemas/turbopuffer-region-enum" required: - "provider" - "apiKey" - "region" additionalProperties: false title: "Turbopuffer Config" file-payloadOutput: type: "object" properties: type: type: "string" const: "FILE" fileUrl: type: "string" format: "uri" description: "The URL of the file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" required: - "type" - "fileUrl" additionalProperties: false title: "URL Payload" managed-file-payloadOutput: type: "object" properties: type: type: "string" const: "MANAGED_FILE" key: type: "string" description: "The key of the managed file to ingest." fileName: anyOf: - type: "string" description: "The name of the file." - type: "null" required: - "type" - "key" additionalProperties: false title: "Managed File Payload" crawl-payloadOutput: type: "object" properties: type: type: "string" const: "CRAWL" url: type: "string" format: "uri" description: "The starting URL to crawl." maxDepth: type: "integer" minimum: 1 maximum: 9.007199254740991e+15 description: "Maximum depth to follow links from the starting URL. Depth 1 means only the initial page. Defaults to `5`." limit: type: "integer" minimum: 1 maximum: 9.007199254740991e+15 description: "Maximum number of pages to crawl before stopping. Helps bound large sites. Defaults to `50`." includePaths: type: "array" items: type: "string" description: "Only crawl URLs whose path matches at least one of these prefixes." excludePaths: type: "array" items: type: "string" description: "Never crawl URLs whose path matches these prefixes." headers: type: "object" propertyNames: type: "string" additionalProperties: type: "string" description: "Custom HTTP headers to send with crawl requests (for example, auth headers)." required: - "type" - "url" additionalProperties: false title: "Crawl Payload" youtube-payloadOutput: type: "object" properties: type: type: "string" const: "YOUTUBE" urls: minItems: 1 type: "array" items: type: "string" format: "uri" description: "The URLs of videos, channels, or playlists (hostname must be www.youtube.com or youtu.be)." transcriptLanguages: type: "array" items: $ref: "#/components/schemas/language-code" description: "We will try to fetch the first available transcript in the given languages. Default is `en`." includeMetadata: type: "boolean" description: "Whether to include metadata in the ingestion (like video description, tags, category, duration, etc...). Defaults to `false`." required: - "type" - "urls" additionalProperties: false title: "Youtube Payload" document-configOutput: type: "object" properties: chunkSize: type: "integer" minimum: 32 maximum: 9.007199254740991e+15 description: "Chunk size (in characters). Controls approximately how much text is included in each chunk. Defaults to `2048`." delimiter: type: "string" description: "Delimiter to use for separating text before chunking." metadata: type: "object" propertyNames: type: "string" additionalProperties: anyOf: - type: "string" - type: "number" - type: "boolean" - type: "array" items: type: "string" description: "Custom metadata to be added to the ingested documents. It cannot contain nested objects; only string, number, boolean, and array of strings are allowed." languageCode: description: "Language code to use for text processing (for example, `en`, `ar`, or `fr`). When omitted, the partition API will attempt to detect the language automatically." $ref: "#/components/schemas/language-code" mode: $ref: "#/components/schemas/mode" disableImageExtraction: type: "boolean" description: "Disable image extraction from the document. When combined with `useLlm`, images may still be automatically captioned by the partition API. Defaults to `false`." disableImageCaptions: type: "boolean" description: "Disable synthetic image captions/descriptions in output. Images will be rendered as plain img tags without alt text. Defaults to `false`." chartUnderstanding: type: "boolean" description: "Enable chart understanding. This will extract the data from the charts in the document. Defaults to `false`." keepPageheaderInOutput: type: "boolean" description: "Keep the page header in the output. Defaults to `false`." keepPagefooterInOutput: type: "boolean" description: "Keep the page footer in the output. Defaults to `false`." forceOcr: type: "boolean" description: "Force OCR on the document even if selectable text exists. Useful for scanned documents with unreliable embedded text. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." disableOcrMath: type: "boolean" description: "Disable inline math recognition in OCR. This can be useful if the document contains content that is frequently misclassified as math. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." useLlm: type: "boolean" description: "Enable LLM-assisted parsing to improve tables, forms, inline math, and layout detection. May increase latency and token usage. Defaults to `true`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." chunkOverlap: type: "number" description: "[Deprecated] Custom chunk overlap (in characters) between consecutive chunks. Helps preserve context across chunk boundaries." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." maxChunkSize: type: "number" description: "[Deprecated] Hard chunk size. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." chunkingStrategy: type: "string" enum: - "basic" - "by_title" description: "[Deprecated] The legacy chunking strategy. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." strategy: type: "string" enum: - "auto" - "fast" - "hi_res" - "ocr_only" description: "[Deprecated] Legacy processing strategy used by the previous partition API. This option is ignored by the current pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." additionalProperties: false description: "The document config." ingest-job-configOutput: type: "object" properties: chunkSize: type: "integer" minimum: 32 maximum: 9.007199254740991e+15 description: "Chunk size (in characters). Controls approximately how much text is included in each chunk. Defaults to `2048`." delimiter: type: "string" description: "Delimiter to use for separating text before chunking." metadata: type: "object" propertyNames: type: "string" additionalProperties: anyOf: - type: "string" - type: "number" - type: "boolean" - type: "array" items: type: "string" description: "Custom metadata to be added to the ingested documents. It cannot contain nested objects; only string, number, boolean, and array of strings are allowed." languageCode: description: "Language code to use for text processing (for example, `en`, `ar`, or `fr`). When omitted, the partition API will attempt to detect the language automatically." $ref: "#/components/schemas/language-code" mode: $ref: "#/components/schemas/mode" disableImageExtraction: type: "boolean" description: "Disable image extraction from the document. When combined with `useLlm`, images may still be automatically captioned by the partition API. Defaults to `false`." disableImageCaptions: type: "boolean" description: "Disable synthetic image captions/descriptions in output. Images will be rendered as plain img tags without alt text. Defaults to `false`." chartUnderstanding: type: "boolean" description: "Enable chart understanding. This will extract the data from the charts in the document. Defaults to `false`." keepPageheaderInOutput: type: "boolean" description: "Keep the page header in the output. Defaults to `false`." keepPagefooterInOutput: type: "boolean" description: "Keep the page footer in the output. Defaults to `false`." forceOcr: type: "boolean" description: "Force OCR on the document even if selectable text exists. Useful for scanned documents with unreliable embedded text. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." disableOcrMath: type: "boolean" description: "Disable inline math recognition in OCR. This can be useful if the document contains content that is frequently misclassified as math. Defaults to `false`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." useLlm: type: "boolean" description: "Enable LLM-assisted parsing to improve tables, forms, inline math, and layout detection. May increase latency and token usage. Defaults to `true`." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." chunkOverlap: type: "number" description: "[Deprecated] Custom chunk overlap (in characters) between consecutive chunks. Helps preserve context across chunk boundaries." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." maxChunkSize: type: "number" description: "[Deprecated] Hard chunk size. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `chunkSize` instead." chunkingStrategy: type: "string" enum: - "basic" - "by_title" description: "[Deprecated] The legacy chunking strategy. This option is ignored by the current partition pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option." strategy: type: "string" enum: - "auto" - "fast" - "hi_res" - "ocr_only" description: "[Deprecated] Legacy processing strategy used by the previous partition API. This option is ignored by the current pipeline and kept only for backwards compatibility." deprecated: true x-speakeasy-deprecation-message: "We no longer support this option. Use `mode` instead." additionalProperties: false description: "The ingest job config." WebhookEvent: anyOf: - $ref: "#/components/schemas/DocumentWebhookEvent" - $ref: "#/components/schemas/IngestJobWebhookEvent" description: "Webhook event schema" x-speakeasy-include: true responses: "400": description: "The server cannot or will not process the request due to something that is perceived to be a client error (e.g., malformed request syntax, invalid request message framing, or deceptive request routing)." content: application/json: schema: x-speakeasy-name-override: "BadRequest" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "bad_request" description: "A short code indicating the error code returned." example: "bad_request" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#bad-request" required: - "code" - "message" required: - "success" - "error" "401": description: "Although the HTTP standard specifies \"unauthorized\", semantically this response means \"unauthenticated\". That is, the client must authenticate itself to get the requested response." content: application/json: schema: x-speakeasy-name-override: "Unauthorized" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "unauthorized" description: "A short code indicating the error code returned." example: "unauthorized" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#unauthorized" required: - "code" - "message" required: - "success" - "error" "403": description: "The client does not have access rights to the content; that is, it is unauthorized, so the server is refusing to give the requested resource. Unlike 401 Unauthorized, the client's identity is known to the server." content: application/json: schema: x-speakeasy-name-override: "Forbidden" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "forbidden" description: "A short code indicating the error code returned." example: "forbidden" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#forbidden" required: - "code" - "message" required: - "success" - "error" "404": description: "The server cannot find the requested resource." content: application/json: schema: x-speakeasy-name-override: "NotFound" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "not_found" description: "A short code indicating the error code returned." example: "not_found" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#not-found" required: - "code" - "message" required: - "success" - "error" "409": description: "This response is sent when a request conflicts with the current state of the server." content: application/json: schema: x-speakeasy-name-override: "Conflict" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "conflict" description: "A short code indicating the error code returned." example: "conflict" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#conflict" required: - "code" - "message" required: - "success" - "error" "410": description: "This response is sent when the requested content has been permanently deleted from server, with no forwarding address." content: application/json: schema: x-speakeasy-name-override: "InviteExpired" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "invite_expired" description: "A short code indicating the error code returned." example: "invite_expired" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#invite-expired" required: - "code" - "message" required: - "success" - "error" "422": description: "The request was well-formed but was unable to be followed due to semantic errors." content: application/json: schema: x-speakeasy-name-override: "UnprocessableEntity" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "unprocessable_entity" description: "A short code indicating the error code returned." example: "unprocessable_entity" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#unprocessable-entity" required: - "code" - "message" required: - "success" - "error" "429": description: "The user has sent too many requests in a given amount of time (\"rate limiting\")" content: application/json: schema: x-speakeasy-name-override: "RateLimitExceeded" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "rate_limit_exceeded" description: "A short code indicating the error code returned." example: "rate_limit_exceeded" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#rate-limit_exceeded" required: - "code" - "message" required: - "success" - "error" "500": description: "The server has encountered a situation it does not know how to handle." content: application/json: schema: x-speakeasy-name-override: "InternalServerError" type: "object" properties: success: type: "boolean" example: false error: type: "object" properties: code: type: "string" enum: - "internal_server_error" description: "A short code indicating the error code returned." example: "internal_server_error" message: x-speakeasy-error-message: true type: "string" description: "A human readable explanation of what went wrong." example: "The requested resource was not found." doc_url: type: "string" description: "A link to our documentation with more details about this error code" example: "https://docs.agentset.ai/api-reference/errors#internal-server_error" required: - "code" - "message" required: - "success" - "error" parameters: NamespaceIdRef: in: "path" name: "namespaceId" schema: type: "string" examples: - "ns_123" description: "The id of the namespace (prefixed with ns_)" x-speakeasy-globals-hidden: true required: true description: "The id of the namespace (prefixed with ns_)" TenantIdRef: in: "header" name: "x-tenant-id" schema: description: "Optional tenant id to use for the request. If not provided, the namespace will be used directly. Must be alphanumeric and up to 64 characters." type: "string" pattern: "^[A-Za-z0-9]{1,64}$" description: "Optional tenant id to use for the request. If not provided, the namespace will be used directly. Must be alphanumeric and up to 64 characters." JobIdRef: in: "path" name: "jobId" schema: type: "string" examples: - "job_123" description: "The id of the job (prefixed with job_)" required: true description: "The id of the job (prefixed with job_)" DocumentIdRef: in: "path" name: "documentId" schema: type: "string" examples: - "doc_123" description: "The id of the document (prefixed with doc_)" required: true description: "The id of the document (prefixed with doc_)" securitySchemes: token: type: "http" description: "Default authentication mechanism" scheme: "bearer" x-speakeasy-example: "AGENTSET_API_KEY"