> ## Documentation Index
> Fetch the complete documentation index at: https://docs.dataframer.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Create evaluation

> Start a new evaluation for a completed run

<Note>
  **Async operation**: This endpoint returns immediately with an evaluation ID. Poll `GET /api/dataframer/evaluations/{evaluation_id}/` until status is `COMPLETED` or `FAILED`.
</Note>

The run must be in `SUCCEEDED` status before an evaluation can be created.


## OpenAPI

````yaml POST /api/dataframer/evaluations/
openapi: 3.0.0
info:
  title: DataFramer API
  version: 0.1.0
  description: ''
  termsOfService: https://www.aimon.ai/docs/privacy-policy.pdf
  contact:
    name: DataFramer Support
    email: info@dataframer.ai
  license:
    name: Proprietary
  x-logo:
    url: https://dataframer.ai/logo.png
    altText: DataFramer AI
  x-stainless:
    package-name: aimon-dataframer
    namespace:
      - aimon
      - dataframer
servers:
  - url: https://df-api.dataframer.ai
    description: Production server
security:
  - BearerAuth: []
tags:
  - name: Seed Datasets
    description: Manage seed datasets for generation
  - name: Specs
    description: Data specifications for sample generation
  - name: Runs
    description: Generation runs and results
  - name: Evaluations
    description: Evaluate generated sample quality
  - name: Red Teaming
    description: Security testing and adversarial prompts
  - name: Spec Creation
    description: Create specs from datasets or from scratch (seedless)
  - name: Generation
    description: Synthetic data generation
  - name: API Keys
    description: API key management and rotation
  - name: Health
    description: Health check endpoints
  - name: Models
    description: Available AI models
externalDocs:
  description: Complete API Guide
  url: https://docs.dataframer.ai/dataframer
paths:
  /api/dataframer/evaluations/:
    post:
      tags:
        - Evaluations
      summary: Create evaluation
      description: >-
        Start a new evaluation for a completed run.


        **Async operation**: This endpoint returns immediately with an
        evaluation ID. Poll `GET /api/dataframer/evaluations/{evaluation_id}/`
        until status is `SUCCEEDED` or `FAILED`.


        The run must be in `SUCCEEDED` status before an evaluation can be
        created.
      operationId: api_dataframer_evaluations_create
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/EvaluationCreate'
            examples:
              default_model:
                summary: Use default model
                value:
                  run_id: a98715da-921d-4326-bbf8-208f8bcc2956
              custom_model:
                summary: Specify evaluation model
                value:
                  run_id: a98715da-921d-4326-bbf8-208f8bcc2956
                  evaluation_model: anthropic/claude-sonnet-4-5
      responses:
        '202':
          description: Evaluation started successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Evaluation'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                type: object
                properties:
                  run_id:
                    type: array
                    items:
                      type: string
                    example:
                      - This field is required.
                  evaluation_model:
                    type: array
                    items:
                      type: string
                    example:
                      - 'Unsupported evaluation model: invalid-model'
                  error:
                    type: string
                    example: Can only evaluate completed runs
        '401':
          description: Authentication credentials were not provided
        '402':
          description: Payment required - subscription inactive
        '404':
          description: Run not found
        '500':
          description: Internal server error
      x-codeSamples:
        - lang: JavaScript
          source: |-
            import Dataframer from 'dataframer';

            const client = new Dataframer({
              apiKey: process.env['DATAFRAMER_API_KEY'], // This is the default and can be omitted
            });

            const evaluation = await client.dataframer.evaluations.create({
              run_id: 'a98715da-921d-4326-bbf8-208f8bcc2956',
            });

            console.log(evaluation.id);
        - lang: Python
          source: |-
            import os
            from dataframer import Dataframer

            client = Dataframer(
                api_key=os.environ.get("DATAFRAMER_API_KEY"),  # This is the default and can be omitted
            )
            evaluation = client.dataframer.evaluations.create(
                run_id="a98715da-921d-4326-bbf8-208f8bcc2956",
            )
            print(evaluation.id)
components:
  schemas:
    EvaluationCreate:
      type: object
      description: Request body for creating an evaluation
      required:
        - run_id
      properties:
        run_id:
          type: string
          format: uuid
          description: >-
            ID of the completed run to evaluate. Run must be in SUCCEEDED
            status.
        evaluation_model:
          type: string
          description: >-
            AI model to use for evaluation. Defaults to
            anthropic/claude-sonnet-4-5.
          enum:
            - anthropic/claude-opus-4-6
            - anthropic/claude-opus-4-6-thinking
            - anthropic/claude-sonnet-4-6
            - anthropic/claude-sonnet-4-6-thinking
            - anthropic/claude-haiku-4-5
            - anthropic/claude-haiku-4-5-thinking
            - openai/gpt-5.4
            - openai/gpt-5.4-thinking
          default: anthropic/claude-sonnet-4-6-thinking
    Evaluation:
      type: object
      description: >-
        Full evaluation details including distribution analysis and sample
        classifications
      properties:
        id:
          type: string
          format: uuid
          readOnly: true
          description: Unique identifier for the evaluation
        run_id:
          type: string
          format: uuid
          readOnly: true
          description: ID of the run being evaluated
        status:
          type: string
          enum:
            - PENDING
            - PROCESSING
            - SUCCEEDED
            - FAILED
          description: Current status of the evaluation
        conformance_score:
          type: number
          nullable: true
          description: >-
            Overall conformance score (0-100) measuring how well generated
            samples match the spec's expected distributions. Null until
            evaluation completes.
        conformance_explanation:
          type: string
          nullable: true
          description: >-
            Human-readable explanation of the conformance score and any notable
            deviations
        distribution_analysis:
          type: array
          nullable: true
          description: >-
            Per-property comparison of expected vs observed distributions. Null
            until evaluation completes.
          items:
            $ref: '#/components/schemas/DistributionAnalysisItem'
        sample_classifications:
          type: array
          readOnly: true
          description: >-
            Classification results for each generated sample. Empty until
            evaluation completes.
          items:
            $ref: '#/components/schemas/SampleClassification'
        started_at:
          type: string
          format: date-time
          nullable: true
          readOnly: true
          description: When evaluation processing started
        completed_at:
          type: string
          format: date-time
          nullable: true
          readOnly: true
          description: When evaluation completed
        error_message:
          type: string
          nullable: true
          readOnly: true
          description: Error message if evaluation failed
        created_by_email:
          type: string
          readOnly: true
          description: Email of the user who created the evaluation
        created_at:
          type: string
          format: date-time
          readOnly: true
          description: When the evaluation was created
        duration_seconds:
          type: number
          nullable: true
          readOnly: true
          description: Time taken to complete the evaluation in seconds
        company_id:
          type: string
          format: uuid
          readOnly: true
          description: ID of the company that owns this evaluation
        status_display:
          type: string
          readOnly: true
          description: Human-readable status display
        conformant_areas:
          type: string
          nullable: true
          readOnly: true
          description: Description of areas where samples conform well to the spec
        non_conformant_areas:
          type: string
          nullable: true
          readOnly: true
          description: Description of areas where samples deviate from the spec
        trace:
          type: object
          nullable: true
          readOnly: true
          description: >-
            Internal trace information including task_id and evaluation model
            used
          additionalProperties: true
        created_by:
          type: integer
          readOnly: true
          description: ID of the user who created this evaluation
        updated_at:
          type: string
          format: date-time
          readOnly: true
          description: When the evaluation was last updated
    DistributionAnalysisItem:
      type: object
      description: Distribution comparison for a single property
      properties:
        property_name:
          type: string
          description: Name of the property being analyzed
        total_samples:
          type: integer
          description: Total number of samples in the run
        requested_distributions:
          type: object
          description: Target percentage for each property value (from the spec)
          additionalProperties:
            type: number
          example:
            positive: 40
            negative: 30
            neutral: 30
        expected_distributions:
          type: object
          description: >-
            Percentage for each property value actually achievable given the
            sample count (from generation-time sampling)
          additionalProperties:
            type: number
          example:
            positive: 42
            negative: 30
            neutral: 28
        evaluated_distributions:
          type: object
          description: >-
            Percentage for each property value as classified by an LLM evaluator
            post-generation
          additionalProperties:
            type: number
          example:
            positive: 45
            negative: 28
            neutral: 27
        observed_distributions:
          type: object
          description: Deprecated alias for evaluated_distributions
          additionalProperties:
            type: number
        total_samples_analyzed:
          type: integer
          description: >-
            Number of samples that were successfully classified for this
            property
    SampleClassification:
      type: object
      description: Classification results for a single generated sample
      properties:
        id:
          type: string
          format: uuid
          readOnly: true
          description: Unique identifier for this classification
        evaluation_id:
          type: string
          format: uuid
          readOnly: true
          description: ID of the parent evaluation
        sample_identifier:
          type: string
          description: >-
            Identifier for the sample (matches the generated sample name from
            the run)
        classifications:
          type: object
          description: Map of property names to their classified values for this sample
          additionalProperties:
            type: string
          example:
            sentiment: positive
            topic: technology
            length: medium
        created_at:
          type: string
          format: date-time
          readOnly: true
          description: When this classification was created
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      bearerFormat: API Key
      description: 'API Key authentication. Format: "Bearer YOUR_API_KEY"'

````