Skip to main content

Prerequisites

pip install pydataframer tenacity

Full example

This script creates generation rules from a text description, generates 5 samples, and downloads the results as a ZIP.
from dataframer import Dataframer
from tenacity import retry, retry_if_result, stop_never, wait_fixed

client = Dataframer()  # uses DATAFRAMER_API_KEY env var

def not_finished(result):
    return result.status not in ("SUCCEEDED", "FAILED")

# 1. Create generation rules from a text description.
#    The platform builds a spec: what properties to
#    vary and in what proportions.
spec = client.dataframer.specs.create(
    name="Customer Support Tickets",
    generation_objectives=(
        "Generate realistic customer support tickets "
        "about billing issues, login problems, and "
        "feature requests. Each ticket should include "
        "a subject line and a message body."
    ),
)
print(f"Created spec: {spec.id}")

# 2. Wait for spec generation to complete (1-2 min)
@retry(wait=wait_fixed(5),
       retry=retry_if_result(not_finished),
       stop=stop_never)
def poll_spec(spec_id):
    return client.dataframer.specs.retrieve(spec_id)

spec_result = poll_spec(spec.id)
assert spec_result.status == "SUCCEEDED", \
    f"Spec failed: {spec_result.error}"

# 3. Generate samples according to the spec
run = client.dataframer.runs.create(
    spec_id=spec.id,
    number_of_samples=5,
)
print(f"Started run: {run.id}")

# 4. Wait for generation to complete
@retry(wait=wait_fixed(10),
       retry=retry_if_result(not_finished),
       stop=stop_never)
def poll_run(run_id):
    return client.dataframer.runs.retrieve(run_id)

run_result = poll_run(run.id)
assert run_result.status == "SUCCEEDED", "Run failed"
print(f"Generated {run_result.samples_completed} samples")

# 5. Download results as ZIP. First call triggers ZIP
#    generation; poll until the presigned URL is ready.
def download_not_ready(result):
    return (not hasattr(result, "download_url")
            or result.download_url is None)

@retry(wait=wait_fixed(2),
       retry=retry_if_result(download_not_ready),
       stop=stop_never)
def poll_download(run_id):
    return client.dataframer.runs \
        .files.download_all(run_id)

result = poll_download(run.id)
print(f"Download URL: {result.download_url}")

Next steps