Prerequisites
- Python 3.9+
- A Dataframer API key (see Getting access)
Copy
pip install pydataframer tenacity
Full example
This script creates generation rules from a text description, generates 5 samples, and downloads the results as a ZIP.Copy
from dataframer import Dataframer
from tenacity import retry, retry_if_result, stop_never, wait_fixed
client = Dataframer() # uses DATAFRAMER_API_KEY env var
def not_finished(result):
return result.status not in ("SUCCEEDED", "FAILED")
# 1. Create generation rules from a text description.
# The platform builds a spec: what properties to
# vary and in what proportions.
spec = client.dataframer.specs.create(
name="Customer Support Tickets",
generation_objectives=(
"Generate realistic customer support tickets "
"about billing issues, login problems, and "
"feature requests. Each ticket should include "
"a subject line and a message body."
),
)
print(f"Created spec: {spec.id}")
# 2. Wait for spec generation to complete (1-2 min)
@retry(wait=wait_fixed(5),
retry=retry_if_result(not_finished),
stop=stop_never)
def poll_spec(spec_id):
return client.dataframer.specs.retrieve(spec_id)
spec_result = poll_spec(spec.id)
assert spec_result.status == "SUCCEEDED", \
f"Spec failed: {spec_result.error}"
# 3. Generate samples according to the spec
run = client.dataframer.runs.create(
spec_id=spec.id,
number_of_samples=5,
)
print(f"Started run: {run.id}")
# 4. Wait for generation to complete
@retry(wait=wait_fixed(10),
retry=retry_if_result(not_finished),
stop=stop_never)
def poll_run(run_id):
return client.dataframer.runs.retrieve(run_id)
run_result = poll_run(run.id)
assert run_result.status == "SUCCEEDED", "Run failed"
print(f"Generated {run_result.samples_completed} samples")
# 5. Download results as ZIP. First call triggers ZIP
# generation; poll until the presigned URL is ready.
def download_not_ready(result):
return (not hasattr(result, "download_url")
or result.download_url is None)
@retry(wait=wait_fixed(2),
retry=retry_if_result(download_not_ready),
stop=stop_never)
def poll_download(run_id):
return client.dataframer.runs \
.files.download_all(run_id)
result = poll_download(run.id)
print(f"Download URL: {result.download_url}")

