Skip to main content

Python Complete Workflow

import requests
import time
from pathlib import Path

class DataframerClient:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://df-api.dataframer.ai/api/dataframer"
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
    
    def create_dataset(self, name, file_path, dataset_type="SINGLE_FILE"):
        """Create a new dataset"""
        with open(file_path, "rb") as f:
            response = requests.post(
                f"{self.base_url}/datasets/create/",
                headers={"Authorization": f"Bearer {self.api_key}"},
                data={
                    "name": name,
                    "dataset_type": dataset_type
                },
                files={"file": f}
            )
        response.raise_for_status()
        return response.json()
    
    def create_spec(self, dataset_id, name):
        """Generate specification from dataset"""
        response = requests.post(
            f"{self.base_url}/analyze/",
            headers=self.headers,
            json={
                "dataset_id": dataset_id,
                "name": name
            }
        )
        response.raise_for_status()
        return response.json()["task_id"]
    
    def wait_for_analysis(self, task_id, timeout=600):
        """Wait for specification analysis to complete"""
        start = time.time()
        while time.time() - start < timeout:
            response = requests.get(
                f"{self.base_url}/analyze/status/{task_id}",
                headers=self.headers
            )
            result = response.json()
            
            if result["status"] == "COMPLETED":
                return result["spec_id"]
            elif result["status"] == "FAILED":
                raise Exception(f"Analysis failed: {result.get('error')}")
            
            print(f"Progress: {result.get('progress', 0)}%")
            time.sleep(30)
        
        raise TimeoutError("Analysis timeout")
    
    def generate_samples(self, spec_id, num_samples, sample_type="short"):
        """Generate synthetic samples"""
        response = requests.post(
            f"{self.base_url}/generate/",
            headers=self.headers,
            json={
                "spec_id": spec_id,
                "number_of_samples": num_samples,
                "sample_type": sample_type
            }
        )
        response.raise_for_status()
        return response.json()["task_id"]
    
    def wait_for_generation(self, task_id, timeout=3600):
        """Wait for sample generation to complete"""
        start = time.time()
        while time.time() - start < timeout:
            response = requests.get(
                f"{self.base_url}/generate/status/{task_id}",
                headers=self.headers
            )
            result = response.json()
            
            if result["status"] == "SUCCEEDED":
                return True
            elif result["status"] == "FAILED":
                raise Exception(f"Generation failed: {result.get('error')}")
            
            progress = result.get('progress', 0)
            completed = result.get('completed_samples', 0)
            total = result.get('total_samples', 0)
            print(f"Progress: {progress}% ({completed}/{total})")
            time.sleep(10)
        
        raise TimeoutError("Generation timeout")
    
    def download_samples(self, task_id, output_file):
        """Download generated samples as ZIP"""
        response = requests.get(
            f"{self.base_url}/generate/retrieve/{task_id}",
            headers=self.headers
        )
        response.raise_for_status()
        
        with open(output_file, "wb") as f:
            f.write(response.content)
        
        return output_file

# Usage
if __name__ == "__main__":
    client = DataframerClient("your_api_key")
    
    # 1. Create dataset
    print("Creating dataset...")
    dataset = client.create_dataset(
        name="Customer Reviews",
        file_path="reviews.csv"
    )
    print(f"Created dataset: {dataset['id']}")
    
    # 2. Generate specification
    print("\nGenerating specification...")
    task_id = client.create_spec(dataset['id'], "Review Spec")
    spec_id = client.wait_for_analysis(task_id)
    print(f"Specification ready: {spec_id}")
    
    # 3. Generate samples
    print("\nGenerating samples...")
    gen_task_id = client.generate_samples(spec_id, num_samples=20)
    client.wait_for_generation(gen_task_id)
    print("Generation completed!")
    
    # 4. Download results
    print("\nDownloading samples...")
    output = client.download_samples(gen_task_id, "generated_samples.zip")
    print(f"Samples saved to: {output}")

Node.js Complete Workflow

const axios = require('axios');
const fs = require('fs');
const FormData = require('form-data');

class DataframerClient {
  constructor(apiKey) {
    this.apiKey = apiKey;
    this.baseURL = 'https://df-api.dataframer.ai/api/dataframer';
    this.headers = {
      'Authorization': `Bearer ${apiKey}`,
      'Content-Type': 'application/json'
    };
  }

  async createDataset(name, filePath, datasetType = 'SINGLE_FILE') {
    const formData = new FormData();
    formData.append('name', name);
    formData.append('dataset_type', datasetType);
    formData.append('file', fs.createReadStream(filePath));

    const response = await axios.post(
      `${this.baseURL}/datasets/create/`,
      formData,
      {
        headers: {
          'Authorization': `Bearer ${this.apiKey}`,
          ...formData.getHeaders()
        }
      }
    );
    return response.data;
  }

  async createSpec(datasetId, name) {
    const response = await axios.post(
      `${this.baseURL}/analyze/`,
      { dataset_id: datasetId, name },
      { headers: this.headers }
    );
    return response.data.task_id;
  }

  async waitForAnalysis(taskId, timeout = 600000) {
    const start = Date.now();
    while (Date.now() - start < timeout) {
      const response = await axios.get(
        `${this.baseURL}/analyze/status/${taskId}`,
        { headers: this.headers }
      );
      const result = response.data;

      if (result.status === 'COMPLETED') {
        return result.spec_id;
      } else if (result.status === 'FAILED') {
        throw new Error(`Analysis failed: ${result.error}`);
      }

      console.log(`Progress: ${result.progress || 0}%`);
      await new Promise(resolve => setTimeout(resolve, 30000));
    }
    throw new Error('Analysis timeout');
  }

  async generateSamples(specId, numSamples, sampleType = 'short') {
    const response = await axios.post(
      `${this.baseURL}/generate/`,
      {
        spec_id: specId,
        number_of_samples: numSamples,
        sample_type: sampleType
      },
      { headers: this.headers }
    );
    return response.data.task_id;
  }

  async waitForGeneration(taskId, timeout = 3600000) {
    const start = Date.now();
    while (Date.now() - start < timeout) {
      const response = await axios.get(
        `${this.baseURL}/generate/status/${taskId}`,
        { headers: this.headers }
      );
      const result = response.data;

      if (result.status === 'SUCCEEDED') {
        return true;
      } else if (result.status === 'FAILED') {
        throw new Error(`Generation failed: ${result.error}`);
      }

      const { progress = 0, completed_samples = 0, total_samples = 0 } = result;
      console.log(`Progress: ${progress}% (${completed_samples}/${total_samples})`);
      await new Promise(resolve => setTimeout(resolve, 10000));
    }
    throw new Error('Generation timeout');
  }

  async downloadSamples(taskId, outputFile) {
    const response = await axios.get(
      `${this.baseURL}/generate/retrieve/${taskId}`,
      {
        headers: this.headers,
        responseType: 'arraybuffer'
      }
    );

    fs.writeFileSync(outputFile, response.data);
    return outputFile;
  }
}

// Usage
async function main() {
  const client = new DataframerClient('your_api_key');

  try {
    // 1. Create dataset
    console.log('Creating dataset...');
    const dataset = await client.createDataset(
      'Customer Reviews',
      'reviews.csv'
    );
    console.log(`Created dataset: ${dataset.id}`);

    // 2. Generate specification
    console.log('\nGenerating specification...');
    const taskId = await client.createSpec(dataset.id, 'Review Spec');
    const specId = await client.waitForAnalysis(taskId);
    console.log(`Specification ready: ${specId}`);

    // 3. Generate samples
    console.log('\nGenerating samples...');
    const genTaskId = await client.generateSamples(specId, 20);
    await client.waitForGeneration(genTaskId);
    console.log('Generation completed!');

    // 4. Download results
    console.log('\nDownloading samples...');
    const output = await client.downloadSamples(genTaskId, 'generated_samples.zip');
    console.log(`Samples saved to: ${output}`);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

main();

cURL Workflow Script

#!/bin/bash

set -e  # Exit on error

API_KEY="your_api_key"
BASE_URL="https://df-api.dataframer.ai/api/dataframer"

# 1. Create dataset
echo "Creating dataset..."
DATASET_RESPONSE=$(curl -s -X POST "${BASE_URL}/datasets/create/" \
  -H "Authorization: Bearer ${API_KEY}" \
  -F "name=Customer Reviews" \
  -F "dataset_type=SINGLE_FILE" \
  -F "[email protected]")

DATASET_ID=$(echo $DATASET_RESPONSE | jq -r '.id')
echo "Dataset created: $DATASET_ID"

# 2. Generate specification
echo -e "\nGenerating specification..."
ANALYZE_RESPONSE=$(curl -s -X POST "${BASE_URL}/analyze/" \
  -H "Authorization: Bearer ${API_KEY}" \
  -H "Content-Type: application/json" \
  -d "{\"dataset_id\": \"$DATASET_ID\", \"name\": \"Review Spec\"}")

TASK_ID=$(echo $ANALYZE_RESPONSE | jq -r '.task_id')
echo "Analysis task: $TASK_ID"

# 3. Wait for analysis
echo "Waiting for analysis..."
while true; do
  STATUS_RESPONSE=$(curl -s -X GET "${BASE_URL}/analyze/status/${TASK_ID}" \
    -H "Authorization: Bearer ${API_KEY}")
  
  STATUS=$(echo $STATUS_RESPONSE | jq -r '.status')
  PROGRESS=$(echo $STATUS_RESPONSE | jq -r '.progress // 0')
  
  echo "Status: $STATUS, Progress: ${PROGRESS}%"
  
  if [ "$STATUS" = "COMPLETED" ]; then
    SPEC_ID=$(echo $STATUS_RESPONSE | jq -r '.spec_id')
    echo "Specification ready: $SPEC_ID"
    break
  elif [ "$STATUS" = "FAILED" ]; then
    echo "Analysis failed!"
    exit 1
  fi
  
  sleep 30
done

# 4. Generate samples
echo -e "\nGenerating samples..."
GEN_RESPONSE=$(curl -s -X POST "${BASE_URL}/generate/" \
  -H "Authorization: Bearer ${API_KEY}" \
  -H "Content-Type: application/json" \
  -d "{\"spec_id\": \"$SPEC_ID\", \"number_of_samples\": 20, \"sample_type\": \"short\"}")

GEN_TASK_ID=$(echo $GEN_RESPONSE | jq -r '.task_id')
echo "Generation task: $GEN_TASK_ID"

# 5. Wait for generation
echo "Waiting for generation..."
while true; do
  GEN_STATUS_RESPONSE=$(curl -s -X GET "${BASE_URL}/generate/status/${GEN_TASK_ID}" \
    -H "Authorization: Bearer ${API_KEY}")
  
  GEN_STATUS=$(echo $GEN_STATUS_RESPONSE | jq -r '.status')
  GEN_PROGRESS=$(echo $GEN_STATUS_RESPONSE | jq -r '.progress // 0')
  COMPLETED=$(echo $GEN_STATUS_RESPONSE | jq -r '.completed_samples // 0')
  TOTAL=$(echo $GEN_STATUS_RESPONSE | jq -r '.total_samples // 0')
  
  echo "Status: $GEN_STATUS, Progress: ${GEN_PROGRESS}% (${COMPLETED}/${TOTAL})"
  
  if [ "$GEN_STATUS" = "SUCCEEDED" ]; then
    echo "Generation completed!"
    break
  elif [ "$GEN_STATUS" = "FAILED" ]; then
    echo "Generation failed!"
    exit 1
  fi
  
  sleep 10
done

# 6. Download samples
echo -e "\nDownloading samples..."
curl -X GET "${BASE_URL}/generate/retrieve/${GEN_TASK_ID}" \
  -H "Authorization: Bearer ${API_KEY}" \
  --output generated_samples.zip

echo "Samples saved to: generated_samples.zip"

Error Handling Examples

Python with Retry Logic

import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def create_session_with_retries():
    """Create session with automatic retry logic"""
    session = requests.Session()
    
    retry_strategy = Retry(
        total=3,
        status_forcelist=[429, 500, 502, 503, 504],
        method_whitelist=["HEAD", "GET", "OPTIONS", "POST"],
        backoff_factor=1
    )
    
    adapter = HTTPAdapter(max_retries=retry_strategy)
    session.mount("https://", adapter)
    session.mount("http://", adapter)
    
    return session

# Usage
session = create_session_with_retries()
response = session.post(url, headers=headers, json=data)

Node.js with Error Handling

const axios = require('axios');

async function apiCallWithRetry(url, options, maxRetries = 3) {
  for (let i = 0; i < maxRetries; i++) {
    try {
      const response = await axios(url, options);
      return response.data;
    } catch (error) {
      if (error.response) {
        // Server responded with error
        console.log(`Error ${error.response.status}: ${error.response.data}`);
        
        if (error.response.status === 429) {
          // Rate limited - wait and retry
          const waitTime = Math.pow(2, i) * 1000;
          console.log(`Rate limited. Waiting ${waitTime}ms...`);
          await new Promise(resolve => setTimeout(resolve, waitTime));
          continue;
        }
        
        // Don't retry on 4xx errors (except 429)
        if (error.response.status < 500) {
          throw error;
        }
      }
      
      // Retry on network errors or 5xx
      if (i < maxRetries - 1) {
        const waitTime = Math.pow(2, i) * 1000;
        console.log(`Retrying in ${waitTime}ms...`);
        await new Promise(resolve => setTimeout(resolve, waitTime));
      } else {
        throw error;
      }
    }
  }
}

Next Steps