Examples

This section provides practical examples showcasing how to use Tukuy for various common tasks and scenarios. These examples demonstrate the power and flexibility of Tukuy’s transformer system.

Text Transformations

Basic Text Processing

from tukuy import TukuyTransformer

TUKUY = TukuyTransformer()

# Clean and normalize user input
user_input = "  Héllò   Wórld!  "
clean_text = TUKUY.transform(user_input, [
    "strip",          # Remove leading/trailing whitespace
    "normalize",      # Remove diacritics
    "lowercase"       # Convert to lowercase
])
print(clean_text)  # "hello world!"

# Truncate long text for display
long_text = "This is a very long text that needs to be truncated for display purposes."
truncated = TUKUY.transform(long_text, [
    {"function": "truncate", "length": 20, "suffix": "..."}
])
print(truncated)  # "This is a very long..."

Text Search and Replace

# Replace specific words
text = "The quick brown fox jumps over the lazy dog."
replaced = TUKUY.transform(text, [
    {"function": "replace", "search": "fox", "replacement": "cat"}
])
print(replaced)  # "The quick brown cat jumps over the lazy dog."

# Replace using regex for more complex patterns
text = "Contact us at info@example.com or support@example.com"
anonymized = TUKUY.transform(text, [
    {"function": "regex_replace",
     "pattern": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
     "replacement": "[EMAIL REDACTED]"}
])
print(anonymized)  # "Contact us at [EMAIL REDACTED] or [EMAIL REDACTED]"

Text Splitting and Joining

# Split a comma-separated list
tags = "python,data,transformation,library"
tag_list = TUKUY.transform(tags, [
    {"function": "split", "separator": ","}
])
print(tag_list)  # ["python", "data", "transformation", "library"]

# Join array into string
words = ["Tukuy", "is", "awesome"]
sentence = TUKUY.transform(words, [
    {"function": "join", "separator": " "}
])
print(sentence)  # "Tukuy is awesome"

HTML Transformations

Extracting Text from HTML

html = """
<article>
    <h1>Understanding Tukuy</h1>
    <div class="content">
        <p>Tukuy is a <strong>powerful</strong> transformation library.</p>
        <p>It makes data processing <em>easy</em> and intuitive.</p>
    </div>
    <div class="author">
        <span>Written by: John Doe</span>
    </div>
</article>
"""

# Extract plain text from HTML
text = TUKUY.transform(html, ["strip_html_tags"])
print(text)  # "Understanding Tukuy Tukuy is a powerful transformation library. It makes data processing easy and intuitive. Written by: John Doe"

# Extract specific elements
title = TUKUY.transform(html, [
    {"function": "select", "selector": "h1"}
])
print(title)  # "Understanding Tukuy"

paragraphs = TUKUY.transform(html, [
    {"function": "select", "selector": "p", "extract": "text_array"}
])
print(paragraphs)  # ["Tukuy is a powerful transformation library.", "It makes data processing easy and intuitive."]

Scraping Product Information

product_html = """
<div class="product">
    <h2 class="title">Wireless Headphones</h2>
    <div class="price">$99.99</div>
    <div class="description">High-quality wireless headphones with noise cancellation.</div>
    <ul class="features">
        <li>Bluetooth 5.0</li>
        <li>40h Battery Life</li>
        <li>Active Noise Cancellation</li>
    </ul>
    <div class="rating">4.5/5 (230 reviews)</div>
</div>
"""

# Extract structured product data
pattern = {
    "properties": [
        {
            "name": "title",
            "selector": ".title",
            "transform": ["strip"]
        },
        {
            "name": "price",
            "selector": ".price",
            "transform": [
                "strip",
                {"function": "regex_replace", "pattern": r"^\$", "replacement": ""}
            ]
        },
        {
            "name": "description",
            "selector": ".description",
            "transform": ["strip"]
        },
        {
            "name": "features",
            "selector": ".features li",
            "type": "array"
        },
        {
            "name": "rating",
            "selector": ".rating",
            "transform": [
                {"function": "regex_extract", "pattern": r"(\d+\.\d+)\/5"}
            ]
        }
    ]
}

product = TUKUY.extract_html_with_pattern(product_html, pattern)
print(product)
# {
#     "title": "Wireless Headphones",
#     "price": "99.99",
#     "description": "High-quality wireless headphones with noise cancellation.",
#     "features": ["Bluetooth 5.0", "40h Battery Life", "Active Noise Cancellation"],
#     "rating": "4.5"
# }

Extracting Tables from HTML

table_html = """
<table>
    <thead>
        <tr>
            <th>Product</th>
            <th>Price</th>
            <th>Stock</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Laptop</td>
            <td>$1,299.99</td>
            <td>10</td>
        </tr>
        <tr>
            <td>Smartphone</td>
            <td>$799.99</td>
            <td>25</td>
        </tr>
        <tr>
            <td>Headphones</td>
            <td>$99.99</td>
            <td>50</td>
        </tr>
    </tbody>
</table>
"""

# Extract table as structured data
table_data = TUKUY.transform(table_html, [
    {"function": "extract_tables"}
])

print(table_data)
# [
#     {
#         "headers": ["Product", "Price", "Stock"],
#         "rows": [
#             ["Laptop", "$1,299.99", "10"],
#             ["Smartphone", "$799.99", "25"],
#             ["Headphones", "$99.99", "50"]
#         ]
#     }
# ]

# Process table data
total_stock = 0
for row in table_data[0]["rows"]:
    total_stock += int(row[2])

print(f"Total stock: {total_stock}")  # "Total stock: 85"

JSON Transformations

Extracting Data from JSON

json_data = """
{
    "user": {
        "profile": {
            "name": "John Doe",
            "email": "john@example.com",
            "age": 30
        },
        "preferences": {
            "theme": "dark",
            "notifications": true
        },
        "stats": {
            "posts": 45,
            "followers": 1024,
            "following": 256
        }
    }
}
"""

# Extract specific values
name = TUKUY.transform(json_data, [
    {"function": "extract", "selector": "user.profile.name"}
])
print(name)  # "John Doe"

# Extract and validate
email = TUKUY.transform(json_data, [
    {"function": "extract", "selector": "user.profile.email"},
    "email_validator"
])
print(email)  # "john@example.com" (or None if invalid)

# Extract multiple values
stats = TUKUY.transform(json_data, [
    {"function": "extract", "selector": "user.stats"}
])
print(stats)  # {"posts": 45, "followers": 1024, "following": 256}

Processing API Responses

api_response = """
{
    "data": {
        "results": [
            {"id": 1, "name": "Product A", "price": 19.99, "inStock": true},
            {"id": 2, "name": "Product B", "price": 29.99, "inStock": false},
            {"id": 3, "name": "Product C", "price": 39.99, "inStock": true},
            {"id": 4, "name": "Product D", "price": 49.99, "inStock": true}
        ],
        "pagination": {
            "total": 15,
            "page": 1,
            "perPage": 4
        }
    },
    "meta": {
        "requestId": "abc-123",
        "timestamp": "2024-01-15T14:30:00Z"
    }
}
"""

# Extract all in-stock products
pattern = {
    "properties": [
        {
            "name": "inStockProducts",
            "selector": "data.results[*]",
            "filter": {"field": "inStock", "value": true},
            "properties": [
                {"name": "id", "selector": "id"},
                {"name": "name", "selector": "name"},
                {"name": "price", "selector": "price"}
            ]
        },
        {
            "name": "totalProducts",
            "selector": "data.pagination.total"
        },
        {
            "name": "requestInfo",
            "properties": [
                {"name": "id", "selector": "meta.requestId"},
                {"name": "time", "selector": "meta.timestamp"}
            ]
        }
    ]
}

result = TUKUY.extract_json_with_pattern(api_response, pattern)
print(result)
# {
#     "inStockProducts": [
#         {"id": 1, "name": "Product A", "price": 19.99},
#         {"id": 3, "name": "Product C", "price": 39.99},
#         {"id": 4, "name": "Product D", "price": 49.99}
#     ],
#     "totalProducts": 15,
#     "requestInfo": {
#         "id": "abc-123",
#         "time": "2024-01-15T14:30:00Z"
#     }
# }

Date Transformations

Working with Dates

from tukuy import TukuyTransformer

TUKUY = TukuyTransformer()

# Parse date string
date_str = "2023-05-15"
date_obj = TUKUY.transform(date_str, [
    {"function": "parse_date", "format": "%Y-%m-%d"}
])

# Calculate age from birthdate
birthdate = "1990-08-25"
age = TUKUY.transform(birthdate, [
    {"function": "age_calc"}
])
print(f"Age: {age} years")

# Calculate days between dates
start_date = "2023-01-01"
end_date = "2023-12-31"
days = TUKUY.transform(start_date, [
    {"function": "duration_calc", "unit": "days", "end": end_date}
])
print(f"Days between: {days}")

# Format date
date_obj = TUKUY.transform("2023-05-15", [
    {"function": "parse_date"},
    {"function": "format_date", "format": "%B %d, %Y"}
])
print(date_obj)  # "May 15, 2023"

Handling Date Ranges

# Check if date falls within a range
target_date = "2023-07-15"
start_range = "2023-06-01"
end_range = "2023-08-31"

is_in_range = TUKUY.transform(target_date, [
    {"function": "parse_date"},
    {"function": "in_date_range", "start": start_range, "end": end_range}
])
print(f"Date is in range: {is_in_range}")  # True

# Calculate business days in a period
business_days = TUKUY.transform("2023-01-01", [
    {"function": "business_days", "end": "2023-01-31"}
])
print(f"Business days: {business_days}")  # ~22 (depending on holidays)

# Check if date is a weekend
is_weekend = TUKUY.transform("2023-07-15", [  # July 15, 2023 is a Saturday
    {"function": "is_weekend"}
])
print(f"Is weekend: {is_weekend}")  # True

Numerical Transformations

Basic Numerical Operations

from tukuy import TukuyTransformer

TUKUY = TukuyTransformer()

# Round numbers
value = 123.456789
rounded = TUKUY.transform(value, [
    {"function": "round", "decimals": 2}
])
print(rounded)  # 123.46

# Format with thousand separators
large_number = 1234567.89
formatted = TUKUY.transform(large_number, [
    {"function": "format_number"}
])
print(formatted)  # "1,234,567.89"

# Format as currency
price = 49.99
currency = TUKUY.transform(price, [
    {"function": "to_currency", "currency": "USD"}
])
print(currency)  # "$49.99"

# Calculate percentage
ratio = 0.7523
percentage = TUKUY.transform(ratio, [
    {"function": "percentage", "decimals": 1}
])
print(percentage)  # "75.2%"

Statistical Calculations

# Calculate statistics from a list of numbers
data = [12, 15, 23, 45, 67, 32, 18, 24]

# Mean
mean = TUKUY.transform(data, [
    {"function": "mean"}
])
print(f"Mean: {mean}")  # Mean: 29.5

# Median
median = TUKUY.transform(data, [
    {"function": "median"}
])
print(f"Median: {median}")  # Median: 23.5

# Min and Max
min_max = TUKUY.transform(data, [
    {"function": "range"}
])
print(f"Range: {min_max}")  # Range: [12, 67]

# Sum
total = TUKUY.transform(data, [
    {"function": "sum"}
])
print(f"Total: {total}")  # Total: 236

Financial Calculations

# Calculate compound interest
principal = 1000
interest_rate = 0.05  # 5%
years = 5

future_value = TUKUY.transform(principal, [
    {"function": "compound_interest", "rate": interest_rate, "years": years}
])
print(f"Future value: {future_value}")  # ~1276.28

# Calculate mortgage payment
loan_amount = 300000
interest_rate = 0.04  # 4%
loan_term_years = 30

monthly_payment = TUKUY.transform(loan_amount, [
    {"function": "mortgage_payment",
     "rate": interest_rate,
     "term_years": loan_term_years}
])
print(f"Monthly payment: {monthly_payment}")  # ~$1,432.25

# Calculate discount
original_price = 100
discount_percent = 25

sale_price = TUKUY.transform(original_price, [
    {"function": "apply_discount", "discount": discount_percent}
])
print(f"Sale price: {sale_price}")  # 75.0

Validation Transformations

Input Validation

from tukuy import TukuyTransformer

TUKUY = TukuyTransformer()

# Email validation
valid_email = "user@example.com"
invalid_email = "not-an-email"

result1 = TUKUY.transform(valid_email, ["email_validator"])
result2 = TUKUY.transform(invalid_email, ["email_validator"])

print(f"Valid email: {result1}")  # "user@example.com"
print(f"Invalid email: {result2}")  # None

# URL validation
valid_url = "https://tukuy.example.com/docs"
invalid_url = "not a url"

result1 = TUKUY.transform(valid_url, ["url_validator"])
result2 = TUKUY.transform(invalid_url, ["url_validator"])

print(f"Valid URL: {result1}")  # "https://tukuy.example.com/docs"
print(f"Invalid URL: {result2}")  # None

# Phone number validation
valid_phone = "+1-555-123-4567"
invalid_phone = "123"

result1 = TUKUY.transform(valid_phone, ["phone_validator"])
result2 = TUKUY.transform(invalid_phone, ["phone_validator"])

print(f"Valid phone: {result1}")  # "+1-555-123-4567"
print(f"Invalid phone: {result2}")  # None

Range and Length Validation

# String length validation
username = "user123"

is_valid = TUKUY.transform(username, [
    {"function": "length_validator", "min": 3, "max": 20}
])
print(f"Username valid: {is_valid is not None}")  # True

# Number range validation
age = 25

is_valid = TUKUY.transform(age, [
    {"function": "range_validator", "min": 18, "max": 65}
])
print(f"Age valid: {is_valid is not None}")  # True

# Date range validation
event_date = "2023-06-15"

is_valid = TUKUY.transform(event_date, [
    {"function": "date_range_validator",
     "min": "2023-01-01",
     "max": "2023-12-31"}
])
print(f"Date valid: {is_valid is not None}")  # True

Custom Pattern Validation

# Regex pattern validation
password = "P@ssw0rd123"

# Check if password meets complexity requirements
is_valid = TUKUY.transform(password, [
    {"function": "regex_validator",
     "pattern": r"^(?=.*[A-Z])(?=.*[a-z])(?=.*\d)(?=.*[@$!%*#?&])[A-Za-z\d@$!%*#?&]{8,}$"}
])
print(f"Password valid: {is_valid is not None}")  # True

# Custom validation function
def validate_isbn(isbn):
    # Remove hyphens
    isbn = isbn.replace('-', '')
    # Check if all remaining characters are digits
    if not isbn.isdigit():
        return False
    # Check length
    if len(isbn) not in (10, 13):
        return False
    return True

# Register custom validator
class ISBNValidator(ChainableTransformer[str, str]):
    def validate(self, value: str) -> bool:
        return isinstance(value, str)

    def _transform(self, value: str, context=None) -> str:
        if validate_isbn(value):
            return value
        return None

# Usage
isbn = "978-3-16-148410-0"

is_valid = TUKUY.transform(isbn, [
    {"function": "isbn_validator"}  # Assuming registered
])
print(f"ISBN valid: {is_valid is not None}")  # True

Creating Custom Transformers

Custom Text Transformer

from tukuy.base import ChainableTransformer
from tukuy.plugins import TransformerPlugin
from tukuy import TukuyTransformer

# Create a custom transformer for title case conversion
class TitleCaseTransformer(ChainableTransformer[str, str]):
    def validate(self, value: str) -> bool:
        return isinstance(value, str)

    def _transform(self, value: str, context=None) -> str:
        # Custom implementation of title case
        # (Different from str.title() because it preserves UPPERCASE acronyms)
        words = value.split()
        for i, word in enumerate(words):
            # Skip uppercase acronyms
            if word.isupper():
                continue
            # Capitalize first letter of other words
            if len(word) > 0:
                words[i] = word[0].upper() + word[1:]
        return ' '.join(words)

# Create a plugin to register the transformer
class TextExtensionsPlugin(TransformerPlugin):
    def __init__(self):
        super().__init__("text_extensions")

    @property
    def transformers(self):
        return {
            'title_case': lambda _: TitleCaseTransformer('title_case')
        }

# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(TextExtensionsPlugin())

text = "the QUICK brown fox jumps over the lazy dog"
result = TUKUY.transform(text, ["title_case"])
print(result)  # "The QUICK Brown Fox Jumps Over The Lazy Dog"

Custom Data Processor

# Create a transformer for processing CSV data
class CSVParserTransformer(ChainableTransformer[str, list]):
    def __init__(self, name: str, delimiter: str = ',', has_header: bool = True):
        super().__init__(name)
        self.delimiter = delimiter
        self.has_header = has_header

    def validate(self, value: str) -> bool:
        return isinstance(value, str)

    def _transform(self, value: str, context=None) -> list:
        lines = value.strip().split('\n')
        if not lines:
            return []

        results = []
        headers = None

        for i, line in enumerate(lines):
            row = line.split(self.delimiter)
            row = [cell.strip() for cell in row]

            if i == 0 and self.has_header:
                headers = row
                continue

            if self.has_header:
                row_dict = {headers[j]: cell for j, cell in enumerate(row) if j < len(headers)}
                results.append(row_dict)
            else:
                results.append(row)

        return results

# Create a plugin
class DataProcessingPlugin(TransformerPlugin):
    def __init__(self):
        super().__init__("data_processing")

    @property
    def transformers(self):
        return {
            'parse_csv': lambda _: CSVParserTransformer('parse_csv')
        }

# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(DataProcessingPlugin())

csv_data = """Name,Age,Email
John Doe,30,john@example.com
Jane Smith,25,jane@example.com
Bob Johnson,45,bob@example.com"""

result = TUKUY.transform(csv_data, [
    {"function": "parse_csv"}
])

print(result)
# [
#     {'Name': 'John Doe', 'Age': '30', 'Email': 'john@example.com'},
#     {'Name': 'Jane Smith', 'Age': '25', 'Email': 'jane@example.com'},
#     {'Name': 'Bob Johnson', 'Age': '45', 'Email': 'bob@example.com'}
# ]

Using Plugins

Registering Custom Plugins

from tukuy.base import ChainableTransformer
from tukuy.plugins import TransformerPlugin
from tukuy import TukuyTransformer

# Create a geo transformation plugin
class GeoTransformer(ChainableTransformer[dict, dict]):
    def __init__(self, name: str):
        super().__init__(name)

    def validate(self, value: dict) -> bool:
        return (isinstance(value, dict) and
               'lat' in value and 'lon' in value)

    def _transform(self, value: dict, context=None) -> dict:
        # Convert decimal coordinates to DMS format
        # (Degrees, Minutes, Seconds)
        lat = value['lat']
        lon = value['lon']

        def decimal_to_dms(coord):
            deg = int(coord)
            min_float = (coord - deg) * 60
            min = int(min_float)
            sec = (min_float - min) * 60
            return f"{deg}° {min}' {sec:.1f}\""

        return {
            'lat': lat,
            'lon': lon,
            'lat_dms': decimal_to_dms(abs(lat)) + ('N' if lat >= 0 else 'S'),
            'lon_dms': decimal_to_dms(abs(lon)) + ('E' if lon >= 0 else 'W')
        }

class GeoPlugin(TransformerPlugin):
    def __init__(self):
        super().__init__("geo")

    @property
    def transformers(self):
        return {
            'to_dms': lambda _: GeoTransformer('to_dms')
        }

    def initialize(self):
        super().initialize()
        print("Geo plugin initialized")

    def cleanup(self):
        super().cleanup()
        print("Geo plugin cleaned up")

# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(GeoPlugin())

coords = {'lat': 40.7128, 'lon': -74.0060}  # New York
result = TUKUY.transform(coords, [
    {"function": "to_dms"}
])

print(result)
# {
#     'lat': 40.7128,
#     'lon': -74.0060,
#     'lat_dms': "40° 42' 46.1\"N",
#     'lon_dms': "74° 0' 21.6\"W"
# }

Creating Plugin Collections

# Create a collection of related transformers
class AnalyticsTransformerA(ChainableTransformer[list, float]):
    def validate(self, value: list) -> bool:
        return isinstance(value, list) and all(isinstance(x, (int, float)) for x in value)

    def _transform(self, value: list, context=None) -> float:
        # Calculate average
        return sum(value) / len(value) if value else 0

class AnalyticsTransformerB(ChainableTransformer[list, dict]):
    def validate(self, value: list) -> bool:
        return isinstance(value, list) and all(isinstance(x, (int, float)) for x in value)

    def _transform(self, value: list, context=None) -> dict:
        # Calculate basic statistics
        if not value:
            return {"count": 0, "sum": 0, "mean": 0, "min": None, "max": None}

        return {
            "count": len(value),
            "sum": sum(value),
            "mean": sum(value) / len(value),
            "min": min(value),
            "max": max(value)
        }

# Group them in a plugin
class AnalyticsPlugin(TransformerPlugin):
    def __init__(self):
        super().__init__("analytics")

    @property
    def transformers(self):
        return {
            'average': lambda _: AnalyticsTransformerA('average'),
            'stats': lambda _: AnalyticsTransformerB('stats')
        }

# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(AnalyticsPlugin())

data = [12, 15, 23, 45, 67, 32, 18, 24]

avg = TUKUY.transform(data, ["average"])
print(f"Average: {avg}")  # Average: 29.5

stats = TUKUY.transform(data, ["stats"])
print(f"Statistics: {stats}")
# Statistics: {
#   'count': 8,
#   'sum': 236,
#   'mean': 29.5,
#   'min': 12,
#   'max': 67
# }

Real-world Use Cases

Web Scraping and Data Extraction

import requests
from tukuy import TukuyTransformer

TUKUY = TukuyTransformer()

# Fetch a web page
url = "https://example.com/products"
response = requests.get(url)
html = response.text

# Define extraction pattern for products
pattern = {
    "properties": [
        {
            "name": "products",
            "selector": ".product-item",
            "type": "array",
            "properties": [
                {
                    "name": "title",
                    "selector": ".product-title",
                    "transform": ["strip"]
                },
                {
                    "name": "price",
                    "selector": ".product-price",
                    "transform": [
                        "strip",
                        {"function": "regex_extract", "pattern": r"\$(\d+\.\d+)"}
                    ]
                },
                {
                    "name": "rating",
                    "selector": ".product-rating",
                    "transform": [
                        {"function": "regex_extract", "pattern": r"(\d\.\d)\/5"}
                    ]
                },
                {
                    "name": "inStock",
                    "selector": ".stock-status",
                    "transform": [
                        {"function": "equals", "value": "In Stock"}
                    ]
                }
            ]
        },
        {
            "name": "pagination",
            "properties": [
                {
                    "name": "currentPage",
                    "selector": ".pagination .current",
                    "transform": ["strip"]
                },
                {
                    "name": "totalPages",
                    "selector": ".pagination .total",
                    "transform": ["strip"]
                }
            ]
        }
    ]
}

# Extract structured data
result = TUKUY.extract_html_with_pattern(html, pattern)

# Process the extracted data
in_stock_products = [p for p in result["products"] if p["inStock"]]
print(f"Found {len(in_stock_products)} in-stock products")

# Sort by price
sorted_products = sorted(result["products"],
                        key=lambda p: float(p["price"]) if p["price"] else 0)

# Display top 5 cheapest in-stock products
for product in [p for p in sorted_products if p["inStock"]][:5]:
    print(f"{product['title']} - ${product['price']} - Rating: {product['rating']}/5")

Data Cleaning and Normalization

import pandas as pd
from tukuy import TukuyTransformer

TUKUY = TukuyTransformer()

# Load raw data
df = pd.read_csv("customer_data.csv")

# Clean and normalize data
cleaned_data = []

for _, row in df.iterrows():
    # Clean and validate email
    email = TUKUY.transform(row["email"], [
        "strip",
        "lowercase",
        "email_validator"
    ])

    # Format phone number
    phone = TUKUY.transform(row["phone"], [
        "strip",
        {"function": "regex_replace", "pattern": r"[^\d+]", "replacement": ""},
        "phone_validator"
    ])

    # Normalize name
    name = TUKUY.transform(row["name"], [
        "strip",
        {"function": "title_case"}  # Custom transformer from earlier
    ])

    # Parse and validate date
    birth_date = TUKUY.transform(row["birth_date"], [
        {"function": "parse_date", "format": "%m/%d/%Y"},
        {"function": "format_date", "format": "%Y-%m-%d"}
    ])

    # Calculate age
    age = TUKUY.transform(birth_date, [
        {"function": "age_calc"}
    ]) if birth_date else None

    # Add to cleaned data if critical fields are valid
    if email and name:
        cleaned_data.append({
            "name": name,
            "email": email,
            "phone": phone,
            "birth_date": birth_date,
            "age": age
        })

# Create cleaned DataFrame
cleaned_df = pd.DataFrame(cleaned_data)

# Save cleaned data
cleaned_df.to_csv("cleaned_customer_data.csv", index=False)

print(f"Processed {len(df)} records, kept {len(cleaned_data)} valid records")
print(f"Removed {len(df) - len(cleaned_data)} invalid records")

API Data Processing

import requests
import json
from tukuy import TukuyTransformer

TUKUY = TukuyTransformer()

# Fetch data from an API
response = requests.get("https://api.example.com/data")
api_data = response.json()

# Extract and transform specific data
pattern = {
    "properties": [
        {
            "name": "items",
            "selector": "data.items[*]",
            "type": "array",
            "properties": [
                {"name": "id", "selector": "id"},
                {"name": "title", "selector": "title", "transform": ["strip"]},
                {"name": "category", "selector": "category.name"},
                {"name": "price", "selector": "price.amount"},
                {"name": "currency", "selector": "price.currency"}
            ]
        },
        {
            "name": "metadata",
            "properties": [
                {"name": "totalCount", "selector": "meta.total"},
                {"name": "page", "selector": "meta.page"},
                {"name": "timestamp", "selector": "meta.timestamp"}
            ]
        }
    ]
}

# Extract structured data
result = TUKUY.extract_json_with_pattern(json.dumps(api_data), pattern)

# Process items
for item in result["items"]:
    # Format price with currency symbol
    formatted_price = TUKUY.transform(float(item["price"]), [
        {"function": "to_currency", "currency": item["currency"]}
    ])

    item["formatted_price"] = formatted_price

    # Categorize by price range
    price = float(item["price"])
    if price < 10:
        item["price_category"] = "budget"
    elif price < 50:
        item["price_category"] = "standard"
    else:
        item["price_category"] = "premium"

# Group items by category
categories = {}
for item in result["items"]:
    category = item["category"]
    if category not in categories:
        categories[category] = []
    categories[category].append(item)

# Calculate statistics for each category
for category, items in categories.items():
    prices = [float(item["price"]) for item in items]
    stats = TUKUY.transform(prices, ["stats"])  # From the analytics plugin

    print(f"Category: {category}")
    print(f"  Items: {len(items)}")
    print(f"  Average price: {stats['mean']:.2f}")
    print(f"  Price range: {stats['min']} - {stats['max']}")