Examples
This section provides practical examples showcasing how to use Tukuy for various common tasks and scenarios. These examples demonstrate the power and flexibility of Tukuy’s transformer system.
Text Transformations
Basic Text Processing
from tukuy import TukuyTransformer
TUKUY = TukuyTransformer()
# Clean and normalize user input
user_input = " Héllò Wórld! "
clean_text = TUKUY.transform(user_input, [
"strip", # Remove leading/trailing whitespace
"normalize", # Remove diacritics
"lowercase" # Convert to lowercase
])
print(clean_text) # "hello world!"
# Truncate long text for display
long_text = "This is a very long text that needs to be truncated for display purposes."
truncated = TUKUY.transform(long_text, [
{"function": "truncate", "length": 20, "suffix": "..."}
])
print(truncated) # "This is a very long..."
Text Search and Replace
# Replace specific words
text = "The quick brown fox jumps over the lazy dog."
replaced = TUKUY.transform(text, [
{"function": "replace", "search": "fox", "replacement": "cat"}
])
print(replaced) # "The quick brown cat jumps over the lazy dog."
# Replace using regex for more complex patterns
text = "Contact us at info@example.com or support@example.com"
anonymized = TUKUY.transform(text, [
{"function": "regex_replace",
"pattern": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b",
"replacement": "[EMAIL REDACTED]"}
])
print(anonymized) # "Contact us at [EMAIL REDACTED] or [EMAIL REDACTED]"
Text Splitting and Joining
# Split a comma-separated list
tags = "python,data,transformation,library"
tag_list = TUKUY.transform(tags, [
{"function": "split", "separator": ","}
])
print(tag_list) # ["python", "data", "transformation", "library"]
# Join array into string
words = ["Tukuy", "is", "awesome"]
sentence = TUKUY.transform(words, [
{"function": "join", "separator": " "}
])
print(sentence) # "Tukuy is awesome"
HTML Transformations
Extracting Text from HTML
html = """
<article>
<h1>Understanding Tukuy</h1>
<div class="content">
<p>Tukuy is a <strong>powerful</strong> transformation library.</p>
<p>It makes data processing <em>easy</em> and intuitive.</p>
</div>
<div class="author">
<span>Written by: John Doe</span>
</div>
</article>
"""
# Extract plain text from HTML
text = TUKUY.transform(html, ["strip_html_tags"])
print(text) # "Understanding Tukuy Tukuy is a powerful transformation library. It makes data processing easy and intuitive. Written by: John Doe"
# Extract specific elements
title = TUKUY.transform(html, [
{"function": "select", "selector": "h1"}
])
print(title) # "Understanding Tukuy"
paragraphs = TUKUY.transform(html, [
{"function": "select", "selector": "p", "extract": "text_array"}
])
print(paragraphs) # ["Tukuy is a powerful transformation library.", "It makes data processing easy and intuitive."]
Scraping Product Information
product_html = """
<div class="product">
<h2 class="title">Wireless Headphones</h2>
<div class="price">$99.99</div>
<div class="description">High-quality wireless headphones with noise cancellation.</div>
<ul class="features">
<li>Bluetooth 5.0</li>
<li>40h Battery Life</li>
<li>Active Noise Cancellation</li>
</ul>
<div class="rating">4.5/5 (230 reviews)</div>
</div>
"""
# Extract structured product data
pattern = {
"properties": [
{
"name": "title",
"selector": ".title",
"transform": ["strip"]
},
{
"name": "price",
"selector": ".price",
"transform": [
"strip",
{"function": "regex_replace", "pattern": r"^\$", "replacement": ""}
]
},
{
"name": "description",
"selector": ".description",
"transform": ["strip"]
},
{
"name": "features",
"selector": ".features li",
"type": "array"
},
{
"name": "rating",
"selector": ".rating",
"transform": [
{"function": "regex_extract", "pattern": r"(\d+\.\d+)\/5"}
]
}
]
}
product = TUKUY.extract_html_with_pattern(product_html, pattern)
print(product)
# {
# "title": "Wireless Headphones",
# "price": "99.99",
# "description": "High-quality wireless headphones with noise cancellation.",
# "features": ["Bluetooth 5.0", "40h Battery Life", "Active Noise Cancellation"],
# "rating": "4.5"
# }
Extracting Tables from HTML
table_html = """
<table>
<thead>
<tr>
<th>Product</th>
<th>Price</th>
<th>Stock</th>
</tr>
</thead>
<tbody>
<tr>
<td>Laptop</td>
<td>$1,299.99</td>
<td>10</td>
</tr>
<tr>
<td>Smartphone</td>
<td>$799.99</td>
<td>25</td>
</tr>
<tr>
<td>Headphones</td>
<td>$99.99</td>
<td>50</td>
</tr>
</tbody>
</table>
"""
# Extract table as structured data
table_data = TUKUY.transform(table_html, [
{"function": "extract_tables"}
])
print(table_data)
# [
# {
# "headers": ["Product", "Price", "Stock"],
# "rows": [
# ["Laptop", "$1,299.99", "10"],
# ["Smartphone", "$799.99", "25"],
# ["Headphones", "$99.99", "50"]
# ]
# }
# ]
# Process table data
total_stock = 0
for row in table_data[0]["rows"]:
total_stock += int(row[2])
print(f"Total stock: {total_stock}") # "Total stock: 85"
JSON Transformations
Extracting Data from JSON
json_data = """
{
"user": {
"profile": {
"name": "John Doe",
"email": "john@example.com",
"age": 30
},
"preferences": {
"theme": "dark",
"notifications": true
},
"stats": {
"posts": 45,
"followers": 1024,
"following": 256
}
}
}
"""
# Extract specific values
name = TUKUY.transform(json_data, [
{"function": "extract", "selector": "user.profile.name"}
])
print(name) # "John Doe"
# Extract and validate
email = TUKUY.transform(json_data, [
{"function": "extract", "selector": "user.profile.email"},
"email_validator"
])
print(email) # "john@example.com" (or None if invalid)
# Extract multiple values
stats = TUKUY.transform(json_data, [
{"function": "extract", "selector": "user.stats"}
])
print(stats) # {"posts": 45, "followers": 1024, "following": 256}
Processing API Responses
api_response = """
{
"data": {
"results": [
{"id": 1, "name": "Product A", "price": 19.99, "inStock": true},
{"id": 2, "name": "Product B", "price": 29.99, "inStock": false},
{"id": 3, "name": "Product C", "price": 39.99, "inStock": true},
{"id": 4, "name": "Product D", "price": 49.99, "inStock": true}
],
"pagination": {
"total": 15,
"page": 1,
"perPage": 4
}
},
"meta": {
"requestId": "abc-123",
"timestamp": "2024-01-15T14:30:00Z"
}
}
"""
# Extract all in-stock products
pattern = {
"properties": [
{
"name": "inStockProducts",
"selector": "data.results[*]",
"filter": {"field": "inStock", "value": true},
"properties": [
{"name": "id", "selector": "id"},
{"name": "name", "selector": "name"},
{"name": "price", "selector": "price"}
]
},
{
"name": "totalProducts",
"selector": "data.pagination.total"
},
{
"name": "requestInfo",
"properties": [
{"name": "id", "selector": "meta.requestId"},
{"name": "time", "selector": "meta.timestamp"}
]
}
]
}
result = TUKUY.extract_json_with_pattern(api_response, pattern)
print(result)
# {
# "inStockProducts": [
# {"id": 1, "name": "Product A", "price": 19.99},
# {"id": 3, "name": "Product C", "price": 39.99},
# {"id": 4, "name": "Product D", "price": 49.99}
# ],
# "totalProducts": 15,
# "requestInfo": {
# "id": "abc-123",
# "time": "2024-01-15T14:30:00Z"
# }
# }
Date Transformations
Working with Dates
from tukuy import TukuyTransformer
TUKUY = TukuyTransformer()
# Parse date string
date_str = "2023-05-15"
date_obj = TUKUY.transform(date_str, [
{"function": "parse_date", "format": "%Y-%m-%d"}
])
# Calculate age from birthdate
birthdate = "1990-08-25"
age = TUKUY.transform(birthdate, [
{"function": "age_calc"}
])
print(f"Age: {age} years")
# Calculate days between dates
start_date = "2023-01-01"
end_date = "2023-12-31"
days = TUKUY.transform(start_date, [
{"function": "duration_calc", "unit": "days", "end": end_date}
])
print(f"Days between: {days}")
# Format date
date_obj = TUKUY.transform("2023-05-15", [
{"function": "parse_date"},
{"function": "format_date", "format": "%B %d, %Y"}
])
print(date_obj) # "May 15, 2023"
Handling Date Ranges
# Check if date falls within a range
target_date = "2023-07-15"
start_range = "2023-06-01"
end_range = "2023-08-31"
is_in_range = TUKUY.transform(target_date, [
{"function": "parse_date"},
{"function": "in_date_range", "start": start_range, "end": end_range}
])
print(f"Date is in range: {is_in_range}") # True
# Calculate business days in a period
business_days = TUKUY.transform("2023-01-01", [
{"function": "business_days", "end": "2023-01-31"}
])
print(f"Business days: {business_days}") # ~22 (depending on holidays)
# Check if date is a weekend
is_weekend = TUKUY.transform("2023-07-15", [ # July 15, 2023 is a Saturday
{"function": "is_weekend"}
])
print(f"Is weekend: {is_weekend}") # True
Numerical Transformations
Basic Numerical Operations
from tukuy import TukuyTransformer
TUKUY = TukuyTransformer()
# Round numbers
value = 123.456789
rounded = TUKUY.transform(value, [
{"function": "round", "decimals": 2}
])
print(rounded) # 123.46
# Format with thousand separators
large_number = 1234567.89
formatted = TUKUY.transform(large_number, [
{"function": "format_number"}
])
print(formatted) # "1,234,567.89"
# Format as currency
price = 49.99
currency = TUKUY.transform(price, [
{"function": "to_currency", "currency": "USD"}
])
print(currency) # "$49.99"
# Calculate percentage
ratio = 0.7523
percentage = TUKUY.transform(ratio, [
{"function": "percentage", "decimals": 1}
])
print(percentage) # "75.2%"
Statistical Calculations
# Calculate statistics from a list of numbers
data = [12, 15, 23, 45, 67, 32, 18, 24]
# Mean
mean = TUKUY.transform(data, [
{"function": "mean"}
])
print(f"Mean: {mean}") # Mean: 29.5
# Median
median = TUKUY.transform(data, [
{"function": "median"}
])
print(f"Median: {median}") # Median: 23.5
# Min and Max
min_max = TUKUY.transform(data, [
{"function": "range"}
])
print(f"Range: {min_max}") # Range: [12, 67]
# Sum
total = TUKUY.transform(data, [
{"function": "sum"}
])
print(f"Total: {total}") # Total: 236
Financial Calculations
# Calculate compound interest
principal = 1000
interest_rate = 0.05 # 5%
years = 5
future_value = TUKUY.transform(principal, [
{"function": "compound_interest", "rate": interest_rate, "years": years}
])
print(f"Future value: {future_value}") # ~1276.28
# Calculate mortgage payment
loan_amount = 300000
interest_rate = 0.04 # 4%
loan_term_years = 30
monthly_payment = TUKUY.transform(loan_amount, [
{"function": "mortgage_payment",
"rate": interest_rate,
"term_years": loan_term_years}
])
print(f"Monthly payment: {monthly_payment}") # ~$1,432.25
# Calculate discount
original_price = 100
discount_percent = 25
sale_price = TUKUY.transform(original_price, [
{"function": "apply_discount", "discount": discount_percent}
])
print(f"Sale price: {sale_price}") # 75.0
Validation Transformations
Input Validation
from tukuy import TukuyTransformer
TUKUY = TukuyTransformer()
# Email validation
valid_email = "user@example.com"
invalid_email = "not-an-email"
result1 = TUKUY.transform(valid_email, ["email_validator"])
result2 = TUKUY.transform(invalid_email, ["email_validator"])
print(f"Valid email: {result1}") # "user@example.com"
print(f"Invalid email: {result2}") # None
# URL validation
valid_url = "https://tukuy.example.com/docs"
invalid_url = "not a url"
result1 = TUKUY.transform(valid_url, ["url_validator"])
result2 = TUKUY.transform(invalid_url, ["url_validator"])
print(f"Valid URL: {result1}") # "https://tukuy.example.com/docs"
print(f"Invalid URL: {result2}") # None
# Phone number validation
valid_phone = "+1-555-123-4567"
invalid_phone = "123"
result1 = TUKUY.transform(valid_phone, ["phone_validator"])
result2 = TUKUY.transform(invalid_phone, ["phone_validator"])
print(f"Valid phone: {result1}") # "+1-555-123-4567"
print(f"Invalid phone: {result2}") # None
Range and Length Validation
# String length validation
username = "user123"
is_valid = TUKUY.transform(username, [
{"function": "length_validator", "min": 3, "max": 20}
])
print(f"Username valid: {is_valid is not None}") # True
# Number range validation
age = 25
is_valid = TUKUY.transform(age, [
{"function": "range_validator", "min": 18, "max": 65}
])
print(f"Age valid: {is_valid is not None}") # True
# Date range validation
event_date = "2023-06-15"
is_valid = TUKUY.transform(event_date, [
{"function": "date_range_validator",
"min": "2023-01-01",
"max": "2023-12-31"}
])
print(f"Date valid: {is_valid is not None}") # True
Custom Pattern Validation
# Regex pattern validation
password = "P@ssw0rd123"
# Check if password meets complexity requirements
is_valid = TUKUY.transform(password, [
{"function": "regex_validator",
"pattern": r"^(?=.*[A-Z])(?=.*[a-z])(?=.*\d)(?=.*[@$!%*#?&])[A-Za-z\d@$!%*#?&]{8,}$"}
])
print(f"Password valid: {is_valid is not None}") # True
# Custom validation function
def validate_isbn(isbn):
# Remove hyphens
isbn = isbn.replace('-', '')
# Check if all remaining characters are digits
if not isbn.isdigit():
return False
# Check length
if len(isbn) not in (10, 13):
return False
return True
# Register custom validator
class ISBNValidator(ChainableTransformer[str, str]):
def validate(self, value: str) -> bool:
return isinstance(value, str)
def _transform(self, value: str, context=None) -> str:
if validate_isbn(value):
return value
return None
# Usage
isbn = "978-3-16-148410-0"
is_valid = TUKUY.transform(isbn, [
{"function": "isbn_validator"} # Assuming registered
])
print(f"ISBN valid: {is_valid is not None}") # True
Creating Custom Transformers
Custom Text Transformer
from tukuy.base import ChainableTransformer
from tukuy.plugins import TransformerPlugin
from tukuy import TukuyTransformer
# Create a custom transformer for title case conversion
class TitleCaseTransformer(ChainableTransformer[str, str]):
def validate(self, value: str) -> bool:
return isinstance(value, str)
def _transform(self, value: str, context=None) -> str:
# Custom implementation of title case
# (Different from str.title() because it preserves UPPERCASE acronyms)
words = value.split()
for i, word in enumerate(words):
# Skip uppercase acronyms
if word.isupper():
continue
# Capitalize first letter of other words
if len(word) > 0:
words[i] = word[0].upper() + word[1:]
return ' '.join(words)
# Create a plugin to register the transformer
class TextExtensionsPlugin(TransformerPlugin):
def __init__(self):
super().__init__("text_extensions")
@property
def transformers(self):
return {
'title_case': lambda _: TitleCaseTransformer('title_case')
}
# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(TextExtensionsPlugin())
text = "the QUICK brown fox jumps over the lazy dog"
result = TUKUY.transform(text, ["title_case"])
print(result) # "The QUICK Brown Fox Jumps Over The Lazy Dog"
Custom Data Processor
# Create a transformer for processing CSV data
class CSVParserTransformer(ChainableTransformer[str, list]):
def __init__(self, name: str, delimiter: str = ',', has_header: bool = True):
super().__init__(name)
self.delimiter = delimiter
self.has_header = has_header
def validate(self, value: str) -> bool:
return isinstance(value, str)
def _transform(self, value: str, context=None) -> list:
lines = value.strip().split('\n')
if not lines:
return []
results = []
headers = None
for i, line in enumerate(lines):
row = line.split(self.delimiter)
row = [cell.strip() for cell in row]
if i == 0 and self.has_header:
headers = row
continue
if self.has_header:
row_dict = {headers[j]: cell for j, cell in enumerate(row) if j < len(headers)}
results.append(row_dict)
else:
results.append(row)
return results
# Create a plugin
class DataProcessingPlugin(TransformerPlugin):
def __init__(self):
super().__init__("data_processing")
@property
def transformers(self):
return {
'parse_csv': lambda _: CSVParserTransformer('parse_csv')
}
# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(DataProcessingPlugin())
csv_data = """Name,Age,Email
John Doe,30,john@example.com
Jane Smith,25,jane@example.com
Bob Johnson,45,bob@example.com"""
result = TUKUY.transform(csv_data, [
{"function": "parse_csv"}
])
print(result)
# [
# {'Name': 'John Doe', 'Age': '30', 'Email': 'john@example.com'},
# {'Name': 'Jane Smith', 'Age': '25', 'Email': 'jane@example.com'},
# {'Name': 'Bob Johnson', 'Age': '45', 'Email': 'bob@example.com'}
# ]
Using Plugins
Registering Custom Plugins
from tukuy.base import ChainableTransformer
from tukuy.plugins import TransformerPlugin
from tukuy import TukuyTransformer
# Create a geo transformation plugin
class GeoTransformer(ChainableTransformer[dict, dict]):
def __init__(self, name: str):
super().__init__(name)
def validate(self, value: dict) -> bool:
return (isinstance(value, dict) and
'lat' in value and 'lon' in value)
def _transform(self, value: dict, context=None) -> dict:
# Convert decimal coordinates to DMS format
# (Degrees, Minutes, Seconds)
lat = value['lat']
lon = value['lon']
def decimal_to_dms(coord):
deg = int(coord)
min_float = (coord - deg) * 60
min = int(min_float)
sec = (min_float - min) * 60
return f"{deg}° {min}' {sec:.1f}\""
return {
'lat': lat,
'lon': lon,
'lat_dms': decimal_to_dms(abs(lat)) + ('N' if lat >= 0 else 'S'),
'lon_dms': decimal_to_dms(abs(lon)) + ('E' if lon >= 0 else 'W')
}
class GeoPlugin(TransformerPlugin):
def __init__(self):
super().__init__("geo")
@property
def transformers(self):
return {
'to_dms': lambda _: GeoTransformer('to_dms')
}
def initialize(self):
super().initialize()
print("Geo plugin initialized")
def cleanup(self):
super().cleanup()
print("Geo plugin cleaned up")
# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(GeoPlugin())
coords = {'lat': 40.7128, 'lon': -74.0060} # New York
result = TUKUY.transform(coords, [
{"function": "to_dms"}
])
print(result)
# {
# 'lat': 40.7128,
# 'lon': -74.0060,
# 'lat_dms': "40° 42' 46.1\"N",
# 'lon_dms': "74° 0' 21.6\"W"
# }
Creating Plugin Collections
# Create a collection of related transformers
class AnalyticsTransformerA(ChainableTransformer[list, float]):
def validate(self, value: list) -> bool:
return isinstance(value, list) and all(isinstance(x, (int, float)) for x in value)
def _transform(self, value: list, context=None) -> float:
# Calculate average
return sum(value) / len(value) if value else 0
class AnalyticsTransformerB(ChainableTransformer[list, dict]):
def validate(self, value: list) -> bool:
return isinstance(value, list) and all(isinstance(x, (int, float)) for x in value)
def _transform(self, value: list, context=None) -> dict:
# Calculate basic statistics
if not value:
return {"count": 0, "sum": 0, "mean": 0, "min": None, "max": None}
return {
"count": len(value),
"sum": sum(value),
"mean": sum(value) / len(value),
"min": min(value),
"max": max(value)
}
# Group them in a plugin
class AnalyticsPlugin(TransformerPlugin):
def __init__(self):
super().__init__("analytics")
@property
def transformers(self):
return {
'average': lambda _: AnalyticsTransformerA('average'),
'stats': lambda _: AnalyticsTransformerB('stats')
}
# Usage
TUKUY = TukuyTransformer()
TUKUY.register_plugin(AnalyticsPlugin())
data = [12, 15, 23, 45, 67, 32, 18, 24]
avg = TUKUY.transform(data, ["average"])
print(f"Average: {avg}") # Average: 29.5
stats = TUKUY.transform(data, ["stats"])
print(f"Statistics: {stats}")
# Statistics: {
# 'count': 8,
# 'sum': 236,
# 'mean': 29.5,
# 'min': 12,
# 'max': 67
# }
Real-world Use Cases
Web Scraping and Data Extraction
import requests
from tukuy import TukuyTransformer
TUKUY = TukuyTransformer()
# Fetch a web page
url = "https://example.com/products"
response = requests.get(url)
html = response.text
# Define extraction pattern for products
pattern = {
"properties": [
{
"name": "products",
"selector": ".product-item",
"type": "array",
"properties": [
{
"name": "title",
"selector": ".product-title",
"transform": ["strip"]
},
{
"name": "price",
"selector": ".product-price",
"transform": [
"strip",
{"function": "regex_extract", "pattern": r"\$(\d+\.\d+)"}
]
},
{
"name": "rating",
"selector": ".product-rating",
"transform": [
{"function": "regex_extract", "pattern": r"(\d\.\d)\/5"}
]
},
{
"name": "inStock",
"selector": ".stock-status",
"transform": [
{"function": "equals", "value": "In Stock"}
]
}
]
},
{
"name": "pagination",
"properties": [
{
"name": "currentPage",
"selector": ".pagination .current",
"transform": ["strip"]
},
{
"name": "totalPages",
"selector": ".pagination .total",
"transform": ["strip"]
}
]
}
]
}
# Extract structured data
result = TUKUY.extract_html_with_pattern(html, pattern)
# Process the extracted data
in_stock_products = [p for p in result["products"] if p["inStock"]]
print(f"Found {len(in_stock_products)} in-stock products")
# Sort by price
sorted_products = sorted(result["products"],
key=lambda p: float(p["price"]) if p["price"] else 0)
# Display top 5 cheapest in-stock products
for product in [p for p in sorted_products if p["inStock"]][:5]:
print(f"{product['title']} - ${product['price']} - Rating: {product['rating']}/5")
Data Cleaning and Normalization
import pandas as pd
from tukuy import TukuyTransformer
TUKUY = TukuyTransformer()
# Load raw data
df = pd.read_csv("customer_data.csv")
# Clean and normalize data
cleaned_data = []
for _, row in df.iterrows():
# Clean and validate email
email = TUKUY.transform(row["email"], [
"strip",
"lowercase",
"email_validator"
])
# Format phone number
phone = TUKUY.transform(row["phone"], [
"strip",
{"function": "regex_replace", "pattern": r"[^\d+]", "replacement": ""},
"phone_validator"
])
# Normalize name
name = TUKUY.transform(row["name"], [
"strip",
{"function": "title_case"} # Custom transformer from earlier
])
# Parse and validate date
birth_date = TUKUY.transform(row["birth_date"], [
{"function": "parse_date", "format": "%m/%d/%Y"},
{"function": "format_date", "format": "%Y-%m-%d"}
])
# Calculate age
age = TUKUY.transform(birth_date, [
{"function": "age_calc"}
]) if birth_date else None
# Add to cleaned data if critical fields are valid
if email and name:
cleaned_data.append({
"name": name,
"email": email,
"phone": phone,
"birth_date": birth_date,
"age": age
})
# Create cleaned DataFrame
cleaned_df = pd.DataFrame(cleaned_data)
# Save cleaned data
cleaned_df.to_csv("cleaned_customer_data.csv", index=False)
print(f"Processed {len(df)} records, kept {len(cleaned_data)} valid records")
print(f"Removed {len(df) - len(cleaned_data)} invalid records")
API Data Processing
import requests
import json
from tukuy import TukuyTransformer
TUKUY = TukuyTransformer()
# Fetch data from an API
response = requests.get("https://api.example.com/data")
api_data = response.json()
# Extract and transform specific data
pattern = {
"properties": [
{
"name": "items",
"selector": "data.items[*]",
"type": "array",
"properties": [
{"name": "id", "selector": "id"},
{"name": "title", "selector": "title", "transform": ["strip"]},
{"name": "category", "selector": "category.name"},
{"name": "price", "selector": "price.amount"},
{"name": "currency", "selector": "price.currency"}
]
},
{
"name": "metadata",
"properties": [
{"name": "totalCount", "selector": "meta.total"},
{"name": "page", "selector": "meta.page"},
{"name": "timestamp", "selector": "meta.timestamp"}
]
}
]
}
# Extract structured data
result = TUKUY.extract_json_with_pattern(json.dumps(api_data), pattern)
# Process items
for item in result["items"]:
# Format price with currency symbol
formatted_price = TUKUY.transform(float(item["price"]), [
{"function": "to_currency", "currency": item["currency"]}
])
item["formatted_price"] = formatted_price
# Categorize by price range
price = float(item["price"])
if price < 10:
item["price_category"] = "budget"
elif price < 50:
item["price_category"] = "standard"
else:
item["price_category"] = "premium"
# Group items by category
categories = {}
for item in result["items"]:
category = item["category"]
if category not in categories:
categories[category] = []
categories[category].append(item)
# Calculate statistics for each category
for category, items in categories.items():
prices = [float(item["price"]) for item in items]
stats = TUKUY.transform(prices, ["stats"]) # From the analytics plugin
print(f"Category: {category}")
print(f" Items: {len(items)}")
print(f" Average price: {stats['mean']:.2f}")
print(f" Price range: {stats['min']} - {stats['max']}")