ParQL Commands Reference
Complete reference for all ParQL commands with examples and options.
Basic Operations
parql head
Display the first N rows of a Parquet file.
# Basic usage
parql head data/sales.parquet
parql head data/sales.parquet -n 20
# With column selection
parql head data/sales.parquet -c "user_id,revenue,country" -n 10
# With filtering
parql head data/sales.parquet -w "revenue > 1000" -n 5
# With ordering
parql head data/sales.parquet -o "revenue DESC" -n 10
parql tail
Display the last N rows of a Parquet file.
parql tail data/sales.parquet -n 20
parql tail data/sales.parquet -c "timestamp,revenue" -n 5
parql schema
Display schema and column information.
parql schema data/sales.parquet
parql count
Count total rows or rows matching a condition.
# Total row count
parql count data/sales.parquet
# Conditional count
parql count data/sales.parquet -w "country = 'US'"
parql count data/sales.parquet -w "revenue > 1000 AND is_premium = true"
parql select
Select columns and filter rows with advanced options.
# Select specific columns
parql select data/sales.parquet -c "user_id,revenue,country"
# Filter rows
parql select data/sales.parquet -w "revenue > 500 AND country IN ('US', 'UK')"
# Sort results
parql select data/sales.parquet -o "revenue DESC, timestamp ASC" -l 50
# Get distinct rows
parql select data/sales.parquet -c "country,device" --distinct
parql sample
Sample data for quick analysis.
# Sample by number of rows
parql sample data/sales.parquet --rows 1000
# Sample by fraction
parql sample data/sales.parquet --fraction 0.01
# Reproducible sampling
parql sample data/sales.parquet --rows 500 --seed 42
Analytics & Aggregation
parql agg
Perform grouping and aggregation operations.
# Basic grouping
parql agg data/sales.parquet -g "country" -a "sum(revenue):total,count():orders"
# Multiple group columns
parql agg data/sales.parquet -g "country,device" -a "avg(revenue):avg_rev,count():cnt"
# With ordering and limits
parql agg data/sales.parquet -g "country" -a "sum(revenue):total" -o "total DESC" -l 10
# With HAVING clause
parql agg data/sales.parquet -g "user_id" -a "sum(revenue):total" -h "total > 1000"
parql window
Apply window functions for advanced analytics.
# Ranking within groups
parql window data/sales.parquet --partition "country" --order "revenue DESC" --expr "row_number() as rank"
# Running totals
parql window data/sales.parquet --partition "user_id" --order "timestamp" --expr "sum(revenue) over (rows unbounded preceding) as running_total"
# Moving averages
parql window data/sales.parquet --partition "user_id" --order "timestamp" --expr "avg(revenue) over (rows between 2 preceding and current row) as moving_avg"
parql pivot
Transform data from long to wide format.
parql pivot data/sales.parquet -i "country" -c "device" -v "revenue" -f "sum"
parql pivot data/sales.parquet -i "user_id,country" -c "product" -v "quantity" -f "avg"
parql corr
Calculate correlation matrices between numeric columns.
# All numeric columns
parql corr data/sales.parquet
# Specific columns
parql corr data/sales.parquet -c "quantity,price,revenue,discount"
# Different correlation methods
parql corr data/sales.parquet --method pearson
parql corr data/sales.parquet --method spearman
parql percentiles
Calculate detailed percentile statistics.
# Standard percentiles (25, 50, 75, 90, 95, 99)
parql percentiles data/sales.parquet -c "revenue,quantity"
# Custom percentiles
parql percentiles data/sales.parquet --percentiles "10,25,50,75,90,95,99"
# All numeric columns
parql percentiles data/sales.parquet
Data Processing
parql join
Join two Parquet files.
# Inner join
parql join data/users.parquet data/sales.parquet --on "user_id" --how inner
# Left join with column selection
parql join data/users.parquet data/sales.parquet --on "user_id" --how left -c "users.first_name,sales.revenue"
# Complex join conditions
parql join data/users.parquet data/sales.parquet --on "users.user_id = sales.user_id AND users.country = sales.country"
parql sql
Execute custom SQL with table parameters.
# Simple query
parql sql "SELECT country, SUM(revenue) FROM t GROUP BY country ORDER BY 2 DESC" -p t=data/sales.parquet
# Multi-table queries
parql sql "
SELECT u.first_name, s.total_spent
FROM users u
JOIN (SELECT user_id, SUM(revenue) as total_spent FROM sales GROUP BY user_id) s
ON u.user_id = s.user_id
WHERE s.total_spent > 1000
" -p users=data/users.parquet -p sales=data/sales.parquet
# CTEs and window functions
parql sql "
WITH ranked_sales AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY country ORDER BY revenue DESC) as rank
FROM t
)
SELECT * FROM ranked_sales WHERE rank <= 5
" -p t=data/sales.parquet
parql str
Perform string operations on text columns.
# Case conversion
parql str data/users.parquet --column first_name --operation upper
parql str data/users.parquet --column email --operation lower
parql str data/users.parquet --column name --operation title
# Trimming
parql str data/users.parquet --column name --operation strip
parql str data/users.parquet --column code --operation lstrip
# String length
parql str data/users.parquet --column email --operation length
# Pattern replacement
parql str data/users.parquet --column email --operation replace --pattern "@gmail.com" --replacement "@company.com"
# Regex extraction
parql str data/users.parquet --column email --operation extract --pattern "@([a-z]+\\.com)"
parql pattern
Advanced pattern searching with SQL LIKE or regex.
# SQL LIKE patterns
parql pattern data/users.parquet --pattern "%gmail%"
parql pattern data/users.parquet --pattern "John_" --case-sensitive
# Regex patterns
parql pattern data/users.parquet --pattern "john.*@gmail" --regex
parql pattern data/users.parquet --pattern "\\d{3}-\\d{3}-\\d{4}" --regex
# Search specific columns
parql pattern data/users.parquet --pattern "%@company%" -c "email,plan"
# Count matches only
parql pattern data/users.parquet --pattern "premium" -c "plan" --count-only
Visualization & Analysis
parql plot
Create ASCII charts in the terminal.
# Histogram
parql plot data/sales.parquet -c revenue --chart-type hist --bins 20 --width 60
# Bar chart for categories
parql plot data/sales.parquet -c country --chart-type bar --width 50 --limit 10
# Scatter plot
parql plot data/sales.parquet -c revenue --chart-type scatter -x quantity --limit 100
# Line chart for trends
parql plot data/timeseries.parquet -c metric_a --chart-type line --limit 50
parql profile
Generate comprehensive data quality reports.
# Basic profiling
parql profile data/sales.parquet
# Detailed profiling with outliers
parql profile data/sales.parquet --include-all
# Profile specific columns
parql profile data/users.parquet -c "age,country,plan"
parql outliers
Detect statistical outliers in numeric data.
# Z-score method (default)
parql outliers data/sales.parquet -c revenue --method zscore --threshold 3
# IQR method
parql outliers data/sales.parquet -c revenue --method iqr --threshold 1.5
parql nulls
Analyze missing values across columns.
# All columns
parql nulls data/sales.parquet
# Specific column
parql nulls data/sales.parquet -c "discount"
parql hist
Generate histograms for numeric columns.
parql hist data/sales.parquet -c revenue --bins 20
parql hist data/sales.parquet -c quantity --bins 10
Data Quality
parql assert
Assert data quality rules and constraints.
# Basic assertions
parql assert data/sales.parquet --rule "row_count > 1000"
parql assert data/sales.parquet --rule "no_nulls(user_id)"
parql assert data/sales.parquet --rule "unique(order_id)"
# Custom SQL conditions
parql assert data/sales.parquet --rule "min(revenue) >= 0"
parql assert data/sales.parquet --rule "max(discount) <= 1.0"
# Multiple rules with fail-fast
parql assert data/sales.parquet --rule "row_count > 5000" --rule "no_nulls(order_id)" --fail-fast
parql compare-schema
Compare schemas between two Parquet files.
# Basic comparison
parql compare-schema data/old.parquet data/new.parquet
# Fail on differences (useful for CI/CD)
parql compare-schema data/expected.parquet data/actual.parquet --fail-on-change
parql infer-types
Analyze and suggest optimal data types.
# Basic type inference
parql infer-types data/sales.parquet
# Get optimization suggestions with SQL
parql infer-types data/sales.parquet --suggest-types --sample-size 50000
System Commands
parql shell
Start interactive REPL for exploratory analysis.
# Start shell
parql shell
# Start with profile
parql shell --profile myprofile
parql config
Manage settings and profiles.
# Set configuration
parql config set --profile production --threads 8 --memory-limit 4GB --output-format csv
# Show current config
parql config show --profile production
# Remove setting
parql config unset --profile production memory-limit
parql cache
Manage query result caching.
# View cache statistics
parql cache info
# Clear all cached results
parql cache clear
parql write
Export query results to various formats.
# Export to CSV
parql write data/sales.parquet output.csv --format csv -c "country,revenue" -w "revenue > 1000"
# Export to Parquet with compression
parql write data/sales.parquet output.parquet --format parquet --compression zstd
# Export to JSON
parql write data/sales.parquet output.json --format json -w "country = 'US'"
# Dry run to see what would be written
parql write data/sales.parquet output.csv --format csv --dry-run