Multi-File Processing¶
Process multiple files efficiently with glob patterns and file arrays.
File Arrays¶
Basic Array Usage¶
Process specific files or multiple patterns in a single query:
-- Array of specific files
SELECT file_path, language, COUNT(*) as nodes
FROM read_ast([
'src/main.py',
'src/utils.py',
'lib/helpers.js'
], ignore_errors := true)
GROUP BY file_path, language;
-- Array of glob patterns
SELECT file_path, COUNT(*) as nodes
FROM read_ast([
'src/**/*.py',
'lib/**/*.js',
'include/**/*.hpp'
], ignore_errors := true)
GROUP BY file_path;
Mixed Patterns and Files¶
-- Combine specific files with patterns
SELECT file_path, language
FROM read_ast([
'main.py', -- Specific file
'src/**/*.py', -- All Python in src/
'config.json' -- Another specific file
], ignore_errors := true)
GROUP BY file_path, language;
Cross-Language Analysis¶
Analyze Multiple Languages¶
-- Function counts across languages
SELECT
language,
COUNT(*) as function_count
FROM read_ast([
'**/*.py',
'**/*.js',
'**/*.java',
'**/*.go'
], ignore_errors := true)
WHERE semantic_type = 'DEFINITION_FUNCTION'
GROUP BY language
ORDER BY function_count DESC;
Compare Patterns Across Languages¶
-- Import patterns by language
SELECT
language,
type,
COUNT(*) as count
FROM read_ast([
'**/*.py',
'**/*.js',
'**/*.java'
], ignore_errors := true)
WHERE semantic_type = 'EXTERNAL_IMPORT'
GROUP BY language, type
ORDER BY language, count DESC;
Deduplication¶
File arrays automatically deduplicate:
-- Even if patterns overlap, each file appears once
SELECT COUNT(DISTINCT file_path) as unique_files
FROM read_ast([
'src/**/*.py', -- Matches src/main.py
'src/main.py', -- Same file
'**/*.py' -- Also matches src/main.py
], ignore_errors := true);
Error Handling¶
Ignore Missing Files¶
-- Continue processing even if some files don't exist
SELECT file_path, COUNT(*)
FROM read_ast([
'existing_file.py',
'missing_file.py', -- Won't stop processing
'another_existing.py'
], ignore_errors := true)
GROUP BY file_path;
Validate Inputs¶
-- Empty array produces an error
SELECT * FROM read_ast([]);
-- Error: File pattern list cannot be empty
-- NULL values produce an error
SELECT * FROM read_ast(['file.py', NULL]);
-- Error: File pattern list cannot contain NULL values
Performance Optimization¶
Batch Processing¶
-- Use batch_size for large file sets
SELECT file_path, COUNT(*) as nodes
FROM read_ast([
'src/**/*.py',
'lib/**/*.py'
], batch_size := 10, ignore_errors := true)
GROUP BY file_path;
Efficient Patterns¶
-- More efficient: specific patterns
SELECT * FROM read_ast([
'src/**/*.py',
'lib/**/*.js'
], ignore_errors := true);
-- Less efficient: overly broad patterns
SELECT * FROM read_ast('**/*.*', ignore_errors := true);
File Statistics¶
Per-File Node Counts¶
SELECT
file_path,
language,
COUNT(*) as total_nodes,
COUNT(CASE WHEN type LIKE '%function%' THEN 1 END) as functions,
COUNT(CASE WHEN type LIKE '%class%' THEN 1 END) as classes
FROM read_ast(['**/*.py', '**/*.js'], ignore_errors := true)
GROUP BY file_path, language
ORDER BY total_nodes DESC;
Language Distribution¶
SELECT
language,
COUNT(DISTINCT file_path) as files,
COUNT(*) as total_nodes,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2) as percentage
FROM read_ast(['**/*.py', '**/*.js', '**/*.java'], ignore_errors := true)
GROUP BY language
ORDER BY total_nodes DESC;
Combining with Aggregations¶
Find Complex Files¶
SELECT
file_path,
language,
COUNT(*) as nodes,
MAX(depth) as max_depth,
MAX(descendant_count) as max_complexity
FROM read_ast([
'src/**/*.py',
'src/**/*.js'
], ignore_errors := true)
GROUP BY file_path, language
HAVING MAX(descendant_count) > 100
ORDER BY max_complexity DESC;
Cross-File Function Analysis¶
SELECT
name,
file_path,
descendant_count as complexity,
language
FROM read_ast(['**/*.py', '**/*.js'], ignore_errors := true)
WHERE semantic_type = 'DEFINITION_FUNCTION'
AND name IS NOT NULL
ORDER BY complexity DESC
LIMIT 20;
Working with Results¶
Save to Table¶
-- Create a table from multi-file results
CREATE TABLE codebase_ast AS
SELECT *
FROM read_ast([
'src/**/*.py',
'lib/**/*.js',
'include/**/*.hpp'
], ignore_errors := true);
-- Query the table
SELECT language, COUNT(*) FROM codebase_ast GROUP BY language;
Export to Parquet¶
-- Export for later analysis
COPY (
SELECT file_path, type, name, start_line, semantic_type
FROM read_ast(['**/*.py', '**/*.js'], ignore_errors := true)
) TO 'codebase_analysis.parquet';
Next Steps¶
- Semantic Types - Cross-language type system
- Context Extraction - Detailed semantic analysis
- Cross-Language Analysis - Comparing across languages