#!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" # dependencies = [ # ] # /// """ Script to collect contents of multiple source files into a single output file. Supports include/exclude patterns for files and directories. """ import os import sys import argparse import fnmatch from pathlib import Path from typing import List, Set, Tuple class FileCollector: def __init__(self, output_file: str = "out"): self.output_file = output_file self.include_patterns: List[str] = [] self.exclude_patterns: List[str] = [] self.include_dirs: List[str] = [] self.exclude_dirs: List[str] = [] self.collected_files: List[Path] = [] def add_include_pattern(self, pattern: str): """Add a file pattern to include (e.g., '*.cpp', '*.hpp')""" self.include_patterns.append(pattern) def add_exclude_pattern(self, pattern: str): """Add a file pattern to exclude (e.g., 'zstd.*')""" self.exclude_patterns.append(pattern) def add_include_dir(self, directory: str): """Add a directory to include files from""" self.include_dirs.append(directory) def add_exclude_dir(self, directory: str): """Add a directory to exclude""" self.exclude_dirs.append(directory) def should_include_file(self, file_path: Path) -> bool: """Check if a file should be included based on patterns""" filename = file_path.name # Always exclude test and docs files if filename.endswith('-tests.factor') or filename.endswith('-docs.factor'): return False # Check exclude patterns first for pattern in self.exclude_patterns: if fnmatch.fnmatch(filename, pattern): return False # Check exclude directories for exclude_dir in self.exclude_dirs: if exclude_dir in str(file_path): return False # If no include patterns specified, include all (except excluded) if not self.include_patterns: return True # Check include patterns for pattern in self.include_patterns: if fnmatch.fnmatch(filename, pattern): return True return False def collect_files_from_dir(self, directory: str): """Recursively collect files from a directory""" dir_path = Path(directory) if not dir_path.exists(): print(f"Warning: Directory '{directory}' does not exist") return for file_path in dir_path.rglob('*'): if file_path.is_file() and self.should_include_file(file_path): self.collected_files.append(file_path) def collect_all_files(self): """Collect files from all included directories""" for directory in self.include_dirs: self.collect_files_from_dir(directory) # Remove duplicates and sort files for consistent output self.collected_files = sorted(list(set(self.collected_files))) def write_output(self): """Write collected file contents to output file""" # Write main output file with contents with open(self.output_file, 'w', encoding='utf-8') as out: for i, file_path in enumerate(self.collected_files): # Write file header with C++ style comment if i > 0: out.write("\n") out.write(f"//{file_path}\n\n") # Write file contents try: with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() # Filter out copyright and license lines filtered_lines = [] for line in lines: if line.strip().startswith('! Copyright') or line.strip().startswith('! See https:'): continue filtered_lines.append(line) content = ''.join(filtered_lines).lstrip('\n') out.write(content) if content and not content.endswith('\n'): out.write('\n') except Exception as e: out.write(f"ERROR reading file: {e}\n") # Write companion file with just file names filelist_name = f"{self.output_file}.files" with open(filelist_name, 'w', encoding='utf-8') as out: for file_path in self.collected_files: out.write(f"{file_path}\n") print(f"Output written to: {self.output_file}") print(f"File list written to: {filelist_name}") print(f"Total files collected: {len(self.collected_files)}") def main(): parser = argparse.ArgumentParser(description="Collect source files into a single output file") parser.add_argument('-o', '--output', default='out', help='Output file name (default: out)') parser.add_argument('--vm', action='store_true', help='Include vm/ directory C++ files (excluding zstd.*)') parser.add_argument('--arm64', action='store_true', help='Include basis/cpu/arm/64/ files') parser.add_argument('--bootstrap', action='store_true', help='Include basis/bootstrap/ files') parser.add_argument('--cpu', action='store_true', help='Include basis/cpu/cpu.factor') parser.add_argument('--compiler-constants', action='store_true', help='Include basis/compiler/constants/ files') args = parser.parse_args() collector = FileCollector(args.output) # Configure for vm/ directory if args.vm: collector.add_include_dir('vm') collector.add_include_pattern('*.hpp') collector.add_include_pattern('*.cpp') collector.add_exclude_pattern('zstd.*') # Configure for basis/cpu/arm/64/ if args.arm64: collector.add_include_dir('basis/cpu/arm/64') collector.add_include_pattern('*.factor') # Configure for basis/bootstrap/ if args.bootstrap: collector.add_include_dir('basis/bootstrap') collector.add_include_pattern('*.factor') # Configure for basis/cpu/cpu.factor if args.cpu: # Add single file directly cpu_file = Path('basis/cpu/cpu.factor') if cpu_file.exists(): collector.collected_files.append(cpu_file) # Configure for basis/compiler/constants/ if args.compiler_constants: collector.add_include_dir('basis/compiler/constants') collector.add_include_pattern('*.factor') # If no specific flags, use default configuration if not (args.vm or args.arm64 or args.bootstrap or args.cpu or args.compiler_constants): print("Using default configuration...") # Add vm/ with C++ files collector.add_include_dir('vm') collector.add_include_pattern('*.hpp') collector.add_include_pattern('*.cpp') collector.add_exclude_pattern('zstd.*') # Add basis/cpu/arm/64/ collector.add_include_dir('basis/cpu/arm/64') collector.add_include_pattern('*.factor') # Add basis/bootstrap/ collector.add_include_dir('basis/bootstrap') collector.add_include_pattern('*.factor') # Add basis/cpu/cpu.factor cpu_file = Path('basis/cpu/cpu.factor') if cpu_file.exists(): collector.collected_files.append(cpu_file) # Add basis/compiler/constants/ collector.add_include_dir('basis/compiler/constants') collector.add_include_pattern('*.factor') # Collect and write files collector.collect_all_files() collector.write_output() if __name__ == "__main__": main()