Paste: collect_files.py

Author: erg
Mode: factor
Date: Fri, 25 Jul 2025 18:18:39
Plain Text |
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
# ]
# ///
"""
Script to collect contents of multiple source files into a single output file.
Supports include/exclude patterns for files and directories.
"""

import os
import sys
import argparse
import fnmatch
from pathlib import Path
from typing import List, Set, Tuple


class FileCollector:
    def __init__(self, output_file: str = "out"):
        self.output_file = output_file
        self.include_patterns: List[str] = []
        self.exclude_patterns: List[str] = []
        self.include_dirs: List[str] = []
        self.exclude_dirs: List[str] = []
        self.collected_files: List[Path] = []
        
    def add_include_pattern(self, pattern: str):
        """Add a file pattern to include (e.g., '*.cpp', '*.hpp')"""
        self.include_patterns.append(pattern)
        
    def add_exclude_pattern(self, pattern: str):
        """Add a file pattern to exclude (e.g., 'zstd.*')"""
        self.exclude_patterns.append(pattern)
        
    def add_include_dir(self, directory: str):
        """Add a directory to include files from"""
        self.include_dirs.append(directory)
        
    def add_exclude_dir(self, directory: str):
        """Add a directory to exclude"""
        self.exclude_dirs.append(directory)
        
    def should_include_file(self, file_path: Path) -> bool:
        """Check if a file should be included based on patterns"""
        filename = file_path.name
        
        # Always exclude test and docs files
        if filename.endswith('-tests.factor') or filename.endswith('-docs.factor'):
            return False
        
        # Check exclude patterns first
        for pattern in self.exclude_patterns:
            if fnmatch.fnmatch(filename, pattern):
                return False
                
        # Check exclude directories
        for exclude_dir in self.exclude_dirs:
            if exclude_dir in str(file_path):
                return False
                
        # If no include patterns specified, include all (except excluded)
        if not self.include_patterns:
            return True
            
        # Check include patterns
        for pattern in self.include_patterns:
            if fnmatch.fnmatch(filename, pattern):
                return True
                
        return False
        
    def collect_files_from_dir(self, directory: str):
        """Recursively collect files from a directory"""
        dir_path = Path(directory)
        if not dir_path.exists():
            print(f"Warning: Directory '{directory}' does not exist")
            return
            
        for file_path in dir_path.rglob('*'):
            if file_path.is_file() and self.should_include_file(file_path):
                self.collected_files.append(file_path)
                
    def collect_all_files(self):
        """Collect files from all included directories"""
        for directory in self.include_dirs:
            self.collect_files_from_dir(directory)
            
        # Remove duplicates and sort files for consistent output
        self.collected_files = sorted(list(set(self.collected_files)))
        
    def write_output(self):
        """Write collected file contents to output file"""
        # Write main output file with contents
        with open(self.output_file, 'w', encoding='utf-8') as out:
            for i, file_path in enumerate(self.collected_files):
                # Write file header with C++ style comment
                if i > 0:
                    out.write("\n")
                out.write(f"//{file_path}\n\n")
                
                # Write file contents
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        lines = f.readlines()
                        # Filter out copyright and license lines
                        filtered_lines = []
                        for line in lines:
                            if line.strip().startswith('! Copyright') or line.strip().startswith('! See https:'):
                                continue
                            filtered_lines.append(line)
                        
                        content = ''.join(filtered_lines).lstrip('\n')
                        out.write(content)
                        if content and not content.endswith('\n'):
                            out.write('\n')
                except Exception as e:
                    out.write(f"ERROR reading file: {e}\n")
        
        # Write companion file with just file names
        filelist_name = f"{self.output_file}.files"
        with open(filelist_name, 'w', encoding='utf-8') as out:
            for file_path in self.collected_files:
                out.write(f"{file_path}\n")
                    
        print(f"Output written to: {self.output_file}")
        print(f"File list written to: {filelist_name}")
        print(f"Total files collected: {len(self.collected_files)}")


def main():
    parser = argparse.ArgumentParser(description="Collect source files into a single output file")
    parser.add_argument('-o', '--output', default='out', help='Output file name (default: out)')
    parser.add_argument('--vm', action='store_true', help='Include vm/ directory C++ files (excluding zstd.*)')
    parser.add_argument('--arm64', action='store_true', help='Include basis/cpu/arm/64/ files')
    parser.add_argument('--bootstrap', action='store_true', help='Include basis/bootstrap/ files')
    parser.add_argument('--cpu', action='store_true', help='Include basis/cpu/cpu.factor')
    parser.add_argument('--compiler-constants', action='store_true', help='Include basis/compiler/constants/ files')
    
    args = parser.parse_args()
    
    collector = FileCollector(args.output)
    
    # Configure for vm/ directory
    if args.vm:
        collector.add_include_dir('vm')
        collector.add_include_pattern('*.hpp')
        collector.add_include_pattern('*.cpp')
        collector.add_exclude_pattern('zstd.*')
    
    # Configure for basis/cpu/arm/64/
    if args.arm64:
        collector.add_include_dir('basis/cpu/arm/64')
        collector.add_include_pattern('*.factor')
    
    # Configure for basis/bootstrap/
    if args.bootstrap:
        collector.add_include_dir('basis/bootstrap')
        collector.add_include_pattern('*.factor')
    
    # Configure for basis/cpu/cpu.factor
    if args.cpu:
        # Add single file directly
        cpu_file = Path('basis/cpu/cpu.factor')
        if cpu_file.exists():
            collector.collected_files.append(cpu_file)
    
    # Configure for basis/compiler/constants/
    if args.compiler_constants:
        collector.add_include_dir('basis/compiler/constants')
        collector.add_include_pattern('*.factor')
    
    # If no specific flags, use default configuration
    if not (args.vm or args.arm64 or args.bootstrap or args.cpu or args.compiler_constants):
        print("Using default configuration...")
        # Add vm/ with C++ files
        collector.add_include_dir('vm')
        collector.add_include_pattern('*.hpp')
        collector.add_include_pattern('*.cpp')
        collector.add_exclude_pattern('zstd.*')
        
        # Add basis/cpu/arm/64/
        collector.add_include_dir('basis/cpu/arm/64')
        collector.add_include_pattern('*.factor')
        
        # Add basis/bootstrap/
        collector.add_include_dir('basis/bootstrap')
        collector.add_include_pattern('*.factor')
        
        # Add basis/cpu/cpu.factor
        cpu_file = Path('basis/cpu/cpu.factor')
        if cpu_file.exists():
            collector.collected_files.append(cpu_file)
                
        # Add basis/compiler/constants/
        collector.add_include_dir('basis/compiler/constants')
        collector.add_include_pattern('*.factor')
    
    # Collect and write files
    collector.collect_all_files()
    collector.write_output()


if __name__ == "__main__":
    main()

Annotation: output

Author: erg
Mode: factor
Date: Fri, 25 Jul 2025 18:22:19
Plain Text |
~/factor on master [$!]
% cat out.files
basis/bootstrap/assembler/arm.32.factor
basis/bootstrap/assembler/arm.32.unix.factor
basis/bootstrap/assembler/arm.32.windows.factor
basis/bootstrap/assembler/arm.64.factor
basis/bootstrap/assembler/arm.64.unix.factor
basis/bootstrap/assembler/arm.64.windows.factor
basis/bootstrap/assembler/arm.factor
basis/bootstrap/assembler/arm.unix.factor
basis/bootstrap/assembler/arm.windows.factor
basis/bootstrap/assembler/ppc.32.linux.factor
basis/bootstrap/assembler/ppc.64.linux.factor
basis/bootstrap/assembler/ppc.factor
basis/bootstrap/assembler/x86.32.factor
basis/bootstrap/assembler/x86.32.unix.factor
basis/bootstrap/assembler/x86.32.windows.factor
basis/bootstrap/assembler/x86.64.factor
basis/bootstrap/assembler/x86.64.unix.factor
basis/bootstrap/assembler/x86.64.windows.factor
basis/bootstrap/assembler/x86.factor
basis/bootstrap/assembler/x86.unix.factor
basis/bootstrap/assembler/x86.windows.factor
basis/bootstrap/bootstrap-error.factor
basis/bootstrap/compiler/compiler.factor
basis/bootstrap/finish-bootstrap.factor
basis/bootstrap/finish-staging.factor
basis/bootstrap/handbook/handbook.factor
basis/bootstrap/help/help.factor
basis/bootstrap/image/download/download.factor
basis/bootstrap/image/image.factor
basis/bootstrap/image/primitives/primitives.factor
basis/bootstrap/image/upload/upload.factor
basis/bootstrap/io/io.factor
basis/bootstrap/layouts.factor
basis/bootstrap/math/math.factor
basis/bootstrap/primitives.factor
basis/bootstrap/stage1.factor
basis/bootstrap/stage2.factor
basis/bootstrap/syntax.factor
basis/bootstrap/threads/threads.factor
basis/bootstrap/tools/tools.factor
basis/bootstrap/ui/tools/tools.factor
basis/bootstrap/ui/ui.factor
basis/bootstrap/unicode/unicode.factor
basis/compiler/constants/constants.factor
basis/cpu/arm/64/64.factor
basis/cpu/arm/64/assembler/assembler.factor
vm/aging_collector.cpp
vm/aging_space.hpp
vm/alien.cpp
vm/allot.hpp
vm/arrays.cpp
vm/arrays.hpp
vm/assert.hpp
vm/atomic-cl-32.hpp
vm/atomic-cl-64.hpp
vm/atomic-gcc.hpp
vm/atomic.hpp
vm/bignum.cpp
vm/bignum.hpp
vm/bignumint.hpp
vm/bitwise_hacks.hpp
vm/booleans.hpp
vm/bump_allocator.hpp
vm/byte_arrays.cpp
vm/byte_arrays.hpp
vm/callbacks.cpp
vm/callbacks.hpp
vm/callstack.cpp
vm/callstack.hpp
vm/code_blocks.cpp
vm/code_blocks.hpp
vm/code_heap.cpp
vm/code_heap.hpp
vm/code_roots.hpp
vm/compaction.cpp
vm/contexts.cpp
vm/contexts.hpp
vm/cpu-arm.32.hpp
vm/cpu-arm.64.cpp
vm/cpu-arm.64.hpp
vm/cpu-ppc.hpp
vm/cpu-x86.32.hpp
vm/cpu-x86.64.hpp
vm/cpu-x86.cpp
vm/cpu-x86.hpp
vm/data_heap.cpp
vm/data_heap.hpp
vm/data_heap_checker.cpp
vm/data_roots.hpp
vm/debug.cpp
vm/debug.hpp
vm/dispatch.cpp
vm/dispatch.hpp
vm/entry_points.cpp
vm/errors.cpp
vm/errors.hpp
vm/factor.cpp
vm/factor.hpp
vm/fixup.hpp
vm/float_bits.hpp
vm/free_list.hpp
vm/full_collector.cpp
vm/gc.cpp
vm/gc.hpp
vm/gc_info.hpp
vm/gc_stress_test.factor
vm/generic_arrays.hpp
vm/image.cpp
vm/image.hpp
vm/inline_cache.cpp
vm/inline_cache.hpp
vm/instruction_operands.cpp
vm/instruction_operands.hpp
vm/io.cpp
vm/io.hpp
vm/jit.cpp
vm/jit.hpp
vm/layouts.hpp
vm/mach_signal.cpp
vm/mach_signal.hpp
vm/main-unix.cpp
vm/main-windows.cpp
vm/mark_bits.hpp
vm/master.hpp
vm/math.cpp
vm/math.hpp
vm/mvm-none.cpp
vm/mvm-unix.cpp
vm/mvm-windows.cpp
vm/mvm.cpp
vm/mvm.hpp
vm/nursery_collector.cpp
vm/object_start_map.cpp
vm/object_start_map.hpp
vm/objects.cpp
vm/objects.hpp
vm/os-freebsd-x86.32.hpp
vm/os-freebsd-x86.64.hpp
vm/os-freebsd.cpp
vm/os-freebsd.hpp
vm/os-genunix.cpp
vm/os-genunix.hpp
vm/os-linux-arm.32.cpp
vm/os-linux-arm.32.hpp
vm/os-linux-arm.64.hpp
vm/os-linux-ppc.32.hpp
vm/os-linux-ppc.64.hpp
vm/os-linux-x86.32.hpp
vm/os-linux-x86.64.hpp
vm/os-linux.cpp
vm/os-linux.hpp
vm/os-macos-arm.64.hpp
vm/os-macos-x86.32.hpp
vm/os-macos-x86.64.hpp
vm/os-macos.hpp
vm/os-unix.cpp
vm/os-unix.hpp
vm/os-windows-x86.32.cpp
vm/os-windows-x86.64.cpp
vm/os-windows.32.hpp
vm/os-windows.64.hpp
vm/os-windows.cpp
vm/os-windows.hpp
vm/platform.hpp
vm/primitives.cpp
vm/primitives.hpp
vm/quotations.cpp
vm/quotations.hpp
vm/run.cpp
vm/run.hpp
vm/safepoints.cpp
vm/sampling_profiler.cpp
vm/sampling_profiler.hpp
vm/segments.hpp
vm/slot_visitor.hpp
vm/strings.cpp
vm/tagged.hpp
vm/tenured_space.hpp
vm/to_tenured_collector.cpp
vm/to_tenured_collector.hpp
vm/tuples.cpp
vm/utilities.cpp
vm/utilities.hpp
vm/vm.cpp
vm/vm.hpp
vm/words.cpp
vm/write_barrier.hpp

New Annotation

Summary:
Author:
Mode:
Body: