Paste: collect_files.py
Author: | erg |
Mode: | factor |
Date: | Fri, 25 Jul 2025 18:18:39 |
Plain Text |
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
# ]
# ///
"""
Script to collect contents of multiple source files into a single output file.
Supports include/exclude patterns for files and directories.
"""
import os
import sys
import argparse
import fnmatch
from pathlib import Path
from typing import List, Set, Tuple
class FileCollector:
def __init__(self, output_file: str = "out"):
self.output_file = output_file
self.include_patterns: List[str] = []
self.exclude_patterns: List[str] = []
self.include_dirs: List[str] = []
self.exclude_dirs: List[str] = []
self.collected_files: List[Path] = []
def add_include_pattern(self, pattern: str):
"""Add a file pattern to include (e.g., '*.cpp', '*.hpp')"""
self.include_patterns.append(pattern)
def add_exclude_pattern(self, pattern: str):
"""Add a file pattern to exclude (e.g., 'zstd.*')"""
self.exclude_patterns.append(pattern)
def add_include_dir(self, directory: str):
"""Add a directory to include files from"""
self.include_dirs.append(directory)
def add_exclude_dir(self, directory: str):
"""Add a directory to exclude"""
self.exclude_dirs.append(directory)
def should_include_file(self, file_path: Path) -> bool:
"""Check if a file should be included based on patterns"""
filename = file_path.name
# Always exclude test and docs files
if filename.endswith('-tests.factor') or filename.endswith('-docs.factor'):
return False
# Check exclude patterns first
for pattern in self.exclude_patterns:
if fnmatch.fnmatch(filename, pattern):
return False
# Check exclude directories
for exclude_dir in self.exclude_dirs:
if exclude_dir in str(file_path):
return False
# If no include patterns specified, include all (except excluded)
if not self.include_patterns:
return True
# Check include patterns
for pattern in self.include_patterns:
if fnmatch.fnmatch(filename, pattern):
return True
return False
def collect_files_from_dir(self, directory: str):
"""Recursively collect files from a directory"""
dir_path = Path(directory)
if not dir_path.exists():
print(f"Warning: Directory '{directory}' does not exist")
return
for file_path in dir_path.rglob('*'):
if file_path.is_file() and self.should_include_file(file_path):
self.collected_files.append(file_path)
def collect_all_files(self):
"""Collect files from all included directories"""
for directory in self.include_dirs:
self.collect_files_from_dir(directory)
# Remove duplicates and sort files for consistent output
self.collected_files = sorted(list(set(self.collected_files)))
def write_output(self):
"""Write collected file contents to output file"""
# Write main output file with contents
with open(self.output_file, 'w', encoding='utf-8') as out:
for i, file_path in enumerate(self.collected_files):
# Write file header with C++ style comment
if i > 0:
out.write("\n")
out.write(f"//{file_path}\n\n")
# Write file contents
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# Filter out copyright and license lines
filtered_lines = []
for line in lines:
if line.strip().startswith('! Copyright') or line.strip().startswith('! See https:'):
continue
filtered_lines.append(line)
content = ''.join(filtered_lines).lstrip('\n')
out.write(content)
if content and not content.endswith('\n'):
out.write('\n')
except Exception as e:
out.write(f"ERROR reading file: {e}\n")
# Write companion file with just file names
filelist_name = f"{self.output_file}.files"
with open(filelist_name, 'w', encoding='utf-8') as out:
for file_path in self.collected_files:
out.write(f"{file_path}\n")
print(f"Output written to: {self.output_file}")
print(f"File list written to: {filelist_name}")
print(f"Total files collected: {len(self.collected_files)}")
def main():
parser = argparse.ArgumentParser(description="Collect source files into a single output file")
parser.add_argument('-o', '--output', default='out', help='Output file name (default: out)')
parser.add_argument('--vm', action='store_true', help='Include vm/ directory C++ files (excluding zstd.*)')
parser.add_argument('--arm64', action='store_true', help='Include basis/cpu/arm/64/ files')
parser.add_argument('--bootstrap', action='store_true', help='Include basis/bootstrap/ files')
parser.add_argument('--cpu', action='store_true', help='Include basis/cpu/cpu.factor')
parser.add_argument('--compiler-constants', action='store_true', help='Include basis/compiler/constants/ files')
args = parser.parse_args()
collector = FileCollector(args.output)
# Configure for vm/ directory
if args.vm:
collector.add_include_dir('vm')
collector.add_include_pattern('*.hpp')
collector.add_include_pattern('*.cpp')
collector.add_exclude_pattern('zstd.*')
# Configure for basis/cpu/arm/64/
if args.arm64:
collector.add_include_dir('basis/cpu/arm/64')
collector.add_include_pattern('*.factor')
# Configure for basis/bootstrap/
if args.bootstrap:
collector.add_include_dir('basis/bootstrap')
collector.add_include_pattern('*.factor')
# Configure for basis/cpu/cpu.factor
if args.cpu:
# Add single file directly
cpu_file = Path('basis/cpu/cpu.factor')
if cpu_file.exists():
collector.collected_files.append(cpu_file)
# Configure for basis/compiler/constants/
if args.compiler_constants:
collector.add_include_dir('basis/compiler/constants')
collector.add_include_pattern('*.factor')
# If no specific flags, use default configuration
if not (args.vm or args.arm64 or args.bootstrap or args.cpu or args.compiler_constants):
print("Using default configuration...")
# Add vm/ with C++ files
collector.add_include_dir('vm')
collector.add_include_pattern('*.hpp')
collector.add_include_pattern('*.cpp')
collector.add_exclude_pattern('zstd.*')
# Add basis/cpu/arm/64/
collector.add_include_dir('basis/cpu/arm/64')
collector.add_include_pattern('*.factor')
# Add basis/bootstrap/
collector.add_include_dir('basis/bootstrap')
collector.add_include_pattern('*.factor')
# Add basis/cpu/cpu.factor
cpu_file = Path('basis/cpu/cpu.factor')
if cpu_file.exists():
collector.collected_files.append(cpu_file)
# Add basis/compiler/constants/
collector.add_include_dir('basis/compiler/constants')
collector.add_include_pattern('*.factor')
# Collect and write files
collector.collect_all_files()
collector.write_output()
if __name__ == "__main__":
main()
Author: | erg |
Mode: | factor |
Date: | Fri, 25 Jul 2025 18:22:19 |
Plain Text |
~/factor on master [$]
% cat out.files
basis/bootstrap/assembler/arm.32.factor
basis/bootstrap/assembler/arm.32.unix.factor
basis/bootstrap/assembler/arm.32.windows.factor
basis/bootstrap/assembler/arm.64.factor
basis/bootstrap/assembler/arm.64.unix.factor
basis/bootstrap/assembler/arm.64.windows.factor
basis/bootstrap/assembler/arm.factor
basis/bootstrap/assembler/arm.unix.factor
basis/bootstrap/assembler/arm.windows.factor
basis/bootstrap/assembler/ppc.32.linux.factor
basis/bootstrap/assembler/ppc.64.linux.factor
basis/bootstrap/assembler/ppc.factor
basis/bootstrap/assembler/x86.32.factor
basis/bootstrap/assembler/x86.32.unix.factor
basis/bootstrap/assembler/x86.32.windows.factor
basis/bootstrap/assembler/x86.64.factor
basis/bootstrap/assembler/x86.64.unix.factor
basis/bootstrap/assembler/x86.64.windows.factor
basis/bootstrap/assembler/x86.factor
basis/bootstrap/assembler/x86.unix.factor
basis/bootstrap/assembler/x86.windows.factor
basis/bootstrap/bootstrap-error.factor
basis/bootstrap/compiler/compiler.factor
basis/bootstrap/finish-bootstrap.factor
basis/bootstrap/finish-staging.factor
basis/bootstrap/handbook/handbook.factor
basis/bootstrap/help/help.factor
basis/bootstrap/image/download/download.factor
basis/bootstrap/image/image.factor
basis/bootstrap/image/primitives/primitives.factor
basis/bootstrap/image/upload/upload.factor
basis/bootstrap/io/io.factor
basis/bootstrap/layouts.factor
basis/bootstrap/math/math.factor
basis/bootstrap/primitives.factor
basis/bootstrap/stage1.factor
basis/bootstrap/stage2.factor
basis/bootstrap/syntax.factor
basis/bootstrap/threads/threads.factor
basis/bootstrap/tools/tools.factor
basis/bootstrap/ui/tools/tools.factor
basis/bootstrap/ui/ui.factor
basis/bootstrap/unicode/unicode.factor
basis/compiler/constants/constants.factor
basis/cpu/arm/64/64.factor
basis/cpu/arm/64/assembler/assembler.factor
vm/aging_collector.cpp
vm/aging_space.hpp
vm/alien.cpp
vm/allot.hpp
vm/arrays.cpp
vm/arrays.hpp
vm/assert.hpp
vm/atomic-cl-32.hpp
vm/atomic-cl-64.hpp
vm/atomic-gcc.hpp
vm/atomic.hpp
vm/bignum.cpp
vm/bignum.hpp
vm/bignumint.hpp
vm/bitwise_hacks.hpp
vm/booleans.hpp
vm/bump_allocator.hpp
vm/byte_arrays.cpp
vm/byte_arrays.hpp
vm/callbacks.cpp
vm/callbacks.hpp
vm/callstack.cpp
vm/callstack.hpp
vm/code_blocks.cpp
vm/code_blocks.hpp
vm/code_heap.cpp
vm/code_heap.hpp
vm/code_roots.hpp
vm/compaction.cpp
vm/contexts.cpp
vm/contexts.hpp
vm/cpu-arm.32.hpp
vm/cpu-arm.64.cpp
vm/cpu-arm.64.hpp
vm/cpu-ppc.hpp
vm/cpu-x86.32.hpp
vm/cpu-x86.64.hpp
vm/cpu-x86.cpp
vm/cpu-x86.hpp
vm/data_heap.cpp
vm/data_heap.hpp
vm/data_heap_checker.cpp
vm/data_roots.hpp
vm/debug.cpp
vm/debug.hpp
vm/dispatch.cpp
vm/dispatch.hpp
vm/entry_points.cpp
vm/errors.cpp
vm/errors.hpp
vm/factor.cpp
vm/factor.hpp
vm/fixup.hpp
vm/float_bits.hpp
vm/free_list.hpp
vm/full_collector.cpp
vm/gc.cpp
vm/gc.hpp
vm/gc_info.hpp
vm/gc_stress_test.factor
vm/generic_arrays.hpp
vm/image.cpp
vm/image.hpp
vm/inline_cache.cpp
vm/inline_cache.hpp
vm/instruction_operands.cpp
vm/instruction_operands.hpp
vm/io.cpp
vm/io.hpp
vm/jit.cpp
vm/jit.hpp
vm/layouts.hpp
vm/mach_signal.cpp
vm/mach_signal.hpp
vm/main-unix.cpp
vm/main-windows.cpp
vm/mark_bits.hpp
vm/master.hpp
vm/math.cpp
vm/math.hpp
vm/mvm-none.cpp
vm/mvm-unix.cpp
vm/mvm-windows.cpp
vm/mvm.cpp
vm/mvm.hpp
vm/nursery_collector.cpp
vm/object_start_map.cpp
vm/object_start_map.hpp
vm/objects.cpp
vm/objects.hpp
vm/os-freebsd-x86.32.hpp
vm/os-freebsd-x86.64.hpp
vm/os-freebsd.cpp
vm/os-freebsd.hpp
vm/os-genunix.cpp
vm/os-genunix.hpp
vm/os-linux-arm.32.cpp
vm/os-linux-arm.32.hpp
vm/os-linux-arm.64.hpp
vm/os-linux-ppc.32.hpp
vm/os-linux-ppc.64.hpp
vm/os-linux-x86.32.hpp
vm/os-linux-x86.64.hpp
vm/os-linux.cpp
vm/os-linux.hpp
vm/os-macos-arm.64.hpp
vm/os-macos-x86.32.hpp
vm/os-macos-x86.64.hpp
vm/os-macos.hpp
vm/os-unix.cpp
vm/os-unix.hpp
vm/os-windows-x86.32.cpp
vm/os-windows-x86.64.cpp
vm/os-windows.32.hpp
vm/os-windows.64.hpp
vm/os-windows.cpp
vm/os-windows.hpp
vm/platform.hpp
vm/primitives.cpp
vm/primitives.hpp
vm/quotations.cpp
vm/quotations.hpp
vm/run.cpp
vm/run.hpp
vm/safepoints.cpp
vm/sampling_profiler.cpp
vm/sampling_profiler.hpp
vm/segments.hpp
vm/slot_visitor.hpp
vm/strings.cpp
vm/tagged.hpp
vm/tenured_space.hpp
vm/to_tenured_collector.cpp
vm/to_tenured_collector.hpp
vm/tuples.cpp
vm/utilities.cpp
vm/utilities.hpp
vm/vm.cpp
vm/vm.hpp
vm/words.cpp
vm/write_barrier.hpp
New Annotation