async def optimize_batch_processing(file_paths: list): """Process files in optimal batch sizes""" OPTIMAL_BATCH_SIZE = 10 # Adjust based on file sizes async with AsyncLexa(api_key="your-api-key") as client: all_documents = [] for i in range(0, len(file_paths), OPTIMAL_BATCH_SIZE): batch = file_paths[i:i + OPTIMAL_BATCH_SIZE] # Process batch documents = await client.parse( batch, mode=ProcessingMode.DEFAULT # Fast processing for most use cases ) all_documents.extend(documents) # Optional: Brief pause to avoid rate limits await asyncio.sleep(0.1) return all_documents
async def memory_efficient_processing(file_paths): """Process large batches without memory issues""" async with AsyncLexa(api_key="your-api-key") as client: for batch in chunk_files(file_paths, batch_size=5): # Process batch documents = await client.parse(batch) # Process immediately and release memory chunks = documents.get_all_text_chunks(target_size=512) # Send to vector DB immediately await store_in_vector_db(chunks) # Explicitly release memory del documents, chunksdef chunk_files(files, batch_size): """Split files into batches""" for i in range(0, len(files), batch_size): yield files[i:i + batch_size]