Revolutionize legal research and document review with Lexa’s enterprise-grade document parsing. Extract precise information from contracts, case files, and legal documents with unmatched accuracy.
Build a legal research system with semantic search:
Copy
from cerevox import AsyncLexafrom typing import List, Dictclass LegalRAGSystem: def __init__(self, api_key: str): self.client = AsyncLexa(api_key=api_key) self.legal_corpus = [] async def ingest_legal_documents(self, document_paths: List[str]): """Ingest legal documents for search""" async with self.client: documents = await self.client.parse(document_paths) # Create searchable chunks optimized for legal content for doc in documents: chunks = doc.get_text_chunks( target_size=800, # Good for legal context tolerance=0.2 ) for i, chunk in enumerate(chunks): self.legal_corpus.append({ 'id': f"{doc.filename}_{i}", 'content': chunk, 'document': doc.filename, 'document_type': self._classify_document(doc.filename) }) return f"Ingested {len(self.legal_corpus)} legal document chunks" def search_legal_precedents(self, query: str, limit: int = 5) -> List[Dict]: """Search for legal precedents and relevant cases""" results = [] query_terms = query.lower().split() for chunk in self.legal_corpus: content_lower = chunk['content'].lower() # Score based on term frequency score = sum(1 for term in query_terms if term in content_lower) if score > 0: results.append({ 'content': chunk['content'], 'document': chunk['document'], 'relevance_score': score, 'document_type': chunk['document_type'] }) # Sort by relevance and return top results results.sort(key=lambda x: x['relevance_score'], reverse=True) return results[:limit] def _classify_document(self, filename: str) -> str: """Classify legal document type""" filename_lower = filename.lower() if any(term in filename_lower for term in ['contract', 'agreement']): return 'contract' elif any(term in filename_lower for term in ['case', 'opinion']): return 'case_law' elif any(term in filename_lower for term in ['brief', 'motion']): return 'court_filing' else: return 'legal_document'# Usagelegal_rag = LegalRAGSystem("your-api-key")await legal_rag.ingest_legal_documents(["contracts.pdf", "case_law.pdf"])results = legal_rag.search_legal_precedents("intellectual property")