diff --git a/auto-insurance-claims-agentic-RAG/.env.example b/auto-insurance-claims-agentic-RAG/.env.example new file mode 100644 index 00000000..99c13086 --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/.env.example @@ -0,0 +1,4 @@ +GOOGLE_API_KEY="AI..." +LLAMA_CLOUD_API_KEY = "llx-..." +PROJECT_NAME ="..." +ORGANIZATION_ID="..." \ No newline at end of file diff --git a/auto-insurance-claims-agentic-RAG/.gitignore b/auto-insurance-claims-agentic-RAG/.gitignore new file mode 100644 index 00000000..dc12cb74 --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/.gitignore @@ -0,0 +1,2 @@ +.env +__pycache__ \ No newline at end of file diff --git a/auto-insurance-claims-agentic-RAG/README.md b/auto-insurance-claims-agentic-RAG/README.md new file mode 100644 index 00000000..38db97ca --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/README.md @@ -0,0 +1,86 @@ +![Architecture diagram](./images/Architecture_diagram.png) + +# Auto Insurance Claim Processor + +This application processes auto insurance claims using LlamaIndex and Gemini LLM. It can be run as a command-line tool or as a Streamlit web application. + +## Setup + +1. Clone this repository +2. Install the required dependencies: + ``` + pip install -r requirements.txt + ``` +3. Copy the `.env.example` file to `.env` and fill in your API keys: + ``` + cp .env.example .env + ``` +4. Edit the `.env` file with your actual API keys and configuration + +## Required API Keys + +- **Google API Key** for Gemini LLM +- **LlamaCloud API Key** for vector database access +- **Project Name** for your LlamaCloud project +- **Organization ID** for your LlamaCloud organization + +## Directory Structure + +Make sure you have a `data` directory with sample claim JSON files: + +``` +data/ + john.json + alice.json + # ... other claim files +``` + +## Running the Command Line Tool + +Test a claim from the command line: + +``` +python test_workflow.py --file data/john.json +``` + +## Running the Streamlit Web App + +Start the Streamlit app: + +``` +streamlit run app.py +``` + +The app will be available at `http://localhost:8501` + +## Features + +- Process claims from JSON files +- Manual claim entry form +- View processing logs +- View claim decisions with coverage, deductible, and payout information + +## Sample Claim JSON Format + +```json +{ + "claim_number": "CL1234567", + "policy_number": "POL987654", + "claimant_name": "John Doe", + "date_of_loss": "2023-11-15", + "loss_description": "Rear-end collision at stop light", + "estimated_repair_cost": 3500.00, + "vehicle_details": "2018 Honda Accord, VIN: 1HGCV1F34JA123456" +} +``` +## Sample claim decision + +```json +{ +"claim_number":"CLAIM-0001" +"covered":true +"deductible":0 +"recommended_payout":0 +"notes":"Collision is covered, subject to exclusions. Exclusion 1 applies as the vehicle was used to deliver pizzas for compensation." +} +``` \ No newline at end of file diff --git a/auto-insurance-claims-agentic-RAG/app.py b/auto-insurance-claims-agentic-RAG/app.py new file mode 100644 index 00000000..3d33d4a0 --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/app.py @@ -0,0 +1,139 @@ +import streamlit as st +import json +import os +from insurance_claim_processor import process_claim, ClaimInfo + +st.set_page_config(page_title="Auto Insurance Claim Processor", layout="wide") + +st.title("Auto Insurance Claim Processor") + +# Create tabs for different ways to input claims +tab1, tab2 = st.tabs(["Process File", "Manual Entry"]) + +with tab1: + st.header("Process Claim from JSON File") + + # File uploader section with its own button + st.subheader("Upload a claim file") + uploaded_file = st.file_uploader("Upload a claim JSON file", type=["json"]) + process_uploaded = st.button("Process Uploaded File", key="process_uploaded", disabled=uploaded_file is None) + + # Add some space between sections + st.markdown("---") + + # Sample files section with its own button + st.subheader("Or use a sample file") + sample_files = [] + + # Check for sample files in the data directory + if os.path.exists("data"): + sample_files = [f for f in os.listdir("data") if f.endswith(".json")] + + if sample_files: + selected_sample = st.selectbox("Select a sample claim", sample_files) + use_sample = st.button("Process Selected Sample", key="use_sample") + else: + st.warning("No sample files found in 'data' directory.") + use_sample = False + selected_sample = None + + # Handle file processing for uploaded file + if process_uploaded and uploaded_file is not None: + # Save uploaded file temporarily + with open("temp_claim.json", "wb") as f: + f.write(uploaded_file.getbuffer()) + + with st.spinner("Processing claim..."): + decision, logs = process_claim(claim_json_path="temp_claim.json") + + # Display results + st.success("Claim processed successfully!") + + # Create two columns + col1, col2 = st.columns(2) + + with col1: + st.subheader("Claim Decision") + st.json(json.loads(decision.model_dump_json())) + + with col2: + st.subheader("Processing Logs") + for log in logs: + st.text(log) + + # Clean up temp file + if os.path.exists("temp_claim.json"): + os.remove("temp_claim.json") + + # Handle sample file processing + elif use_sample and selected_sample: + sample_path = os.path.join("data", selected_sample) + + with st.spinner("Processing sample claim..."): + decision, logs = process_claim(claim_json_path=sample_path) + + # Display results + st.success(f"Sample claim {selected_sample} processed successfully!") + + # Create two columns + col1, col2 = st.columns(2) + + with col1: + st.subheader("Claim Decision") + st.json(json.loads(decision.model_dump_json())) + + with col2: + st.subheader("Processing Logs") + for log in logs: + st.text(log) + +with tab2: + st.header("Enter Claim Details Manually") + + # Form for manual entry + with st.form("claim_form"): + claim_number = st.text_input("Claim Number") + policy_number = st.text_input("Policy Number") + claimant_name = st.text_input("Claimant Name") + date_of_loss = st.date_input("Date of Loss") + loss_description = st.text_area("Loss Description") + estimated_repair_cost = st.number_input("Estimated Repair Cost", min_value=0.0, format="%.2f") + vehicle_details = st.text_input("Vehicle Details (Optional)") + + submit_form = st.form_submit_button("Submit Claim") + + if submit_form: + # Create claim data dictionary + claim_data = { + "claim_number": claim_number, + "policy_number": policy_number, + "claimant_name": claimant_name, + "date_of_loss": date_of_loss.strftime("%Y-%m-%d"), + "loss_description": loss_description, + "estimated_repair_cost": estimated_repair_cost + } + + if vehicle_details: + claim_data["vehicle_details"] = vehicle_details + + with st.spinner("Processing claim..."): + decision, logs = process_claim(claim_data=claim_data) + + # Display results + st.success("Claim processed successfully!") + + # Create two columns + col1, col2 = st.columns(2) + + with col1: + st.subheader("Claim Decision") + st.json(json.loads(decision.model_dump_json())) + + with col2: + st.subheader("Processing Logs") + for log in logs: + st.text(log) + +# Add footer with information +st.markdown("---") +st.markdown("Insurance Claim Processor | Powered by LlamaIndex and Gemini") \ No newline at end of file diff --git a/auto-insurance-claims-agentic-RAG/auto_insurance.ipynb b/auto-insurance-claims-agentic-RAG/auto_insurance.ipynb new file mode 100644 index 00000000..14223a45 --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/auto_insurance.ipynb @@ -0,0 +1,753 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import necessary files\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "\n", + "from typing import List, Optional\n", + "from pydantic import BaseModel, Field\n", + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environmental variables\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# LOAD Environmental variables\n", + "gemini_key = os.getenv(\"GOOGLE_API_KEY\")\n", + "llama_cloud_key = os.getenv(\"LLAMA_CLOUD_API_KEY\")\n", + "project_name= os.getenv(\"PROJECT_NAME\")\n", + "organization_id= os.getenv(\"ORGANIZATION_ID\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define claim schemas" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "# ClaimInfo Schema\n", + "class ClaimInfo(BaseModel):\n", + " \"\"\"Extracted Insurance claim information.\"\"\"\n", + " claim_number: str\n", + " policy_number: str\n", + " claimant_name: str\n", + " date_of_loss: str\n", + " loss_description: str\n", + " estimated_repair_cost: float\n", + " vehicle_details: Optional[str] = None\n", + "\n", + "# Policy Condition schema\n", + "class PolicyQueries(BaseModel):\n", + " queries: List[str] = Field(\n", + " default_factory=list,\n", + " description=\"A list of query strings to retrieve relevant policy sections.\"\n", + " )\n", + "\n", + "# Policy Guidelines\n", + "class PolicyRecommendation(BaseModel):\n", + " \"\"\"Policy recommendation regarding a given claim.\"\"\"\n", + " policy_section: str = Field(..., description=\"The policy section or clause that applies.\")\n", + " recommendation_summary: str = Field(..., description=\"A concise summary of coverage determination.\")\n", + " deductible: Optional[float] = Field(None, description=\"The applicable deductible amount.\")\n", + " settlement_amount: Optional[float] = Field(None, description=\"Recommended settlement payout.\")\n", + "\n", + "# Final Claim Schema\n", + "class ClaimDecision(BaseModel):\n", + " claim_number: str\n", + " covered: bool\n", + " deductible: float\n", + " recommended_payout: float\n", + " notes: Optional[str] = None\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# Now load claim docs\n", + "# Remember we have already stored the policy documents in a vector store (llama cloud)\n", + "import json\n", + "\n", + "def parse_claim(file_path: str) -> ClaimInfo:\n", + " with open(file_path, \"r\") as f:\n", + " data = json.load(f)\n", + " # Validate and return\n", + " return ClaimInfo.model_validate(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'claim_number': 'CLAIM-0001',\n", + " 'policy_number': 'POLICY-ABC123',\n", + " 'claimant_name': 'John Smith',\n", + " 'date_of_loss': '2024-04-10',\n", + " 'loss_description': 'While delivering pizzas, collided with a parked car, causing damage to the parked car’s door.',\n", + " 'estimated_repair_cost': 1500.0,\n", + " 'vehicle_details': '2022 Honda Civic'}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# example let's load John's claim\n", + "claim_info = parse_claim(\"data/john.json\")\n", + "claim_info.model_dump()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "| Field | Details |\n", + "|------------------------|---------|\n", + "| Claim Number | CLAIM-0001 |\n", + "| Policy Number | POLICY-ABC123 |\n", + "| Claimant Name | John Smith |\n", + "| Date of Loss | 2024-04-10 |\n", + "| Vehicle Details | 2022 Honda Civic |\n", + "| Estimated Repair Cost | $1500.00 |\n", + "| Loss Description | While delivering pizzas, collided with a parked car, causing damage to the parked car’s door. |\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indexing Policy Documents\n", + "\n", + "We will be indexing a sample [California Personal Automobile Policy](https://nationalgeneral.com/forms_catalog/CAIP400_03012006_CA.pdf) which we will validate the claims against.\n", + "\n", + "Make sure to download the docment and upload it to [LlamaCloud](https://cloud.llamaindex.ai/). If you don't have access yet, you can use our open-source VectorStoreIndex." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Indexing policy documents\n", + "from llama_index.indices.managed.llama_cloud import LlamaCloudIndex\n", + "\n", + "index = LlamaCloudIndex(\n", + " name=\"auto_insurance_policies_0\", \n", + " project_name= project_name,\n", + " organization_id= organization_id,\n", + " api_key= llama_cloud_key\n", + ")\n", + "\n", + "retriever = index.as_retriever(rerank_top_n=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indexing Per-User Declarations Documents\n", + "\n", + "Besides the general auto-insurance policy, we need a separate index to store the per-user declarations pages. These include specific details for each policy holder. They need to be filtered according to the right policy number during retrieval.\n", + "\n", + "The declarations are stored in the `data` folder. In LlamaCloud, drag and drop the markdown files (not the JSON files) into a new LlamaCloud index. We will also attach the policy number as metadata." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Indexing per-user declarations \n", + "from llama_index.indices.managed.llama_cloud import LlamaCloudIndex\n", + "import os\n", + "\n", + "declarations_index = LlamaCloudIndex(\n", + " name=\"auto_insurance_declarations_0\", \n", + " project_name= project_name,\n", + " organization_id= organization_id,\n", + " api_key= llama_cloud_key\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# We use the API endpoint to load custom documents into the index " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'file_size': 1410,\n", + " 'last_modified_at': '2025-03-12T14:36:28',\n", + " 'file_path': 'john-declarations.md',\n", + " 'file_name': 'john-declarations.md',\n", + " 'external_file_id': 'john-declarations.md',\n", + " 'pipeline_id': 'e695a413-25a1-4983-bee0-c7d4503cd53c',\n", + " 'policy_number': 'POLICY-ABC123'}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Use API to load custom documents into the index\n", + "from llama_cloud.client import LlamaCloud\n", + "\n", + "client = LlamaCloud(\n", + " base_url=\"https://api.cloud.llamaindex.ai\",\n", + " token=llama_cloud_key,\n", + ")\n", + "\n", + "# TODO: make this function not hidden\n", + "declarations_pipeline_id = declarations_index.pipeline.id\n", + "declarations_project_id = declarations_index.project.id\n", + "\n", + "person_policy_map = {}\n", + "for p in [\"alice\", \"john\"]:\n", + " claim_info = parse_claim(f\"data/{p}.json\")\n", + " policy_num = claim_info.policy_number\n", + " person_policy_map[f\"{p}-declarations.md\"] = policy_num\n", + "\n", + "pipeline_docs = client.pipelines.list_pipeline_documents(declarations_pipeline_id)\n", + "for doc in pipeline_docs:\n", + " doc.metadata[\"policy_number\"] = person_policy_map[doc.metadata[\"file_name\"]]\n", + "upserted_docs = client.pipelines.upsert_batch_pipeline_documents(declarations_pipeline_id, request=pipeline_docs)\n", + "upserted_docs[0].metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# Now get docs from the vector store\n", + "from llama_index.core.vector_stores.types import (\n", + " MetadataInfo,\n", + " MetadataFilters,\n", + ")\n", + "\n", + "def get_declarations_docs(policy_number: str, top_k: int = 1):\n", + " \"\"\"Get declarations retriever.\"\"\"\n", + " # build retriever and query engine\n", + " filters = MetadataFilters.from_dicts([\n", + " {\"key\": \"policy_number\", \"value\": policy_number}\n", + " ])\n", + " retriever = declarations_index.as_retriever(\n", + " # TODO: do file-level retrieval\n", + " # retrieval_mode=\"files_via_metadata\", \n", + " rerank_top_n=top_k, \n", + " filters=filters\n", + " )\n", + " # semantic query matters less here\n", + " return retriever.retrieve(f\"declarations page for {policy_number}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "file_size: 1410\n", + "last_modified_at: 2025-03-12T14:36:28\n", + "file_path: john-declarations.md\n", + "file_name: john-declarations.md\n", + "external_file_id: john-declarations.md\n", + "pipeline_id: e695a413-25a1-4983-bee0-c7d4503cd53c\n", + "policy_number: POLICY-ABC123\n", + "\n", + "# CALIFORNIA PERSONAL AUTO POLICY DECLARATIONS PAGE\n", + "**Policy Number:** CAP-ABC123-01 \n", + "**Policy Period:** 01/01/2024 to 07/01/2024 \n", + "(12:01 A.M. standard time at the address below)\n", + "\n", + "**Named Insured:** \n", + "John Smith \n", + "456 Delivery Lane \n", + "San Francisco, CA 94112\n", + "\n", + "**Vehicle Information:** \n", + "Vehicle: 2022 Honda Civic LX Sedan \n", + "VIN: 2HGFE2F54NH123456 \n", + "Principal Operator: John Smith \n", + "Usage: Personal\n", + "\n", + "**Coverages and Premiums:**\n", + "\n", + "- Bodily Injury Liability: $100,000/$300,000 [$450]\n", + "- Property Damage Liability: $50,000 [$295]\n", + "- Medical Payments: $5,000 [$80]\n", + "- Uninsured/Underinsured Motorist: $100,000/$300,000 [$115]\n", + "- Collision Coverage: $500 deductible [$425]\n", + "- Other Than Collision: $250 deductible [$210]\n", + "- Rental Reimbursement: $30/day, max $900 [$30]\n", + "- Towing and Labor: $75 per disablement [$20]\n", + "\n", + "**Total Semi-Annual Premium:** $1,625\n", + "\n", + "**Discounts Applied:**\n", + "- Safe Driver Discount\n", + "- Anti-theft Device Discount\n", + "- Automatic Payment Discount\n", + "\n", + "**Forms and Endorsements:**\n", + "- CAIP400 (03012006) - Personal Auto Policy\n", + "- CA401 - Towing and Labor Costs Coverage\n", + "- CA405 - Rental Reimbursement Coverage\n", + "- CA410 - Pollution Exclusion\n", + "\n", + "**Loss Payee/Additional Interest:**\n", + "Honda Financial Services \n", + "Account #: HFS-123456789 \n", + "P.O. Box 44444 \n", + "Dallas, TX 75202\n", + "\n", + "**Special Provisions:**\n", + "This is a summary of your coverages. Please refer to your policy for complete details of coverage, conditions, and exclusions.\n" + ] + } + ], + "source": [ + "# try it out \n", + "docs = get_declarations_docs(\"POLICY-ABC123\")\n", + "print(len(docs))\n", + "print(docs[0].get_content(metadata_mode=\"all\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Important PROMPTS\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "# General policy queries\n", + "\n", + "GENERATE_POLICY_QUERIES_PROMPT = \"\"\"\\\n", + "You are an assistant tasked with determining what insurance policy sections to consult for a given auto claim.\n", + "\n", + "**Instructions:**\n", + "1. Review the claim data, including the type of loss (rear-end collision), estimated repair cost, and policy number.\n", + "2. Identify what aspects of the policy we need:\n", + " - Collision coverage conditions\n", + " - Deductible application\n", + " - Any special endorsements related to rear-end collisions or no-fault scenarios\n", + "3. Produce 3-5 queries that can be used against a vector index of insurance policies to find relevant clauses.\n", + "\n", + "Claim Data:\n", + "{claim_info}\n", + "\n", + "Return a JSON object matching the PolicyQueries schema.\n", + "\"\"\"\n", + "\n", + "# Policy recomendation prompt\n", + "POLICY_RECOMMENDATION_PROMPT = \"\"\"\\\n", + "Given the retrieved policy sections for this claim, determine:\n", + "- If the collision is covered\n", + "- The applicable deductible\n", + "- Recommended settlement amount (e.g., cost minus deductible)\n", + "- Which policy section applies\n", + "\n", + "Claim Info:\n", + "{claim_info}\n", + "\n", + "Policy Text:\n", + "{policy_text}\n", + "\n", + "Return a JSON object matching PolicyRecommendation schema.\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Auto Insurance Processing workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.core.workflow import (\n", + " Event,\n", + " StartEvent,\n", + " StopEvent,\n", + " Context,\n", + " Workflow,\n", + " step\n", + ")\n", + "from llama_index.core.llms import LLM\n", + "from llama_index.core.prompts import ChatPromptTemplate\n", + "from llama_index.llms.openai import OpenAI\n", + "from llama_index.core.retrievers import BaseRetriever\n", + "\n", + "class ClaimInfoEvent(Event):\n", + " claim_info: ClaimInfo\n", + "\n", + "class PolicyQueryEvent(Event):\n", + " queries: PolicyQueries\n", + "\n", + "class PolicyMatchedEvent(Event):\n", + " policy_text: str\n", + "\n", + "class RecommendationEvent(Event):\n", + " recommendation: PolicyRecommendation\n", + "\n", + "class DecisionEvent(Event):\n", + " decision: ClaimDecision\n", + "\n", + "class LogEvent(Event):\n", + " msg: str\n", + " delta: bool = False\n", + "\n", + "\n", + "def parse_claim(file_path: str) -> ClaimInfo:\n", + " import json\n", + " with open(file_path, \"r\") as f:\n", + " data = json.load(f)\n", + " return ClaimInfo.model_validate(data) # replace \"ClaimInfo\".model_validate with actual ClaimInfo class method\n", + "\n", + "class AutoInsuranceWorkflow(Workflow):\n", + " def __init__(\n", + " self, \n", + " policy_retriever: BaseRetriever, \n", + " llm: LLM | None = None, \n", + " output_dir: str = \"data_out\", \n", + " **kwargs\n", + " ) -> None:\n", + " super().__init__(**kwargs)\n", + " self.policy_retriever = policy_retriever\n", + " self.llm = llm\n", + "\n", + " @step\n", + " async def load_claim_info(self, ctx: Context, ev: StartEvent) -> ClaimInfoEvent:\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=\">> Loading Claim Info\"))\n", + " claim_info = parse_claim(ev.claim_json_path)\n", + " await ctx.set(\"claim_info\", claim_info)\n", + " return ClaimInfoEvent(claim_info=claim_info)\n", + "\n", + " @step\n", + " async def generate_policy_queries(self, ctx: Context, ev: ClaimInfoEvent) -> PolicyQueryEvent:\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=\">> Generating Policy Queries\"))\n", + " prompt = ChatPromptTemplate.from_messages([(\"user\", GENERATE_POLICY_QUERIES_PROMPT)])\n", + " queries = await self.llm.astructured_predict(\n", + " PolicyQueries,\n", + " prompt,\n", + " claim_info=ev.claim_info.model_dump_json()\n", + " )\n", + " return PolicyQueryEvent(queries=queries)\n", + "\n", + " @step\n", + " async def retrieve_policy_text(self, ctx: Context, ev: PolicyQueryEvent) -> PolicyMatchedEvent:\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=\">> Retrieving policy sections\"))\n", + " \n", + " # Get claim_info from context\n", + " claim_info = await ctx.get(\"claim_info\")\n", + " \n", + " combined_docs = {}\n", + " for query in ev.queries.queries:\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=f\">> Query: {query}\"))\n", + " # fetch policy text\n", + " docs = await self.policy_retriever.aretrieve(query)\n", + " for d in docs:\n", + " combined_docs[d.id_] = d\n", + "\n", + " # Also fetch the declarations page for the policy holder\n", + " # Add error handling to deal with empty results\n", + " declaration_docs = get_declarations_docs(str(claim_info.policy_number))\n", + " if declaration_docs: # Check if list is not empty\n", + " d_doc = declaration_docs[0]\n", + " combined_docs[d_doc.id_] = d_doc\n", + " else:\n", + " # Log the issue if verbose mode is on\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=f\">> Warning: No declaration docs found for policy {claim_info.policy_number}\"))\n", + " \n", + " policy_text = \"\\n\\n\".join([doc.get_content() for doc in combined_docs.values()])\n", + " await ctx.set(\"policy_text\", policy_text)\n", + " return PolicyMatchedEvent(policy_text=policy_text)\n", + "\n", + " @step\n", + " async def generate_recommendation(self, ctx: Context, ev: PolicyMatchedEvent) -> RecommendationEvent:\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=\">> Generating Policy Recommendation\"))\n", + " claim_info = await ctx.get(\"claim_info\")\n", + " prompt = ChatPromptTemplate.from_messages([(\"user\", POLICY_RECOMMENDATION_PROMPT)])\n", + " recommendation = await self.llm.astructured_predict(\n", + " PolicyRecommendation,\n", + " prompt,\n", + " claim_info=claim_info.model_dump_json(),\n", + " policy_text=ev.policy_text\n", + " )\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=f\">> Recommendation: {recommendation.model_dump_json()}\"))\n", + " return RecommendationEvent(recommendation=recommendation)\n", + "\n", + " @step\n", + " async def finalize_decision(self, ctx: Context, ev: RecommendationEvent) -> DecisionEvent:\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=\">> Finalizing Decision\"))\n", + " claim_info = await ctx.get(\"claim_info\")\n", + " rec = ev.recommendation\n", + " covered = \"covered\" in rec.recommendation_summary.lower() or (rec.settlement_amount is not None and rec.settlement_amount > 0)\n", + " deductible = rec.deductible if rec.deductible is not None else 0.0\n", + " recommended_payout = rec.settlement_amount if rec.settlement_amount else 0.0\n", + " decision = ClaimDecision(\n", + " claim_number=claim_info.claim_number,\n", + " covered=covered,\n", + " deductible=deductible,\n", + " recommended_payout=recommended_payout,\n", + " notes=rec.recommendation_summary\n", + " )\n", + " return DecisionEvent(decision=decision)\n", + "\n", + " @step\n", + " async def output_result(self, ctx: Context, ev: DecisionEvent) -> StopEvent:\n", + " if self._verbose:\n", + " ctx.write_event_to_stream(LogEvent(msg=f\">> Decision: {ev.decision.model_dump_json()}\"))\n", + " return StopEvent(result={\"decision\": ev.decision})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run the workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.llms.gemini import Gemini\n", + "from llama_index.core import Settings\n", + "\n", + "\n", + "\n", + "# Set Gemini as the default LLM\n", + "llm = Gemini(model=\"models/gemini-2.0-flash\", \n", + " google_api_key= gemini_key,\n", + " temperature=0.3)\n", + "llm = llm\n", + "workflow = AutoInsuranceWorkflow(\n", + " policy_retriever=retriever,\n", + " llm=llm,\n", + " verbose=True,\n", + " timeout=None, # don't worry about timeout to make sure it completes\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualize workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "workflow_all_flows.html\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "auto_insurance_workflow.html\n" + ] + } + ], + "source": [ + "from llama_index.utils.workflow import draw_all_possible_flows\n", + "draw_all_possible_flows(AutoInsuranceWorkflow)\n", + "draw_all_possible_flows(AutoInsuranceWorkflow, filename=\"auto_insurance_workflow.html\")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "# Run the workflow\n", + "from IPython.display import clear_output\n", + "\n", + "async def stream_workflow(workflow, **workflow_kwargs):\n", + " handler = workflow.run(**workflow_kwargs)\n", + " async for event in handler.stream_events():\n", + " if isinstance(event, LogEvent):\n", + " if event.delta:\n", + " print(event.msg, end=\"\")\n", + " else:\n", + " print(event.msg)\n", + "\n", + " return await handler\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running step load_claim_info\n", + "Step load_claim_info produced event ClaimInfoEvent\n", + ">> Loading Claim Info\n", + "Running step generate_policy_queries\n", + ">> Generating Policy Queries\n", + "Step generate_policy_queries produced event PolicyQueryEvent\n", + "Running step retrieve_policy_text\n", + ">> Retrieving policy sections\n", + ">> Query: Collision coverage conditions\n", + ">> Query: Deductible for collision claims\n", + ">> Query: Exclusions to collision coverage\n", + ">> Query: No-fault collision\n", + ">> Query: Endorsements for delivery drivers\n", + "Step retrieve_policy_text produced event PolicyMatchedEvent\n", + ">> Warning: No declaration docs found for policy POLICY-ABC123\n", + "Running step generate_recommendation\n", + ">> Generating Policy Recommendation\n", + "Step generate_recommendation produced event RecommendationEvent\n", + ">> Recommendation: {\"policy_section\":\"Collision\",\"recommendation_summary\":\"Collision is covered, subject to exclusions. Exclusion 1 applies as the vehicle was used to deliver pizzas for compensation.\",\"deductible\":null,\"settlement_amount\":0.0}\n", + "Running step finalize_decision\n", + "Step finalize_decision produced event DecisionEvent\n", + "Running step output_result\n", + "Step output_result produced event StopEvent\n", + ">> Finalizing Decision\n", + ">> Decision: {\"claim_number\":\"CLAIM-0001\",\"covered\":true,\"deductible\":0.0,\"recommended_payout\":0.0,\"notes\":\"Collision is covered, subject to exclusions. Exclusion 1 applies as the vehicle was used to deliver pizzas for compensation.\"}\n", + "claim_number='CLAIM-0001' covered=True deductible=0.0 recommended_payout=0.0 notes='Collision is covered, subject to exclusions. Exclusion 1 applies as the vehicle was used to deliver pizzas for compensation.'\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_19552\\2703443022.py:2: RuntimeWarning: coroutine 'stream_workflow' was never awaited\n", + " response_dict = await stream_workflow(workflow, claim_json_path=\"data/john.json\")\n", + "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n" + ] + } + ], + "source": [ + "# Now test the workflow\n", + "response_dict = await stream_workflow(workflow, claim_json_path=\"data/john.json\")\n", + "print(str(response_dict[\"decision\"]))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llama_index_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auto-insurance-claims-agentic-RAG/data/Chicago.pdf b/auto-insurance-claims-agentic-RAG/data/Chicago.pdf new file mode 100644 index 00000000..6023552d Binary files /dev/null and b/auto-insurance-claims-agentic-RAG/data/Chicago.pdf differ diff --git a/auto-insurance-claims-agentic-RAG/data/Houston.pdf b/auto-insurance-claims-agentic-RAG/data/Houston.pdf new file mode 100644 index 00000000..d68c7c73 Binary files /dev/null and b/auto-insurance-claims-agentic-RAG/data/Houston.pdf differ diff --git a/auto-insurance-claims-agentic-RAG/data/Los_Angeles.pdf b/auto-insurance-claims-agentic-RAG/data/Los_Angeles.pdf new file mode 100644 index 00000000..47dac05e Binary files /dev/null and b/auto-insurance-claims-agentic-RAG/data/Los_Angeles.pdf differ diff --git a/auto-insurance-claims-agentic-RAG/data/Miami.pdf b/auto-insurance-claims-agentic-RAG/data/Miami.pdf new file mode 100644 index 00000000..496a11e8 Binary files /dev/null and b/auto-insurance-claims-agentic-RAG/data/Miami.pdf differ diff --git a/auto-insurance-claims-agentic-RAG/data/New_York_City.pdf b/auto-insurance-claims-agentic-RAG/data/New_York_City.pdf new file mode 100644 index 00000000..7311efae Binary files /dev/null and b/auto-insurance-claims-agentic-RAG/data/New_York_City.pdf differ diff --git a/auto-insurance-claims-agentic-RAG/data/Seattle.pdf b/auto-insurance-claims-agentic-RAG/data/Seattle.pdf new file mode 100644 index 00000000..cde39784 Binary files /dev/null and b/auto-insurance-claims-agentic-RAG/data/Seattle.pdf differ diff --git a/auto-insurance-claims-agentic-RAG/data/alice-declarations.md b/auto-insurance-claims-agentic-RAG/data/alice-declarations.md new file mode 100644 index 00000000..a98b87fd --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/data/alice-declarations.md @@ -0,0 +1,49 @@ +# CALIFORNIA PERSONAL AUTO POLICY DECLARATIONS PAGE +**Policy Number:** CAP-XYZ789-01 +**Policy Period:** 03/01/2024 to 09/01/2024 +(12:01 A.M. standard time at the address below) + +**Named Insured:** +Alice Johnson +789 Commuter Avenue +San Francisco, CA 94115 + +**Vehicle Information:** +Vehicle: 2021 Toyota Camry SE +VIN: 4T1BF1FK5MU987654 +Principal Operator: Alice Johnson +Usage: Commute (15 miles one-way) + +**Coverages and Premiums:** + +- Bodily Injury Liability: $100,000/$300,000 [$435] +- Property Damage Liability: $50,000 [$290] +- Medical Payments: $5,000 [$85] +- Uninsured/Underinsured Motorist: $100,000/$300,000 [$120] +- Collision Coverage: $500 deductible [$410] +- Other Than Collision: $250 deductible [$205] +- Rental Reimbursement: $30/day, max $900 [$30] +- Towing and Labor: $75 per disablement [$20] + +**Total Semi-Annual Premium:** $1,595 + +**Discounts Applied:** +- Safe Driver Discount +- Anti-theft Device Discount +- Homeowner Discount +- Automatic Payment Discount + +**Forms and Endorsements:** +- CAIP400 (03012006) - Personal Auto Policy +- CA401 - Towing and Labor Costs Coverage +- CA405 - Rental Reimbursement Coverage +- CA410 - Pollution Exclusion + +**Loss Payee/Additional Interest:** +Toyota Financial Services +Account #: TFS-987654321 +P.O. Box 33333 +Dallas, TX 75203 + +**Special Provisions:** +This is a summary of your coverages. Please refer to your policy for complete details of coverage, conditions, and exclusions. diff --git a/auto-insurance-claims-agentic-RAG/data/alice.json b/auto-insurance-claims-agentic-RAG/data/alice.json new file mode 100644 index 00000000..e3af0cad --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/data/alice.json @@ -0,0 +1,9 @@ +{ + "claim_number": "CLAIM-0002", + "policy_number": "POLICY-XYZ789", + "claimant_name": "Alice Johnson", + "date_of_loss": "2024-05-20", + "loss_description": "Rear-ended by another driver at a red light, resulting in bumper damage and mild whiplash.", + "estimated_repair_cost": 2200.00, + "vehicle_details": "2021 Toyota Camry" +} diff --git a/auto-insurance-claims-agentic-RAG/data/john-declarations.md b/auto-insurance-claims-agentic-RAG/data/john-declarations.md new file mode 100644 index 00000000..3f44badb --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/data/john-declarations.md @@ -0,0 +1,48 @@ +# CALIFORNIA PERSONAL AUTO POLICY DECLARATIONS PAGE +**Policy Number:** CAP-ABC123-01 +**Policy Period:** 01/01/2024 to 07/01/2024 +(12:01 A.M. standard time at the address below) + +**Named Insured:** +John Smith +456 Delivery Lane +San Francisco, CA 94112 + +**Vehicle Information:** +Vehicle: 2022 Honda Civic LX Sedan +VIN: 2HGFE2F54NH123456 +Principal Operator: John Smith +Usage: Personal + +**Coverages and Premiums:** + +- Bodily Injury Liability: $100,000/$300,000 [$450] +- Property Damage Liability: $50,000 [$295] +- Medical Payments: $5,000 [$80] +- Uninsured/Underinsured Motorist: $100,000/$300,000 [$115] +- Collision Coverage: $500 deductible [$425] +- Other Than Collision: $250 deductible [$210] +- Rental Reimbursement: $30/day, max $900 [$30] +- Towing and Labor: $75 per disablement [$20] + +**Total Semi-Annual Premium:** $1,625 + +**Discounts Applied:** +- Safe Driver Discount +- Anti-theft Device Discount +- Automatic Payment Discount + +**Forms and Endorsements:** +- CAIP400 (03012006) - Personal Auto Policy +- CA401 - Towing and Labor Costs Coverage +- CA405 - Rental Reimbursement Coverage +- CA410 - Pollution Exclusion + +**Loss Payee/Additional Interest:** +Honda Financial Services +Account #: HFS-123456789 +P.O. Box 44444 +Dallas, TX 75202 + +**Special Provisions:** +This is a summary of your coverages. Please refer to your policy for complete details of coverage, conditions, and exclusions. diff --git a/auto-insurance-claims-agentic-RAG/data/john.json b/auto-insurance-claims-agentic-RAG/data/john.json new file mode 100644 index 00000000..575ce3ab --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/data/john.json @@ -0,0 +1,9 @@ +{ + "claim_number": "CLAIM-0001", + "policy_number": "POLICY-ABC123", + "claimant_name": "John Smith", + "date_of_loss": "2024-04-10", + "loss_description": "While delivering pizzas, collided with a parked car, causing damage to the parked car’s door.", + "estimated_repair_cost": 1500.00, + "vehicle_details": "2022 Honda Civic" +} diff --git a/auto-insurance-claims-agentic-RAG/images/Architecture_diagram.png b/auto-insurance-claims-agentic-RAG/images/Architecture_diagram.png new file mode 100644 index 00000000..f8cf85e9 Binary files /dev/null and b/auto-insurance-claims-agentic-RAG/images/Architecture_diagram.png differ diff --git a/auto-insurance-claims-agentic-RAG/insurance_claim_processor.py b/auto-insurance-claims-agentic-RAG/insurance_claim_processor.py new file mode 100644 index 00000000..8ea67001 --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/insurance_claim_processor.py @@ -0,0 +1,326 @@ +import nest_asyncio +nest_asyncio.apply() + +from typing import List, Optional, Dict, Any +from pydantic import BaseModel, Field +import os +import json +import asyncio +from dotenv import load_dotenv + +# SCHEMAS +class ClaimInfo(BaseModel): + """Extracted Insurance claim information.""" + claim_number: str + policy_number: str + claimant_name: str + date_of_loss: str + loss_description: str + estimated_repair_cost: float + vehicle_details: Optional[str] = None + +class PolicyQueries(BaseModel): + queries: List[str] = Field( + default_factory=list, + description="A list of query strings to retrieve relevant policy sections." + ) + +class PolicyRecommendation(BaseModel): + """Policy recommendation regarding a given claim.""" + policy_section: str = Field(..., description="The policy section or clause that applies.") + recommendation_summary: str = Field(..., description="A concise summary of coverage determination.") + deductible: Optional[float] = Field(None, description="The applicable deductible amount.") + settlement_amount: Optional[float] = Field(None, description="Recommended settlement payout.") + +class ClaimDecision(BaseModel): + claim_number: str + covered: bool + deductible: float + recommended_payout: float + notes: Optional[str] = None + +# PROMPTS +GENERATE_POLICY_QUERIES_PROMPT = """\ +You are an assistant tasked with determining what insurance policy sections to consult for a given auto claim. + +**Instructions:** +1. Review the claim data, including the type of loss (rear-end collision), estimated repair cost, and policy number. +2. Identify what aspects of the policy we need: + - Collision coverage conditions + - Deductible application + - Any special endorsements related to rear-end collisions or no-fault scenarios +3. Produce 3-5 queries that can be used against a vector index of insurance policies to find relevant clauses. + +Claim Data: +{claim_info} + +Return a JSON object matching the PolicyQueries schema. +""" + +POLICY_RECOMMENDATION_PROMPT = """\ +Given the retrieved policy sections for this claim, determine: +- If the collision is covered +- The applicable deductible +- Recommended settlement amount (e.g., cost minus deductible) +- Which policy section applies + +Claim Info: +{claim_info} + +Policy Text: +{policy_text} + +Return a JSON object matching PolicyRecommendation schema. +""" + +# WORKFLOW DEFINITIONS +from llama_index.core.workflow import ( + Event, + StartEvent, + StopEvent, + Context, + Workflow, + step +) +from llama_index.core.llms import LLM +from llama_index.core.prompts import ChatPromptTemplate +from llama_index.core.retrievers import BaseRetriever + +class ClaimInfoEvent(Event): + claim_info: ClaimInfo + +class PolicyQueryEvent(Event): + queries: PolicyQueries + +class PolicyMatchedEvent(Event): + policy_text: str + +class RecommendationEvent(Event): + recommendation: PolicyRecommendation + +class DecisionEvent(Event): + decision: ClaimDecision + +class LogEvent(Event): + msg: str + delta: bool = False + +def parse_claim(file_path: str) -> ClaimInfo: + with open(file_path, "r") as f: + data = json.load(f) + return ClaimInfo.model_validate(data) + +def parse_claim_dict(claim_data: Dict[str, Any]) -> ClaimInfo: + return ClaimInfo.model_validate(claim_data) + +class AutoInsuranceWorkflow(Workflow): + def __init__( + self, + policy_retriever: BaseRetriever, + declarations_retriever_func, + llm: LLM | None = None, + output_dir: str = "data_out", + **kwargs + ) -> None: + super().__init__(**kwargs) + self.policy_retriever = policy_retriever + self.declarations_retriever_func = declarations_retriever_func + self.llm = llm + + @step + async def load_claim_info(self, ctx: Context, ev: StartEvent) -> ClaimInfoEvent: + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=">> Loading Claim Info")) + + # Handle both file path and direct json + if hasattr(ev, 'claim_json_path') and ev.claim_json_path: + claim_info = parse_claim(ev.claim_json_path) + elif hasattr(ev, 'claim_data') and ev.claim_data: + claim_info = parse_claim_dict(ev.claim_data) + else: + raise ValueError("No claim data provided") + + await ctx.set("claim_info", claim_info) + return ClaimInfoEvent(claim_info=claim_info) + + @step + async def generate_policy_queries(self, ctx: Context, ev: ClaimInfoEvent) -> PolicyQueryEvent: + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=">> Generating Policy Queries")) + prompt = ChatPromptTemplate.from_messages([("user", GENERATE_POLICY_QUERIES_PROMPT)]) + queries = await self.llm.astructured_predict( + PolicyQueries, + prompt, + claim_info=ev.claim_info.model_dump_json() + ) + return PolicyQueryEvent(queries=queries) + + @step + async def retrieve_policy_text(self, ctx: Context, ev: PolicyQueryEvent) -> PolicyMatchedEvent: + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=">> Retrieving policy sections")) + + # Get claim_info from context + claim_info = await ctx.get("claim_info") + + combined_docs = {} + for query in ev.queries.queries: + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=f">> Query: {query}")) + # fetch policy text + docs = await self.policy_retriever.aretrieve(query) + for d in docs: + combined_docs[d.id_] = d + + # Also fetch the declarations page for the policy holder + declaration_docs = self.declarations_retriever_func(str(claim_info.policy_number)) + if declaration_docs: # Check if list is not empty + d_doc = declaration_docs[0] + combined_docs[d_doc.id_] = d_doc + else: + # Log the issue if verbose mode is on + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=f">> Warning: No declaration docs found for policy {claim_info.policy_number}")) + + policy_text = "\n\n".join([doc.get_content() for doc in combined_docs.values()]) + await ctx.set("policy_text", policy_text) + return PolicyMatchedEvent(policy_text=policy_text) + + @step + async def generate_recommendation(self, ctx: Context, ev: PolicyMatchedEvent) -> RecommendationEvent: + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=">> Generating Policy Recommendation")) + claim_info = await ctx.get("claim_info") + prompt = ChatPromptTemplate.from_messages([("user", POLICY_RECOMMENDATION_PROMPT)]) + recommendation = await self.llm.astructured_predict( + PolicyRecommendation, + prompt, + claim_info=claim_info.model_dump_json(), + policy_text=ev.policy_text + ) + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=f">> Recommendation: {recommendation.model_dump_json()}")) + return RecommendationEvent(recommendation=recommendation) + + @step + async def finalize_decision(self, ctx: Context, ev: RecommendationEvent) -> DecisionEvent: + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=">> Finalizing Decision")) + claim_info = await ctx.get("claim_info") + rec = ev.recommendation + covered = "covered" in rec.recommendation_summary.lower() or (rec.settlement_amount is not None and rec.settlement_amount > 0) + deductible = rec.deductible if rec.deductible is not None else 0.0 + recommended_payout = rec.settlement_amount if rec.settlement_amount else 0.0 + decision = ClaimDecision( + claim_number=claim_info.claim_number, + covered=covered, + deductible=deductible, + recommended_payout=recommended_payout, + notes=rec.recommendation_summary + ) + return DecisionEvent(decision=decision) + + @step + async def output_result(self, ctx: Context, ev: DecisionEvent) -> StopEvent: + if self._verbose: + ctx.write_event_to_stream(LogEvent(msg=f">> Decision: {ev.decision.model_dump_json()}")) + return StopEvent(result={"decision": ev.decision}) + +# HELPER FUNCTION +async def stream_workflow(workflow, **workflow_kwargs): + handler = workflow.run(**workflow_kwargs) + events = [] + async for event in handler.stream_events(): + if isinstance(event, LogEvent): + events.append(event.msg) + if event.delta: + print(event.msg, end="") + else: + print(event.msg) + + result = await handler + return result, events + +# SETUP FUNCTION +def setup_workflow(): + # Load environmental variables + load_dotenv() + + # Load API keys and project info + gemini_key = os.getenv("GOOGLE_API_KEY") + llama_cloud_key = os.getenv("LLAMA_CLOUD_API_KEY") + project_name = os.getenv("PROJECT_NAME") + organization_id = os.getenv("ORGANIZATION_ID") + + # Set up the indices + from llama_index.indices.managed.llama_cloud import LlamaCloudIndex + from llama_index.llms.gemini import Gemini + + # Initialize policy index + policy_index = LlamaCloudIndex( + name="auto_insurance_policies_0", + project_name=project_name, + organization_id=organization_id, + api_key=llama_cloud_key + ) + policy_retriever = policy_index.as_retriever(rerank_top_n=3) + + # Initialize declarations index + declarations_index = LlamaCloudIndex( + name="auto_insurance_declarations_0", + project_name=project_name, + organization_id=organization_id, + api_key=llama_cloud_key + ) + + # Declarations retriever function + from llama_index.core.vector_stores.types import MetadataFilters + + def get_declarations_docs(policy_number: str, top_k: int = 1): + """Get declarations retriever.""" + # build retriever and query engine + filters = MetadataFilters.from_dicts([ + {"key": "policy_number", "value": policy_number} + ]) + retriever = declarations_index.as_retriever( + rerank_top_n=top_k, + filters=filters + ) + # semantic query matters less here + return retriever.retrieve(f"declarations page for {policy_number}") + + # Set up LLM + llm = Gemini( + model="models/gemini-2.0-flash", + google_api_key=gemini_key, + temperature=0.3 + ) + + # Create and return workflow + workflow = AutoInsuranceWorkflow( + policy_retriever=policy_retriever, + declarations_retriever_func=get_declarations_docs, + llm=llm, + verbose=True, + timeout=None, # don't worry about timeout to make sure it completes + ) + + return workflow + +# MAIN PROCESSING FUNCTION +def process_claim(claim_json_path=None, claim_data=None): + """Process a claim either from a file path or from a dictionary.""" + workflow = setup_workflow() + + # Run the workflow with async handling + kwargs = {} + if claim_json_path: + kwargs['claim_json_path'] = claim_json_path + elif claim_data: + kwargs['claim_data'] = claim_data + else: + raise ValueError("Either claim_json_path or claim_data must be provided") + + result, events = asyncio.run(stream_workflow(workflow, **kwargs)) + + return result["decision"], events \ No newline at end of file diff --git a/auto-insurance-claims-agentic-RAG/test_workflow.py b/auto-insurance-claims-agentic-RAG/test_workflow.py new file mode 100644 index 00000000..a5030cfc --- /dev/null +++ b/auto-insurance-claims-agentic-RAG/test_workflow.py @@ -0,0 +1,27 @@ +import argparse +import json +from insurance_claim_processor import process_claim + +def main(): + parser = argparse.ArgumentParser(description='Process an insurance claim') + parser.add_argument('--file', type=str, help='Path to claim JSON file') + args = parser.parse_args() + + if not args.file: + print("Please provide a path to a claim JSON file using --file") + return + + print(f"Processing claim from {args.file}...") + + try: + decision, logs = process_claim(claim_json_path=args.file) + + print("\n" + "="*50) + print("CLAIM DECISION:") + print(decision.model_dump_json(indent=2)) + print("="*50) + except Exception as e: + print(f"Error processing claim: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file