diff --git a/python/03-integrate/tools/exa/.prompt b/python/03-integrate/tools/exa/.prompt index 7f0e3de2..bc10771b 100644 --- a/python/03-integrate/tools/exa/.prompt +++ b/python/03-integrate/tools/exa/.prompt @@ -15,10 +15,11 @@ Use exa_search with: - type: "auto" ← SHOWCASES: Intelligent search mode selection - num_results: 5 - text: {"maxCharacters": 2000} ← SHOWCASES: Content extraction with limits +- highlights: {"maxCharacters": 4000} ← SHOWCASES: Token-efficient page excerpts - summary: {"query": "What are the key concepts, main points, and important details?"} ← SHOWCASES: AI summaries - subpages: 2 ← SHOWCASES: Subpage crawling to find related content - subpage_target: ["overview", "about", "introduction"] ← SHOWCASES: Targeted subpage discovery -- livecrawl: "fallback" ← SHOWCASES: Live crawling when cache misses +- max_age_hours: 168 ← SHOWCASES: Content freshness (livecrawl pages older than 7 days) PURPOSE: Get foundational understanding with AI-summarized key points. @@ -33,7 +34,7 @@ Use exa_search with: - start_published_date: [Use the date provided in the prompt] ← SHOWCASES: Date filtering - text: {"maxCharacters": 1500} - summary: {"query": "What are the key announcements, developments, and news?"} -- livecrawl: "fallback" ← SHOWCASES: Fresh news retrieval +- max_age_hours: 24 ← SHOWCASES: Fresh news (livecrawl pages older than 24h) PURPOSE: Capture recent developments and breaking news. @@ -43,7 +44,7 @@ STEP 3: ACADEMIC PAPERS - Demonstrating PDF Category & Structured Output Use exa_search with: - query: Academic/research-focused query (include terms like "research", "study", "analysis") -- category: "pdf" ← SHOWCASES: Academic/PDF document search +- category: "research paper" ← SHOWCASES: Academic/research paper search - num_results: 5 - text: {"maxCharacters": 2000} - summary: { @@ -82,10 +83,11 @@ STEP 5: DEEP DIVE - Demonstrating exa_get_contents for Detailed Extraction Select 2-3 of the most valuable URLs from previous searches, then use exa_get_contents with: - urls: [list of important URLs] - text: {"maxCharacters": 4000} ← SHOWCASES: Extended content extraction +- highlights: {"maxCharacters": 4000} ← SHOWCASES: Key excerpts from important sources - summary: {"query": "Extract all important details, insights, and actionable information"} - subpages: 3 ← SHOWCASES: Deep subpage discovery (citations, references, docs) - subpage_target: ["references", "citations", "bibliography", "methodology"] ← SHOWCASES: Targeted crawling for academic context -- livecrawl: "always" ← SHOWCASES: Force fresh content retrieval +- max_age_hours: 0 ← SHOWCASES: Force fresh content (always livecrawl) PURPOSE: Get comprehensive detail from the most important sources, including citations and methodology. diff --git a/python/03-integrate/tools/exa/README.md b/python/03-integrate/tools/exa/README.md index d8651805..72e653d1 100644 --- a/python/03-integrate/tools/exa/README.md +++ b/python/03-integrate/tools/exa/README.md @@ -11,13 +11,14 @@ A conversational AI agent that demonstrates the full power of Exa's search and c | Feature | Description | |---------|-------------| | Auto Mode | Intelligent search mode selection for optimal results | -| Category Filtering | Specialized searches for news, PDF documents, and GitHub repositories | +| Category Filtering | Specialized searches for news, research papers, and GitHub repositories | | Date Filtering | Time-bound searches for recent content (e.g., last 30 days) | | AI Summaries | Automatic key insights extraction from search results | | Structured Output | JSON schema for structured summary extraction | | Subpage Crawling | Discover related pages (citations, methodology, references) | | Subpage Targeting | Keywords to find specific subpages (references, bibliography) | -| Live Crawling | Fresh content retrieval, bypassing cache | +| Highlights | Token-efficient page excerpts for key information | +| Content Freshness | max_age_hours for livecrawl control (replaces old livecrawl enum) | | Content Extraction | Full text retrieval with character limit control | ## Architecture @@ -42,9 +43,9 @@ The Deep Research Assistant implements a comprehensive 6-step research workflow: │ ├─────────────────────────────────────────────────────────────┤ │ │ │ 1. Overview Search │ Auto mode + subpages + AI summaries │ │ │ │ 2. News Search │ Category: news + date filtering │ │ -│ │ 3. Academic Papers │ Category: pdf + structured output │ │ +│ │ 3. Academic Papers │ Category: research paper + structured │ │ │ │ 4. GitHub Projects │ Category: github │ │ -│ │ 5. Deep Dive │ exa_get_contents + live crawling │ │ +│ │ 5. Deep Dive │ exa_get_contents + highlights │ │ │ │ 6. Synthesis │ Comprehensive research brief │ │ │ └─────────────────────────────────────────────────────────────┘ │ │ │ @@ -305,4 +306,4 @@ Store API keys (such as `EXA_API_KEY`) securely using environment variables and ### Clean up -Clean up the resources created on AWS to avoid undesired costs. \ No newline at end of file +Clean up the resources created on AWS to avoid undesired costs. diff --git a/python/03-integrate/tools/exa/deep_research_assistant.py b/python/03-integrate/tools/exa/deep_research_assistant.py index 1f230057..caef983f 100644 --- a/python/03-integrate/tools/exa/deep_research_assistant.py +++ b/python/03-integrate/tools/exa/deep_research_assistant.py @@ -7,19 +7,20 @@ Exa Capabilities Demonstrated: - Auto mode: Intelligent search mode selection for optimal results -- Category filtering: news, pdf, github for specialized searches +- Category filtering: news, research paper, github for specialized searches - Date filtering: Time-bound searches for recent content - AI summaries: Automatic key insights extraction +- Highlights: Token-efficient page excerpts - Structured output: JSON schema for structured summary extraction - Subpage crawling: Discover related pages (citations, methodology) - Subpage targeting: Keywords to find specific subpages (references, bibliography) -- Live crawling: Fresh content retrieval, bypassing cache +- Content freshness: max_age_hours for livecrawl control - Content extraction: Full text with character limit control Research Workflow: 1. Overview Search (Auto Mode) - General topic understanding 2. News Search (Category: news) - Recent developments with date filtering - 3. Academic Papers (Category: pdf) - Technical depth from research + 3. Academic Papers (Category: research paper) - Technical depth from research 4. Code & Projects (Category: github) - Practical implementations 5. Deep Dive (exa_get_contents) - Detailed extraction from key sources 6. Synthesis - Structured research brief with citations @@ -126,7 +127,7 @@ def main(): print("Deep Research Assistant") print("=======================") print("Demonstrating Exa capabilities: auto mode, category filters, date filtering,") - print("AI summaries, structured output (JSON schema), subpage crawling/targeting, and live crawling.\n") + print("AI summaries, highlights, structured output, subpage crawling/targeting, and content freshness.\n") agent = create_research_agent()