Skip to main content

Search Memory

Search through stored data and conversation history using various search methods.

Overview

The Memory system provides powerful search capabilities to find relevant information across both persistent storage and conversation history. This enables building intelligent applications that can recall context and find related information efficiently.

Search Methods

While the current implementation doesn’t include built-in search functionality, you can implement search patterns using the existing memory methods:
async def search_memory(agent, query, keys_to_search):
    """Basic text search across stored data"""
    results = []
    
    for key in keys_to_search:
        data = await agent.recall(key)
        if data and isinstance(data, (str, dict, list)):
            if search_in_data(data, query):
                results.append({'key': key, 'data': data})
    
    return results

def search_in_data(data, query):
    """Search for query in various data types"""
    query_lower = query.lower()
    
    if isinstance(data, str):
        return query_lower in data.lower()
    elif isinstance(data, dict):
        return any(search_in_data(value, query) for value in data.values())
    elif isinstance(data, list):
        return any(search_in_data(item, query) for item in data)
    
    return False
Search through conversation messages:
def search_messages(memory, query, role=None):
    """Search through conversation messages"""
    messages = memory.get_messages()
    results = []
    
    for i, message in enumerate(messages):
        # Filter by role if specified
        if role and message.role != role:
            continue
        
        # Search in message content
        if query.lower() in message.content.lower():
            results.append({
                'index': i,
                'message': message,
                'timestamp': message.timestamp
            })
    
    return results

Examples

import asyncio
from zg_ai_sdk import create_agent

async def search_user_data(agent, search_term):
    """Search across user-related data"""
    # Define keys to search
    user_keys = [
        'user_profile',
        'user_preferences', 
        'user_settings',
        'user_history',
        'user_projects'
    ]
    
    results = []
    
    for key in user_keys:
        data = await agent.recall(key)
        if data and search_in_content(data, search_term):
            results.append({
                'key': key,
                'data': data,
                'relevance': calculate_relevance(data, search_term)
            })
    
    # Sort by relevance
    results.sort(key=lambda x: x['relevance'], reverse=True)
    return results

def search_in_content(data, term):
    """Check if term exists in data"""
    term_lower = term.lower()
    
    if isinstance(data, str):
        return term_lower in data.lower()
    elif isinstance(data, dict):
        for key, value in data.items():
            if (term_lower in str(key).lower() or 
                search_in_content(value, term)):
                return True
    elif isinstance(data, list):
        return any(search_in_content(item, term) for item in data)
    
    return False

def calculate_relevance(data, term):
    """Simple relevance scoring"""
    if isinstance(data, str):
        return data.lower().count(term.lower())
    elif isinstance(data, dict):
        return sum(calculate_relevance(value, term) for value in data.values())
    elif isinstance(data, list):
        return sum(calculate_relevance(item, term) for item in data)
    return 0

async def main():
    agent = await create_agent({
        'name': 'Search Assistant',
        'provider_address': '0xf07240Efa67755B5311bc75784a061eDB47165Dd',
        'memory_bucket': 'search-demo',
        'private_key': 'your-private-key'
    })
    
    # Store sample data
    await agent.remember('user_profile', {
        'name': 'Alice Johnson',
        'skills': ['Python', 'Machine Learning', 'Data Science'],
        'bio': 'Passionate about AI and Python development'
    })
    
    await agent.remember('user_projects', [
        {'name': 'ML Pipeline', 'tech': 'Python, TensorFlow'},
        {'name': 'Web Scraper', 'tech': 'Python, BeautifulSoup'},
        {'name': 'Data Viz', 'tech': 'Python, Matplotlib'}
    ])
    
    # Search for Python-related content
    results = await search_user_data(agent, 'Python')
    
    print("Search results for 'Python':")
    for result in results:
        print(f"Key: {result['key']}, Relevance: {result['relevance']}")
        print(f"Data: {result['data']}")
        print("---")

asyncio.run(main())

Search Patterns

def fuzzy_match(text, query, threshold=0.6):
    """Simple fuzzy string matching"""
    from difflib import SequenceMatcher
    
    similarity = SequenceMatcher(None, text.lower(), query.lower()).ratio()
    return similarity >= threshold

async def fuzzy_search(agent, query, keys, threshold=0.6):
    """Search with fuzzy matching"""
    results = []
    
    for key in keys:
        data = await agent.recall(key)
        if data:
            text = str(data)
            if fuzzy_match(text, query, threshold):
                results.append({'key': key, 'data': data})
    
    return results
async def multi_field_search(agent, query_dict, keys):
    """Search across multiple fields with different criteria"""
    results = []
    
    for key in keys:
        data = await agent.recall(key)
        if data and isinstance(data, dict):
            match_score = 0
            total_criteria = len(query_dict)
            
            for field, expected_value in query_dict.items():
                if field in data:
                    if isinstance(expected_value, str):
                        if expected_value.lower() in str(data[field]).lower():
                            match_score += 1
                    elif data[field] == expected_value:
                        match_score += 1
            
            if match_score > 0:
                results.append({
                    'key': key,
                    'data': data,
                    'match_ratio': match_score / total_criteria
                })
    
    results.sort(key=lambda x: x['match_ratio'], reverse=True)
    return results

# Usage
search_criteria = {
    'category': 'programming',
    'difficulty': 'beginner',
    'language': 'python'
}
results = await multi_field_search(agent, search_criteria, content_keys)
from datetime import datetime, timedelta

def search_by_time_range(memory, start_time, end_time, query=None):
    """Search messages within a time range"""
    messages = memory.get_messages()
    results = []
    
    for message in messages:
        if message.timestamp and start_time <= message.timestamp <= end_time:
            if query is None or query.lower() in message.content.lower():
                results.append(message)
    
    return results

# Search last 24 hours
now = datetime.now()
yesterday = now - timedelta(days=1)
recent_messages = search_by_time_range(memory, yesterday, now, 'python')

Performance Considerations

  1. Indexing: Build search indexes for frequently searched data
  2. Caching: Cache search results for repeated queries
  3. Pagination: Implement pagination for large result sets
  4. Async Operations: Use concurrent searches for multiple keys
  5. Memory Usage: Be mindful of memory usage with large datasets

Best Practices

  1. Normalize Text: Convert to lowercase and remove special characters
  2. Use Keywords: Extract meaningful keywords for better matching
  3. Score Results: Implement relevance scoring for better ranking
  4. Handle Edge Cases: Account for empty data and malformed content
  5. User Feedback: Allow users to refine searches based on results

Next Steps