Ambrogio: Code improvements

Modified files: - llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py - advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py - advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py - rag_tutorials/rag_chain/app.py - rag_tutorials/hybrid_search_rag/main.py
2025-02-13 21:59:11 +01:00 · 2025-02-13 21:59:11 +01:00 · 3a85fa8924
commit 3a85fa8924
parent 1a036b2813
5 changed files with 124 additions and 0 deletions
--- a/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py
+++ b/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py
@ -7,6 +7,17 @@ import os
 gpt4_model = None

 def create_article_crew(topic):
+    """Creates a team of agents to research, write, and edit an article on a given topic.
+
+    This function sets up a crew consisting of three agents: a researcher, a writer, and an editor. 
+    Each agent is assigned a specific task to ensure the production of a well-researched, 
+    well-written, and polished article. The article is formatted using markdown standards.
+
+    Args:
+        topic (str): The subject matter on which the article will be based.
+
+    Returns:
+        Crew: A crew object that contains the agents and tasks necessary to complete the article."""
    # Create agents
    researcher = Agent(
        role='Researcher',
--- a/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py
+++ b/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py
@ -13,6 +13,18 @@ if 'SERPAPI_API_KEY' not in os.environ:
    st.stop()

 def get_assistant(tools):
+    """Creates and returns a configured assistant agent.
+
+    This function initializes an assistant agent with a specific model and toolset. 
+    The assistant is capable of accessing tools selected by the user and includes 
+    additional features such as showing tool call details, running in debug mode, 
+    and appending the current datetime to its instructions.
+
+    Args:
+        tools (list): A list of tools that the assistant can access.
+
+    Returns:
+        Agent: A configured assistant agent with specified capabilities and settings."""
    return Agent(
        name="llama3_assistant",
        model=Ollama(id="llama3.1:8b"),
--- a/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py
+++ b/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py
@ -29,6 +29,17 @@ if all(api_keys.values()):
    search_query = st.text_input("Research paper search query")

    def process_with_gpt4(result):
+        """Processes an arXiv search result to produce a structured markdown output.
+
+    This function takes a search result from arXiv and generates a markdown-formatted
+    table containing details about each paper. The table includes columns for the 
+    paper's title, authors, a brief abstract, and a link to the paper on arXiv. 
+
+    Args:
+        result (str): The raw search result from arXiv, typically in a text format.
+
+    Returns:
+        str: A markdown-formatted string containing a table with paper details."""
        prompt = f"""
        Based on the following arXiv search result, provide a proper structured output in markdown that is readable by the users. 
        Each paper should have a title, authors, abstract, and link.
--- a/rag_tutorials/hybrid_search_rag/main.py
+++ b/rag_tutorials/hybrid_search_rag/main.py
@ -21,6 +21,23 @@ Instead, you MUST treat the context as if its contents are entirely part of your
 """.strip()

 def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_url: str) -> RAGLiteConfig:
+    """Initializes and returns a RAGLiteConfig object with the specified API keys and database URL.
+
+    This function sets the provided API keys in the environment variables and returns a 
+    RAGLiteConfig object configured with the given database URL and pre-defined settings for 
+    language model, embedder, and reranker.
+
+    Args:
+        openai_key (str): The API key for OpenAI services.
+        anthropic_key (str): The API key for Anthropic services.
+        cohere_key (str): The API key for Cohere services.
+        db_url (str): The database URL for connecting to the desired data source.
+
+    Returns:
+        RAGLiteConfig: A configuration object initialized with the specified parameters.
+
+    Raises:
+        ValueError: If there is an issue setting up the configuration, an error is raised with details."""
    try:
        os.environ["OPENAI_API_KEY"] = openai_key
        os.environ["ANTHROPIC_API_KEY"] = anthropic_key
@ -39,6 +56,17 @@ def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_u
        raise ValueError(f"Configuration error: {e}")

 def process_document(file_path: str) -> bool:
+    """Processes a document by inserting it into a system with a given configuration.
+
+    This function checks if a configuration is initialized in the session state.
+    If the configuration is present, it attempts to insert the document located
+    at the given file path using this configuration.
+
+    Args:
+        file_path (str): The path to the document to be processed.
+
+    Returns:
+        bool: True if the document was successfully processed; False otherwise."""
    try:
        if not st.session_state.get('my_config'):
            raise ValueError("Configuration not initialized")
@ -49,6 +77,18 @@ def process_document(file_path: str) -> bool:
        return False

 def perform_search(query: str) -> List[dict]:
+    """Conducts a hybrid search and returns a list of ranked chunks based on the query.
+
+    This function performs a search using a hybrid search method, retrieves the relevant 
+    chunks, and reranks them according to the query. It handles any exceptions that occur 
+    during the process and logs the errors.
+
+    Args:
+        query (str): The search query string.
+
+    Returns:
+        List[dict]: A list of dictionaries representing the ranked chunks. Returns an 
+        empty list if no results are found or if an error occurs."""
    try:
        chunk_ids, scores = hybrid_search(query, num_results=10, config=st.session_state.my_config)
        if not chunk_ids:
--- a/rag_tutorials/rag_chain/app.py
+++ b/rag_tutorials/rag_chain/app.py
@ -20,9 +20,29 @@ db = Chroma(collection_name="pharma_database",
            persist_directory='./pharma_db')

 def format_docs(docs):
+    """Formats a list of document objects into a single string.
+
+    Args:
+        docs (list): A list of document objects, each having a 'page_content' attribute.
+
+    Returns:
+        str: A single string containing the page content from each document, 
+        separated by double newlines."""
    return "\n\n".join(doc.page_content for doc in docs)

 def add_to_db(uploaded_files):
+    """Processes and adds uploaded PDF files to the database.
+
+    This function checks if any files have been uploaded. If files are uploaded,
+    it saves each file to a temporary location, processes the content using a PDF loader,
+    and splits the content into smaller chunks. Each chunk, along with its metadata, 
+    is then added to the database. Temporary files are removed after processing.
+
+    Args:
+        uploaded_files (list): A list of uploaded file objects to be processed.
+
+    Returns:
+        None"""
    # Check if files are uploaded
    if not uploaded_files:
        st.error("No files uploaded!")
@ -59,6 +79,18 @@ def add_to_db(uploaded_files):
        os.remove(temp_file_path)

 def run_rag_chain(query):
+    """Processes a query using a Retrieval-Augmented Generation (RAG) chain.
+
+    This function utilizes a RAG chain to answer a given query. It retrieves 
+    relevant context using similarity search and then generates a response 
+    based on this context using a chat model. The chat model is pre-configured 
+    with a prompt template specialized in pharmaceutical sciences.
+
+    Args:
+        query (str): The user's question that needs to be answered.
+
+    Returns:
+        str: A response generated by the chat model, based on the retrieved context."""
    # Create a Retriever Object and apply Similarity Search
    retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 5})

@ -98,6 +130,24 @@ def run_rag_chain(query):
    return response

 def main():
+    """Initialize and manage the PharmaQuery application interface.
+
+    This function sets up the Streamlit application interface for PharmaQuery,
+    a Pharmaceutical Insight Retrieval System. Users can enter queries related
+    to the pharmaceutical industry, upload research documents, and manage API 
+    keys for enhanced functionality.
+
+    The main features include:
+    - Query input area for users to ask questions about the pharmaceutical industry.
+    - Submission button to process the query and display the retrieved insights.
+    - Sidebar for API key input and management.
+    - File uploader for adding research documents to the database, enhancing query responses.
+
+    Args:
+        None
+
+    Returns:
+        None"""
    st.set_page_config(page_title="PharmaQuery", page_icon=":microscope:")
    st.header("Pharmaceutical Insight Retrieval System")