From 3a85fa8924d8ffec1f4a425817c25ba1de4125de Mon Sep 17 00:00:00 2001
From: Raoul Scalise <scaliseraoul00@gmail.com>
Date: Thu, 13 Feb 2025 21:59:11 +0100
Subject: [PATCH] Ambrogio: Code improvements

Modified files:
- llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py
- advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py
- advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py
- rag_tutorials/rag_chain/app.py
- rag_tutorials/hybrid_search_rag/main.py
---
 .../multi_agent_researcher.py                 | 11 ++++
 .../llama3_tool_use.py                        | 12 +++++
 .../ai_arxiv_agent_memory.py                  | 11 ++++
 rag_tutorials/hybrid_search_rag/main.py       | 40 +++++++++++++++
 rag_tutorials/rag_chain/app.py                | 50 +++++++++++++++++++
 5 files changed, 124 insertions(+)

diff --git a/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py b/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py
index 6d54dae..ba7e6e4 100644
--- a/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py
+++ b/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py
@@ -7,6 +7,17 @@ import os
 gpt4_model = None
 
 def create_article_crew(topic):
+    """Creates a team of agents to research, write, and edit an article on a given topic.
+
+    This function sets up a crew consisting of three agents: a researcher, a writer, and an editor. 
+    Each agent is assigned a specific task to ensure the production of a well-researched, 
+    well-written, and polished article. The article is formatted using markdown standards.
+
+    Args:
+        topic (str): The subject matter on which the article will be based.
+
+    Returns:
+        Crew: A crew object that contains the agents and tasks necessary to complete the article."""
     # Create agents
     researcher = Agent(
         role='Researcher',
diff --git a/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py b/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py
index a1a16e2..f52d356 100644
--- a/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py
+++ b/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py
@@ -13,6 +13,18 @@ if 'SERPAPI_API_KEY' not in os.environ:
     st.stop()
 
 def get_assistant(tools):
+    """Creates and returns a configured assistant agent.
+
+    This function initializes an assistant agent with a specific model and toolset. 
+    The assistant is capable of accessing tools selected by the user and includes 
+    additional features such as showing tool call details, running in debug mode, 
+    and appending the current datetime to its instructions.
+
+    Args:
+        tools (list): A list of tools that the assistant can access.
+
+    Returns:
+        Agent: A configured assistant agent with specified capabilities and settings."""
     return Agent(
         name="llama3_assistant",
         model=Ollama(id="llama3.1:8b"),
diff --git a/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py b/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py
index 098c3bd..2fef9f1 100644
--- a/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py
+++ b/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py
@@ -29,6 +29,17 @@ if all(api_keys.values()):
     search_query = st.text_input("Research paper search query")
 
     def process_with_gpt4(result):
+        """Processes an arXiv search result to produce a structured markdown output.
+
+    This function takes a search result from arXiv and generates a markdown-formatted
+    table containing details about each paper. The table includes columns for the 
+    paper's title, authors, a brief abstract, and a link to the paper on arXiv. 
+
+    Args:
+        result (str): The raw search result from arXiv, typically in a text format.
+
+    Returns:
+        str: A markdown-formatted string containing a table with paper details."""
         prompt = f"""
         Based on the following arXiv search result, provide a proper structured output in markdown that is readable by the users. 
         Each paper should have a title, authors, abstract, and link.
diff --git a/rag_tutorials/hybrid_search_rag/main.py b/rag_tutorials/hybrid_search_rag/main.py
index b11bd80..07af8a0 100644
--- a/rag_tutorials/hybrid_search_rag/main.py
+++ b/rag_tutorials/hybrid_search_rag/main.py
@@ -21,6 +21,23 @@ Instead, you MUST treat the context as if its contents are entirely part of your
 """.strip()
 
 def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_url: str) -> RAGLiteConfig:
+    """Initializes and returns a RAGLiteConfig object with the specified API keys and database URL.
+
+    This function sets the provided API keys in the environment variables and returns a 
+    RAGLiteConfig object configured with the given database URL and pre-defined settings for 
+    language model, embedder, and reranker.
+
+    Args:
+        openai_key (str): The API key for OpenAI services.
+        anthropic_key (str): The API key for Anthropic services.
+        cohere_key (str): The API key for Cohere services.
+        db_url (str): The database URL for connecting to the desired data source.
+
+    Returns:
+        RAGLiteConfig: A configuration object initialized with the specified parameters.
+
+    Raises:
+        ValueError: If there is an issue setting up the configuration, an error is raised with details."""
     try:
         os.environ["OPENAI_API_KEY"] = openai_key
         os.environ["ANTHROPIC_API_KEY"] = anthropic_key
@@ -39,6 +56,17 @@ def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_u
         raise ValueError(f"Configuration error: {e}")
 
 def process_document(file_path: str) -> bool:
+    """Processes a document by inserting it into a system with a given configuration.
+
+    This function checks if a configuration is initialized in the session state.
+    If the configuration is present, it attempts to insert the document located
+    at the given file path using this configuration.
+
+    Args:
+        file_path (str): The path to the document to be processed.
+
+    Returns:
+        bool: True if the document was successfully processed; False otherwise."""
     try:
         if not st.session_state.get('my_config'):
             raise ValueError("Configuration not initialized")
@@ -49,6 +77,18 @@ def process_document(file_path: str) -> bool:
         return False
 
 def perform_search(query: str) -> List[dict]:
+    """Conducts a hybrid search and returns a list of ranked chunks based on the query.
+
+    This function performs a search using a hybrid search method, retrieves the relevant 
+    chunks, and reranks them according to the query. It handles any exceptions that occur 
+    during the process and logs the errors.
+
+    Args:
+        query (str): The search query string.
+
+    Returns:
+        List[dict]: A list of dictionaries representing the ranked chunks. Returns an 
+        empty list if no results are found or if an error occurs."""
     try:
         chunk_ids, scores = hybrid_search(query, num_results=10, config=st.session_state.my_config)
         if not chunk_ids:
diff --git a/rag_tutorials/rag_chain/app.py b/rag_tutorials/rag_chain/app.py
index 92eea3f..714999d 100644
--- a/rag_tutorials/rag_chain/app.py
+++ b/rag_tutorials/rag_chain/app.py
@@ -20,9 +20,29 @@ db = Chroma(collection_name="pharma_database",
             persist_directory='./pharma_db')
 
 def format_docs(docs):
+    """Formats a list of document objects into a single string.
+
+    Args:
+        docs (list): A list of document objects, each having a 'page_content' attribute.
+
+    Returns:
+        str: A single string containing the page content from each document, 
+        separated by double newlines."""
     return "\n\n".join(doc.page_content for doc in docs)
 
 def add_to_db(uploaded_files):
+    """Processes and adds uploaded PDF files to the database.
+
+    This function checks if any files have been uploaded. If files are uploaded,
+    it saves each file to a temporary location, processes the content using a PDF loader,
+    and splits the content into smaller chunks. Each chunk, along with its metadata, 
+    is then added to the database. Temporary files are removed after processing.
+
+    Args:
+        uploaded_files (list): A list of uploaded file objects to be processed.
+
+    Returns:
+        None"""
     # Check if files are uploaded
     if not uploaded_files:
         st.error("No files uploaded!")
@@ -59,6 +79,18 @@ def add_to_db(uploaded_files):
         os.remove(temp_file_path)
 
 def run_rag_chain(query):
+    """Processes a query using a Retrieval-Augmented Generation (RAG) chain.
+
+    This function utilizes a RAG chain to answer a given query. It retrieves 
+    relevant context using similarity search and then generates a response 
+    based on this context using a chat model. The chat model is pre-configured 
+    with a prompt template specialized in pharmaceutical sciences.
+
+    Args:
+        query (str): The user's question that needs to be answered.
+
+    Returns:
+        str: A response generated by the chat model, based on the retrieved context."""
     # Create a Retriever Object and apply Similarity Search
     retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 5})
 
@@ -98,6 +130,24 @@ def run_rag_chain(query):
     return response
 
 def main():
+    """Initialize and manage the PharmaQuery application interface.
+
+    This function sets up the Streamlit application interface for PharmaQuery,
+    a Pharmaceutical Insight Retrieval System. Users can enter queries related
+    to the pharmaceutical industry, upload research documents, and manage API 
+    keys for enhanced functionality.
+
+    The main features include:
+    - Query input area for users to ask questions about the pharmaceutical industry.
+    - Submission button to process the query and display the retrieved insights.
+    - Sidebar for API key input and management.
+    - File uploader for adding research documents to the database, enhancing query responses.
+
+    Args:
+        None
+
+    Returns:
+        None"""
     st.set_page_config(page_title="PharmaQuery", page_icon=":microscope:")
     st.header("Pharmaceutical Insight Retrieval System")