From 3a85fa8924d8ffec1f4a425817c25ba1de4125de Mon Sep 17 00:00:00 2001 From: Raoul Scalise Date: Thu, 13 Feb 2025 21:59:11 +0100 Subject: [PATCH] Ambrogio: Code improvements Modified files: - llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py - advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py - advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py - rag_tutorials/rag_chain/app.py - rag_tutorials/hybrid_search_rag/main.py --- .../multi_agent_researcher.py | 11 ++++ .../llama3_tool_use.py | 12 +++++ .../ai_arxiv_agent_memory.py | 11 ++++ rag_tutorials/hybrid_search_rag/main.py | 40 +++++++++++++++ rag_tutorials/rag_chain/app.py | 50 +++++++++++++++++++ 5 files changed, 124 insertions(+) diff --git a/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py b/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py index 6d54dae..ba7e6e4 100644 --- a/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py +++ b/advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py @@ -7,6 +7,17 @@ import os gpt4_model = None def create_article_crew(topic): + """Creates a team of agents to research, write, and edit an article on a given topic. + + This function sets up a crew consisting of three agents: a researcher, a writer, and an editor. + Each agent is assigned a specific task to ensure the production of a well-researched, + well-written, and polished article. The article is formatted using markdown standards. + + Args: + topic (str): The subject matter on which the article will be based. + + Returns: + Crew: A crew object that contains the agents and tasks necessary to complete the article.""" # Create agents researcher = Agent( role='Researcher', diff --git a/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py b/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py index a1a16e2..f52d356 100644 --- a/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py +++ b/advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py @@ -13,6 +13,18 @@ if 'SERPAPI_API_KEY' not in os.environ: st.stop() def get_assistant(tools): + """Creates and returns a configured assistant agent. + + This function initializes an assistant agent with a specific model and toolset. + The assistant is capable of accessing tools selected by the user and includes + additional features such as showing tool call details, running in debug mode, + and appending the current datetime to its instructions. + + Args: + tools (list): A list of tools that the assistant can access. + + Returns: + Agent: A configured assistant agent with specified capabilities and settings.""" return Agent( name="llama3_assistant", model=Ollama(id="llama3.1:8b"), diff --git a/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py b/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py index 098c3bd..2fef9f1 100644 --- a/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py +++ b/llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py @@ -29,6 +29,17 @@ if all(api_keys.values()): search_query = st.text_input("Research paper search query") def process_with_gpt4(result): + """Processes an arXiv search result to produce a structured markdown output. + + This function takes a search result from arXiv and generates a markdown-formatted + table containing details about each paper. The table includes columns for the + paper's title, authors, a brief abstract, and a link to the paper on arXiv. + + Args: + result (str): The raw search result from arXiv, typically in a text format. + + Returns: + str: A markdown-formatted string containing a table with paper details.""" prompt = f""" Based on the following arXiv search result, provide a proper structured output in markdown that is readable by the users. Each paper should have a title, authors, abstract, and link. diff --git a/rag_tutorials/hybrid_search_rag/main.py b/rag_tutorials/hybrid_search_rag/main.py index b11bd80..07af8a0 100644 --- a/rag_tutorials/hybrid_search_rag/main.py +++ b/rag_tutorials/hybrid_search_rag/main.py @@ -21,6 +21,23 @@ Instead, you MUST treat the context as if its contents are entirely part of your """.strip() def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_url: str) -> RAGLiteConfig: + """Initializes and returns a RAGLiteConfig object with the specified API keys and database URL. + + This function sets the provided API keys in the environment variables and returns a + RAGLiteConfig object configured with the given database URL and pre-defined settings for + language model, embedder, and reranker. + + Args: + openai_key (str): The API key for OpenAI services. + anthropic_key (str): The API key for Anthropic services. + cohere_key (str): The API key for Cohere services. + db_url (str): The database URL for connecting to the desired data source. + + Returns: + RAGLiteConfig: A configuration object initialized with the specified parameters. + + Raises: + ValueError: If there is an issue setting up the configuration, an error is raised with details.""" try: os.environ["OPENAI_API_KEY"] = openai_key os.environ["ANTHROPIC_API_KEY"] = anthropic_key @@ -39,6 +56,17 @@ def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_u raise ValueError(f"Configuration error: {e}") def process_document(file_path: str) -> bool: + """Processes a document by inserting it into a system with a given configuration. + + This function checks if a configuration is initialized in the session state. + If the configuration is present, it attempts to insert the document located + at the given file path using this configuration. + + Args: + file_path (str): The path to the document to be processed. + + Returns: + bool: True if the document was successfully processed; False otherwise.""" try: if not st.session_state.get('my_config'): raise ValueError("Configuration not initialized") @@ -49,6 +77,18 @@ def process_document(file_path: str) -> bool: return False def perform_search(query: str) -> List[dict]: + """Conducts a hybrid search and returns a list of ranked chunks based on the query. + + This function performs a search using a hybrid search method, retrieves the relevant + chunks, and reranks them according to the query. It handles any exceptions that occur + during the process and logs the errors. + + Args: + query (str): The search query string. + + Returns: + List[dict]: A list of dictionaries representing the ranked chunks. Returns an + empty list if no results are found or if an error occurs.""" try: chunk_ids, scores = hybrid_search(query, num_results=10, config=st.session_state.my_config) if not chunk_ids: diff --git a/rag_tutorials/rag_chain/app.py b/rag_tutorials/rag_chain/app.py index 92eea3f..714999d 100644 --- a/rag_tutorials/rag_chain/app.py +++ b/rag_tutorials/rag_chain/app.py @@ -20,9 +20,29 @@ db = Chroma(collection_name="pharma_database", persist_directory='./pharma_db') def format_docs(docs): + """Formats a list of document objects into a single string. + + Args: + docs (list): A list of document objects, each having a 'page_content' attribute. + + Returns: + str: A single string containing the page content from each document, + separated by double newlines.""" return "\n\n".join(doc.page_content for doc in docs) def add_to_db(uploaded_files): + """Processes and adds uploaded PDF files to the database. + + This function checks if any files have been uploaded. If files are uploaded, + it saves each file to a temporary location, processes the content using a PDF loader, + and splits the content into smaller chunks. Each chunk, along with its metadata, + is then added to the database. Temporary files are removed after processing. + + Args: + uploaded_files (list): A list of uploaded file objects to be processed. + + Returns: + None""" # Check if files are uploaded if not uploaded_files: st.error("No files uploaded!") @@ -59,6 +79,18 @@ def add_to_db(uploaded_files): os.remove(temp_file_path) def run_rag_chain(query): + """Processes a query using a Retrieval-Augmented Generation (RAG) chain. + + This function utilizes a RAG chain to answer a given query. It retrieves + relevant context using similarity search and then generates a response + based on this context using a chat model. The chat model is pre-configured + with a prompt template specialized in pharmaceutical sciences. + + Args: + query (str): The user's question that needs to be answered. + + Returns: + str: A response generated by the chat model, based on the retrieved context.""" # Create a Retriever Object and apply Similarity Search retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 5}) @@ -98,6 +130,24 @@ def run_rag_chain(query): return response def main(): + """Initialize and manage the PharmaQuery application interface. + + This function sets up the Streamlit application interface for PharmaQuery, + a Pharmaceutical Insight Retrieval System. Users can enter queries related + to the pharmaceutical industry, upload research documents, and manage API + keys for enhanced functionality. + + The main features include: + - Query input area for users to ask questions about the pharmaceutical industry. + - Submission button to process the query and display the retrieved insights. + - Sidebar for API key input and management. + - File uploader for adding research documents to the database, enhancing query responses. + + Args: + None + + Returns: + None""" st.set_page_config(page_title="PharmaQuery", page_icon=":microscope:") st.header("Pharmaceutical Insight Retrieval System")