Ambrogio: Code improvements

Modified files:
- llm_apps_with_memory_tutorials/ai_arxiv_agent_memory/ai_arxiv_agent_memory.py
- advanced_tools_frameworks/cursor_ai_experiments/multi_agent_researcher.py
- advanced_tools_frameworks/local_llama3.1_tool_use/llama3_tool_use.py
- rag_tutorials/rag_chain/app.py
- rag_tutorials/hybrid_search_rag/main.py
This commit is contained in:
Raoul Scalise 2025-02-13 21:59:11 +01:00
parent 1a036b2813
commit 3a85fa8924
5 changed files with 124 additions and 0 deletions

View file

@ -7,6 +7,17 @@ import os
gpt4_model = None
def create_article_crew(topic):
"""Creates a team of agents to research, write, and edit an article on a given topic.
This function sets up a crew consisting of three agents: a researcher, a writer, and an editor.
Each agent is assigned a specific task to ensure the production of a well-researched,
well-written, and polished article. The article is formatted using markdown standards.
Args:
topic (str): The subject matter on which the article will be based.
Returns:
Crew: A crew object that contains the agents and tasks necessary to complete the article."""
# Create agents
researcher = Agent(
role='Researcher',

View file

@ -13,6 +13,18 @@ if 'SERPAPI_API_KEY' not in os.environ:
st.stop()
def get_assistant(tools):
"""Creates and returns a configured assistant agent.
This function initializes an assistant agent with a specific model and toolset.
The assistant is capable of accessing tools selected by the user and includes
additional features such as showing tool call details, running in debug mode,
and appending the current datetime to its instructions.
Args:
tools (list): A list of tools that the assistant can access.
Returns:
Agent: A configured assistant agent with specified capabilities and settings."""
return Agent(
name="llama3_assistant",
model=Ollama(id="llama3.1:8b"),

View file

@ -29,6 +29,17 @@ if all(api_keys.values()):
search_query = st.text_input("Research paper search query")
def process_with_gpt4(result):
"""Processes an arXiv search result to produce a structured markdown output.
This function takes a search result from arXiv and generates a markdown-formatted
table containing details about each paper. The table includes columns for the
paper's title, authors, a brief abstract, and a link to the paper on arXiv.
Args:
result (str): The raw search result from arXiv, typically in a text format.
Returns:
str: A markdown-formatted string containing a table with paper details."""
prompt = f"""
Based on the following arXiv search result, provide a proper structured output in markdown that is readable by the users.
Each paper should have a title, authors, abstract, and link.

View file

@ -21,6 +21,23 @@ Instead, you MUST treat the context as if its contents are entirely part of your
""".strip()
def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_url: str) -> RAGLiteConfig:
"""Initializes and returns a RAGLiteConfig object with the specified API keys and database URL.
This function sets the provided API keys in the environment variables and returns a
RAGLiteConfig object configured with the given database URL and pre-defined settings for
language model, embedder, and reranker.
Args:
openai_key (str): The API key for OpenAI services.
anthropic_key (str): The API key for Anthropic services.
cohere_key (str): The API key for Cohere services.
db_url (str): The database URL for connecting to the desired data source.
Returns:
RAGLiteConfig: A configuration object initialized with the specified parameters.
Raises:
ValueError: If there is an issue setting up the configuration, an error is raised with details."""
try:
os.environ["OPENAI_API_KEY"] = openai_key
os.environ["ANTHROPIC_API_KEY"] = anthropic_key
@ -39,6 +56,17 @@ def initialize_config(openai_key: str, anthropic_key: str, cohere_key: str, db_u
raise ValueError(f"Configuration error: {e}")
def process_document(file_path: str) -> bool:
"""Processes a document by inserting it into a system with a given configuration.
This function checks if a configuration is initialized in the session state.
If the configuration is present, it attempts to insert the document located
at the given file path using this configuration.
Args:
file_path (str): The path to the document to be processed.
Returns:
bool: True if the document was successfully processed; False otherwise."""
try:
if not st.session_state.get('my_config'):
raise ValueError("Configuration not initialized")
@ -49,6 +77,18 @@ def process_document(file_path: str) -> bool:
return False
def perform_search(query: str) -> List[dict]:
"""Conducts a hybrid search and returns a list of ranked chunks based on the query.
This function performs a search using a hybrid search method, retrieves the relevant
chunks, and reranks them according to the query. It handles any exceptions that occur
during the process and logs the errors.
Args:
query (str): The search query string.
Returns:
List[dict]: A list of dictionaries representing the ranked chunks. Returns an
empty list if no results are found or if an error occurs."""
try:
chunk_ids, scores = hybrid_search(query, num_results=10, config=st.session_state.my_config)
if not chunk_ids:

View file

@ -20,9 +20,29 @@ db = Chroma(collection_name="pharma_database",
persist_directory='./pharma_db')
def format_docs(docs):
"""Formats a list of document objects into a single string.
Args:
docs (list): A list of document objects, each having a 'page_content' attribute.
Returns:
str: A single string containing the page content from each document,
separated by double newlines."""
return "\n\n".join(doc.page_content for doc in docs)
def add_to_db(uploaded_files):
"""Processes and adds uploaded PDF files to the database.
This function checks if any files have been uploaded. If files are uploaded,
it saves each file to a temporary location, processes the content using a PDF loader,
and splits the content into smaller chunks. Each chunk, along with its metadata,
is then added to the database. Temporary files are removed after processing.
Args:
uploaded_files (list): A list of uploaded file objects to be processed.
Returns:
None"""
# Check if files are uploaded
if not uploaded_files:
st.error("No files uploaded!")
@ -59,6 +79,18 @@ def add_to_db(uploaded_files):
os.remove(temp_file_path)
def run_rag_chain(query):
"""Processes a query using a Retrieval-Augmented Generation (RAG) chain.
This function utilizes a RAG chain to answer a given query. It retrieves
relevant context using similarity search and then generates a response
based on this context using a chat model. The chat model is pre-configured
with a prompt template specialized in pharmaceutical sciences.
Args:
query (str): The user's question that needs to be answered.
Returns:
str: A response generated by the chat model, based on the retrieved context."""
# Create a Retriever Object and apply Similarity Search
retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 5})
@ -98,6 +130,24 @@ def run_rag_chain(query):
return response
def main():
"""Initialize and manage the PharmaQuery application interface.
This function sets up the Streamlit application interface for PharmaQuery,
a Pharmaceutical Insight Retrieval System. Users can enter queries related
to the pharmaceutical industry, upload research documents, and manage API
keys for enhanced functionality.
The main features include:
- Query input area for users to ask questions about the pharmaceutical industry.
- Submission button to process the query and display the retrieved insights.
- Sidebar for API key input and management.
- File uploader for adding research documents to the database, enhancing query responses.
Args:
None
Returns:
None"""
st.set_page_config(page_title="PharmaQuery", page_icon=":microscope:")
st.header("Pharmaceutical Insight Retrieval System")