Expand X's t.co links (#91)

This PR expands t.co links in user bios and user's link section.
This PR also expands t.co links that are contained within tweets.


![image](https://github.com/user-attachments/assets/15c72c7b-1950-46a3-872c-dae45119bc81)
This commit is contained in:
Eric Gustin 2024-10-04 12:42:11 -07:00 committed by GitHub
parent b52f6daa6f
commit 844403906d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 99 additions and 31 deletions

View file

@ -6,7 +6,11 @@ from arcade.core.errors import ToolExecutionError
from arcade.core.schema import ToolContext
from arcade.sdk import tool
from arcade.sdk.auth import X
from arcade_x.tools.utils import get_tweet_url, parse_search_recent_tweets_response
from arcade_x.tools.utils import (
expand_urls_in_tweets,
get_tweet_url,
parse_search_recent_tweets_response,
)
TWEETS_URL = "https://api.x.com/2/tweets"
@ -69,7 +73,7 @@ async def search_recent_tweets_by_username(
max_results: Annotated[
int, "The maximum number of results to return. Cannot be less than 10"
] = 10,
) -> Annotated[str, "JSON string of the search results"]:
) -> Annotated[dict, "Dictionary containing the search results"]:
"""Search for recent tweets (last 7 days) on X (Twitter) by username. Includes replies and reposts."""
headers = {
@ -80,9 +84,7 @@ async def search_recent_tweets_by_username(
"query": f"from:{username}",
"max_results": max(max_results, 10), # X API does not allow 'max_results' less than 10
}
url = (
"https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username"
)
url = "https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username,entities&tweet.fields=entities"
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers, params=params, timeout=10)
@ -92,9 +94,14 @@ async def search_recent_tweets_by_username(
f"Failed to search recent tweets during execution of '{search_recent_tweets_by_username.__name__}' tool. Request returned an error: {response.status_code} {response.text}"
)
tweets_data = parse_search_recent_tweets_response(response)
response_data = response.json()
return tweets_data
# Expand the urls that are in the tweets
expand_urls_in_tweets(response_data.get("data", []), delete_entities=True)
parse_search_recent_tweets_response(response_data)
return response_data
@tool(requires_auth=X(scopes=["tweet.read", "users.read"]))
@ -109,7 +116,7 @@ async def search_recent_tweets_by_keywords(
max_results: Annotated[
int, "The maximum number of results to return. Cannot be less than 10"
] = 10,
) -> Annotated[str, "JSON string of the search results"]:
) -> Annotated[dict, "Dictionary containing the search results"]:
"""
Search for recent tweets (last 7 days) on X (Twitter) by required keywords and phrases. Includes replies and reposts
One of the following input parametersMUST be provided: keywords, phrases
@ -124,7 +131,7 @@ async def search_recent_tweets_by_keywords(
"Authorization": f"Bearer {context.authorization.token}",
"Content-Type": "application/json",
}
query = " ".join([f'"{phrase}"' for phrase in (phrases or [])]) + " " + " "
query = "".join([f'"{phrase}" ' for phrase in (phrases or [])])
if keywords:
query += " ".join(keywords or [])
@ -132,9 +139,7 @@ async def search_recent_tweets_by_keywords(
"query": query,
"max_results": max(max_results, 10), # X API does not allow 'max_results' less than 10
}
url = (
"https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username"
)
url = "https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username,entities&tweet.fields=entities"
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers, params=params, timeout=10)
@ -144,6 +149,11 @@ async def search_recent_tweets_by_keywords(
f"Failed to search recent tweets during execution of '{search_recent_tweets_by_keywords.__name__}' tool. Request returned an error: {response.status_code} {response.text}"
)
tweets_data = parse_search_recent_tweets_response(response)
response_data = response.json()
return tweets_data
# Expand the urls that are in the tweets
expand_urls_in_tweets(response_data.get("data", []), delete_entities=True)
parse_search_recent_tweets_response(response_data)
return response_data

View file

@ -6,6 +6,7 @@ from arcade.core.errors import ToolExecutionError
from arcade.core.schema import ToolContext
from arcade.sdk import tool
from arcade.sdk.auth import X
from arcade_x.tools.utils import expand_urls_in_user_description, expand_urls_in_user_url
# Users Lookup Tools. See developer docs for additional available query parameters: https://developer.x.com/en/docs/x-api/users/lookup/api-reference
@ -13,13 +14,13 @@ from arcade.sdk.auth import X
async def lookup_single_user_by_username(
context: ToolContext,
username: Annotated[str, "The username of the X (Twitter) user to look up"],
) -> Annotated[str, "User information including id, name, username, and description"]:
) -> Annotated[dict, "User information including id, name, username, and description"]:
"""Look up a user on X (Twitter) by their username."""
headers = {
"Authorization": f"Bearer {context.authorization.token}",
}
url = f"https://api.x.com/2/users/by/username/{username}?user.fields=created_at,description,id,location,most_recent_tweet_id,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,verified_type,withheld"
url = f"https://api.x.com/2/users/by/username/{username}?user.fields=created_at,description,id,location,most_recent_tweet_id,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,verified_type,withheld,entities"
async with httpx.AsyncClient() as client:
response = await client.get(url, headers=headers, timeout=10)
@ -29,8 +30,14 @@ async def lookup_single_user_by_username(
f"Failed to look up user during execution of '{lookup_single_user_by_username.__name__}' tool. Request returned an error: {response.status_code} {response.text}"
)
# Parse the response JSON
user_data = response.json()["data"]
expand_urls_in_user_description(user_data, delete_entities=False)
expand_urls_in_user_url(user_data, delete_entities=True)
"""
Example response.text structure:
Example response["data"] structure:
{
"data": {
"verified_type": str,
@ -55,4 +62,4 @@ async def lookup_single_user_by_username(
}
}
"""
return response.text
return {"data": user_data}

View file

@ -1,6 +1,4 @@
import json
from requests import Response
from typing import Any
def get_tweet_url(tweet_id: str) -> str:
@ -8,7 +6,7 @@ def get_tweet_url(tweet_id: str) -> str:
return f"https://x.com/x/status/{tweet_id}"
def parse_search_recent_tweets_response(response: Response) -> str:
def parse_search_recent_tweets_response(response_data: Any) -> dict:
"""
Parses response from the X API search recent tweets endpoint.
Returns a JSON string with the tweets data.
@ -28,22 +26,18 @@ def parse_search_recent_tweets_response(response: Response) -> str:
},
]
"""
if response.status_code != 200:
return json.dumps({"tweets": []})
tweets_data = json.loads(response.text)
if not sanity_check_tweets_data(response_data):
return {"data": []}
if not sanity_check_tweets_data(tweets_data):
return json.dumps({"tweets": []})
for tweet in tweets_data["data"]:
for tweet in response_data["data"]:
tweet["tweet_url"] = get_tweet_url(tweet["id"])
for tweet_data, user_data in zip(tweets_data["data"], tweets_data["includes"]["users"]):
for tweet_data, user_data in zip(response_data["data"], response_data["includes"]["users"]):
tweet_data["author_username"] = user_data["username"]
tweet_data["author_name"] = user_data["name"]
return json.dumps({"tweets": tweets_data["data"]})
return response_data
def sanity_check_tweets_data(tweets_data: dict) -> bool:
@ -54,3 +48,60 @@ def sanity_check_tweets_data(tweets_data: dict) -> bool:
if not tweets_data.get("data", []):
return False
return tweets_data.get("includes", {}).get("users", [])
def expand_urls_in_tweets(tweets_data: list[dict], delete_entities: bool = True) -> None:
"""
Expands the urls in the test of the provided tweets.
X shortens urls, and consequently, this can cause language models to hallucinate.
See more about X's link shortner at https://help.x.com/en/using-x/url-shortener
"""
for tweet_data in tweets_data:
if "entities" in tweet_data and "urls" in tweet_data["entities"]:
for url_entity in tweet_data["entities"]["urls"]:
short_url = url_entity["url"]
expanded_url = url_entity["expanded_url"]
tweet_data["text"] = tweet_data["text"].replace(short_url, expanded_url)
if delete_entities:
tweet_data.pop(
"entities", None
) # Now that we've expanded the urls in the tweet, we no longer need the entities
def expand_urls_in_user_description(user_data: dict, delete_entities: bool = True) -> None:
"""
Expands the urls in the description of the provided user.
X shortens urls, and consequently, this can cause language models to hallucinate.
See more about X's link shortner at https://help.x.com/en/using-x/url-shortener
"""
description_urls = user_data.get("entities", {}).get("description", {}).get("urls", [])
description = user_data.get("description", "")
for url_info in description_urls:
t_co_link = url_info["url"]
expanded_url = url_info["expanded_url"]
description = description.replace(t_co_link, expanded_url)
user_data["description"] = description
if delete_entities:
# Entities is no longer needed now that we have expanded the t.co links
user_data.pop("entities", None)
def expand_urls_in_user_url(user_data: dict, delete_entities: bool = True) -> None:
"""
Expands the urls in the url section of the provided user.
X shortens urls, and consequently, this can cause language models to hallucinate.
See more about X's link shortner at https://help.x.com/en/using-x/url-shortener
"""
url_urls = user_data.get("entities", {}).get("url", {}).get("urls", [])
url = user_data.get("url", "")
for url_info in url_urls:
t_co_link = url_info["url"]
expanded_url = url_info["expanded_url"]
url = url.replace(t_co_link, expanded_url)
user_data["url"] = url
if delete_entities:
# Entities is no longer needed now that we have expanded the t.co links
user_data.pop("entities", None)