Expand X's t.co links (#91)
This PR expands t.co links in user bios and user's link section. This PR also expands t.co links that are contained within tweets. 
This commit is contained in:
parent
b52f6daa6f
commit
844403906d
3 changed files with 99 additions and 31 deletions
|
|
@ -6,7 +6,11 @@ from arcade.core.errors import ToolExecutionError
|
|||
from arcade.core.schema import ToolContext
|
||||
from arcade.sdk import tool
|
||||
from arcade.sdk.auth import X
|
||||
from arcade_x.tools.utils import get_tweet_url, parse_search_recent_tweets_response
|
||||
from arcade_x.tools.utils import (
|
||||
expand_urls_in_tweets,
|
||||
get_tweet_url,
|
||||
parse_search_recent_tweets_response,
|
||||
)
|
||||
|
||||
TWEETS_URL = "https://api.x.com/2/tweets"
|
||||
|
||||
|
|
@ -69,7 +73,7 @@ async def search_recent_tweets_by_username(
|
|||
max_results: Annotated[
|
||||
int, "The maximum number of results to return. Cannot be less than 10"
|
||||
] = 10,
|
||||
) -> Annotated[str, "JSON string of the search results"]:
|
||||
) -> Annotated[dict, "Dictionary containing the search results"]:
|
||||
"""Search for recent tweets (last 7 days) on X (Twitter) by username. Includes replies and reposts."""
|
||||
|
||||
headers = {
|
||||
|
|
@ -80,9 +84,7 @@ async def search_recent_tweets_by_username(
|
|||
"query": f"from:{username}",
|
||||
"max_results": max(max_results, 10), # X API does not allow 'max_results' less than 10
|
||||
}
|
||||
url = (
|
||||
"https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username"
|
||||
)
|
||||
url = "https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username,entities&tweet.fields=entities"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url, headers=headers, params=params, timeout=10)
|
||||
|
|
@ -92,9 +94,14 @@ async def search_recent_tweets_by_username(
|
|||
f"Failed to search recent tweets during execution of '{search_recent_tweets_by_username.__name__}' tool. Request returned an error: {response.status_code} {response.text}"
|
||||
)
|
||||
|
||||
tweets_data = parse_search_recent_tweets_response(response)
|
||||
response_data = response.json()
|
||||
|
||||
return tweets_data
|
||||
# Expand the urls that are in the tweets
|
||||
expand_urls_in_tweets(response_data.get("data", []), delete_entities=True)
|
||||
|
||||
parse_search_recent_tweets_response(response_data)
|
||||
|
||||
return response_data
|
||||
|
||||
|
||||
@tool(requires_auth=X(scopes=["tweet.read", "users.read"]))
|
||||
|
|
@ -109,7 +116,7 @@ async def search_recent_tweets_by_keywords(
|
|||
max_results: Annotated[
|
||||
int, "The maximum number of results to return. Cannot be less than 10"
|
||||
] = 10,
|
||||
) -> Annotated[str, "JSON string of the search results"]:
|
||||
) -> Annotated[dict, "Dictionary containing the search results"]:
|
||||
"""
|
||||
Search for recent tweets (last 7 days) on X (Twitter) by required keywords and phrases. Includes replies and reposts
|
||||
One of the following input parametersMUST be provided: keywords, phrases
|
||||
|
|
@ -124,7 +131,7 @@ async def search_recent_tweets_by_keywords(
|
|||
"Authorization": f"Bearer {context.authorization.token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
query = " ".join([f'"{phrase}"' for phrase in (phrases or [])]) + " " + " "
|
||||
query = "".join([f'"{phrase}" ' for phrase in (phrases or [])])
|
||||
if keywords:
|
||||
query += " ".join(keywords or [])
|
||||
|
||||
|
|
@ -132,9 +139,7 @@ async def search_recent_tweets_by_keywords(
|
|||
"query": query,
|
||||
"max_results": max(max_results, 10), # X API does not allow 'max_results' less than 10
|
||||
}
|
||||
url = (
|
||||
"https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username"
|
||||
)
|
||||
url = "https://api.x.com/2/tweets/search/recent?expansions=author_id&user.fields=id,name,username,entities&tweet.fields=entities"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url, headers=headers, params=params, timeout=10)
|
||||
|
|
@ -144,6 +149,11 @@ async def search_recent_tweets_by_keywords(
|
|||
f"Failed to search recent tweets during execution of '{search_recent_tweets_by_keywords.__name__}' tool. Request returned an error: {response.status_code} {response.text}"
|
||||
)
|
||||
|
||||
tweets_data = parse_search_recent_tweets_response(response)
|
||||
response_data = response.json()
|
||||
|
||||
return tweets_data
|
||||
# Expand the urls that are in the tweets
|
||||
expand_urls_in_tweets(response_data.get("data", []), delete_entities=True)
|
||||
|
||||
parse_search_recent_tweets_response(response_data)
|
||||
|
||||
return response_data
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from arcade.core.errors import ToolExecutionError
|
|||
from arcade.core.schema import ToolContext
|
||||
from arcade.sdk import tool
|
||||
from arcade.sdk.auth import X
|
||||
from arcade_x.tools.utils import expand_urls_in_user_description, expand_urls_in_user_url
|
||||
|
||||
|
||||
# Users Lookup Tools. See developer docs for additional available query parameters: https://developer.x.com/en/docs/x-api/users/lookup/api-reference
|
||||
|
|
@ -13,13 +14,13 @@ from arcade.sdk.auth import X
|
|||
async def lookup_single_user_by_username(
|
||||
context: ToolContext,
|
||||
username: Annotated[str, "The username of the X (Twitter) user to look up"],
|
||||
) -> Annotated[str, "User information including id, name, username, and description"]:
|
||||
) -> Annotated[dict, "User information including id, name, username, and description"]:
|
||||
"""Look up a user on X (Twitter) by their username."""
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {context.authorization.token}",
|
||||
}
|
||||
url = f"https://api.x.com/2/users/by/username/{username}?user.fields=created_at,description,id,location,most_recent_tweet_id,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,verified_type,withheld"
|
||||
url = f"https://api.x.com/2/users/by/username/{username}?user.fields=created_at,description,id,location,most_recent_tweet_id,name,pinned_tweet_id,profile_image_url,protected,public_metrics,url,username,verified,verified_type,withheld,entities"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url, headers=headers, timeout=10)
|
||||
|
|
@ -29,8 +30,14 @@ async def lookup_single_user_by_username(
|
|||
f"Failed to look up user during execution of '{lookup_single_user_by_username.__name__}' tool. Request returned an error: {response.status_code} {response.text}"
|
||||
)
|
||||
|
||||
# Parse the response JSON
|
||||
user_data = response.json()["data"]
|
||||
|
||||
expand_urls_in_user_description(user_data, delete_entities=False)
|
||||
expand_urls_in_user_url(user_data, delete_entities=True)
|
||||
|
||||
"""
|
||||
Example response.text structure:
|
||||
Example response["data"] structure:
|
||||
{
|
||||
"data": {
|
||||
"verified_type": str,
|
||||
|
|
@ -55,4 +62,4 @@ async def lookup_single_user_by_username(
|
|||
}
|
||||
}
|
||||
"""
|
||||
return response.text
|
||||
return {"data": user_data}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,4 @@
|
|||
import json
|
||||
|
||||
from requests import Response
|
||||
from typing import Any
|
||||
|
||||
|
||||
def get_tweet_url(tweet_id: str) -> str:
|
||||
|
|
@ -8,7 +6,7 @@ def get_tweet_url(tweet_id: str) -> str:
|
|||
return f"https://x.com/x/status/{tweet_id}"
|
||||
|
||||
|
||||
def parse_search_recent_tweets_response(response: Response) -> str:
|
||||
def parse_search_recent_tweets_response(response_data: Any) -> dict:
|
||||
"""
|
||||
Parses response from the X API search recent tweets endpoint.
|
||||
Returns a JSON string with the tweets data.
|
||||
|
|
@ -28,22 +26,18 @@ def parse_search_recent_tweets_response(response: Response) -> str:
|
|||
},
|
||||
]
|
||||
"""
|
||||
if response.status_code != 200:
|
||||
return json.dumps({"tweets": []})
|
||||
|
||||
tweets_data = json.loads(response.text)
|
||||
if not sanity_check_tweets_data(response_data):
|
||||
return {"data": []}
|
||||
|
||||
if not sanity_check_tweets_data(tweets_data):
|
||||
return json.dumps({"tweets": []})
|
||||
|
||||
for tweet in tweets_data["data"]:
|
||||
for tweet in response_data["data"]:
|
||||
tweet["tweet_url"] = get_tweet_url(tweet["id"])
|
||||
|
||||
for tweet_data, user_data in zip(tweets_data["data"], tweets_data["includes"]["users"]):
|
||||
for tweet_data, user_data in zip(response_data["data"], response_data["includes"]["users"]):
|
||||
tweet_data["author_username"] = user_data["username"]
|
||||
tweet_data["author_name"] = user_data["name"]
|
||||
|
||||
return json.dumps({"tweets": tweets_data["data"]})
|
||||
return response_data
|
||||
|
||||
|
||||
def sanity_check_tweets_data(tweets_data: dict) -> bool:
|
||||
|
|
@ -54,3 +48,60 @@ def sanity_check_tweets_data(tweets_data: dict) -> bool:
|
|||
if not tweets_data.get("data", []):
|
||||
return False
|
||||
return tweets_data.get("includes", {}).get("users", [])
|
||||
|
||||
|
||||
def expand_urls_in_tweets(tweets_data: list[dict], delete_entities: bool = True) -> None:
|
||||
"""
|
||||
Expands the urls in the test of the provided tweets.
|
||||
X shortens urls, and consequently, this can cause language models to hallucinate.
|
||||
See more about X's link shortner at https://help.x.com/en/using-x/url-shortener
|
||||
"""
|
||||
for tweet_data in tweets_data:
|
||||
if "entities" in tweet_data and "urls" in tweet_data["entities"]:
|
||||
for url_entity in tweet_data["entities"]["urls"]:
|
||||
short_url = url_entity["url"]
|
||||
expanded_url = url_entity["expanded_url"]
|
||||
tweet_data["text"] = tweet_data["text"].replace(short_url, expanded_url)
|
||||
|
||||
if delete_entities:
|
||||
tweet_data.pop(
|
||||
"entities", None
|
||||
) # Now that we've expanded the urls in the tweet, we no longer need the entities
|
||||
|
||||
|
||||
def expand_urls_in_user_description(user_data: dict, delete_entities: bool = True) -> None:
|
||||
"""
|
||||
Expands the urls in the description of the provided user.
|
||||
X shortens urls, and consequently, this can cause language models to hallucinate.
|
||||
See more about X's link shortner at https://help.x.com/en/using-x/url-shortener
|
||||
"""
|
||||
description_urls = user_data.get("entities", {}).get("description", {}).get("urls", [])
|
||||
description = user_data.get("description", "")
|
||||
for url_info in description_urls:
|
||||
t_co_link = url_info["url"]
|
||||
expanded_url = url_info["expanded_url"]
|
||||
description = description.replace(t_co_link, expanded_url)
|
||||
user_data["description"] = description
|
||||
|
||||
if delete_entities:
|
||||
# Entities is no longer needed now that we have expanded the t.co links
|
||||
user_data.pop("entities", None)
|
||||
|
||||
|
||||
def expand_urls_in_user_url(user_data: dict, delete_entities: bool = True) -> None:
|
||||
"""
|
||||
Expands the urls in the url section of the provided user.
|
||||
X shortens urls, and consequently, this can cause language models to hallucinate.
|
||||
See more about X's link shortner at https://help.x.com/en/using-x/url-shortener
|
||||
"""
|
||||
url_urls = user_data.get("entities", {}).get("url", {}).get("urls", [])
|
||||
url = user_data.get("url", "")
|
||||
for url_info in url_urls:
|
||||
t_co_link = url_info["url"]
|
||||
expanded_url = url_info["expanded_url"]
|
||||
url = url.replace(t_co_link, expanded_url)
|
||||
user_data["url"] = url
|
||||
|
||||
if delete_entities:
|
||||
# Entities is no longer needed now that we have expanded the t.co links
|
||||
user_data.pop("entities", None)
|
||||
|
|
|
|||
Loading…
Reference in a new issue