13. Using Tweets#
%%capture
#INCLUDING SCIENTIFIC AND NUMERICAL COMPUTING LIBRARIES
#Run this code to make sure that you have all the libraries at one go.
%pylab inline
import os
!pip install ipypublish
from ipypublish import nb_setup
import pandas as pd
%load_ext rpy2.ipython
# Basic lines of code needed to import a data file with permissions from Google Drive
from google.colab import drive
# drive.mount("/content/drive", force_remount=True)
drive.mount('/content/drive')
os.chdir("drive/My Drive/Books_Writings/NLPBook/")
Mounted at /content/drive
13.1. NLTK for tokenization#
Please install the nltk package:
pip install nltk
Also NLTK has packages that may need packages to be installed from within it, so use nltk.download() to do so, in case you get the following error when using NLTK.
In case of LookupError:
Resource ‘tokenizers/punkt/PY3/english.pickle’ not found. Please use the NLTK Downloader to obtain the resource: >>> nltk.download() Searched in: - ‘/Users/srdas/nltk_data’ - ‘/usr/share/nltk_data’ - ‘/usr/local/share/nltk_data’ - ‘/usr/lib/nltk_data’ - ‘/usr/local/lib/nltk_data’ - ‘’
import nltk
#Run if needed to install a package from within nltk.
nltk.download('punkt')
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data] Unzipping tokenizers/punkt.zip.
True
text = "Ask not what your country can do for you, \
but ask what you can do for your country."
nltk.word_tokenize(text)
['Ask',
'not',
'what',
'your',
'country',
'can',
'do',
'for',
'you',
',',
'but',
'ask',
'what',
'you',
'can',
'do',
'for',
'your',
'country',
'.']
13.2. Twitter API#
We explore using the Twitter API here.
We can set up keys and tokens at: https://apps.twitter.com/
!pip install tweepy
Requirement already satisfied: tweepy in /usr/local/lib/python3.10/dist-packages (4.13.0)
Requirement already satisfied: oauthlib<4,>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from tweepy) (3.2.2)
Requirement already satisfied: requests<3,>=2.27.0 in /usr/local/lib/python3.10/dist-packages (from tweepy) (2.31.0)
Requirement already satisfied: requests-oauthlib<2,>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from tweepy) (1.3.1)
Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27.0->tweepy) (3.3.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27.0->tweepy) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27.0->tweepy) (2.0.6)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2.27.0->tweepy) (2023.7.22)
import tweepy
#Authentication
client_id = 'NldmeVg0NUZDVWRHeEx5YjVwaGI6MTpjaQ'
client_secret = 'F9rZYcEiE_2jgQtqVNac4Oh0QnC6YZWgLjqhb8mR18jXInVU2A'
consumer_key = 'lL0azqwomcIieBCA3YZ69qXMl'
consumer_secret = 'dKn7wGIim5VSuhNf60jJmVqZ8w0wZkIqJklIuhVK2XJyb51dZc'
access_token = '18666236-LGTWKxBigQAjaq0COJqDl729Ixx2C2hRScWy8fNeP'
access_token_secret = 'EtuD5B7bjs51QoLL5OuHZ6Ue8EC9kG2s2cR6MlFlkuhdU'
bearer_token = 'AAAAAAAAAAAAAAAAAAAAACvoTgAAAAAAM3HDmzFZ4PIxU58LKEJ6FEzHO24%3D3nfocQxUEtWWyzUNKiNZOhw5fb5Eh7235gtTdXlng9K7hl0Pyi'
# You can authenticate as your app with just your bearer token
# api = tweepy.Client(bearer_token=bearer_token)
# You can provide the consumer key and secret with the access token and access
# token secret to authenticate as a user
api = tweepy.Client(
consumer_key=consumer_key, consumer_secret=consumer_secret,
access_token=access_token, access_token_secret=access_token_secret
)
# auth = tweepy.OAuth1UserHandler(
# consumer_key, consumer_secret, access_token, access_token_secret
# )
# api = tweepy.API(auth)
# # If the authentication was successful, this should print the
# # screen name / username of the account
# print(api.verify_credentials().screen_name)
# # auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# # auth.set_access_token(access_token, access_token_secret)
# # api = tweepy.API(auth)
import tweepy
client = tweepy.Client(bearer_token)
# Search Recent Tweets
# This endpoint/method returns Tweets from the last seven days
response = client.search_recent_tweets("Tweepy")
# The method returns a Response object, a named tuple with data, includes,
# errors, and meta fields
print(response.meta)
# In this case, the data field of the Response returned is a list of Tweet
# objects
tweets = response.data
# Each Tweet object has default ID and text fields
for tweet in tweets:
print(tweet.id)
print(tweet.text)
# By default, this endpoint/method returns 10 results
# You can retrieve up to 100 Tweets by specifying max_results
response = client.search_recent_tweets("Tweepy", max_results=100)
---------------------------------------------------------------------------
Forbidden Traceback (most recent call last)
<ipython-input-10-49f7529c7720> in <cell line: 9>()
7 # This endpoint/method returns Tweets from the last seven days
8
----> 9 response = client.search_recent_tweets("Tweepy")
10 # The method returns a Response object, a named tuple with data, includes,
11 # errors, and meta fields
/usr/local/lib/python3.10/dist-packages/tweepy/client.py in search_recent_tweets(self, query, user_auth, **params)
1264 """
1265 params["query"] = query
-> 1266 return self._make_request(
1267 "GET", "/2/tweets/search/recent", params=params,
1268 endpoint_parameters=(
/usr/local/lib/python3.10/dist-packages/tweepy/client.py in _make_request(self, method, route, params, endpoint_parameters, json, data_type, user_auth)
127 request_params = self._process_params(params, endpoint_parameters)
128
--> 129 response = self.request(method, route, params=request_params,
130 json=json, user_auth=user_auth)
131
/usr/local/lib/python3.10/dist-packages/tweepy/client.py in request(self, method, route, params, json, user_auth)
98 raise Unauthorized(response)
99 if response.status_code == 403:
--> 100 raise Forbidden(response)
101 if response.status_code == 404:
102 raise NotFound(response)
Forbidden: 403 Forbidden
When authenticating requests to the Twitter API v2 endpoints, you must use keys and tokens from a Twitter developer App that is attached to a Project. You can create a project via the developer portal.
#Get all tweets from any user
id = 'srdas'
new_tweets = api.search_tweets('srdas')
# new_tweets = api.search_recent_tweets(screen_name = id,count=20)
print(len(new_tweets))
print(new_tweets)
---------------------------------------------------------------------------
Unauthorized Traceback (most recent call last)
<ipython-input-32-05c5b85318ee> in <cell line: 3>()
1 #Get all tweets from any user
2 id = 'srdas'
----> 3 new_tweets = api.search_tweets('srdas')
4 # new_tweets = api.search_recent_tweets(screen_name = id,count=20)
5 print(len(new_tweets))
/usr/local/lib/python3.10/dist-packages/tweepy/api.py in wrapper(*args, **kwargs)
31 @functools.wraps(method)
32 def wrapper(*args, **kwargs):
---> 33 return method(*args, **kwargs)
34 wrapper.pagination_mode = mode
35 return wrapper
/usr/local/lib/python3.10/dist-packages/tweepy/api.py in wrapper(*args, **kwargs)
44 kwargs['payload_list'] = payload_list
45 kwargs['payload_type'] = payload_type
---> 46 return method(*args, **kwargs)
47 wrapper.payload_list = payload_list
48 wrapper.payload_type = payload_type
/usr/local/lib/python3.10/dist-packages/tweepy/api.py in search_tweets(self, q, **kwargs)
1307 .. _Twitter's documentation on the standard search API: https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/overview
1308 """
-> 1309 return self.request(
1310 'GET', 'search/tweets', endpoint_parameters=(
1311 'q', 'geocode', 'lang', 'locale', 'result_type', 'count',
/usr/local/lib/python3.10/dist-packages/tweepy/api.py in request(self, method, endpoint, endpoint_parameters, params, headers, json_payload, parser, payload_list, payload_type, post_data, files, require_auth, return_cursors, upload_api, use_cache, **kwargs)
261 raise BadRequest(resp)
262 if resp.status_code == 401:
--> 263 raise Unauthorized(resp)
264 if resp.status_code == 403:
265 raise Forbidden(resp)
Unauthorized: 401 Unauthorized
32 - Could not authenticate you.
13.3. JSON#
JSON = Java Script Object Notation. It is a flat file data format.
import json
tweets = []
for tw in new_tweets:
tweets.append(tw.text)
print(tweets)
#Cleaner display
df_tweets = pd.DataFrame(tweets)
df_tweets
13.4. Using the NLTK package to conduct sentiment analysis without a dictionary#
When using tweets, it may be a good idea to install the Twython library: “pip3 install -U nltk[twitter]” (You can also simply use pip instead of pip3.)
import nltk
nltk.download('vader_lexicon') # required first time
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
scores = []
for tw in tweets:
print(tw)
score = sid.polarity_scores(tw)
scores.append(score)
print(score)
df = pd.DataFrame(scores)
df
x = ['neg','pos']
bar(x, df[x].mean())
grid()
13.5. Extracting tweets with a hashtag#
htag = '#Ukraine'
tweets = []
# for tw in tweepy.Cursor(api.search_tweets,q=htag).items(10):
for tw in tweepy.Cursor(api.search,q=htag).items(10):
tweets.append(tw.text)
scores = []
for tw in tweets:
print(tw)
score = sid.polarity_scores(tw)
scores.append(score)
print(score)
df = pd.DataFrame(scores)
df
x = ['neg','neu','pos']
bar(x, df[x].mean())
grid()
htag = '#inflation'
tweets = []
for tw in tweepy.Cursor(api.search,q=htag).items(10):
tweets.append(tw.text)
scores = []
for tw in tweets:
print(tw)
score = sid.polarity_scores(tw)
scores.append(score)
print(score)
x = ['neg','neu','pos']
bar(x, df[x].mean())
grid()