noapiaccesstweetextraction.py

# -*- coding: utf-8 -*-
"""NoAPIAccessTweetExtraction.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1YTTZ1zO71oYV8RpLJsgD0wbZU4yxCpoR

# Tweet Extraction with the Twint Library
Allows for the extraction of tweets from Twitter without the need to access [Twitter's API](https://developer.twitter.com/en/docs/twitter-api/getting-started/getting-access-to-the-twitter-api) which now costs money to access.

Relevant Links:<br>
[Official PyPi](https://pypi.org/project/twint/) <br>
[Medium Blog / Article](https://basilkjose.medium.com/twint-twitter-scraping-without-twitters-api-aca8ba1b210e#:~:text=What%20is%20Twint%20%3F,scraping%20Tweets%20from%20Twitter%20profiles%20.) <br>
[Kaggle Discussion Page / Post](https://www.kaggle.com/general/207512) <br>
[Twint Status](https://www.reddit.com/r/Python/comments/vb8cmu/twint_python_twitter_crawler_no_longer_being/) <br>
[Twint Usage](https://www.reddit.com/r/OSINT/comments/1101aud/anyone_familiar_with_the_opensource_twitter_api/)
"""

#!pip3 install twint
#!pip3 uninstall twint
#!pip3 install --user --upgrade "git+https://github.com/twintproject/twint.git@origin/master#egg=twint"

import twint

# Configure and set up the details of what is to be scrapped
user = twint.Config()

tweetExtractCount = input("Enter the number of tweets you would like to scrape:") # Get user input for number of tweets
user.Limit = tweetExtractCount  # Limit : Number of tweets to pull.

twitterUsername = input("Enter the username of the Twitter user which you would like to extract tweets from:")
user.Username = twitterUsername

# Optional Settings
user.Lang = "en"
user.Hide_output = True
user.Since = '2022-10-12'     # Since : Filter tweets from this date.
user.Until= '2023-01-20'      # Until : Filter tweets up to this date.
#user.Images= True            # Images : Display only tweets with images .
#user.Videos = True           # Videos : Display only tweets with videos.
user.Media = True             # Media : Display tweets with only images or videos.
user.Popular_tweets = True    # Popular_tweets : Scrape popular tweets ,most recent(default=False)
user.Min_likes = 50           # Min_likes : Filter tweets by minimum number of likes.
user.Min_replies = 10         # Min_retweets : Filter tweets by minimum number of retweets.
user.Min_retweets = 10        # Min_replies : Filter tweets by minimum number of replies.

!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()

import pandas as pd

twint.run.Search(user)                      # run and search twint
Tweets_df = twint.storage.panda.Tweets_df   # store scrapped data in pandas dataframe
print(Tweets_df)                            # print out pandas dataframe

# Optional Display - Displays Tweets using HTML and requests library
from IPython.display import HTML
import requests

def show_tweet(link):
    '''Display the contents of a tweet. '''
    url = 'https://publish.twitter.com/oembed?url=%s' % link
    response = requests.get(url)
    html = response.json()["html"]
    display(HTML(html))
sample_tweet_link = Tweets_df.sample(1)['link'].values[0]
display(sample_tweet_link)
show_tweet(sample_tweet_link)