import json
import os
from pymongo import MongoClient
from bson.objectid import ObjectId
from googleapiclient.discovery import build
from google.cloud import language_v1
def get_entity_sentiment(myText):
client = language_v1.LanguageServiceClient()
doc = language_v1.Document(content=myText,type_=language_v1.types.Document.Type.PLAIN_TEXT)
response = client.analyze_entity_sentiment(request={'document':doc, 'encoding_type':language_v1.EncodingType.UTF8})
myList = list()
for ent in response.entities:
myDict = dict()
sent = ent.sentiment
myDict['entity'] = ent.name
myDict['salience'] = ent.salience
myDict['score'] = sent.score
myDict['magnitude'] = sent.magnitude
myList.append(myDict)
return myList
def get_videoID_by_title(video_title):
'''
takes: video_title as string
returns: videoId as string
describtion:
builds a connection to Youtube Data API and executes a search method to list results.
searches for channelId from NFL-Youtube Channel, for videos only
returns videoID for most relevant search result
'''
try:
youtube = build('youtube', 'v3',
developerKey=api_key)
response = youtube.search().list(
part='snippet',
q=video_title,
channelId='UCDVYQ4Zhbm3S2dlz7P1GBDg', # NFL Channel ID
type='video',
order='relevance', # Default Value = relevance
maxResults=1
).execute()
return response['items'][-1]['id']['videoId']
except:
return "NoQuota"
def get_comments(vidId):
'''
takes: youtube video id as string, global api_key as string
returns: list of toplevel-comments under video (no replies)
describtion:
builds a connection to Youtube Data API and executs commentsThreads().list() methods
iterates over response from api request and stores displayed text as string in an list
breaks if no NextPageToken is provided. Thus end of comments is reached.
breaks if counter < threshold-value prevent Youtube out of quota error
returns list with comments
'''
# list for several pages
comments = list()
counter = 4
# get youtube access
youtube = build('youtube', 'v3',
developerKey=api_key)
print('yt loaded')
# get response
response = youtube.commentThreads().list(
part='snippet',
videoId=vidId,
textFormat='plainText'
).execute()
while True:
for item in response['items']:
# get comment as string, attached to list.
# return item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])
counter -=1
if counter < 1:
break
if 'nextPageToken' in response:
response = youtube.commentThreads().list(
part='snippet',
pageToken=response['nextPageToken'],
videoId=vidId
).execute()
else:
break
print('comments returned')
return comments
api_key = json.load(open('API_Data.json'))['ytDataAPI'] # key for Youtube Data API
mongodb_pass = json.load(open('API_Data.json'))['mongoDB_pass'] # password mongodb user
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="green-reporter-369217-84ed773093b1.json" # expose google service account for sentiment analysis
client = MongoClient(mongodb_pass)
db = client.gc_nfl
mycoll = db.gc_games
# db = client.nfl_data
# mycoll = db.games
myList = mycoll.find({'videoID':{'$exists':False}})[0:2]
for doc in myList:
mongo_obj_id = doc['_id'] # ObjectID von mongoDB speichern
myTitle = doc['team1']+' '+doc['team2']+' '+doc['week']+' Highlights | NFL 2021'
vid_id = get_videoID_by_title(myTitle)
if vid_id == "NoQuota":
print(vid_id)
break
print(myTitle)
comment_count = 0
for comment in get_comments(vid_id):
comment_count += 1
myDict = dict()
# Retrieve old information
myDict = doc
myDict['videoID'] = vid_id
# Add new information / generate one doc per entity
myDict['comment'] = comment
try:
entList = get_entity_sentiment(comment)
count = 0
for ent in entList:
count+=1
print('next Dict',count)
del myDict['_id']
myDict['entity'] = ent['entity']
myDict['salience'] = ent['salience']
myDict['score'] = ent['score']
myDict['magnitude'] = ent['magnitude']
mycoll.insert_one(myDict)
except:
print('Comment is not in English.')
print('next comment',comment_count)
# mycoll.delete_one({'_id':mongo_obj_id})
print('delete document')
mycoll.delete_one({'_id':ObjectId(mongo_obj_id)})