Using AWS Comprehend with IAM Role

10. Using AWS Comprehend with IAM Role#

import pandas as pd
from collections import OrderedDict
import requests
import boto3
comprehend = boto3.client('comprehend', region_name='us-east-1')

10.1. Sample Text#

text = 'A new statement from Boeing indicates that the aerospace manufacturer knew about a problem with the 737 Max aircraft well before the deadly October 2018 Lion Air crash, but decided not to do anything about it.'
print(text)
A new statement from Boeing indicates that the aerospace manufacturer knew about a problem with the 737 Max aircraft well before the deadly October 2018 Lion Air crash, but decided not to do anything about it.

10.2. Process Text#

# Key phrases
phrases = comprehend.detect_key_phrases(Text=text, LanguageCode='en')
print(phrases)
{'KeyPhrases': [{'Score': 0.9999999403953552, 'Text': 'A new statement', 'BeginOffset': 0, 'EndOffset': 15}, {'Score': 1.0, 'Text': 'Boeing', 'BeginOffset': 21, 'EndOffset': 27}, {'Score': 1.0, 'Text': 'the aerospace manufacturer', 'BeginOffset': 43, 'EndOffset': 69}, {'Score': 1.0, 'Text': 'a problem', 'BeginOffset': 81, 'EndOffset': 90}, {'Score': 1.0, 'Text': 'the 737 Max aircraft', 'BeginOffset': 96, 'EndOffset': 116}, {'Score': 0.992807149887085, 'Text': 'the deadly October 2018 Lion Air crash', 'BeginOffset': 129, 'EndOffset': 167}], 'ResponseMetadata': {'RequestId': 'e0ef5b19-0d30-49a8-bb84-7caa7a2a1839', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'e0ef5b19-0d30-49a8-bb84-7caa7a2a1839', 'content-type': 'application/x-amz-json-1.1', 'content-length': '497', 'date': 'Wed, 30 Sep 2020 18:17:21 GMT'}, 'RetryAttempts': 0}}
# Entities
entities = comprehend.detect_entities(Text=text, LanguageCode='en')
print(entities)
{'Entities': [{'Score': 0.9907217025756836, 'Type': 'ORGANIZATION', 'Text': 'Boeing', 'BeginOffset': 21, 'EndOffset': 27}, {'Score': 0.8122062087059021, 'Type': 'COMMERCIAL_ITEM', 'Text': '737 Max aircraft', 'BeginOffset': 100, 'EndOffset': 116}, {'Score': 0.9706804752349854, 'Type': 'DATE', 'Text': 'October 2018', 'BeginOffset': 140, 'EndOffset': 152}, {'Score': 0.6601864099502563, 'Type': 'COMMERCIAL_ITEM', 'Text': 'Lion Air', 'BeginOffset': 153, 'EndOffset': 161}], 'ResponseMetadata': {'RequestId': 'bb8594ea-6ed5-41b3-baff-86716e4b495a', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'bb8594ea-6ed5-41b3-baff-86716e4b495a', 'content-type': 'application/x-amz-json-1.1', 'content-length': '432', 'date': 'Wed, 30 Sep 2020 18:18:04 GMT'}, 'RetryAttempts': 0}}
#Sentiments
sentiments = comprehend.detect_sentiment(Text=text, LanguageCode='en')
print(sentiments)
{'Sentiment': 'NEGATIVE', 'SentimentScore': {'Positive': 0.019578900188207626, 'Negative': 0.6704141497612, 'Neutral': 0.310001015663147, 'Mixed': 5.94459106650902e-06}, 'ResponseMetadata': {'RequestId': '090907d6-25b1-4d2d-8d7d-10e30071ad58', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '090907d6-25b1-4d2d-8d7d-10e30071ad58', 'content-type': 'application/x-amz-json-1.1', 'content-length': '158', 'date': 'Wed, 30 Sep 2020 18:56:39 GMT'}, 'RetryAttempts': 0}}
## Prettify Things
# Print the phrases:
print('------- phrases ---------')
for i in range(0, len(phrases['KeyPhrases'])):
    print((phrases['KeyPhrases'][i]['Text']))
    

# Print the entities with entitity type:
print('------- entity : entity type ---------')
for i in range(0, len(entities['Entities'])):
    print(entities['Entities'][i]['Text'] + ' : ' + entities['Entities'][i]['Type'] )
    
# Print the sentiment:
print('------- sentiment ---------')
print(sentiments['Sentiment'])
------- phrases ---------
A new statement
Boeing
the aerospace manufacturer
a problem
the 737 Max aircraft
the deadly October 2018 Lion Air crash
------- entity : entity type ---------
Boeing : ORGANIZATION
737 Max aircraft : COMMERCIAL_ITEM
October 2018 : DATE
Lion Air : COMMERCIAL_ITEM
------- sentiment ---------
NEGATIVE