Python Scraper – Google Play and App Store Reviews

This a a web scraper written in Python to get all the reviews for an app from Google Play and App Store.

The apps that are used in the video:

Google Play:

https://play.google.com/store/apps/details?id=com.apphup.passwordmanager&hl=en&gl=US />
App Store:

https://apps.apple.com/ro/app/password-manager-myvault/id1138075747

The code:

from google_play_scraper import app, Sort, reviews_all
from app_store_scraper import AppStore
import pandas as pd
import numpy as np
import json, os, uuid

g_reviews = reviews_all(
“com.apphup.passwordmanager”,
sleep_milliseconds=0, # defaults to 0
lang=’en’, # defaults to ‘en’
country=’us’, # defaults to ‘us’
sort=Sort.NEWEST, # defaults to Sort.MOST_RELEVANT
)
a_reviews = AppStore(‘us’, ‘password-manager-myvault’, ‘1138075747’)
a_reviews.review()

g_df = pd.DataFrame(np.array(g_reviews),columns=[‘review’])
g_df2 = g_df.join(pd.DataFrame(g_df.pop(‘review’).tolist()))

g_df2.drop(columns={‘userImage’, ‘reviewCreatedVersion’},inplace = True)
g_df2.rename(columns= {‘score’: ‘rating’,’userName’: ‘user_name’, ‘reviewId’: ‘review_id’, ‘content’: ‘review_description’, ‘at’: ‘review_date’, ‘replyContent’: ‘developer_response’, ‘repliedAt’: ‘developer_response_date’, ‘thumbsUpCount’: ‘thumbs_up’},inplace = True)
g_df2.insert(loc=0, column=’source’, value=’Google Play’)
g_df2.insert(loc=3, column=’review_title’, value=None)
g_df2[‘laguage_code’] = ‘en’
g_df2[‘country_code’] = ‘us’

a_df = pd.DataFrame(np.array(a_reviews.reviews),columns=[‘review’])
a_df2 = a_df.join(pd.DataFrame(a_df.pop(‘review’).tolist()))

a_df2.drop(columns={‘isEdited’},inplace = True)
a_df2.insert(loc=0, column=’source’, value=’App Store’)
a_df2[‘developer_response_date’] = None
a_df2[‘thumbs_up’] = None
a_df2[‘laguage_code’] = ‘en’
a_df2[‘country_code’] = ‘us’
a_df2.insert(loc=1, column=’review_id’, value=[uuid.uuid4() for _ in range(len(a_df2.index))])
a_df2.rename(columns= {‘review’: ‘review_description’,’userName’: ‘user_name’, ‘date’: ‘review_date’,’title’: ‘review_title’, ‘developerResponse’: ‘developer_response’},inplace = True)
a_df2 = a_df2.where(pd.notnull(a_df2), None)

result = pd.concat([g_df2,a_df2])
result

Leave a Reply

Your email address will not be published. Required fields are marked *