Skip to content

Commit

Permalink
Add release date to scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
ddxv committed Oct 17, 2023
1 parent 7ff0f34 commit e90245b
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 1 deletion.
5 changes: 4 additions & 1 deletion adscrawler/app_stores/apple.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ def scrape_app_ios(store_id: str, country: str) -> dict:
# NOTE: averageUserRating, Rating_count, Histogram are country specific
scraper = AppStoreScraper()
result: dict = scraper.get_app_details(store_id, country=country, add_ratings=True)

return result


Expand All @@ -119,6 +118,7 @@ def clean_ios_app_df(df: pd.DataFrame) -> pd.DataFrame:
"minimum_OsVersion": "minimum_android",
"primaryGenreName": "category",
"bundleId": "bundle_id",
"releaseDate": "release_date",
"currentVersionReleaseDate": "store_last_updated",
"artistId": "developer_id",
"artistName": "developer_name",
Expand Down Expand Up @@ -150,6 +150,9 @@ def clean_ios_app_df(df: pd.DataFrame) -> pd.DataFrame:
store_last_updated=pd.to_datetime(df["store_last_updated"]).dt.strftime(
"%Y-%m-%d %H:%M"
),
release_date=pd.to_datetime(
df["release_date"], format="%Y-%m-%dT%H:%M:%SZ"
).dt.date,
)
try:
df["histogram"] = df["user_ratings"].apply(
Expand Down
3 changes: 3 additions & 0 deletions adscrawler/app_stores/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def clean_google_play_app_df(df: pd.DataFrame) -> pd.DataFrame:
"updated": "store_last_updated",
"reviews": "review_count",
"ratings": "rating_count",
"summary": "short_description",
"released": "release_date",
"containsAds": "ad_supported",
"offersIAP": "in_app_purchases",
"url": "store_url",
Expand All @@ -64,6 +66,7 @@ def clean_google_play_app_df(df: pd.DataFrame) -> pd.DataFrame:
store_last_updated=pd.to_datetime(
df["store_last_updated"], unit="s"
).dt.strftime("%Y-%m-%d %H:%M"),
release_date=pd.to_datetime(df["release_date"], format="%b %d, %Y").dt.date,
)
return df

Expand Down
1 change: 1 addition & 0 deletions adscrawler/app_stores/scrape_stores.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ def log_crawl_results(app_df: pd.DataFrame, database_connection: PostgresCon) ->
"size",
"minimum_android",
"review_count",
"release_date",
"content_rating",
"store_last_updated",
"developer_email",
Expand Down
1 change: 1 addition & 0 deletions pg-ddl/db.sql
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,7 @@ CREATE TABLE public.store_apps (
in_app_purchases bool NULL,
editors_choice bool NULL,
icon_url_512 varchar NULL,
release_date date NULL,
created_at timestamp NULL DEFAULT timezone('utc'::text, now()),
updated_at timestamp NULL DEFAULT timezone('utc'::text, now()),
crawl_result int4 NULL,
Expand Down

0 comments on commit e90245b

Please sign in to comment.