-
Notifications
You must be signed in to change notification settings - Fork 0
/
pullAppIds.js
168 lines (131 loc) · 5.58 KB
/
pullAppIds.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
const yargs = require('yargs/yargs');
const fs = require('fs');
const path = require('path');
// Ensure the log directory exists
const logDir = path.join(process.env.HOME, '.config/adscrawler/logs');
if (!fs.existsSync(logDir)) {
fs.mkdirSync(logDir, { recursive: true });
}
// Create a write stream for logging
const logFile = path.join(logDir, 'pulljs.log');
const logStream = fs.createWriteStream(logFile, { flags: 'a' });
// Override console methods
console.log = function (message, ...optionalParams) {
logStream.write(`[LOG ${new Date().toISOString()}] ${message} ${optionalParams.join(' ')}\n`);
process.stdout.write(`[LOG ${new Date().toISOString()}] ${message} ${optionalParams.join(' ')}\n`);
};
console.error = function (message, ...optionalParams) {
logStream.write(`[ERROR ${new Date().toISOString()}] ${message} ${optionalParams.join(' ')}\n`);
process.stderr.write(`[ERROR ${new Date().toISOString()}] ${message} ${optionalParams.join(' ')}\n`);
};
console.warn = function (message, ...optionalParams) {
logStream.write(`[WARN ${new Date().toISOString()}] ${message} ${optionalParams.join(' ')}\n`);
process.stderr.write(`[WARN ${new Date().toISOString()}] ${message} ${optionalParams.join(' ')}\n`);
};
(async () => {
const gplay = await import('google-play-scraper');
const { hideBin } = require('yargs/helpers');
const argv = yargs(hideBin(process.argv))
.option('developers', {
alias: 'd',
type: 'boolean',
description: 'Set to true or false to include developers'
})
.argv;
async function pullRank(category, collection, country, numApps) {
try {
let result = await gplay.default.list({ category: category, collection: collection, num: numApps, country: country });
if (!Array.isArray(result)) {
console.warn(`No results for Category: ${category}, Collection: ${collection}, Country: ${country}`);
return [];
}
return result.map((item, index) => ({
crawled_date: new Date().toISOString().split('T')[0], // Gets the current date in "YYYY-mm-dd" format
store: 1,
country: country,
collection: collection,
category: category,
rank: index + 1, // Assuming the list starts from rank 1
store_id: item.appId
}));
}
catch (e) {
console.error(e);
return [];
}
}
async function loopDevelopers(country, numApps) {
// Read the file synchronously
const fileContent = fs.readFileSync('/tmp/googleplay_developers.txt', 'utf8');
// Split the content by newlines to get an array of developer IDs
const developerIds = fileContent.split('\n').filter(Boolean);
let allAppIds = [];
// Loop over the list of developer IDs
for (const devId of developerIds) {
try {
console.info(`devId=${devId}: start`);
const apps = await gplay.default.developer({ devId: devId, country: country, num: numApps });
// Extract the appId from each app and add it to the allAppIds array
const appIds = apps.map(app => app.appId);
if (appIds.length > 1) {
allAppIds = allAppIds.concat(appIds);
console.info(`devId=${devId}: added ${appIds.length}`);
}
else {
console.info(`devId=${devId}: no appIds found`);
}
} catch (error) {
console.error(`Error fetching apps for developer ${devId}:`, error);
}
}
if (allAppIds.length > 0) {
// Save the list of appIds to a file separated by newlines
fs.writeFileSync('/tmp/googleplay_developers_app_ids.txt', allAppIds.join('\n'));
}
}
async function loopLists(categories, collections, country, numApps) {
// Loop over each keys in categories and collections
for (const categoryKey in categories) {
let collectedAppRanks = [];
const category = categories[categoryKey]
for (const collectionKey in collections) {
const collection = collections[collectionKey]
const logString = "Category:" + category + ", Collection: " + collection
console.log(logString)
let appRanks = await pullRank(category, collection, country, numApps)
collectedAppRanks = collectedAppRanks.concat(appRanks);
}
appendToFile(collectedAppRanks)
}
}
async function appendToFile(collectedAppRanks) {
const fs = require('fs');
// Convert each JSON object to a string and join them with newline characters
const dataString = collectedAppRanks.map(rank => JSON.stringify(rank)).join('\n');
// Append the data string to the file with an additional newline at the end
fs.appendFile('/tmp/googleplay_json.txt', dataString + '\n', (err) => {
if (err) throw err;
});
console.log('Appended %i Ids', collectedAppRanks.length);
}
// Apps pulled per category per collection
var numApps = 500
var country = "us"
async function main() {
if (argv.developers) {
loopDevelopers(country, numApps = 60)
}
else {
var categories = gplay.default.category;
var collections = gplay.default.collection;
// 54 Categories: GAME_TRIVIA, EVENTS, TRAVEL
// var categories = gplay.category
// 3 Collections: TOP_FREE, TOP_PAID, GROSSING
// var collections = gplay.collection
console.log("Starting %i categories and %i collections", Object.keys(categories).length, Object.keys(collections).length)
// nested for loop for two iterables
loopLists(categories, collections, country, numApps)
}
}
main()
})();