gsamat · skywinder · Feb 20, 2024 · Feb 20, 2024 · Feb 21, 2024 · Feb 21, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,5 @@
 .DS_Store
-.env
+.env
+*.mp4
+
+.python-version
diff --git a/README.md b/README.md
@@ -22,10 +22,26 @@ Use great [Cloud Storage Cost Calculator](http://coststorage.com) to check your
 ## Quick start guide
 
 1. Choose if you'd like to do it from your laptop or server. You'd need fast internet and free space on the disk to fit the largest of your recordings (files are deleted upon upload).
-1. Clone or download the repo 
+1. Clone or download the repo
 2. Get all dependencies `pip3 install -r requirements.txt`
 3. Rename `example.env` to `.env` and open it with an editor
-4. Register for [Zoom Dev account](https://developers.zoom.us) and create a new app, generate a login token and paste it into .env
+4. Register for [Zoom Dev account](https://developers.zoom.us) and create a new app
+    0. Create a Server-to-Server OAuth app:
+    1. Navigate to the [Zoom Marketplace](https://marketplace.zoom.us/develop)
+    2. Click on the `Develop` button in the top-right corner
+    3. Select `Build App` from the dropdown menu
+    4. Choose `Server-to-Server` as the app type
+    5. Grant the app access to `Recording: Read` scope
+    6. You need to provide your Zoom account details in the `.env` file for the application to function correctly.
+
+    In your `example.env` file, you will find the following section:
+
+    ```plaintext
+    # Zoom
+    ACCOUNT_ID=
+    CLIENT_ID=
+    CLIENT_SECRET=
+
 5. [Register for Backblaze](https://www.backblaze.com/b2/sign-up.html), create a bucket, drop it's name into .env file
 6. Create a new auth key in Backblaze, drop it's ID and the Key into .env file
 7. Choose a range of dates you'd like to move. If not sure — export a list of all recording from zoom web interface and check it's first and last line — and drop them into `.env` file
@@ -45,7 +61,7 @@ I am not a professional python developer and code might be obscene without me re
 
 This script worked for my 3 years worth of recordings: 3k meetings, 1,3TB of storage in Zoom, but might break in your case. Please open an issue via GitHub and include tracebacks, I will do my best to fix them.
 
-## Automatically move new recordings 
+## Automatically move new recordings
 
 There are two approaches to move new recordings from Zoom to Backblaze automatically:
 

diff --git a/example.env b/example.env
@@ -6,7 +6,9 @@ B2_KEY=
 BUCKET=
 
 # Zoom
-ZOOM_KEY=
+ACCOUNT_ID=
+CLIENT_ID=
+CLIENT_SECRET=
 
 # latest date to fetch
 DATE_FROM_Y=2023

diff --git a/zoom-importer.py b/zoom-importer.py
@@ -1,133 +1,199 @@
-import requests
-import json
-from slugify import slugify 
-from datetime import date, timedelta
-import b2sdk.v2 as b2
-import b2sdk
+import base64
 import os
 import time
-from environs import Env
 import urllib
+from datetime import date, timedelta
+
+import b2sdk
+import b2sdk.v2 as b2
+import requests
+from environs import Env
+from slugify import slugify
+
 
 def download_file(url, filename):
-	response = requests.get(
-		url=url,
-		headers={
-			"Authorization": f"Bearer {ZOOM_KEY}",
-		},
-	)
-	with open(filename, 'wb') as f:
-		f.write(response.content)
+    response = requests.get(
+        url=url,
+        headers={
+            "Authorization": f"Bearer {ZOOM_KEY}",
+        },
+    )
+    with open(filename, 'wb') as f:
+        f.write(response.content)
+
 
 def b2_file_size(file):
     try:
-        return b.get_file_info_by_name(file).content_length
+        return bucket.get_file_info_by_name(file).content_length
     except (b2sdk.exception.FileNotPresent):
         return 0
 
-def delete_recordings(uuid):
+
+def delete_recording(uuid):
+    try:
+        uuid = urllib.parse.quote(urllib.parse.quote(uuid, safe=''))
+        response = requests.delete(
+            url=f"https://api.zoom.us/v2/meetings/{uuid}/recordings",
+            headers={
+                "Authorization": f"Bearer {ZOOM_KEY}",
+            },
+        )
+    except requests.exceptions.RequestException:
+        print(f"delete failed for {uuid}")
+
+
+def process_file(file, name):
+	filename = slugify(name) + '.' + file['file_extension'].lower()
+	size_zoom = file['file_size']
+	size_bb = b2_file_size(filename)
 	try:
-		uuid = urllib.parse.quote(urllib.parse.quote(uuid, safe = ''))
-		response = requests.delete(
-			url=f"https://api.zoom.us/v2/meetings/{uuid}/recordings",
-			headers={
-				"Authorization": f"Bearer {ZOOM_KEY}",
-			},
-		)
-	except requests.exceptions.RequestException:
-		print(f"delete failed for {uuid}")
-
-def send_request(date_from, date_to):
+		size_local = os.path.getsize(filename)
+	except FileNotFoundError:
+		size_local = 0
+	if size_zoom > size_bb:
+		if size_zoom > size_local:
+			file_url = file['download_url']
+			print('    start download: ' + filename)
+			download_file(file_url, filename)
+			# Check if the file was downloaded successfully
+			if not os.path.exists(filename):
+				print(f"Failed to download file: {filename}")
+				return
+	print('    start upload: ' + filename)
+	# Check if the file exists and is accessible before upload
+	if os.path.exists(filename) and os.access(filename, os.R_OK):
+		bucket.upload_local_file(filename, filename)
+	else:
+		print(f"File {filename} doesn't exist or isn't accessible")
+		return
+
 	try:
-		response = requests.get(
-			url="https://api.zoom.us/v2/users/me/recordings",
-			params={
-				"from": date_from,
-				"to": date_to
-			},
-			headers={
-				"Authorization": f"Bearer {ZOOM_KEY}",
-			},
-		)
-		meetings = response.json()["meetings"]
-		for meeting in meetings:
-			name =  meeting['start_time'] + '-' + meeting['topic']
-			print('	' + name)
-			if meeting['recording_count'] > 0:
-				files = meeting['recording_files']
-				sizes = [x['file_size'] for x in files]
-				global processed_storage
-				processed_storage = processed_storage + sum(sizes)
-				types = [x['recording_type'] for x in files]
-				wanted_types = ['audio_only', 'chat_file']
-				if 'shared_screen_with_gallery_view' in types:
-					wanted_types.append('shared_screen_with_gallery_view')
-				elif 'gallery_view' in types:
-					wanted_types.append('gallery_view')
-				elif 'shared_screen_with_speaker_view' in types:
-					wanted_types.append('shared_screen_with_speaker_view')
-				elif 'speaker_view' in types:
-					wanted_types.append('speaker_view')
-				elif 'shared_screen' in types:
-					wanted_types.append('shared_screen')
-				print(f"		{wanted_types}")
-				for file in files:
-					if file['recording_type'] in wanted_types:
-						filename = slugify(name) + '.' + file['file_extension'].lower()
-						size_zoom = file['file_size']
-						size_bb = b2_file_size(filename)
-						try:
-							size_local = os.path.getsize(filename)
-						except FileNotFoundError:
-							size_local = 0
-						if size_zoom > size_bb:
-							if size_zoom > size_local:
-								file_url = file['download_url']
-								print('			start download: ' + filename)
-								download_file(file_url, filename)
-							print('			start upload: ' + filename)
-							b.upload_local_file(filename, filename)
-						try:
-							os.remove(filename)
-						except FileNotFoundError:
-							pass
-			delete_recordings(meeting['uuid'])
-			time_elapsed = time.time() - time_start
-			speed = processed_storage / (time_elapsed/60)
-			print(f"total processed {processed_storage:,} bytes, elapsed {time_elapsed:,.0f} sec, {speed:,.0f} bytes per minute")
-	except requests.exceptions.RequestException:
-		print('HTTP Request failed')
-
-def daterange(start_date, end_date):
-    if start_date < end_date:
-    	raise RuntimeError('start date should be > than end_date!')
-    for n in range(int((start_date - end_date).days)):
-        yield start_date - timedelta(n)
+		os.remove(filename)
+	except FileNotFoundError:
+		pass
+
+def process_meeting(meeting):
+    print(f"	{meeting['start_time']} {meeting['topic']}")
+    if meeting['recording_count'] > 0:
+        files = meeting['recording_files']
+        global processed_storage
+        sizes = [x['file_size'] for x in files]
+
+        processed_storage = processed_storage + sum(sizes)
+        types = [x['recording_type'] for x in files]
+        wanted_types = ['audio_only', 'chat_file']
+        extra_wanted_types = [
+            'shared_screen_with_gallery_view',
+            'gallery_view',
+            'shared_screen_with_speaker_view',
+            'speaker_view',
+            'shared_screen',
+        ]
+        for t in extra_wanted_types:
+            if t in types:
+                wanted_types.append(t)
+                break
+    print(f"		{wanted_types}")
+    for file in files:
+        if file['recording_type'] in wanted_types:
+            name = meeting['start_time'] + '-' + meeting['topic']
+            process_file(file, name)
+    delete_recording(meeting['uuid'])
+    time_elapsed = time.time() - time_start
+    speed = processed_storage / (time_elapsed / 60)
+    print(
+        f"total processed {processed_storage:,} bytes, elapsed {time_elapsed:,.0f} sec, {speed:,.0f} bytes per minute")
+
+
+def get_recordings(date_from, date_to):
+    # The maximum date range for this endpoing can be a month
+    print(f"getting recordings from {date_from} to {date_to}...")
+    response = requests.get(
+        url="https://api.zoom.us/v2/users/me/recordings",
+        params={
+            "from": date_from.strftime("%Y-%m-%d"),
+            "to": date_to.strftime("%Y-%m-%d"),
+            'page_size': 300,
+        },
+        headers={
+            "Authorization": f"Bearer {ZOOM_KEY}",
+        },
+    )
+    meetings = response.json()['meetings']
+    return meetings
+
+
+def date_span(date_start, date_end, delta=timedelta(days=30)):
+    if date_start > date_end:
+        raise RuntimeError('start date should be lower than end date!')
+
+    current_date = date_start
+    while current_date < date_end:
+        yield (current_date, min(current_date + delta, date_end))
+        current_date += delta
+
+
+# Get Zoom access token using Server-to-Server OAuth app
+def get_zoom_access_token():
+    print('getting Zoom access token...')
+    params = {
+        'grant_type': 'account_credentials',
+        'account_id': env('ACCOUNT_ID'),
+    }
+    credentials = base64.b64encode(f"{env('CLIENT_ID')}:{env('CLIENT_SECRET')}".encode()).decode()
+    headers = {
+        "Authorization": f"Basic {credentials}",
+        'Content-Type': 'application/x-www-form-urlencoded',
+    }
+    response = requests.post(
+        "https://zoom.us/oauth/token",
+        params=params,
+        headers=headers,
+    )
+    data = response.json()
+    if not 'access_token' in data:
+        raise Exception(data)
+    return data['access_token']
+
+
+def get_cloud_bucket():
+    print('preparing B2 bucket...')
+    info = b2.InMemoryAccountInfo()
+    b2_api = b2.B2Api(info)
+    b2_api.authorize_account("production", env('B2_KEY_ID'), env('B2_KEY'))
+    return b2_api.get_bucket_by_name(env('BUCKET'))
+
 
 env = Env()
 env.read_env()
-B2_KEY_ID = env('B2_KEY_ID')
-B2_KEY = env('B2_KEY')
-BUCKET = env('BUCKET')
-ZOOM_KEY = env('ZOOM_KEY')
-DATE_FROM_Y = env.int('DATE_FROM_Y')
-DATE_FROM_M = env.int('DATE_FROM_M')
-DATE_FROM_D = env.int('DATE_FROM_D')
-DATE_TO_Y = env.int('DATE_TO_Y')
-DATE_TO_M = env.int('DATE_TO_M')
-DATE_TO_D = env.int('DATE_TO_D')
-
-
-info = b2.InMemoryAccountInfo()
-b2_api = b2.B2Api(info)
-b2_api.authorize_account("production", B2_KEY_ID, B2_KEY)
-b = b2_api.get_bucket_by_name(BUCKET)
+
+bucket = get_cloud_bucket()
 processed_storage = 0
 time_start = time.time()
 
 if __name__ == "__main__":
-	start_date = date(DATE_FROM_Y, DATE_FROM_M, DATE_FROM_D)
-	end_date = date(DATE_TO_Y, DATE_TO_M, DATE_TO_D)
-	for single_date in daterange(start_date, end_date):
-		print(f"{single_date.strftime('%Y-%m-%d')}")
-		send_request(single_date.strftime("%Y-%m-%d"), single_date.strftime("%Y-%m-%d"))
+    date_start = date(
+        env.int('DATE_FROM_Y'),
+        env.int('DATE_FROM_M'),
+        env.int('DATE_FROM_D')
+    )
+    date_end = date(
+        env.int('DATE_TO_Y'),
+        env.int('DATE_TO_M'),
+        env.int('DATE_TO_D'),
+    )
+
+    ZOOM_KEY = get_zoom_access_token()
+
+    meetings = []
+    for period_start, period_end in date_span(date_start, date_end):
+        period_meetings = get_recordings(period_start, period_end)
+        meetings.extend(period_meetings)
+        if period_end > date.today():
+            break
+    meetings.sort(key=lambda x: x['start_time'])
+
+    print(f'Total meetings in cloud: {len(meetings)}')
+    for i, meeting in enumerate(meetings):
+        print(i, meeting['start_time'], meeting['topic'])
+        process_meeting(meeting)