Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor code and update Zoom authentication process #3

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
.DS_Store
.env
.env
*.mp4

.python-version
22 changes: 19 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,26 @@ Use great [Cloud Storage Cost Calculator](http://coststorage.com) to check your
## Quick start guide

1. Choose if you'd like to do it from your laptop or server. You'd need fast internet and free space on the disk to fit the largest of your recordings (files are deleted upon upload).
1. Clone or download the repo
1. Clone or download the repo
2. Get all dependencies `pip3 install -r requirements.txt`
3. Rename `example.env` to `.env` and open it with an editor
4. Register for [Zoom Dev account](https://developers.zoom.us) and create a new app, generate a login token and paste it into .env
4. Register for [Zoom Dev account](https://developers.zoom.us) and create a new app
0. Create a Server-to-Server OAuth app:
1. Navigate to the [Zoom Marketplace](https://marketplace.zoom.us/develop)
2. Click on the `Develop` button in the top-right corner
3. Select `Build App` from the dropdown menu
4. Choose `Server-to-Server` as the app type
5. Grant the app access to `Recording: Read` scope
6. You need to provide your Zoom account details in the `.env` file for the application to function correctly.

In your `example.env` file, you will find the following section:

```plaintext
# Zoom
ACCOUNT_ID=
CLIENT_ID=
CLIENT_SECRET=

5. [Register for Backblaze](https://www.backblaze.com/b2/sign-up.html), create a bucket, drop it's name into .env file
6. Create a new auth key in Backblaze, drop it's ID and the Key into .env file
7. Choose a range of dates you'd like to move. If not sure — export a list of all recording from zoom web interface and check it's first and last line — and drop them into `.env` file
Expand All @@ -45,7 +61,7 @@ I am not a professional python developer and code might be obscene without me re

This script worked for my 3 years worth of recordings: 3k meetings, 1,3TB of storage in Zoom, but might break in your case. Please open an issue via GitHub and include tracebacks, I will do my best to fix them.

## Automatically move new recordings
## Automatically move new recordings

There are two approaches to move new recordings from Zoom to Backblaze automatically:

Expand Down
4 changes: 3 additions & 1 deletion example.env
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ B2_KEY=
BUCKET=

# Zoom
ZOOM_KEY=
ACCOUNT_ID=
CLIENT_ID=
CLIENT_SECRET=

# latest date to fetch
DATE_FROM_Y=2023
Expand Down
292 changes: 179 additions & 113 deletions zoom-importer.py
Original file line number Diff line number Diff line change
@@ -1,133 +1,199 @@
import requests
import json
from slugify import slugify
from datetime import date, timedelta
import b2sdk.v2 as b2
import b2sdk
import base64
import os
import time
from environs import Env
import urllib
from datetime import date, timedelta

import b2sdk
import b2sdk.v2 as b2
import requests
from environs import Env
from slugify import slugify


def download_file(url, filename):
response = requests.get(
url=url,
headers={
"Authorization": f"Bearer {ZOOM_KEY}",
},
)
with open(filename, 'wb') as f:
f.write(response.content)
response = requests.get(
url=url,
headers={
"Authorization": f"Bearer {ZOOM_KEY}",
},
)
with open(filename, 'wb') as f:
f.write(response.content)


def b2_file_size(file):
try:
return b.get_file_info_by_name(file).content_length
return bucket.get_file_info_by_name(file).content_length
except (b2sdk.exception.FileNotPresent):
return 0

def delete_recordings(uuid):

def delete_recording(uuid):
try:
uuid = urllib.parse.quote(urllib.parse.quote(uuid, safe=''))
response = requests.delete(
url=f"https://api.zoom.us/v2/meetings/{uuid}/recordings",
headers={
"Authorization": f"Bearer {ZOOM_KEY}",
},
)
except requests.exceptions.RequestException:
print(f"delete failed for {uuid}")


def process_file(file, name):
filename = slugify(name) + '.' + file['file_extension'].lower()
size_zoom = file['file_size']
size_bb = b2_file_size(filename)
try:
uuid = urllib.parse.quote(urllib.parse.quote(uuid, safe = ''))
response = requests.delete(
url=f"https://api.zoom.us/v2/meetings/{uuid}/recordings",
headers={
"Authorization": f"Bearer {ZOOM_KEY}",
},
)
except requests.exceptions.RequestException:
print(f"delete failed for {uuid}")

def send_request(date_from, date_to):
size_local = os.path.getsize(filename)
except FileNotFoundError:
size_local = 0
if size_zoom > size_bb:
if size_zoom > size_local:
file_url = file['download_url']
print(' start download: ' + filename)
download_file(file_url, filename)
# Check if the file was downloaded successfully
if not os.path.exists(filename):
print(f"Failed to download file: {filename}")
return
print(' start upload: ' + filename)
# Check if the file exists and is accessible before upload
if os.path.exists(filename) and os.access(filename, os.R_OK):
bucket.upload_local_file(filename, filename)
else:
print(f"File {filename} doesn't exist or isn't accessible")
return

try:
response = requests.get(
url="https://api.zoom.us/v2/users/me/recordings",
params={
"from": date_from,
"to": date_to
},
headers={
"Authorization": f"Bearer {ZOOM_KEY}",
},
)
meetings = response.json()["meetings"]
for meeting in meetings:
name = meeting['start_time'] + '-' + meeting['topic']
print(' ' + name)
if meeting['recording_count'] > 0:
files = meeting['recording_files']
sizes = [x['file_size'] for x in files]
global processed_storage
processed_storage = processed_storage + sum(sizes)
types = [x['recording_type'] for x in files]
wanted_types = ['audio_only', 'chat_file']
if 'shared_screen_with_gallery_view' in types:
wanted_types.append('shared_screen_with_gallery_view')
elif 'gallery_view' in types:
wanted_types.append('gallery_view')
elif 'shared_screen_with_speaker_view' in types:
wanted_types.append('shared_screen_with_speaker_view')
elif 'speaker_view' in types:
wanted_types.append('speaker_view')
elif 'shared_screen' in types:
wanted_types.append('shared_screen')
print(f" {wanted_types}")
for file in files:
if file['recording_type'] in wanted_types:
filename = slugify(name) + '.' + file['file_extension'].lower()
size_zoom = file['file_size']
size_bb = b2_file_size(filename)
try:
size_local = os.path.getsize(filename)
except FileNotFoundError:
size_local = 0
if size_zoom > size_bb:
if size_zoom > size_local:
file_url = file['download_url']
print(' start download: ' + filename)
download_file(file_url, filename)
print(' start upload: ' + filename)
b.upload_local_file(filename, filename)
try:
os.remove(filename)
except FileNotFoundError:
pass
delete_recordings(meeting['uuid'])
time_elapsed = time.time() - time_start
speed = processed_storage / (time_elapsed/60)
print(f"total processed {processed_storage:,} bytes, elapsed {time_elapsed:,.0f} sec, {speed:,.0f} bytes per minute")
except requests.exceptions.RequestException:
print('HTTP Request failed')

def daterange(start_date, end_date):
if start_date < end_date:
raise RuntimeError('start date should be > than end_date!')
for n in range(int((start_date - end_date).days)):
yield start_date - timedelta(n)
os.remove(filename)
except FileNotFoundError:
pass

def process_meeting(meeting):
print(f" {meeting['start_time']} {meeting['topic']}")
if meeting['recording_count'] > 0:
files = meeting['recording_files']
global processed_storage
sizes = [x['file_size'] for x in files]

processed_storage = processed_storage + sum(sizes)
types = [x['recording_type'] for x in files]
wanted_types = ['audio_only', 'chat_file']
extra_wanted_types = [
'shared_screen_with_gallery_view',
'gallery_view',
'shared_screen_with_speaker_view',
'speaker_view',
'shared_screen',
]
for t in extra_wanted_types:
if t in types:
wanted_types.append(t)
break
print(f" {wanted_types}")
for file in files:
if file['recording_type'] in wanted_types:
name = meeting['start_time'] + '-' + meeting['topic']
process_file(file, name)
delete_recording(meeting['uuid'])
time_elapsed = time.time() - time_start
speed = processed_storage / (time_elapsed / 60)
print(
f"total processed {processed_storage:,} bytes, elapsed {time_elapsed:,.0f} sec, {speed:,.0f} bytes per minute")


def get_recordings(date_from, date_to):
# The maximum date range for this endpoing can be a month
print(f"getting recordings from {date_from} to {date_to}...")
response = requests.get(
url="https://api.zoom.us/v2/users/me/recordings",
params={
"from": date_from.strftime("%Y-%m-%d"),
"to": date_to.strftime("%Y-%m-%d"),
'page_size': 300,
},
headers={
"Authorization": f"Bearer {ZOOM_KEY}",
},
)
meetings = response.json()['meetings']
return meetings


def date_span(date_start, date_end, delta=timedelta(days=30)):
if date_start > date_end:
raise RuntimeError('start date should be lower than end date!')

current_date = date_start
while current_date < date_end:
yield (current_date, min(current_date + delta, date_end))
current_date += delta


# Get Zoom access token using Server-to-Server OAuth app
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here is key changes with auth

def get_zoom_access_token():
print('getting Zoom access token...')
params = {
'grant_type': 'account_credentials',
'account_id': env('ACCOUNT_ID'),
}
credentials = base64.b64encode(f"{env('CLIENT_ID')}:{env('CLIENT_SECRET')}".encode()).decode()
headers = {
"Authorization": f"Basic {credentials}",
'Content-Type': 'application/x-www-form-urlencoded',
}
response = requests.post(
"https://zoom.us/oauth/token",
params=params,
headers=headers,
)
data = response.json()
if not 'access_token' in data:
raise Exception(data)
return data['access_token']


def get_cloud_bucket():
print('preparing B2 bucket...')
info = b2.InMemoryAccountInfo()
b2_api = b2.B2Api(info)
b2_api.authorize_account("production", env('B2_KEY_ID'), env('B2_KEY'))
return b2_api.get_bucket_by_name(env('BUCKET'))


env = Env()
env.read_env()
B2_KEY_ID = env('B2_KEY_ID')
B2_KEY = env('B2_KEY')
BUCKET = env('BUCKET')
ZOOM_KEY = env('ZOOM_KEY')
DATE_FROM_Y = env.int('DATE_FROM_Y')
DATE_FROM_M = env.int('DATE_FROM_M')
DATE_FROM_D = env.int('DATE_FROM_D')
DATE_TO_Y = env.int('DATE_TO_Y')
DATE_TO_M = env.int('DATE_TO_M')
DATE_TO_D = env.int('DATE_TO_D')


info = b2.InMemoryAccountInfo()
b2_api = b2.B2Api(info)
b2_api.authorize_account("production", B2_KEY_ID, B2_KEY)
b = b2_api.get_bucket_by_name(BUCKET)

bucket = get_cloud_bucket()
processed_storage = 0
time_start = time.time()

if __name__ == "__main__":
start_date = date(DATE_FROM_Y, DATE_FROM_M, DATE_FROM_D)
end_date = date(DATE_TO_Y, DATE_TO_M, DATE_TO_D)
for single_date in daterange(start_date, end_date):
print(f"{single_date.strftime('%Y-%m-%d')}")
send_request(single_date.strftime("%Y-%m-%d"), single_date.strftime("%Y-%m-%d"))
date_start = date(
env.int('DATE_FROM_Y'),
env.int('DATE_FROM_M'),
env.int('DATE_FROM_D')
)
date_end = date(
env.int('DATE_TO_Y'),
env.int('DATE_TO_M'),
env.int('DATE_TO_D'),
)

ZOOM_KEY = get_zoom_access_token()

meetings = []
for period_start, period_end in date_span(date_start, date_end):
period_meetings = get_recordings(period_start, period_end)
meetings.extend(period_meetings)
if period_end > date.today():
break
meetings.sort(key=lambda x: x['start_time'])

print(f'Total meetings in cloud: {len(meetings)}')
for i, meeting in enumerate(meetings):
print(i, meeting['start_time'], meeting['topic'])
process_meeting(meeting)