-
Notifications
You must be signed in to change notification settings - Fork 2
/
Backup.py
85 lines (73 loc) · 2.74 KB
/
Backup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from pymongo import MongoClient
from typing import Optional, Dict, List
def copy_collection(
source_collection_name: str,
target_collection_name: str,
db_name: str,
connection_string: str = "mongodb://localhost:27017/",
query_filter: Optional[Dict] = None,
indexes: bool = True
) -> int:
# Connect to MongoDB
client = MongoClient(connection_string)
db = client[db_name]
# Get source and target collections
source = db[source_collection_name]
target = db[target_collection_name]
# Copy documents
docs_to_copy = source.find(query_filter) if query_filter else source.find()
# Insert documents in batches for better performance
batch_size = 1000
batch: List = []
copied_count = 0
for doc in docs_to_copy:
batch.append(doc)
if len(batch) >= batch_size:
target.insert_many(batch)
copied_count += len(batch)
batch = []
# Insert remaining documents
if batch:
target.insert_many(batch)
copied_count += len(batch)
# Copy indexes if requested
if indexes:
index_info = source.index_information()
# Skip the _id_ index as it's created automatically
for index_name, index_info in index_info.items():
if index_name != '_id_':
keys = index_info['key']
# Convert index information to kwargs
index_kwargs = {
k: v for k, v in index_info.items()
if k not in ['ns', 'v', 'key']
}
target.create_index(keys, **index_kwargs)
client.close()
return copied_count
# Example usage with different scenarios
if __name__ == "__main__":
# Basic copy of entire collection
docs_copied = copy_collection(
source_collection_name="old_collection",
target_collection_name="new_collection",
db_name="your_database"
)
print(f"Copied {docs_copied} documents")
# Copy with a filter
filter_query = {"status": "active", "age": {"$gt": 25}}
docs_copied = copy_collection(
source_collection_name="users",
target_collection_name="active_adult_users",
db_name="your_database",
query_filter=filter_query
)
print(f"Copied {docs_copied} filtered documents")
# Copy to different database (using connection string)
docs_copied = copy_collection(
source_collection_name="products",
target_collection_name="products_backup",
db_name="store",
connection_string="mongodb://user:pass@remote-host:27017/"
)
print(f"Copied {docs_copied} documents to remote database")