Skip to content

Commit

Permalink
code clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
bishoyh committed Nov 11, 2024
1 parent a9762bb commit f24d3be
Showing 1 changed file with 38 additions and 4 deletions.
42 changes: 38 additions & 4 deletions mbox2eml.cc
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,36 @@
//////////////////////////////////////////////////////
///// main entry point for mbox2eml.cc
/////////////////////////////////////////////////////
// Copyright (c) Bishoy H.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// Description:
// This tool, mbox2eml, is designed to extract individual email messages from an
// mbox file and save them as separate .eml files in a given folder. It utilizes multithreading to
// speed up the processing of large mbox files by distributing the workload across
// multiple CPU cores, but it requires enough memory to load the mbox file. The tool takes two command-line arguments: the path to the
// mbox file and the output directory where the .eml files will be saved.

// Compile with g++ -O3 -std=c++23 -pthread -lstdc++fs -o mbox2eml mbox2eml.cc


#include <iostream>
#include <fstream>
#include <string>
Expand All @@ -9,8 +39,6 @@
#include <mutex>
#include <filesystem>

// Compile with g++ -O3 -std=c++23 -pthread -lstdc++fs -o mbox2eml mbox2eml.cc

namespace fs = std::filesystem;

// Structure to hold email data
Expand All @@ -26,7 +54,7 @@ std::vector<Email> extractEmails(const std::string& mbox_file) {
Email current_email;

while (std::getline(file, line)) {
if (line.starts_with("From ")) {
if (line.starts_with("From ")) { // use c++20 feature
// Start of a new email
if (!current_email.content.empty()) {
emails.push_back(current_email);
Expand Down Expand Up @@ -67,6 +95,8 @@ void workerThread(const std::vector<Email>& emails, const std::string& output_di
int main(int argc, char* argv[]) {
// Check for correct number of arguments
if (argc != 3) {
std::cerr << "mbox2eml: Extract individual email messages from an mbox file and save them as separate .eml files." << std::endl;
std::cerr << "Error: Incorrect number of arguments." << std::endl;
std::cerr << "Usage: " << argv[0] << " <mbox_file> <output_directory>" << std::endl;
return 1;
}
Expand All @@ -75,8 +105,12 @@ int main(int argc, char* argv[]) {
std::string output_dir = argv[2];

// Create the output directory if it doesn't exist
try {
fs::create_directory(output_dir);

} catch (const std::exception& e) {
std::cerr << "Error creating output directory: " << e.what() << std::endl;
return 1;
}
// Extract emails from the mbox file
std::vector<Email> emails = extractEmails(mbox_file);
std::cout << "Extracted " << emails.size() << " emails." << std::endl;
Expand Down

0 comments on commit f24d3be

Please sign in to comment.