-
Notifications
You must be signed in to change notification settings - Fork 1
/
SplitAndShuffle.cpp
99 lines (81 loc) · 2.4 KB
/
SplitAndShuffle.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <iostream>
#include <fstream>
#include <vector>
#include <algorithm>
#include <random>
#include <thread>
void writeLinesToFile(const std::vector<std::string> &lines, const std::string &filename)
{
std::ofstream outputFile(filename, std::ios::binary);
if (!outputFile)
{
std::cout << "无法创建文件:" << filename << std::endl;
return;
}
std::string outputString;
for (const std::string &line : lines)
{
outputString += line + "\n";
}
outputFile << outputString;
outputFile.close();
}
void shuffleLinesInFile(const std::string &filename, const int numFiles)
{
// 打开原始文件进行读取
std::ifstream inputFile(filename);
if (!inputFile)
{
std::cout << "无法打开文件:" << filename << std::endl;
return;
}
// 读取原始文件中的每一行
std::vector<std::string> lines;
std::string line;
while (std::getline(inputFile, line))
{
lines.push_back(line);
}
inputFile.close();
// 打乱行的次序
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(lines.begin(), lines.end(), g);
// 将行均匀分配至小文件中
int numLinesPerFile = lines.size() / numFiles;
int remainder = lines.size() % numFiles;
std::vector<std::thread> threads;
for (int i = 0; i < numFiles; ++i)
{
int start = i * numLinesPerFile;
int end = start + numLinesPerFile;
if (i == numFiles - 1)
{
end += remainder;
}
std::vector<std::string> linesPerFile(lines.begin() + start, lines.begin() + end);
std::string outputFilename = "output_" + std::to_string(i) + ".txt";
// 创建一个线程,将多行数据转换为一个字符串,并写入文件
threads.emplace_back(writeLinesToFile, linesPerFile, outputFilename);
}
// 等待所有线程完成
for (std::thread &thread : threads)
{
thread.join();
}
std::cout << "行次序已打乱并成功分配至" << numFiles << "个小文件中。" << std::endl;
}
int main(int argc, char *argv[])
{
if (argc < 2) // 读取在命令提示行中被附加给程序的文本文件地址
{
std::cout << "请提供文件名" << std::endl;
return 1; // 返回非零值表示程序异常退出
}
std::string filename = argv[1];
int numFiles = atoi(argv[2]);
// filename_READ = "DATA1G.txt";
std::cout << "文件名: " << filename << std::endl;
shuffleLinesInFile(filename, numFiles);
return 0;
}