-
Notifications
You must be signed in to change notification settings - Fork 82
/
Fileinfo.hh
207 lines (164 loc) · 5.45 KB
/
Fileinfo.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
/*
copyright 2006-2018 Paul Dreik (earlier Paul Sundvall)
Distributed under GPL v 2.0 or later, at your option.
See LICENSE for further details.
*/
#ifndef Fileinfo_hh
#define Fileinfo_hh
#include <array>
#include <cstdint>
#include <string>
// os specific headers
#include <sys/types.h> //for off_t and others.
/**
Holds information about a file.
Keeping this small is probably beneficial for performance, because the
large vector of all found files will be better cached.
*/
class Fileinfo
{
public:
// constructor
Fileinfo(std::string name, int cmdline_index, int depth)
: m_info()
, m_filename(std::move(name))
, m_delete(false)
, m_duptype(duptype::DUPTYPE_UNKNOWN)
, m_cmdline_index(cmdline_index)
, m_depth(depth)
, m_identity(0)
{
m_somebytes.fill('\0');
}
/// for storing file size in bytes, defined in sys/types.h
using filesizetype = off_t;
// enums used to tell how to read data into the buffer
enum class readtobuffermode : signed char
{
NOT_DEFINED = -1,
READ_FIRST_BYTES = 0,
READ_LAST_BYTES = 1,
CREATE_MD5_CHECKSUM = 2,
CREATE_SHA1_CHECKSUM,
CREATE_SHA256_CHECKSUM,
CREATE_SHA512_CHECKSUM,
};
// type of duplicate
enum class duptype : char
{
DUPTYPE_UNKNOWN,
DUPTYPE_FIRST_OCCURRENCE,
DUPTYPE_WITHIN_SAME_TREE,
DUPTYPE_OUTSIDE_TREE
};
/**
* gets a string with duptype
* @param A
* @return
*/
[[gnu::pure]] static const char* getduptypestring(const Fileinfo& A);
void setduptype(enum duptype duptype_) { m_duptype = duptype_; }
std::int64_t getidentity() const { return m_identity; }
static std::int64_t identity(const Fileinfo& A) { return A.getidentity(); }
void setidentity(std::int64_t id) { m_identity = id; }
/**
* reads info about the file, by querying the filesystem.
* @return false if it was not possible to get the information.
*/
bool readfileinfo();
duptype getduptype() const { return m_duptype; }
/// makes a symlink of "this" that points to A.
int makesymlink(const Fileinfo& A);
/// makes a hardlink of "this" that points to A.
int makehardlink(const Fileinfo& A);
/**
* deletes the file from the file system
* @return zero on success
*/
int deletefile();
// makes a symlink of A that points to B
static int static_makesymlink(Fileinfo& A, const Fileinfo& B);
// makes a hard link of A that points to B
static int static_makehardlink(Fileinfo& A, const Fileinfo& B);
// deletes file A, that is a duplicate of B
static int static_deletefile(Fileinfo& A, const Fileinfo& B);
// sets the deleteflag
void setdeleteflag(bool flag) { m_delete = flag; }
/// to get the deleteflag
bool deleteflag() const { return m_delete; }
/// returns the file size in bytes
filesizetype size() const { return m_info.stat_size; }
// returns true if A has size zero
bool isempty() const { return size() == 0; }
/// filesize comparison
bool is_smaller_than(Fileinfo::filesizetype minsize) const
{
return size() < minsize;
}
// returns the inode number
unsigned long inode() const { return m_info.stat_ino; }
// returns the device
unsigned long device() const { return m_info.stat_dev; }
// gets the filename
const std::string& name() const { return m_filename; }
// gets the command line index this item was found at
int get_cmdline_index() const { return m_cmdline_index; }
// gets the depth
int depth() const { return m_depth; }
/**
* fills with bytes from the file. if lasttype is supplied,
* it is used to see if the file needs to be read again - useful if the file
* is shorter than the length of the bytes field.
* @param filltype
* @param lasttype
* @return zero on success
*/
int fillwithbytes(enum readtobuffermode filltype,
enum readtobuffermode lasttype);
/// get a pointer to the bytes read from the file
const char* getbyteptr() const { return m_somebytes.data(); }
std::size_t getbuffersize() const { return m_somebytes.size(); }
/// returns true if file is a regular file. call readfileinfo first!
bool isRegularFile() const { return m_info.is_file; }
// returns true if file is a directory . call readfileinfo first!
bool isDirectory() const { return m_info.is_directory; }
private:
// to store info about the file
struct Fileinfostat
{
filesizetype stat_size; // size
unsigned long stat_ino; // inode
unsigned long stat_dev; // device
bool is_file;
bool is_directory;
Fileinfostat();
};
Fileinfostat m_info;
// to keep the name of the file, including path
std::string m_filename;
// to be deleted or not
bool m_delete;
duptype m_duptype;
// If two files are found to be identical, the one with highest ranking is
// chosen. The rules are listed in the man page.
// lowest cmdlineindex wins, followed by the lowest depth, then first found.
/**
* in which order it appeared on the command line. can't be const, because
* that means the implicitly defined assignment needed by the stl will be
* illformed.
* This is fine to be an int, because that is what argc,argv use.
*/
int m_cmdline_index;
/**
* the directory depth at which this file was found.
*/
int m_depth;
/**
* a number to identify this individual file. used for ranking.
*/
std::int64_t m_identity;
static const int SomeByteSize = 64;
/// a buffer that will be filled with some bytes of the file or a hash
std::array<char, SomeByteSize> m_somebytes;
};
#endif