-
Notifications
You must be signed in to change notification settings - Fork 0
/
fingerprint_2n_test.py
340 lines (265 loc) · 12.4 KB
/
fingerprint_2n_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# Ben Williams '25
# benjamin.r.williams.25@dartmouth.edu or roaf676@gmail.com
# Randomized Algorithms, May 23rd, 2023
# Testing the "unique fingerprint experiment" with the largest highly composite numbers known to man
from fingerprinting_2n_hash_experiment import input_ab_experiment_deterministic
from fingerprinting_2n_hash_experiment import input_ab_experiment_rand
from fingerprinting_2n_hash_experiment import find_succeeds_2n
import math
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
start_line = 779174
# For storing the output of the worst HCN from the test
# Reads the primes from the primes1.txt (so I don't have to calculate them)
def get_primes_from_file(file_loc, max_count):
file = open(file_loc, 'r')
primes_list = []
prime_count = 0
line_count = 0
for line in file:
if line_count % 2 != 1:
line = line.strip()
line = line.strip("\n")
nums = line.split()
for num in nums:
primes_list.append(int(num))
prime_count += 1
if prime_count > max_count:
return primes_list
line_count += 1
return primes_list
primes = get_primes_from_file("./files/primes1.txt", 100000)
def test_highly_composites_from_file(file_input, file_output, max_line=None, output_all=False, print_info=False, random_test=False, start_line=None, plot_failure=False):
# Open the files (we assume they are not None)
hcn_file = open(file_input, 'r')
output_file = open(file_output, "w")
# Init all the variables
highly_comps = []
line_count = 1
fail_rates = []
indices = []
log2_list = []
max_fail_rate = 0
worst_hcn = (0, 0, [])
# Read lines ahead of time and do nothing until we are at the start line
if start_line:
while line_count < start_line:
hcn_file.readline()
line_count += 1
else:
# Otherwise just ignore the first line since its 0
hcn_file.readline()
# Loop through every HCN (each line is an HCN)
for line in hcn_file:
# Stop if we are at the max-wanted HCN
if max_line:
if line_count > max_line:
break
# Init hcn
hcn = 1
# Handle the line
line = line.strip()
line = line.strip("\n")
num_pairs = line.split()
# The first entry is the number of unique primes
num_primes = num_pairs.pop(0)
relevant_primes = []
# Make a smaller list (maybe not necessary)
for i in range(int(num_primes)):
relevant_primes.append(primes[i])
# Keep track of which prime we are multiplying
prime_index = 0
prev_prime_index = 0
prime_list = [] # in the format of (prime, exponent)
# Loop through each set of exponent ^ number of primes it is repeated for
for pair in num_pairs:
# first entry is exponent, second is number of times we repeat said exponent
array = pair.split("^")
# If we are repeating the exponent for more than one prime...
if len(array) == 2:
while prime_index < int(array[1]) + prev_prime_index:
hcn *= relevant_primes[prime_index] ** int(array[0])
prime_list.append((relevant_primes[prime_index], int(array[0])))
prime_index += 1
else:
# If we are only using the exponent for this single prime
hcn *= relevant_primes[prime_index] ** int(array[0])
prime_list.append((relevant_primes[prime_index], int(array[0])))
prime_index += 1
prev_prime_index = prime_index
highly_comps.append(hcn)
# Calculate the log_2 of the hcn, so that we know what n to use for the experiment
log2 = math.log(hcn, 2)
log2_list.append(log2)
# Run the experiment, add one to the log2 to correspond with the binary
# Either run the test with every possible factor 1->2n, or with random r's
if not random_test:
fail_rate = input_ab_experiment_deterministic((0, hcn), math.ceil(log2) + 1)
else:
fail_rate = input_ab_experiment_rand((0, hcn), math.ceil(log2) + 1, 1000)
indices.append(line_count)
fail_rates.append(fail_rate)
# Keep track of which fail rate is the worst, and its respective hcn
if fail_rate > max_fail_rate:
max_fail_rate = fail_rate
worst_hcn = (max_fail_rate, hcn, prime_list)
# If we want to print info for every hcn
if print_info:
if hcn != 0:
print(f'log2 : {log2}, hcn: {hcn}')
print(f'Fail rate for hcn # {line_count} = {fail_rate}')
# If we want to output only the worst HCN per 1000 (obviously much faster)
if not output_all:
if line_count % 1000 == 0:
output_file.write(f'By HCN # {line_count} with log2 = {log2}, Worst Fail Rate = {worst_hcn[0]}, HCN = {worst_hcn[1]} \n')
print(f'Line number: {line_count}')
else:
# or output every HCN's fail rate to the file
output_file.write(str((fail_rate, hcn)) + "\n")
line_count += 1
log2_worst = math.ceil(math.log(worst_hcn[1], 2))
print(f'\nWorst hcn: {worst_hcn[1]} \nLog2: {log2_worst} \nFail Rate: {worst_hcn[0]} \nPrimes and Exponents:{worst_hcn[2]}')
# Plot the probability of failure for all the HCNs we have seen
if plot_failure:
percent_fail_rates = []
for fr in fail_rates:
percent_fail_rates.append(fr * 100)
plot = plt.subplot2grid((2, 2), (0, 0), rowspan=2, colspan=2)
plot.plot(log2_list, percent_fail_rates)
# Set the title of the plot accordingly
if start_line:
if max_line:
plot.set_title(f'Probability of failure from HCN #{start_line} to HCN #{max_line}')
else:
plot.set_title(f'Probability of failure from HCN #{start_line} to HCN #779674')
elif max_line:
plot.set_title(f'Probability of failure from the first HCN to HCN #{max_line}')
else:
plot.set_title(f'Probability of failure for HCNs as n grows')
plot.set_xlabel("Log_2(HCN)")
plot.yaxis.set_major_formatter(mtick.PercentFormatter())
plot.set_ylabel("Percentage of numbers <= 2n as factors")
plt.show()
# Only doable for smaller HCNs, since they just get too big to plot and matplotlib crashes
# plt.plot(highly_comps, fail_rates)
# plt.show()
return worst_hcn[1], log2_worst, worst_hcn[2]
# Test the first 10,000 HCNs
# test_highly_composites_from_file("./files/HCN.txt", "./files/null.txt", max_line=10000, plot_failure=True)
# test_highly_composites_from_file("./files/HCN.txt", "./files/null.txt", start_line=start_line, plot_failure=True)
# Use the information to optimize the HCN
# I changed the start line and max line to closely straddle the worst HCN in the last 500 for my own sanity
triple = test_highly_composites_from_file("./files/HCN.txt", "./outputs/null.txt", start_line=779374, max_line=779474)
# Recalculates a number given its list of primes and their exponents
# Needs to be done since we crash when dividing along the way
def recalc_candidate(primes_and_exponents):
result = 1
for pe in primes_and_exponents:
result *= (pe[0] ** pe[1])
return result
# Takes any number as the parameter and gets its prime factorization
def get_prime_factors(num, print_info=False):
copy = num
i = 2
index_map = dict()
prime_factors = []
if print_info:
print(f'Getting prime factors for {num}')
while i * i < copy:
while copy % i == 0:
if index_map.get(i) is None:
prime_factors.append((i, 1))
index_map[i] = len(prime_factors) - 1
else:
prime_factors[index_map[i]] = (i, prime_factors[index_map[i]][1] + 1)
copy /= i
i += 1
if len(prime_factors) == 0:
prime_factors.append((i, 1))
return prime_factors, None
return prime_factors, index_map
# Counts the number of primes from a start prime and a finish number
# (here, the last prime in the prime factorization, and 2*n as the finish)
def missing_primes_count(start, finish):
count = 0
# Define this to be the total number of primes and multiples of primes that we are missing below 2n
total_wrong_count = 0
# Find the start prime in the list
index = 0
while primes[index] != start:
index += 1
# to account for the "start" prime
index += 1
while primes[index] < finish:
total_wrong_count += 1
count += 1
mult = 2
while primes[index] * mult < finish:
total_wrong_count += 1
mult += 1
index += 1
return count, total_wrong_count
# Takes a HCN as input, prints out the info about the best
def optimize_hcn_for_2n_attempt(hcn, log2, primes_and_exponents):
log2 = math.ceil(log2)
old_fail_rate = input_ab_experiment_deterministic((0, hcn), log2)
succeeding_nums = find_succeeds_2n((0, hcn), log2)
candidate_pe = primes_and_exponents
remove_count = 0
# Get rid of all excess exponents
for i in range(len(candidate_pe)):
while candidate_pe[i][0] ** candidate_pe[i][1] > 2*log2:
candidate_pe[i] = (candidate_pe[i][0], candidate_pe[i][1] - 1)
remove_count += 1
print(f'Removed {remove_count} prime-exponents')
candidate = recalc_candidate(candidate_pe)
#print(f'Old log2: {log2}, New log2: {math.log(candidate, 2)}')
diff_count = 0
# Takes in the first number that succeeded (also the smallest)
remove = succeeding_nums.pop(0)
remove_prime_factors, index_map = get_prime_factors(remove)
while candidate * remove < 2**log2:
# Find the differences in prime factorization, and just multiply those, rather than the full "remove":
prime_differences = []
difference = False
for i in range(len(remove_prime_factors)):
for j in range(i, len(candidate_pe)):
# If we are at the same prime factor
if remove_prime_factors[i][0] == candidate_pe[j][0]:
if remove_prime_factors[i][1] > candidate_pe[j][1]:
prime_differences.append((remove_prime_factors[i][0], remove_prime_factors[i][1] - candidate_pe[j][1]))
difference = True
# The removed number is a prime
if not difference:
candidate_pe.append((remove, 1))
candidate *= remove
diff_count += 1
else: # Otherwise add the necessary different prime factors between
for diff in prime_differences:
for i in range(len(candidate_pe)):
if candidate_pe[i][0] == diff[0]:
candidate_pe[i][1] += diff[1]
candidate *= (diff[0] ** diff[1])
diff_count += diff[1]
# More than one number could have been removed by adding these prime
succeeding_nums = find_succeeds_2n((0, candidate), log2)
# print(f'Current Fail Rate: {1 - (len(succeeding_nums) / (2*log2))}')
if len(succeeding_nums) == 0: # If by some miracle we succeed...
print(f'Success with {candidate}')
else:
remove = succeeding_nums.pop(0)
remove_prime_factors, index_map = get_prime_factors(remove)
new_fail_rate = 1 - (len(succeeding_nums) / (2*log2))
print(f'Total prime differences = {diff_count}')
print(f'Best optimized fail rate: {1 - (len(succeeding_nums) / (2*log2))}\nNumber: {candidate}\nImproved(?) by: {new_fail_rate - old_fail_rate}\n New prime factorization:\n{candidate_pe}')
# To figure out how many primes we are missing before 2*n
final_prime = candidate_pe[len(candidate_pe) - 1][0]
missing_count = missing_primes_count(final_prime, 2*log2)
print(f'Missing {missing_count[0]} primes before 2n = {2*log2}')
print(f'Missing {missing_count[1]} primes and prime-multiples before 2n = {2 * log2}')
return new_fail_rate, candidate
# Only run the final algorithm if we can
if triple:
output = open("../outputs/optimized_hcn_final_last_500", 'w')
info = optimize_hcn_for_2n_attempt(triple[0], triple[1], triple[2])
output.write("Optimized HCN: " + str(info[1]) + "\nFail Rate:" + str(info[0]) + "\nLog2: " + str(math.ceil(math.log(info[1], 2))))