Skip to content

Commit

Permalink
[TST] Commit the manually reconstructed test flake. (#3124)
Browse files Browse the repository at this point in the history
The actual test flake shows that record segments were not found during
the test run.  I cannot reproduce locally, so submitting the extracted
test as an exemplar so that we test it in the future.
  • Loading branch information
rescrv authored Nov 21, 2024
1 parent b58fa86 commit 232db53
Showing 1 changed file with 384 additions and 0 deletions.
384 changes: 384 additions & 0 deletions chromadb/test/property/test_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,390 @@ def test_add_then_delete_n_minus_1(client: ClientAPI) -> None:
state.teardown()


def test_embeddings_flake1(client: ClientAPI) -> None:
state = EmbeddingStateMachine(client)
state.initialize(
collection=strategies.Collection(
name='fOIBy\n',
metadata={
'-7n': False,
'92WhVE_': 'HtmY',
'J-sW': 'RTip',
'wPGA8hY7uX': -171,
'4rA': '5KdoaYsUQ_EWStV4',
'hnsw:construction_ef': 128,
'hnsw:search_ef': 128,
'hnsw:M': 128,
},
embedding_function=None,
id=uuid.UUID('ff006990-82c3-494b-97d5-cbb05092c861'),
dimension=664,
dtype=np.float16,
known_metadata_keys={},
known_document_keywords=[],
has_documents=False,
has_embeddings=True
)
)
state.ann_accuracy()
state.count()
state.fields_match()
state.no_duplicates()
embedding_ids_0, embedding_ids_1, embedding_ids_2, embedding_ids_3, embedding_ids_4, embedding_ids_5 = state.add_embeddings(record_set={'ids': ['kgaT4d', 'C2h2YoNSgUqRyE-Tmxf3MT', 'ODI-yO', 't', 'b', 'vC'],
'embeddings': [[0]*664, [0]*664, [0]*664, [0]*664, [0]*664, [0]*664],
'metadatas': [{'s': False,
'd1wQJV-9': -2_021_928_494,
'hWf7gwQ': '5DkqA9o6',
'rbyHg': 0.0,
'Pe': 251,
'0r6qQ5XYxeq': -0.3333333432674408,
'PzXpiqB': 'VT'},
None,
{'hqTZ6Ok767eCSwyvGEuig8a': -659321220,
'TRGxN': -0.3333333432674408,
'1h8I': 'E'},
{'ATRs': -0.3333333432674408, 'KF0P': -23106},
{'PcFwu': -14169,
'PS': 0.0,
'WCgx': -13116,
'EQt': False,
'upcOfhu': -1.5,
'e': 'vReD',
'U': -2147,
'zI4tO': True,
'MfHM7uU58tW_muctZf': -22,
'SvOy': 2.220446049250313e-16},
{'iuTAKznMg6IdUKxaPi': -58907,
'oy': 'uDC',
'c0Zb3VTUktBu-uW': 'OcywKhsi',
'6i': -42181,
'nn': 5.960464477539063e-08,
'bs': '-',
'om': -1000000.0,
'MXnpsEEE': True,
'Ful8JRj': -304752924,
'Hi7lrY': True}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 6, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
(embedding_ids_6,) = state.add_embeddings(record_set={'ids': 'ua',
'embeddings': [[0]*664],
'metadatas': None,
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 7, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
embedding_ids_7, embedding_ids_8 = state.add_embeddings(record_set={'ids': ['K_', 'yFsH'],
'embeddings': [[0]*664, [0]*664],
'metadatas': [None,
{'RiaaN9MNpq': -634040344,
'g9Wx': True,
'uexOH': -2.220446049250313e-16,
'h2': True}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 9, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.upsert_embeddings(record_set={'ids': ['SCeelWyLAWG_oHa', 'lY', '3'],
'embeddings': [[0]*664, [0]*664, [0]*664],
'metadatas': [{'0ZbYq40P': 448094799,
'OT9sTxkM': 9.999999747378752e-06,
'-j': 158,
'rqsBEfrELJctJoVeLqtsPZp': -100,
'5M4': 64676,
'XFt': 227,
'ii': 168135.75,
'ly': True},
{'Dy6': 'q7LZUW'},
{'fP': 'KuQG8m-T',
'APtmt': False,
'xKb6': -2_147_483_647,
'C': 'xGw',
'G18V': False,
's': True,
'c-': 'k',
'G92n': -7024,
'YTTBWs31rbM_L_PQDSCu': False,
'xOGzFeG': True,
'gh7cuT_ruA3mn': 883101.75}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 12, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.upsert_embeddings(record_set={'ids': ['O3m3-X1', 'ZNt2PF6M5_q', 'Ij0Yh6', embedding_ids_1, embedding_ids_7],
'embeddings': [[0]*664, [0]*664, [0]*664, [0]*664, [0]*664],
'metadatas': [{'2fDAuv7': -46139,
'4Et': 19926,
'5hqGH60G-yZ6PWyM1B': False,
'OkMjjG': '34oWsr93EUl',
'yTk': 999999.0,
'wZvpmS5HbTAI': -9.999999747378752e-06,
'bvq': 'Xc80e',
'zPhL': 'e-QXuDdnxYMd'},
{'WK': -9.999999747378752e-06,
'y': 'g',
'GNZphPCKay88gsh3x_': 1.899999976158142},
{'_zVO2i-N': -40, 'tWHxo': False, 'ltu_E_fg': 'JDc', '9yGpik': -153},
{'otM8': 'ZnQ3ALwA',
'EGeKm': 50,
'skf71O0UKT': True,
'S8Kc8-l95Rpc': True,
'4bGz1QmzbKVySN1yrXFl56CmDS08F': 1_284_815_517},
None],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 15, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.update_embeddings(record_set={'ids': [embedding_ids_1,
embedding_ids_3,
embedding_ids_8,
embedding_ids_5,
embedding_ids_6],
'embeddings': [[0]*664, [0]*664, [0]*664, [0]*664, [0]*664],
'metadatas': [{'hBFXAIA': False,
'Wx4dcB5': -35,
'8w': False,
'8': False,
'mwQ5': 'c7',
'G9g2': 'J',
'VY': True,
'VQGb_r-hzoA': -0.9999899864196777,
'M0lMig': True,
'F': True,
'J': 1.100000023841858,
'd': 'R',
'DugrcoZv': False,
'45B': -2.0000100135803223,
'UG-sSV': False,
'cri4cT1G': -1_067_180_133,
'I': -4411,
'FqFWR__': False,
'4': -23,
'vwo4WERBljY3aWjWnqL': 'xM0jUV4U2r',
'WF': 'msuFYMwj_SXc'},
None,
{'m': -49054, 'f4': 239658268, 'Ut': False, 'V_NVCw': '5'},
{'VWuP': -9.999999747378752e-06, '7uF8': 127, '3': False},
{'a1': -6.103515625e-05,
'ML_Zl2Ir85KolESaX': False,
'iJvA': -1.5,
'O8o': 1_287_175_929,
'rMS': 200,
'0': -1000000.0,
'5AeE': 9.999999747378752e-06,
'2q': True}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 15, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.update_embeddings(record_set={'ids': [embedding_ids_1, embedding_ids_2, embedding_ids_8, embedding_ids_3],
'embeddings': [[0]*664,
[0]*664,
[0]*664,
[0]*664],
'metadatas': [{'Yx': '6T9tEEC84', 'lGe5GMX': 3054},
{'UvsAljL5V5ELRv': True,
embedding_ids_3: False,
'yeLTrhAIq': 1.5,
'iP': -0.5},
{'C': 'Ri'},
{'pzHn2': -9.999999747378752e-06,
'YfdftMEd0C5ekByb7mhdb': 9735,
'LJCViu': 333447280,
'LT': True,
'5Y': False,
'OoVwE': False,
'vq': 1.899999976158142,
'8Wf6': False}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 15, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.update_embeddings(record_set={'ids': [embedding_ids_5],
'embeddings': [[0]*664],
'metadatas': {'C1KbOOlKkzzLo9CGU2': -1_379_550_593,
'NH': 'd',
'M': 'ebEKOx',
'fpu77F70Icl': True,
'dz6fI-Gpp': True,
'qVVW': -63204,
'Qrcq645F': 296029.46875},
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 15, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
embedding_ids_9, embedding_ids_10, embedding_ids_11, embedding_ids_12 = state.add_embeddings(record_set={'ids': ['F7', 'Rig1', 'RXi', '_nC8-'],
'embeddings': [[0]*664, [0]*664, [0]*664, [0]*664],
'metadatas': [{'FBtaPcQWV24v': -25365,
'ddLq1My3mbUL9I': 2019,
'fI': 908902.125,
'HLxuosT': False},
{'ATUP1': -1.5},
{'AhC': True, 'wm9AwP': -0.9999899864196777},
{'K': -33427}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 19, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.upsert_embeddings(record_set={'ids': ['4GJ', 'r', 'Aunf5', embedding_ids_5],
'embeddings': [[0]*664, [0]*664, [0]*664, [0]*664],
'metadatas': [{'J8O0R8VGaY': True},
{'K2cCg': 5.960464477539063e-08,
'oObAcp': -2.0000100135803223,
'ax': 'nK67g',
'afzp': 1000000.0,
'xnRCSPJUF4JZ2sKOIRDc': True,
'nBaQ6F1O38etVMhss2angu-': 158622.671875},
{'UwbDWM2_': 9.999999747378752e-06,
'3': -452142.625,
'nfoovt': 214128.375,
'elaMLbhEvW': 1.100000023841858,
'0': 'iSNcMrT',
'UO': True,
'I': 176,
'3ssGS4rSKXsKqRPFTBGrRPPsu': 1000000.0,
'Gw': False,
'V': True},
{'F': 'tTw'}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 22, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.update_embeddings(record_set={'ids': [embedding_ids_1, embedding_ids_9],
'embeddings': [[0]*664,
[0]*664],
'metadatas': [{'ei': -6.103515625e-05,
'_': 'qscyRBC_',
'TP': 'IXd',
'N0FG7Nta1': -745247.375,
'woD': 66,
'IV': '0L3xImGg',
'9N--JBl0uH_au_': -0.5,
'KVmhtcA': -9.999999747378752e-06,
'qr': False,
'NfL6': -0.9999899864196777,
'taIVpC': True,
'XJX': 'l',
'5': 66,
'8YaEynJznB': True,
'k': -177,
'N': 671709.375,
'ebB': 53239,
'fJ': 65709.09375,
'QK8l3l4yP-': False,
'2': 'cRl59jW_O',
'-XP899RRn': -999999.0,
'A9': 1.1754943508222875e-38,
'UlxNwmc': True,
'G': 128,
'1NoCd': False,
'WRn5cD': -175840.15625},
{'zAbCKkEvE4s': True,
'hnFN': 'HExeVM0iM',
'Uc9': False,
'v': 1_759_514_963,
'X': False,
'W': 1.100000023841858}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 22, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.update_embeddings(record_set={'ids': [embedding_ids_2],
'embeddings': [[0]*664],
'metadatas': None,
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 22, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.update_embeddings(record_set={'ids': [embedding_ids_10,
embedding_ids_2,
embedding_ids_4,
embedding_ids_12,
embedding_ids_3],
'embeddings': [[0]*664, [0]*664, [0]*664, [0]*664, [0]*664],
'metadatas': [{'Y': '-iRt8'},
{'55m28': '8MxYq', 'krQsTFdqMhYjhF': False},
None,
{'9SnviLf': -6.103515625e-05,
'Y0Jw4pLTwr': -184,
'v3E': 6.103515625e-05,
'Fx3jsbcdqy': 'VG7E7xm',
'H': 9071,
'-U': '1xXUHLklmIVSVgQd7EHUCu5wa',
'S': 'kl6'},
{'U': -12,
'Qfm_6duL': False,
'Sh0LkduZt5qsRJrF': 'sB',
'8DM': -64114,
'MZ': 'xtLNrNyRo2',
'lY': -922831.5,
'7': False}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 22, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.upsert_embeddings(record_set={'ids': [embedding_ids_0, embedding_ids_7, 'Oia', 'iD', embedding_ids_5],
'embeddings': [[0]*664, [0]*664, [0]*664, [0]*664, [0]*664],
'metadatas': [None,
{'tVs': True,
'B': '4eK',
'zTR': True,
'bq6VslBBo2_12hgyKNPddxify34-np-': -22311,
'F7FcZpODwCTHg91o4mKTjBL': False,
'1Zjfys': -13897,
'lg3': -866314519},
{'1qr': '_TG-YhAQ',
'TKV': 'Q',
'8tLu': 1000000.0,
'QHsxa': 1.100000023841858,
'F': True},
{'p': True,
'rR': 'UepiV6K_',
'UDZ_uR': -1.5,
'fFG6cZvICaGc': True,
'unTbxz0qd2-AV1': -332950.25},
{'EXXVBZU': 2_147_483_647,
'tJMO': 'C9OePg',
'4o': False,
'F8g8n': -999999.0,
'5': 'aBY',
'hv3i': -48091}],
'documents': None})
state.ann_accuracy()
# recall: 1.0, missing 0 out of 24, accuracy threshold 1e-06
state.count()
state.fields_match()
state.no_duplicates()
state.teardown()


def test_update_none(caplog: pytest.LogCaptureFixture, client: ClientAPI) -> None:
state = EmbeddingStateMachine(client)
state.initialize(
Expand Down

0 comments on commit 232db53

Please sign in to comment.