From 8ee429fe40201cf3b42a125b6f045990dc6dc747 Mon Sep 17 00:00:00 2001 From: nkumar2 Date: Fri, 25 Oct 2024 12:07:41 +0100 Subject: [PATCH] test for checking upload of variants with invalid characters in ref/alt --- .../steps/LoadVariantsStepAsteriskTest.java | 98 ++++++++++++++++++ .../input-files/vcf/small_asterisk.vcf.gz | Bin 0 -> 8037 bytes 2 files changed, 98 insertions(+) create mode 100644 src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepAsteriskTest.java create mode 100644 src/test/resources/input-files/vcf/small_asterisk.vcf.gz diff --git a/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepAsteriskTest.java b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepAsteriskTest.java new file mode 100644 index 00000000..a51098a1 --- /dev/null +++ b/src/test/java/uk/ac/ebi/eva/pipeline/configuration/jobs/steps/LoadVariantsStepAsteriskTest.java @@ -0,0 +1,98 @@ +/* + * Copyright 2015-2017 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.pipeline.configuration.jobs.steps; + +import com.mongodb.client.model.Filters; +import org.bson.Document; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.test.JobLauncherTestUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; +import uk.ac.ebi.eva.pipeline.Application; +import uk.ac.ebi.eva.pipeline.configuration.BeanNames; +import uk.ac.ebi.eva.pipeline.configuration.jobs.GenotypedVcfJobConfiguration; +import uk.ac.ebi.eva.test.configuration.BatchTestConfiguration; +import uk.ac.ebi.eva.test.configuration.TemporaryRuleConfiguration; +import uk.ac.ebi.eva.test.rules.TemporaryMongoRule; +import uk.ac.ebi.eva.utils.EvaJobParameterBuilder; + +import static org.junit.Assert.assertEquals; +import static uk.ac.ebi.eva.test.utils.JobTestUtils.assertCompleted; +import static uk.ac.ebi.eva.utils.FileUtils.getResource; + +/** + * Test for {@link LoadVariantsStepConfiguration} + */ +@RunWith(SpringRunner.class) +@ActiveProfiles({Application.VARIANT_WRITER_MONGO_PROFILE, Application.VARIANT_ANNOTATION_MONGO_PROFILE}) +@TestPropertySource({"classpath:common-configuration.properties", "classpath:test-mongo.properties"}) +@ContextConfiguration(classes = {GenotypedVcfJobConfiguration.class, BatchTestConfiguration.class, TemporaryRuleConfiguration.class}) +public class LoadVariantsStepAsteriskTest { + + private static final int EXPECTED_VARIANTS = 2; + + private static final String SMALL_VCF_FILE = "/input-files/vcf/small_asterisk.vcf.gz"; + + private static final String COLLECTION_VARIANTS_NAME = "variants"; + + @Autowired + @Rule + public TemporaryMongoRule mongoRule; + + @Autowired + private JobLauncherTestUtils jobLauncherTestUtils; + + private String input; + + @Before + public void setUp() throws Exception { + input = getResource(SMALL_VCF_FILE).getAbsolutePath(); + } + + @Test + public void loaderStepShouldLoadAllVariants() throws Exception { + String databaseName = "test_invalid_variant_db"; + + // When the execute method in variantsLoad is executed + JobParameters jobParameters = new EvaJobParameterBuilder() + .collectionVariantsName(COLLECTION_VARIANTS_NAME) + .databaseName(databaseName) + .inputStudyId("1") + .inputVcf(input) + .inputVcfAggregation("NONE") + .inputVcfId("1") + .toJobParameters(); + + JobExecution jobExecution = jobLauncherTestUtils.launchStep(BeanNames.LOAD_VARIANTS_STEP, jobParameters); + + //Then variantsLoad step should complete correctly + assertCompleted(jobExecution); + + // And the number of documents in the DB should be equals to the number of lines in the VCF file + assertEquals(EXPECTED_VARIANTS, mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME).count()); + + assertEquals(1, mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME).countDocuments(Filters.eq("_id", "20_60343_G_*"))); + assertEquals(1, mongoRule.getCollection(databaseName, COLLECTION_VARIANTS_NAME).countDocuments(Filters.eq("_id", "20_60419_A_>"))); + } +} diff --git a/src/test/resources/input-files/vcf/small_asterisk.vcf.gz b/src/test/resources/input-files/vcf/small_asterisk.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..61d2c364da87c15dcb3da8325fd976d181989d04 GIT binary patch literal 8037 zcmZvB2UrtX_x`#TRuR!fK%@k9DWZtVQlv#hM5U=nFF{yAX#wew@}avF5zs|IrDQD# z(orF_B#SfwA@mj?LJ~q~ffSNS{Rj8^_P5VI|I9Np_hx49y!XsG@0|AxDE)^W-);TG zyS}?%_5F93L2sr!p@h%!QtKSp!zAH*=>~?Eamz2|I}Z|e?`?lrdvAL4_@O5@Ls2$A z+3pJs2Jt4PZ~SgQXt8nNMag+G9jsXNvh$#$qvBZWtJ1N~EV`Zrc*n7gxsUwyp}0e~ zr@mJ^E}CPpmJke5y^cuc9qNU_BgZwDKX*_I;7KiRR(&xx?u};+v|W<6J8e(guazSB zap~jxVzl+_+ld#WeX!8wDTQ;k>C?*j<14K<$H+yaV^)23MWK=U^}n{P=l>W~udNeM z;Qw~RdM0{KG485&_9U!(uagn;eSrK)!n9$6v;6YfMa`e`A0T&pc8?nVO&Y;G(fh}H z*~hik&K1v*fiMF>Ns`&lTqWqG6`ao30t28bj}QjFu(RTRbj0H+z5 z8mp1PHqWc-v!5vRTP|^ij~K|hWxEb+9``rIuRQ2GOj?@Rc${NBX*}#=iz(cjt&Dyb z+NWmUSEdX1Frd7!VrSb~O;@Ij#o}HXSX@R)`PPFP$p?2%eC^eV=_s3K*&m zayjYNPSrS}Q(y5%VeQ~<|K_VjD^20s8dHK`_eO78@74@Nh1_DUCcMAfSH+z2B*snX zCAoV%J?&%EPryw!-LVm>Bn%t+Whg@&wD|b7FT-q#@$PuzJ3xPe6e*j(s`F0D`UtR$ zbto`kpXsfac`Ep2e`HMp9ilswHwu*5H2+av@}myHrE})(uN&Q9m){=rSKdNehG5$r za0v0R;LCQmLZ?mSl-BVB)YnV|S3;BjK}M98?c z(Zzgy;!N$ISrb168ggnXnx;aJHE9gx-k!>_+f>fChc17puWAj|($*a-exT)2{ji5l zzt^0sI0jUmLd(w>nl|HZ1+SO;p?$H`&242q8 zjT%s=)@fTQp(mHw7f^pz!%dH4RREnpmsnrFeJmytj5*z3c!soe|MvQKRoX9OuNvX` zwzuEI1A`z2@1Qy)A5Bi69K?wB@cE0-aWdk~>h0jj<`bwF9k0UO`xY{yv8%0EVzcKb z>T-3gEAo*T0Y8T={1Ol-m17tYW;ASY!rt2|hQY5oD{g3dKAq`kfmV$9qJR~|A+YO| zQp|qB&g)seVK$no2FM~5_@KkbF1-1h^69sog{}kg;d`hvBRwEINN^|L13$CSCDM$= zmN`GA4Ssr*i>ym00n;0K>D{ukBDrnj$AojNk3zNj-muH_As_=Y`jsIlp1)6@JlcB1`Uq z)Y&~!5xmrfQbk4P#XCy-5*5`BD01eH9{F|Q*3YTwnW?r35he$8_O;Cwr_v%0&^va% z+_U`@Mg7#dqYf<6uLpImwKd5JQq3lNojl6b_atX)wI7IIQqN*uOAmjhmGxPg7q2rW zIrX%o>XUJZ(uMTXX|Lm+DXK_1hs|zSzS?f_W!Q#nEi`+Vb1h9};WryHTfyd2&c36S z$q|DGA}}2Xq@Ub5?vt^Z+HruRU~`bE{d;S9`V*A{Md`dDHk!q~hJhFSJ#el`mV(>wL>UfbKggnjbOy3+yPHz_@1%VF8I zu6eEy*-=VZZ%o`6htTS7PYVh1DU35Ogts*Ei~*i-U9jAeTqcAz%N4?e{eN0EM zm?EDv)cP71|9t=_8?`wm^^a!r z1_@^okWOeX8U9>7`n>j$q|yDwsXP2MTCcY2+;>iu`N8mfg{E7^#A9Ep_DegIj~HrB z9ID;jdNf*NceKVizn!gn+HZti`?2<+ue{&(`!f5>ce@-j{Apr;?Sa-ysTsQsFNIvF zx$J%Xg7@)EH_MvuYk&8xgGa9yhHh@;n9g7YR|NGrrQ6Y@UHwn z*Ic*RRH(Zpxm~H*8oAb7{ocO^--Q~xMixo%Rew+A@ z@Amef`yHZvZv;mU{R>FyHP-|G1oF=*x~=TqPB~!p>%=o(|8~tC=_Af|rRC3#x$Lhv zU-6S$nwxdap4x4#yHZDYlpj$3F(h>&)wed#^=XRhe~}y+asKrGLo%8wv)k}u$iDavl} z%v-sKok1C_{c6ZPjtP79PU{}`@bkF-@l(|K!EJ91tb^@R4yyW@yjuKW_>6VeGY9R% zMTZnTyf<8=5!U z24I8Of#(-ELji1G6@BMHvQqO4pBVb@glKomAVMA_7Am!m&?GeEgSWP?>)6weCk%gP z=Je&ItbS?Y>?$Ifsj6OLX8Cd3a8r%yWntHU~Mm zBn1V(;L||JFp*tt{xyGo?d>3?T=P47;B=%;_uDgJ76_f!vr$9#8EzN3nV=4ommo_Ka;py%vOd z)mZ0}IA%w+7{FEnA+C)ELOi6I)i_|}0j}luXKgH`6V@nnmU^{#*|msVf}FofpCG|O z=~*T50!EjO9uZSQ0)$zZyR$InS&&$mqKE6a*5W)J#+OBdkeJpi2#%@i!5^3axb?+A zx?(j_*ioW~eXm5FeIABOjv!lD7SzM&cLt(jsY@%qmL{DS|T#*~cpWr$a1AP=?O7aQcnZ79a_p!X=Ooj<+Oi=5?BpEr= zeFY8I60g_z`L<81nJ0#l_Di0l1?P@%0njjrOQ3K*f+$?aaFhVp)P% zPd+RD58UZmk_25{wqzcfF)E)+a36i1R1e4UEC!>ch+mbH(p04!U|esc%ifTQ2z4TP zSaz!H(8T(*O@dxvyozi=gDeGH?jLVYAAE{_n({Qojy+#EUvMQ4wzVsC^!hNIk-Qc= zWKZqT=+MwYHAIOmqJ`+?EN|qBJw`O9?ERVvn_))d$IT(JVhlJFFhNmiGr~vrRcX0Q z#yF+2*ApVh50e^N&D^f2Mtn@XRUiwfc5?Qk=7^Kmf%^`#HUnwI(?vEX$#VEXZhK@Z z=?=^NG1hb~i7<+kpTZ-WA*;2w%6TC_LQo z7&=V9$TQzq?29c2!YVatNf&wgvt>Z20j_cZJ=#+DqS}Qy>T&FPbEUd>@a0(Ebpuf4 z!pl*!k1vp8I!J@jUK`aV%Ih90RTjh7$GzO_nX3!4$3qLn9k)iSQR~psE;sf1ukX>^ zY^ykA{G^&0-86ez^5B#lM+H?cP^0Sw3xT7sie#$3xNHG3+ESh@8f9g<9FFly;aQbO zSt>@oR0Tqdn&ZQm&x0(S@~O}wTyz4SXoA^l#fuENm(+=ipN$r^X6)X`ftXK&)NT-7 zc?z0+)o0d>*FR>@Ki^tLfhAa<73rsZ-X$pPm*J|7Cs$B1a1{-C`K*CAD;wcyjMiEga8xi2;DbCR`XG!Qr7kUxb%Co{OkS3>GwKapXy^@ z6>P{C54rNPyR>tv9mo<}g6V*^B}kfvPqery{RV$u4*0#goW4aV4rCDQ0dzp7>80?B z(H2!o0>GKH4|Z@IQcO?tVulo6n+;9OKDa6|sacIBShZ@~4iY#Ng| zFC>w>`O*wf+dt+%mw0}}PNlT)8w*mGVVGm$aD*|qATLEn7BLHQwkU($1WFm#B@tqi zlcghj-E^%j+{eobob|cwE%x1b_3Obp36TUryk0S*zj3MX4-o4?*g)UIrNE;z1&bC6 z!dDO(KmpazAu(;@TqDj!N7KRUGSz%or?3Hi6+(O9FkN}#by{NZlIQ%z@he!e?dnoW z<(5A-r$s8KO%=kjp=F9hL+@Dmy>B3_bRVxut+s@-FN}>M!t$ZaP^@{x++8h7Yl6O@ z!0d*_4U6{!iQ;bzWcomFWSKar5~vP0AxfKob%xTbqXWoqP*IUzFRq?qZv~bu#(3c^ zL59|$*HNABs#v{-A#}WRWjkXyRcwDZa5CH1P4&vZ}Dm=yCb#Q$BEG zJqLK%IEZgI(*MdcDQf;UTnVll1)2Y)P|3TXLd!ojkNwMD(oEBtOU{r{SN2PbkrZCs zh~9KcUp#&AB`Sm5+XW;^1Buoa8)}7)EjCpBwXUzq+yA0h`*Ba$d?-fAf1@%D)~rr^ zOWaP&SUxTqdwHe5TaL0xOM(+hVRt_dp+ zN%#{XA95MJ%VA{gEinm}I5+4HO=btU1tv!axIvQHx7}Pj%V6_C-b&>`M%))X$rE*C zWBhzy@s*+;gqbKPF4w!+3in7}gHIe`V+({R-uM`}0{6*8rvl|y!d!?p1->lTQ)_)5 z>e2~9Xgpe!rb{0K-xA6Zn4x&IInhG+hdcV?J;qpt=L`1L6D4A!<)<$Ff|u?kAMbTq zov=i9T7B@jtD(s-z-HqJ#Qe6Ts4L^!c3r?#jW;Hu#)7oJ9_W<*upqJ$m!lNx;#A<(5lmR4h+TcJ6PJ&e)gS^g@;-`D$ zND(EQ6ccXe1>O~-I$Qy9r*fI14KXPUIltMjos?Pbk5}gTFQ&}Juj&WQ0V{ezV{*DI zCt{ArVhQ#{-C|S`vFYXH`0yuY=SjCGh;z6?Vw$Q1#0!z1`M?W=zxRMgG}Z(u@`f%f z_X%3+)?ifSCCa--v8H365Z!G=dz8I!?n`fed|a?Ch8kQ{9bd&#S?KOFYb-V^M%?k4 zz9F`aKkFDqJJ*-(sp(zDnWi=N<$G?XRh5WeI2th*bLSsBVT{Qk#^FbVS650AiWPAz zZ&B06Oa?s+7E5YYCN6YW8c{o5Ye{@v%7h4_(kQsB%%~53fAE0P@&2Gv#NAj^J^BKs zav8sX>4Tx4(+!FCQ~R@y>}$hWS%J^sLXVB4Ff*#(L1FHvYmRuwz!vMqX{JJLu5r;~ zqo(jCxb6ejrDKaa8KKxd^tS}Zcw;&b2+f~gHMPxO*oOV^GtqtOpxB+p?)|m`EL$ci zz+(Ds%a^x4h%f1cr>Mz~*-=-tAo?^)aL@SD>Pf%^UP5OTV!^C6=(-Hn(rCny-Pu+U z#R#6A`O-`%kR@Kg_52J-V8H=hX$2yw;-n05A z4i9ylU;RuEN@8VH*+;?Sz1~@XsC8PX1t2t<=b(qgN*OQ(_2dX86juB*5k7SoMw*K> zkKY<6zMJ_wRRbUV4$@yOiA*_fo7pi5Wo5O5@`M8@{uHB;64^`IEShFCo+ylH&LHSk z6B=f6s)NAXqcz9icW};jBMu5TQ8KjmBrAGTp(ItnI)F!Hsa+&N`)^a(Xmc8+x2e=! zvCbSI2kWtIO&MgqBOyPGS=jQ+Ic6TqTpKQXb-9dHs8#uTD7NTh=GEm=7Uo+ACFv01 zL!6?M_HZYnLK*)2_;T8I+NIWqFe|>3aOJbaqh*Ka7n?BaUibv9axPc&yR#V#e1&K~ z6#Inb{S#d33ej?ilgYvygHu0Gf!NaC#7GxB&GhmVI(M`3HoIW6{5HD)38#Rb-9knC-#&nhDZwY_DK8lJc7}HncIo*_`zDw-9)U>61Ca-(dw7Z;2 zFdUOO$0$8fc(PcfKK|KqjcW@(pw|gW0BfCEXew&Fm}PB zK;i|GKNbf7gYel&hnD$zG5z%Vb=*DxN<0(xINIM@lzArwNM=X^3^Aj{9biy2YyncfH$Ti_C~ zp^i!o;h?ZCz6ucwRoSH(QxFG33!_qCTG(>erm~qo(j`I!xX_$|btVVHbhiXRR*V<; z#9#VB;gROn}RrfRgy9( z4lT=CArBe+PmAbD+&|>mH~BIZyz~FYz5PFl6gQPW^*qW+D0q(mEkSH?p75`Oy24$Ck9 z%qr#oB7XHv(?#W+T70!7vS3trtt}~7o~8_c4P{-)OR5_GCiez}nG$<#HA@TKPd|n- z?6KWOAd7LxzwaPPe%86=qU#h~t`j7w?X+f%!p;9XQ zF0vyfDswGSKIGYF+$jBFRDKvZO3tzQxR{1{&a+Gq?KKgaNoJf)eM%h>JHc*X^`#BT zw%9k02^+SgBZ)^L)a5FpMJeH(%SLJ@yh@oEFo^;@=+8VS0Hz!KwD}QTXFHs|r zZSV|B@JA}#l=a{Y@RsL%W}RCSVMdAQA)Uc4 z3|Oyw2;+5!z}z37l5!YvD3K!`ulg{YzP!Sk4>re?a2+BFoiJv2pcG_$Ju%&}g*vZ4 zyD;x54Hya?%y<;@kL|H(y+7vZW_hZi1+9JAS>Nf37DUY;%rI(WYWlC@s*} z1)R`nu|r4UmAs|MKVlL(GMD%lS09n^32_LA1I5HH$EQr@Lv1Qm{(6Fvam1WO#XtNa z%|m0~=$>J#mm z(!c{fuHV;-ytfvnLmJ*e)ru9LOTZ_Ex7dK&D;gP42_|2D;8SB2ZA8vgG8cqu{i@b z&7cF5)%S96?F^_vfVi?MF{d1cW6Z=z3nutLmQYm1nCXQ;9F{p1VrDK5%Z2=H50F#i z!eOK;rH2H$07O*kBSI)kZOGYs9sF+=6Rl97BJc?F(LlNAyYE2%`jD>t-S%(a)weeP E55{j