diff --git a/Bitmapper_main.cpp b/Bitmapper_main.cpp index 2285e88..e0eb99e 100644 --- a/Bitmapper_main.cpp +++ b/Bitmapper_main.cpp @@ -53,7 +53,6 @@ int main(int argc, char *argv[]) double loadingTime; double mappingTime; char outputFileName[NAME_LENGTH]; - char bam_outputFileName[NAME_LENGTH]; @@ -69,18 +68,17 @@ int main(int argc, char *argv[]) return 1; } + if(Mapped_File) + { + sprintf(outputFileName, "%s%s",Mapped_FilePath , Mapped_File); + } + else + { + outputFileName[0] = '\0'; + } + - sprintf(outputFileName, "%s%s",Mapped_FilePath , Mapped_File); - sprintf(bam_outputFileName, "%s.tmp", outputFileName); - - - - if (output_methy == 0) - { - Output_gene(outputFileName); - } - - + if (!is_pairedEnd) { @@ -105,14 +103,16 @@ int main(int argc, char *argv[]) if (output_methy == 0) { - //这个Prepare_alignment()就是将这个文件里的变量各种配置到新文件中 - OutPutSAM_Nounheader(chhy_ih_refGenName, refChromeCont, argc, argv); - - - if (bam_output == 1) + if (bam_output == 0) { - init_bam_file_from_sam(outputFileName, bam_outputFileName); + Output_gene(outputFileName); + OutPutSAM_Nounheader(chhy_ih_refGenName, refChromeCont, argc, argv); } + else + { + init_bam_header(outputFileName, chhy_ih_refGenName, refChromeCont, argc, argv); + } + } @@ -200,19 +200,20 @@ int main(int argc, char *argv[]) fprintf(stderr, "Start alignment in sensitive mode.\n"); } - - ///fprintf(stderr, "is_local: %d\n", is_local); if (output_methy == 0) { - - OutPutSAM_Nounheader(chhy_ih_refGenName, refChromeCont, argc, argv); - - if (bam_output == 1) + if (bam_output == 0) { - init_bam_file_from_sam(outputFileName, bam_outputFileName); + Output_gene(outputFileName); + OutPutSAM_Nounheader(chhy_ih_refGenName, refChromeCont, argc, argv); } + else + { + init_bam_header(outputFileName, chhy_ih_refGenName, refChromeCont, argc, argv); + } + } @@ -224,7 +225,7 @@ int main(int argc, char *argv[]) if(THREAD_COUNT==1) { - Map_Pair_Seq(0); + Map_Pair_Seq(0); } else { @@ -247,9 +248,7 @@ int main(int argc, char *argv[]) if (bam_output == 1) { - ///close_bam_file(); - close_bam_file_rename(bam_outputFileName, outputFileName); - + close_bam_file(); } ///这个一打开速度就奇慢,不知道为什么 @@ -284,8 +283,16 @@ int main(int argc, char *argv[]) { char mapstatsFileName[NAME_LENGTH]; - sprintf(mapstatsFileName, "%s.mapstats", outputFileName); - + if (outputFileName[0] == '\0') + { + sprintf(mapstatsFileName, "stdout.mapstats"); + } + else + { + sprintf(mapstatsFileName, "%s.mapstats", outputFileName); + } + + FILE* mapstats_fp = fopen(mapstatsFileName, "w"); if (mapstats_fp != NULL) diff --git a/Process_CommandLines.cpp b/Process_CommandLines.cpp index d45614e..84eda0c 100644 --- a/Process_CommandLines.cpp +++ b/Process_CommandLines.cpp @@ -21,7 +21,7 @@ int is_index = 0; int is_search = 0; int is_methy = 0; -int is_pairedEnd; +int is_pairedEnd = 0; int cropSize = 0; int minDistance_pair=0; int maxDistance_pair=500; @@ -29,8 +29,8 @@ int over_all_seed_length = 30; unsigned int THREAD_COUNT = 1; char *Read_File1; char *Read_File2; -char *Mapped_File = "output"; -///char *Mapped_File = NULL; +///char *Mapped_File = "output"; +char *Mapped_File = NULL; char *Mapped_FilePath = ""; char *folder_path = NULL; char *bmm_folder_path = NULL; @@ -272,19 +272,25 @@ int CommandLine_process (int argc, char *argv[]) } - if (!is_pairedEnd && Read_File2 != NULL) + + if (Read_File1 != NULL && Read_File2 != NULL) + { + is_pairedEnd = 1; + } + + if (!is_pairedEnd && Read_File2 != NULL) { fprintf(stderr, "Please not indicate the read2 files for single-end mode!\n"); return 0; } - if (is_pairedEnd && (minDistance_pair <0 || maxDistance_pair < 0 || minDistance_pair > maxDistance_pair)) + if (is_pairedEnd && (minDistance_pair <0 || maxDistance_pair < 0 || minDistance_pair > maxDistance_pair)) { fprintf(stderr, "Please set a valid range for paired-end mode.\n"); return 0; } - if (is_pairedEnd && Read_File1 == NULL) + if (is_pairedEnd && Read_File1 == NULL) { fprintf(stderr, "Please indicate the read1 file for single-end mode.\n"); return 0; @@ -414,11 +420,11 @@ void Print_H() fprintf(stderr," --search [file/folder]\tSearch in the specified genome. If the indexes of this genome are built without \"--index_folder\", \n\t\t\tplease provide the path to the fasta file of genome (index files should be in the same folder). \n\t\t\tOtherwise please provide the path to the index folder (set by \"--index_folder\" during indexing).\n"); fprintf(stderr," --fast \t\tSet bitmapperBS in fast mode (default). This option is only available in paired-end mode.\n"); fprintf(stderr," --sensitive \t\tSet bitmapperBS in sensitive mode. This option is only available in paired-end mode.\n"); - fprintf(stderr," --pe \t\t\tSearch will be done in paired-end mode.\n"); + //fprintf(stderr," --pe \t\t\tSearch will be done in paired-end mode.\n"); fprintf(stderr," --seq [file]\t\tInput sequences in fastq/fastq.gz format [file]. This option is used \n\t\t\tfor single-end reads.\n"); fprintf(stderr," --seq1 [file]\t\tInput sequences in fastq/fastq.gz format [file] (First file). \n\t\t\tUse this option to indicate the first file of \n\t\t\tpaired-end reads. \n"); fprintf(stderr," --seq2 [file]\t\tInput sequences in fastq/fastq.gz format [file] (Second file). \n\t\t\tUse this option to indicate the second file of \n\t\t\tpaired-end reads. \n"); - fprintf(stderr," -o [file]\t\tOutput of the mapped sequences in SAM or BAM format. The default is \"output\" in SAM format.\n"); + fprintf(stderr," -o [file]\t\tOutput of the mapped sequences in SAM or BAM format. The default is \"stdout\" (standard output) in SAM format.\n"); fprintf(stderr, " --sam \t\t\tOutput mapping results in SAM format (default).\n"); fprintf(stderr, " --bam \t\t\tOutput mapping results in BAM format.\n"); fprintf(stderr, " --methy_out \t\tOutput the intermediate methylation result files, instead of SAM or BAM files.\n"); diff --git a/Process_Reads.cpp b/Process_Reads.cpp index a8c44e6..bcd12dd 100644 --- a/Process_Reads.cpp +++ b/Process_Reads.cpp @@ -188,21 +188,6 @@ int inputReads_paired_directly( //****************************读第一个read************************************* /*************************处理第一个read********************************/ - ///处理read name - read1_length = strlen(seqList1->name); - ///去回车 - seqList1->name[read1_length - 1] = '\0'; - for (j = 0; j < read1_length; j++) - { - if (seqList1->name[j] == ' ' || seqList1->name[j] == '/') - { - seqList1->name[j] = '\0'; - break; - } - } - - - ///处理read seq read1_length = strlen(seqList1->seq); ///去回车 @@ -268,21 +253,6 @@ int inputReads_paired_directly( /*************************处理第二个read********************************/ - ///处理read name - read2_length = strlen(seqList2->name); - ///去回车 - seqList2->name[read2_length - 1] = '\0'; - for (j = 0; j < read2_length; j++) - { - if (seqList2->name[j] == ' ' || seqList2->name[j] == '/') - { - seqList2->name[j] = '\0'; - break; - } - } - - - ///处理read seq read2_length = strlen(seqList2->rseq); ///去回车 @@ -311,6 +281,30 @@ int inputReads_paired_directly( + ///处理read name + read1_length = strlen(seqList1->name); + ///去回车 + seqList1->name[read1_length - 1] = '\0'; + + ///处理read name + read2_length = strlen(seqList2->name); + ///去回车 + seqList2->name[read2_length - 1] = '\0'; + + + + for (j = 0; j < read1_length && j < read2_length; j++) + { + if (seqList1->name[j] != seqList2->name[j]|| + seqList1->name[j] == ' ' || + seqList1->name[j] == '/' + ) + { + seqList1->name[j] = '\0'; + seqList2->name[j] = '\0'; + break; + } + } return 1; @@ -331,21 +325,6 @@ inline int post_process_paired_reads(Read_buffer_pe_sub_block* curr_sub_block) for (i = 0; i < curr_sub_block->sub_block_read_number; i++) { /*************************处理第一个read********************************/ - ///处理read name - seq_length1 = strlen(curr_sub_block->read1[i].name); - ///去回车 - curr_sub_block->read1[i].name[seq_length1 - 1] = '\0'; - for (j = 0; j < seq_length1; j++) - { - if (curr_sub_block->read1[i].name[j] == ' ' || curr_sub_block->read1[i].name[j] == '/') - { - curr_sub_block->read1[i].name[j] = '\0'; - break; - } - } - - - ///处理read seq seq_length1 = strlen(curr_sub_block->read1[i].seq); ///去回车 @@ -376,21 +355,6 @@ inline int post_process_paired_reads(Read_buffer_pe_sub_block* curr_sub_block) /*************************处理第二个read********************************/ - ///处理read name - seq_length2 = strlen(curr_sub_block->read2[i].name); - ///去回车 - curr_sub_block->read2[i].name[seq_length2 - 1] = '\0'; - for (j = 0; j < seq_length2; j++) - { - if (curr_sub_block->read2[i].name[j] == ' ' || curr_sub_block->read2[i].name[j] == '/') - { - curr_sub_block->read2[i].name[j] = '\0'; - break; - } - } - - - ///处理read seq seq_length2 = strlen(curr_sub_block->read2[i].rseq); ///去回车 @@ -417,9 +381,28 @@ inline int post_process_paired_reads(Read_buffer_pe_sub_block* curr_sub_block) /*************************处理第二个read********************************/ + ///处理read name + seq_length1 = strlen(curr_sub_block->read1[i].name); + ///去回车 + curr_sub_block->read1[i].name[seq_length1 - 1] = '\0'; + ///处理read name + seq_length2 = strlen(curr_sub_block->read2[i].name); + ///去回车 + curr_sub_block->read2[i].name[seq_length2 - 1] = '\0'; - - + for (j = 0; j < seq_length1; j++) + { + if ( + curr_sub_block->read1[i].name[j] != curr_sub_block->read2[i].name[j]|| + curr_sub_block->read1[i].name[j] == ' ' || + curr_sub_block->read1[i].name[j] == '/' + ) + { + curr_sub_block->read1[i].name[j] = '\0'; + curr_sub_block->read2[i].name[j] = '\0'; + break; + } + } } } diff --git a/Process_sam_out.cpp b/Process_sam_out.cpp index 796082a..f836af1 100644 --- a/Process_sam_out.cpp +++ b/Process_sam_out.cpp @@ -1140,8 +1140,7 @@ void OutPutSAM_Nounheader(_rg_name_l *_ih_refGenName, int refChromeCont, int ar int i = 0; for (i = 0; i --seq1 --seq2 --pe [options] + ./bitmapperBS --search --seq1 --seq2 [options] output mapping results in BAM format @@ -148,11 +148,10 @@ We recommend users to first remove the duplicates by Picard or samtools, and the | --search | NULL| String | NULL | Search in the specified genome. If the indexes of this genome are built without "--index_folder", please provide the path to the fasta file when aligning. Otherwise please provide the path to the index folder (set by "--index_folder" during indexing).| | --fast | NULL| String | NULL | Set bitmapperBS in fast mode (default). Only available for paired-end mode.| | --sensitive | NULL| String | NULL | Set bitmapperBS in sensitive mode. Only available for paired-end mode.| -| --pe | NULL| NULL | NULL | Searching will be done in paired-end mode. | | --seq | NULL| String | NULL | Provide the name of single-end read file (.fastq/.fq/.fastq.gz/.fq.gz format). | | --seq1 | NULL| String | NULL | Provide the name of paired-end read_1 file (.fastq/.fq/.fastq.gz/.fq.gz format). | | --seq2 | NULL| String | NULL | Provide the name of paired-end read_2 file (.fastq/.fq/.fastq.gz/.fq.gz format). | -| -o | -o | String | output (SAM format) | Provide the name of output file (SAM or BAM format). | +| -o | -o | String | stdout (Standard output) | Provide the name of output file (SAM or BAM format). | | --sam | NULL| String | NULL | Output mapping results in SAM format (default). | | --bam | NULL| String | NULL | Output mapping results in BAM format. | | -e | -e | Double | 0.08 | Set the edit distance rate of read length, which is between 0 and 1. | @@ -197,7 +196,7 @@ If map the reads from the *_2 reads file or the pbat protocol, the --pbat option to map reads to human genome (GRCH38) in paired-end mode using 6 CPU threads: - ./bitmapperBS --search ../../ssd/human_genome.fa --seq1 ../../ssd/read_1.fq --seq2 ../../ssd/read_1.fq --pe -t 6 + ./bitmapperBS --search ../../ssd/human_genome.fa --seq1 ../../ssd/read_1.fq --seq2 ../../ssd/read_1.fq -t 6 @@ -218,6 +217,13 @@ The output file of BitMapperBS must be first sorted into a coordinate-sorted BAM ### Changelog ### +(15) June 29, 2019: version 1.0.1.6 released. + + >> Remove the `--pe` option. The paired-end mode or the single-end mode can be determined automatically. + >> All logging information is printed to standard error. + >> If `-o` is not specified, the alignment results would be printed to standard output. + >> Fix a minor bug of the paired-end read name. + (14) June 28, 2019: version 1.0.1.5 released. >> Fix a minor bug when mapping. diff --git a/Schema.cpp b/Schema.cpp index 59eebbd..0be0c62 100644 --- a/Schema.cpp +++ b/Schema.cpp @@ -60,7 +60,7 @@ char char_to_3bit[128] = { _rg_name_l *_ih_refGenName; int refChromeCont; -char *versionN = "1.0.1.5"; +char *versionN = "1.0.1.6"; long long mappingCnt[MAX_Thread]; unsigned int done; long long mappedSeqCnt[MAX_Thread]; @@ -541,6 +541,7 @@ void Prepare_alignment(char* outputFileName, char *genFileName, _rg_name_l *chhy cut_length = cut_length + 1; } + ///outputFileName一定是bmm_folder/output open_methy_output(&methy_out, outputFileName, cut_length); diff --git a/Schema.h b/Schema.h index 476dbf6..b5e78e8 100644 --- a/Schema.h +++ b/Schema.h @@ -202,8 +202,8 @@ typedef struct char cigar[MAX_CIGAR_SIZE]; unsigned short iVal; char sVal[MAX_CIGAR_SIZE]; -} tmp_result; - +} tmp_result; + typedef struct { int flag; @@ -374,19 +374,19 @@ extern Methylation_Output methy_out; void Prepare_alignment(char* outputFileName, char *genFileName, _rg_name_l *chhy_ih_refGenName, int chhy_refChromeCont, int i_read_format, int is_pairedEnd); int Map_Single_Seq( int thread_id); int Map_Single_Seq_pbat(int thread_id); -///void* Map_Single_Seq_split(int thread_id); +///void* Map_Single_Seq_split(int thread_id); void* Map_Single_Seq_split(void* arg); void* Map_Single_Seq_split_pbat(void* arg); void* Map_Single_Seq_split_one_by_one(void* arg); int Map_Single_Seq_muti_thread(int thread_id); int Map_Single_Seq_pbat_muti_thread(int thread_id); - - -int Map_Pair_Seq(int thread_id); -void* Map_Pair_Seq_split(void* arg); + + +int Map_Pair_Seq(int thread_id); +void* Map_Pair_Seq_split(void* arg); int Map_Pair_Seq_muti_thread(int thread_id); void Prepare_methy(char *genFileName, _rg_name_l *chhy_ih_refGenName, int chhy_refChromeCont); - + void get_mapping_informations(long long* number_of_read, long long* number_of_unique_mapped_read, long long* number_of_ambiguous_mapped_read, long long* number_of_unmapped_read, long long* number_of_mapped_bases, @@ -401,212 +401,212 @@ void get_mapping_informations(long long* number_of_read, long long* number_of_un unsigned int get_total_reads_number(); void open_methy_output(Methylation_Output& methy_out); - - -inline void correct_read_using_cigar(char* cigar, char* input_read, char* output_read) -{ - ///这下面是遍历cigar用的 - int i = 0; - int cur_i = 0; - - ///这下面是遍历read用的 - ///int r_length = 0; - int input_length = 0; - int gap_size = 0; - int convert_length; - int output_length = 0; - - while (cigar[i] != '\0') - { - - switch (cigar[i]){ - case 'M': - cigar[i] = '\0'; - convert_length = atoi(cigar + cur_i); - memcpy(output_read + output_length, input_read + input_length, convert_length); - input_length = input_length + convert_length; - output_length = output_length + convert_length; - cur_i = i + 1; - break; - case 'I': - cigar[i] = '\0'; - convert_length = atoi(cigar + cur_i); - input_length = input_length + convert_length; - gap_size = gap_size - convert_length; - cur_i = i + 1; - break; - case 'D': - cigar[i] = '\0'; - convert_length = atoi(cigar + cur_i); - memset(output_read + output_length, '_', convert_length); - output_length = output_length + convert_length; - gap_size = gap_size + convert_length; - cur_i = i + 1; - break; - } - - i++; - } - - output_read[output_length] = '\0'; - - -} - - - -inline void correct_read_using_cigar_3_bit(char* cigar, char* input_read, - bitmapper_bs_iter* output_read_3_bit) -{ - ///这下面是遍历cigar用的 - int i = 0; - int cur_i = 0; - - ///这下面是遍历read用的 - ///int r_length = 0; - int input_length = 0; - int gap_size = 0; - int convert_length; - int output_length = 0; - - int word64_index = 0; - int word64_inner_bit_index = 0; - int inner_i = 0; - bitmapper_bs_iter tmp; - - - while (cigar[i] != '\0') - { - - switch (cigar[i]){ - case 'M': - cigar[i] = '\0'; - convert_length = atoi(cigar + cur_i); - - /******************这里要改***********************/ - ///memcpy(output_read + output_length, input_read + input_length, convert_length); - for (inner_i = 0; inner_i < convert_length; inner_i++) - { + + +inline void correct_read_using_cigar(char* cigar, char* input_read, char* output_read) +{ + ///这下面是遍历cigar用的 + int i = 0; + int cur_i = 0; + + ///这下面是遍历read用的 + ///int r_length = 0; + int input_length = 0; + int gap_size = 0; + int convert_length; + int output_length = 0; + + while (cigar[i] != '\0') + { + + switch (cigar[i]){ + case 'M': + cigar[i] = '\0'; + convert_length = atoi(cigar + cur_i); + memcpy(output_read + output_length, input_read + input_length, convert_length); + input_length = input_length + convert_length; + output_length = output_length + convert_length; + cur_i = i + 1; + break; + case 'I': + cigar[i] = '\0'; + convert_length = atoi(cigar + cur_i); + input_length = input_length + convert_length; + gap_size = gap_size - convert_length; + cur_i = i + 1; + break; + case 'D': + cigar[i] = '\0'; + convert_length = atoi(cigar + cur_i); + memset(output_read + output_length, '_', convert_length); + output_length = output_length + convert_length; + gap_size = gap_size + convert_length; + cur_i = i + 1; + break; + } + + i++; + } + + output_read[output_length] = '\0'; + + +} + + + +inline void correct_read_using_cigar_3_bit(char* cigar, char* input_read, + bitmapper_bs_iter* output_read_3_bit) +{ + ///这下面是遍历cigar用的 + int i = 0; + int cur_i = 0; + + ///这下面是遍历read用的 + ///int r_length = 0; + int input_length = 0; + int gap_size = 0; + int convert_length; + int output_length = 0; + + int word64_index = 0; + int word64_inner_bit_index = 0; + int inner_i = 0; + bitmapper_bs_iter tmp; + + + while (cigar[i] != '\0') + { + + switch (cigar[i]){ + case 'M': + cigar[i] = '\0'; + convert_length = atoi(cigar + cur_i); + + /******************这里要改***********************/ + ///memcpy(output_read + output_length, input_read + input_length, convert_length); + for (inner_i = 0; inner_i < convert_length; inner_i++) + { if (word64_inner_bit_index == 0) { output_read_3_bit[word64_index] = 0; - } - - tmp = char_to_3bit[input_read[input_length + inner_i]]; - tmp = tmp << word64_inner_bit_index * 3; - - output_read_3_bit[word64_index] = output_read_3_bit[word64_index] | tmp; - - word64_inner_bit_index++; - + } + + tmp = char_to_3bit[input_read[input_length + inner_i]]; + tmp = tmp << word64_inner_bit_index * 3; + + output_read_3_bit[word64_index] = output_read_3_bit[word64_index] | tmp; + + word64_inner_bit_index++; + if (word64_inner_bit_index == 21) { word64_inner_bit_index = 0; word64_index++; - } - - - - } - /******************这里要改***********************/ - - - - input_length = input_length + convert_length; - output_length = output_length + convert_length; - cur_i = i + 1; - break; - case 'I': - cigar[i] = '\0'; - convert_length = atoi(cigar + cur_i); - input_length = input_length + convert_length; - gap_size = gap_size - convert_length; - cur_i = i + 1; - break; - case 'D': - cigar[i] = '\0'; - convert_length = atoi(cigar + cur_i); - - /******************这里要改***********************/ - ///memset(output_read + output_length, '_', convert_length); - for (inner_i = 0; inner_i < convert_length; inner_i++) - { + } + + + + } + /******************这里要改***********************/ + + + + input_length = input_length + convert_length; + output_length = output_length + convert_length; + cur_i = i + 1; + break; + case 'I': + cigar[i] = '\0'; + convert_length = atoi(cigar + cur_i); + input_length = input_length + convert_length; + gap_size = gap_size - convert_length; + cur_i = i + 1; + break; + case 'D': + cigar[i] = '\0'; + convert_length = atoi(cigar + cur_i); + + /******************这里要改***********************/ + ///memset(output_read + output_length, '_', convert_length); + for (inner_i = 0; inner_i < convert_length; inner_i++) + { if (word64_inner_bit_index == 0) { output_read_3_bit[word64_index] = 0; - } - - tmp = char_to_3bit['N']; - tmp = tmp << word64_inner_bit_index * 3; - - output_read_3_bit[word64_index] = output_read_3_bit[word64_index] | tmp; - - word64_inner_bit_index++; - + } + + tmp = char_to_3bit['N']; + tmp = tmp << word64_inner_bit_index * 3; + + output_read_3_bit[word64_index] = output_read_3_bit[word64_index] | tmp; + + word64_inner_bit_index++; + if (word64_inner_bit_index == 21) { word64_inner_bit_index = 0; word64_index++; - } - - - - } - - - - /******************这里要改***********************/ - - - - - - - - - output_length = output_length + convert_length; - gap_size = gap_size + convert_length; - cur_i = i + 1; - break; - } - - i++; - } - - ///output_read[output_length] = '\0'; - - -} - - - -inline void correct_read_using_cigar_3_bit_over_lap_length_r2(char* cigar, char* input_read, - bitmapper_bs_iter* output_read_3_bit, - long long min1, long long len1, long long min2, long long len2) -{ - ///这下面是遍历cigar用的 - int i = 0; - int cur_i = 0; - - ///这下面是遍历read用的 - ///int r_length = 0; - int input_length = 0; - int gap_size = 0; - int convert_length; - int output_length = 0; - - int word64_index = 0; - int word64_inner_bit_index = 0; - int inner_i = 0; - bitmapper_bs_iter tmp; - - int is_overlap = 0; - - - - - - + } + + + + } + + + + /******************这里要改***********************/ + + + + + + + + + output_length = output_length + convert_length; + gap_size = gap_size + convert_length; + cur_i = i + 1; + break; + } + + i++; + } + + ///output_read[output_length] = '\0'; + + +} + + + +inline void correct_read_using_cigar_3_bit_over_lap_length_r2(char* cigar, char* input_read, + bitmapper_bs_iter* output_read_3_bit, + long long min1, long long len1, long long min2, long long len2) +{ + ///这下面是遍历cigar用的 + int i = 0; + int cur_i = 0; + + ///这下面是遍历read用的 + ///int r_length = 0; + int input_length = 0; + int gap_size = 0; + int convert_length; + int output_length = 0; + + int word64_index = 0; + int word64_inner_bit_index = 0; + int inner_i = 0; + bitmapper_bs_iter tmp; + + int is_overlap = 0; + + + + + + long long min = min1; long long max = min1 + len1; @@ -640,375 +640,375 @@ inline void correct_read_using_cigar_3_bit_over_lap_length_r2(char* cigar, char* ///memset(read2 + region_start2, 'N', region_length2); - } - - - - - - - - - - while (cigar[i] != '\0') - { - - switch (cigar[i]){ - case 'M': - cigar[i] = '\0'; - convert_length = atoi(cigar + cur_i); - - /******************这里要改***********************/ - ///memcpy(output_read + output_length, input_read + input_length, convert_length); - ///input_length = input_length + convert_length; - ///output_length = output_length + convert_length; - for (inner_i = 0; inner_i < convert_length; inner_i++, output_length++, input_length++) - { + } + + + + + + + + + + while (cigar[i] != '\0') + { + + switch (cigar[i]){ + case 'M': + cigar[i] = '\0'; + convert_length = atoi(cigar + cur_i); + + /******************这里要改***********************/ + ///memcpy(output_read + output_length, input_read + input_length, convert_length); + ///input_length = input_length + convert_length; + ///output_length = output_length + convert_length; + for (inner_i = 0; inner_i < convert_length; inner_i++, output_length++, input_length++) + { if (word64_inner_bit_index == 0) { output_read_3_bit[word64_index] = 0; - } - + } + if (is_overlap && output_length >= region_start2&&output_lengthtotal_size = size; - methy->current_size = 0; - (*methy).genome_cut = genome_cuts; - (*methy).cut_length = cut_length; - (*methy).cut_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); - (*methy).tmp_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); - - - memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); - - - - - - /**read1**/ - (*methy).R[0].r_length = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].sites = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*size); - /******************这里改了********************/ - /** - (*methy).R[0].r_size = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].reads = (char**)malloc(sizeof(char*)*size); - **/ - (*methy).R[0].r_real_length = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].r_size_3_bit = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].reads_3_bit = (bitmapper_bs_iter**)malloc(sizeof(bitmapper_bs_iter*)*size); - /******************这里改了********************/ - int i; - for (i = 0; i < size; i++) - { - /******************这里改了********************/ - /** - (*methy).R[0].reads[i] = (char*)malloc(sizeof(char)*INIT_READ_LENGTH); - (*methy).R[0].r_size[i] = INIT_READ_LENGTH; - (*methy).R[0].r_length[i] = 0; - **/ - (*methy).R[0].reads_3_bit[i] = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*INIT_READ_3_BIT_LENGTH); - (*methy).R[0].r_size_3_bit[i] = INIT_READ_3_BIT_LENGTH; - (*methy).R[0].r_length[i] = 0; - (*methy).R[0].r_real_length[i] = 0; - - /******************这里改了********************/ - } - /**read1**/ - - - - - - /**read2**/ - (*methy).R[1].r_length = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[1].sites = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*size); - /******************这里改了********************/ - /** - (*methy).R[1].r_size = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[1].reads = (char**)malloc(sizeof(char*)*size); - **/ - (*methy).R[1].r_real_length = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[1].r_size_3_bit = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[1].reads_3_bit = (bitmapper_bs_iter**)malloc(sizeof(bitmapper_bs_iter*)*size); - /******************这里改了********************/ - for (i = 0; i < size; i++) - { - /******************这里改了********************/ - /** - (*methy).R[1].reads[i] = (char*)malloc(sizeof(char)*INIT_READ_LENGTH); - (*methy).R[1].r_size[i] = INIT_READ_LENGTH; - (*methy).R[1].r_length[i] = 0; - **/ - (*methy).R[1].reads_3_bit[i] = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*INIT_READ_3_BIT_LENGTH); - (*methy).R[1].r_size_3_bit[i] = INIT_READ_3_BIT_LENGTH; - (*methy).R[1].r_length[i] = 0; - (*methy).R[1].r_real_length[i] = 0; - /******************这里改了********************/ - } - /**read2**/ - - - -} - - - -inline void init_single_methylation(Pair_Methylation* methy, int size) -{ - methy->total_size = size; - methy->current_size = 0; - (*methy).genome_cut = genome_cuts; - (*methy).cut_length = cut_length; - (*methy).cut_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); - (*methy).tmp_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); - - - memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); - - - - /**read1**/ - (*methy).R[0].r_length = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].sites = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*size); - /******************这里改了********************/ - /** - (*methy).R[0].r_size = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].reads = (char**)malloc(sizeof(char*)*size); - **/ - (*methy).R[0].r_real_length = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].r_size_3_bit = (uint16_t*)malloc(sizeof(uint16_t)*size); - (*methy).R[0].reads_3_bit = (bitmapper_bs_iter**)malloc(sizeof(bitmapper_bs_iter*)*size); - /******************这里改了********************/ - int i; - for (i = 0; i < size; i++) - { - /******************这里改了********************/ - /** - (*methy).R[0].reads[i] = (char*)malloc(sizeof(char)*INIT_READ_LENGTH); - (*methy).R[0].r_size[i] = INIT_READ_LENGTH; - (*methy).R[0].r_length[i] = 0; - **/ - (*methy).R[0].reads_3_bit[i] = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*INIT_READ_3_BIT_LENGTH); - (*methy).R[0].r_size_3_bit[i] = INIT_READ_3_BIT_LENGTH; - (*methy).R[0].r_length[i] = 0; - (*methy).R[0].r_real_length[i] = 0; - - /******************这里改了********************/ - } - /**read1**/ - -} - - - -/** -inline void output_methylation(Methylation* methy, FILE *schema_out_fp) -{ + +} +**/ + + +inline void init_pair_methylation(Pair_Methylation* methy, int size) +{ + methy->total_size = size; + methy->current_size = 0; + (*methy).genome_cut = genome_cuts; + (*methy).cut_length = cut_length; + (*methy).cut_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); + (*methy).tmp_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); + + + memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); + + + + + + /**read1**/ + (*methy).R[0].r_length = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].sites = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*size); + /******************这里改了********************/ + /** + (*methy).R[0].r_size = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].reads = (char**)malloc(sizeof(char*)*size); + **/ + (*methy).R[0].r_real_length = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].r_size_3_bit = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].reads_3_bit = (bitmapper_bs_iter**)malloc(sizeof(bitmapper_bs_iter*)*size); + /******************这里改了********************/ + int i; + for (i = 0; i < size; i++) + { + /******************这里改了********************/ + /** + (*methy).R[0].reads[i] = (char*)malloc(sizeof(char)*INIT_READ_LENGTH); + (*methy).R[0].r_size[i] = INIT_READ_LENGTH; + (*methy).R[0].r_length[i] = 0; + **/ + (*methy).R[0].reads_3_bit[i] = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*INIT_READ_3_BIT_LENGTH); + (*methy).R[0].r_size_3_bit[i] = INIT_READ_3_BIT_LENGTH; + (*methy).R[0].r_length[i] = 0; + (*methy).R[0].r_real_length[i] = 0; + + /******************这里改了********************/ + } + /**read1**/ + + + + + + /**read2**/ + (*methy).R[1].r_length = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[1].sites = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*size); + /******************这里改了********************/ + /** + (*methy).R[1].r_size = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[1].reads = (char**)malloc(sizeof(char*)*size); + **/ + (*methy).R[1].r_real_length = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[1].r_size_3_bit = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[1].reads_3_bit = (bitmapper_bs_iter**)malloc(sizeof(bitmapper_bs_iter*)*size); + /******************这里改了********************/ + for (i = 0; i < size; i++) + { + /******************这里改了********************/ + /** + (*methy).R[1].reads[i] = (char*)malloc(sizeof(char)*INIT_READ_LENGTH); + (*methy).R[1].r_size[i] = INIT_READ_LENGTH; + (*methy).R[1].r_length[i] = 0; + **/ + (*methy).R[1].reads_3_bit[i] = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*INIT_READ_3_BIT_LENGTH); + (*methy).R[1].r_size_3_bit[i] = INIT_READ_3_BIT_LENGTH; + (*methy).R[1].r_length[i] = 0; + (*methy).R[1].r_real_length[i] = 0; + /******************这里改了********************/ + } + /**read2**/ + + + +} + + + +inline void init_single_methylation(Pair_Methylation* methy, int size) +{ + methy->total_size = size; + methy->current_size = 0; + (*methy).genome_cut = genome_cuts; + (*methy).cut_length = cut_length; + (*methy).cut_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); + (*methy).tmp_index = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*(genome_cuts + 1)); + + + memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); + + + + /**read1**/ + (*methy).R[0].r_length = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].sites = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*size); + /******************这里改了********************/ + /** + (*methy).R[0].r_size = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].reads = (char**)malloc(sizeof(char*)*size); + **/ + (*methy).R[0].r_real_length = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].r_size_3_bit = (uint16_t*)malloc(sizeof(uint16_t)*size); + (*methy).R[0].reads_3_bit = (bitmapper_bs_iter**)malloc(sizeof(bitmapper_bs_iter*)*size); + /******************这里改了********************/ + int i; + for (i = 0; i < size; i++) + { + /******************这里改了********************/ + /** + (*methy).R[0].reads[i] = (char*)malloc(sizeof(char)*INIT_READ_LENGTH); + (*methy).R[0].r_size[i] = INIT_READ_LENGTH; + (*methy).R[0].r_length[i] = 0; + **/ + (*methy).R[0].reads_3_bit[i] = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*INIT_READ_3_BIT_LENGTH); + (*methy).R[0].r_size_3_bit[i] = INIT_READ_3_BIT_LENGTH; + (*methy).R[0].r_length[i] = 0; + (*methy).R[0].r_real_length[i] = 0; + + /******************这里改了********************/ + } + /**read1**/ + +} + + + +/** +inline void output_methylation(Methylation* methy, FILE *schema_out_fp) +{ int i = 0; fprintf(schema_out_fp, "#%llu\t%llu\t%llu", (*methy).current_size, (*methy).genome_cut, (*methy).cut_length); - for (i = 0; i <= (*methy).genome_cut; i++) - { - + for (i = 0; i <= (*methy).genome_cut; i++) + { + fprintf(schema_out_fp, "\t%llu", - (*methy).cut_index[i]); + (*methy).cut_index[i]); } fprintf(schema_out_fp, "\n"); @@ -1017,25 +1017,25 @@ inline void output_methylation(Methylation* methy, FILE *schema_out_fp) fprintf(schema_out_fp, "%llu\t%s\n", (*methy).sites[i], (*methy).reads[i]); } -} -**/ - -inline void output_methylation(Methylation* methy) -{ - bitmapper_bs_iter i; - for (i = 0; i <= (*methy).genome_cut; i++) - { - methy_out.cut_index[i] += Get_Count((*methy).cut_index, i); - } - - bitmapper_bs_iter index; - bitmapper_bs_iter end_i; - FILE* file; - - int read_length; - - ///外层循环是每个cut - for (index = 0; index < (*methy).genome_cut; index++) +} +**/ + +inline void output_methylation(Methylation* methy) +{ + bitmapper_bs_iter i; + for (i = 0; i <= (*methy).genome_cut; i++) + { + methy_out.cut_index[i] += Get_Count((*methy).cut_index, i); + } + + bitmapper_bs_iter index; + bitmapper_bs_iter end_i; + FILE* file; + + int read_length; + + ///外层循环是每个cut + for (index = 0; index < (*methy).genome_cut; index++) { i = (*methy).cut_index[index]; end_i = (*methy).cut_index[index + 1]; @@ -1058,27 +1058,27 @@ inline void output_methylation(Methylation* methy) /**********这里要改*********/ } - } - -} - - - -inline void output_methylation_pair(Pair_Methylation* methy) -{ - bitmapper_bs_iter i; - for (i = 0; i <= (*methy).genome_cut; i++) - { - methy_out.cut_index[i] += Get_Count((*methy).cut_index, i); - } - - bitmapper_bs_iter index; - bitmapper_bs_iter end_i; - FILE* file; - int read_length; - - ///外层循环是每个cut - for (index = 0; index < (*methy).genome_cut; index++) + } + +} + + + +inline void output_methylation_pair(Pair_Methylation* methy) +{ + bitmapper_bs_iter i; + for (i = 0; i <= (*methy).genome_cut; i++) + { + methy_out.cut_index[i] += Get_Count((*methy).cut_index, i); + } + + bitmapper_bs_iter index; + bitmapper_bs_iter end_i; + FILE* file; + int read_length; + + ///外层循环是每个cut + for (index = 0; index < (*methy).genome_cut; index++) { i = (*methy).cut_index[index]; end_i = (*methy).cut_index[index + 1]; @@ -1115,149 +1115,149 @@ inline void output_methylation_pair(Pair_Methylation* methy) /**********这里要改*********/ } - } - -} - - - -inline int get_cut_id(Methylation* methy, bitmapper_bs_iter site) -{ - return site / (*methy).cut_length; -} - - -inline int get_cut_id_pair(Pair_Methylation* methy, bitmapper_bs_iter site) -{ - return site / (*methy).cut_length; -} - - -inline void swap(Methylation* methy, bitmapper_bs_iter i, bitmapper_bs_iter j) -{ - - (*methy).k_sites = (*methy).sites[i]; - (*methy).k_r_length = (*methy).r_length[i]; - /************这个要改**************/ - ///(*methy).k_r_size = (*methy).r_size[i]; - ///(*methy).k_reads = (*methy).reads[i]; - (*methy).k_r_size_3_bit = (*methy).r_size_3_bit[i]; - (*methy).k_reads_3_bit = (*methy).reads_3_bit[i]; - (*methy).k_r_real_length = (*methy).r_real_length[i]; - /************这个要改**************/ - - (*methy).sites[i] = (*methy).sites[j]; - (*methy).r_length[i] = (*methy).r_length[j]; - /************这个要改**************/ - ///(*methy).r_size[i] = (*methy).r_size[j]; - ///(*methy).reads[i] = (*methy).reads[j]; - (*methy).r_size_3_bit[i] = (*methy).r_size_3_bit[j]; - (*methy).reads_3_bit[i] = (*methy).reads_3_bit[j]; - (*methy).r_real_length[i] = (*methy).r_real_length[j]; - /************这个要改**************/ - - (*methy).sites[j] = (*methy).k_sites; - (*methy).r_length[j] = (*methy).k_r_length; - /************这个要改**************/ - ///(*methy).r_size[j] = (*methy).k_r_size; - ///(*methy).reads[j] = (*methy).k_reads; - (*methy).r_size_3_bit[j] = (*methy).k_r_size_3_bit; - (*methy).reads_3_bit[j] = (*methy).k_reads_3_bit; - (*methy).r_real_length[j] = (*methy).k_r_real_length; - /************这个要改**************/ - - -} - - -inline void swap_pair(Pair_Methylation* methy, bitmapper_bs_iter i, bitmapper_bs_iter j) -{ - - (*methy).R[0].k_sites = (*methy).R[0].sites[i]; - (*methy).R[0].k_r_length = (*methy).R[0].r_length[i]; - /************这个要改**************/ - ///(*methy).R[0].k_r_size = (*methy).R[0].r_size[i]; - ///(*methy).R[0].k_reads = (*methy).R[0].reads[i]; - (*methy).R[0].k_r_size_3_bit = (*methy).R[0].r_size_3_bit[i]; - (*methy).R[0].k_reads_3_bit = (*methy).R[0].reads_3_bit[i]; - (*methy).R[0].k_r_real_length = (*methy).R[0].r_real_length[i]; - /************这个要改**************/ - - (*methy).R[0].sites[i] = (*methy).R[0].sites[j]; - (*methy).R[0].r_length[i] = (*methy).R[0].r_length[j]; - /************这个要改**************/ - //(*methy).R[0].r_size[i] = (*methy).R[0].r_size[j]; - //(*methy).R[0].reads[i] = (*methy).R[0].reads[j]; - (*methy).R[0].r_size_3_bit[i] = (*methy).R[0].r_size_3_bit[j]; - (*methy).R[0].reads_3_bit[i] = (*methy).R[0].reads_3_bit[j]; - (*methy).R[0].r_real_length[i] = (*methy).R[0].r_real_length[j]; - /************这个要改**************/ - - (*methy).R[0].sites[j] = (*methy).R[0].k_sites; - (*methy).R[0].r_length[j] = (*methy).R[0].k_r_length; - /************这个要改**************/ - ///(*methy).R[0].r_size[j] = (*methy).R[0].k_r_size; - ///(*methy).R[0].reads[j] = (*methy).R[0].k_reads; - (*methy).R[0].r_size_3_bit[j] = (*methy).R[0].k_r_size_3_bit; - (*methy).R[0].reads_3_bit[j] = (*methy).R[0].k_reads_3_bit; - (*methy).R[0].r_real_length[j] = (*methy).R[0].k_r_real_length; - /************这个要改**************/ - - - - - - /************这个要改**************/ - (*methy).R[1].k_sites = (*methy).R[1].sites[i]; - (*methy).R[1].k_r_length = (*methy).R[1].r_length[i]; - ///(*methy).R[1].k_r_size = (*methy).R[1].r_size[i]; - ///(*methy).R[1].k_reads = (*methy).R[1].reads[i]; - (*methy).R[1].k_r_size_3_bit = (*methy).R[1].r_size_3_bit[i]; - (*methy).R[1].k_reads_3_bit = (*methy).R[1].reads_3_bit[i]; - (*methy).R[1].k_r_real_length = (*methy).R[1].r_real_length[i]; - /************这个要改**************/ - - - /************这个要改**************/ - (*methy).R[1].sites[i] = (*methy).R[1].sites[j]; - (*methy).R[1].r_length[i] = (*methy).R[1].r_length[j]; - ///(*methy).R[1].r_size[i] = (*methy).R[1].r_size[j]; - ///(*methy).R[1].reads[i] = (*methy).R[1].reads[j]; - (*methy).R[1].r_size_3_bit[i] = (*methy).R[1].r_size_3_bit[j]; - (*methy).R[1].reads_3_bit[i] = (*methy).R[1].reads_3_bit[j]; - (*methy).R[1].r_real_length[i] = (*methy).R[1].r_real_length[j]; - /************这个要改**************/ - - - /************这个要改**************/ - (*methy).R[1].sites[j] = (*methy).R[1].k_sites; - (*methy).R[1].r_length[j] = (*methy).R[1].k_r_length; - ///(*methy).R[1].r_size[j] = (*methy).R[1].k_r_size; - ///(*methy).R[1].reads[j] = (*methy).R[1].k_reads; - (*methy).R[1].r_size_3_bit[j] = (*methy).R[1].k_r_size_3_bit; - (*methy).R[1].reads_3_bit[j] = (*methy).R[1].k_reads_3_bit; - (*methy).R[1].r_real_length[j] = (*methy).R[1].k_r_real_length; - /************这个要改**************/ - - -} - - - - - - - -inline void assign_cuts_pair(Pair_Methylation* methy) -{ + } + +} + + + +inline int get_cut_id(Methylation* methy, bitmapper_bs_iter site) +{ + return site / (*methy).cut_length; +} + + +inline int get_cut_id_pair(Pair_Methylation* methy, bitmapper_bs_iter site) +{ + return site / (*methy).cut_length; +} + + +inline void swap(Methylation* methy, bitmapper_bs_iter i, bitmapper_bs_iter j) +{ + + (*methy).k_sites = (*methy).sites[i]; + (*methy).k_r_length = (*methy).r_length[i]; + /************这个要改**************/ + ///(*methy).k_r_size = (*methy).r_size[i]; + ///(*methy).k_reads = (*methy).reads[i]; + (*methy).k_r_size_3_bit = (*methy).r_size_3_bit[i]; + (*methy).k_reads_3_bit = (*methy).reads_3_bit[i]; + (*methy).k_r_real_length = (*methy).r_real_length[i]; + /************这个要改**************/ + + (*methy).sites[i] = (*methy).sites[j]; + (*methy).r_length[i] = (*methy).r_length[j]; + /************这个要改**************/ + ///(*methy).r_size[i] = (*methy).r_size[j]; + ///(*methy).reads[i] = (*methy).reads[j]; + (*methy).r_size_3_bit[i] = (*methy).r_size_3_bit[j]; + (*methy).reads_3_bit[i] = (*methy).reads_3_bit[j]; + (*methy).r_real_length[i] = (*methy).r_real_length[j]; + /************这个要改**************/ + + (*methy).sites[j] = (*methy).k_sites; + (*methy).r_length[j] = (*methy).k_r_length; + /************这个要改**************/ + ///(*methy).r_size[j] = (*methy).k_r_size; + ///(*methy).reads[j] = (*methy).k_reads; + (*methy).r_size_3_bit[j] = (*methy).k_r_size_3_bit; + (*methy).reads_3_bit[j] = (*methy).k_reads_3_bit; + (*methy).r_real_length[j] = (*methy).k_r_real_length; + /************这个要改**************/ + + +} + + +inline void swap_pair(Pair_Methylation* methy, bitmapper_bs_iter i, bitmapper_bs_iter j) +{ + + (*methy).R[0].k_sites = (*methy).R[0].sites[i]; + (*methy).R[0].k_r_length = (*methy).R[0].r_length[i]; + /************这个要改**************/ + ///(*methy).R[0].k_r_size = (*methy).R[0].r_size[i]; + ///(*methy).R[0].k_reads = (*methy).R[0].reads[i]; + (*methy).R[0].k_r_size_3_bit = (*methy).R[0].r_size_3_bit[i]; + (*methy).R[0].k_reads_3_bit = (*methy).R[0].reads_3_bit[i]; + (*methy).R[0].k_r_real_length = (*methy).R[0].r_real_length[i]; + /************这个要改**************/ + + (*methy).R[0].sites[i] = (*methy).R[0].sites[j]; + (*methy).R[0].r_length[i] = (*methy).R[0].r_length[j]; + /************这个要改**************/ + //(*methy).R[0].r_size[i] = (*methy).R[0].r_size[j]; + //(*methy).R[0].reads[i] = (*methy).R[0].reads[j]; + (*methy).R[0].r_size_3_bit[i] = (*methy).R[0].r_size_3_bit[j]; + (*methy).R[0].reads_3_bit[i] = (*methy).R[0].reads_3_bit[j]; + (*methy).R[0].r_real_length[i] = (*methy).R[0].r_real_length[j]; + /************这个要改**************/ + + (*methy).R[0].sites[j] = (*methy).R[0].k_sites; + (*methy).R[0].r_length[j] = (*methy).R[0].k_r_length; + /************这个要改**************/ + ///(*methy).R[0].r_size[j] = (*methy).R[0].k_r_size; + ///(*methy).R[0].reads[j] = (*methy).R[0].k_reads; + (*methy).R[0].r_size_3_bit[j] = (*methy).R[0].k_r_size_3_bit; + (*methy).R[0].reads_3_bit[j] = (*methy).R[0].k_reads_3_bit; + (*methy).R[0].r_real_length[j] = (*methy).R[0].k_r_real_length; + /************这个要改**************/ + + + + + + /************这个要改**************/ + (*methy).R[1].k_sites = (*methy).R[1].sites[i]; + (*methy).R[1].k_r_length = (*methy).R[1].r_length[i]; + ///(*methy).R[1].k_r_size = (*methy).R[1].r_size[i]; + ///(*methy).R[1].k_reads = (*methy).R[1].reads[i]; + (*methy).R[1].k_r_size_3_bit = (*methy).R[1].r_size_3_bit[i]; + (*methy).R[1].k_reads_3_bit = (*methy).R[1].reads_3_bit[i]; + (*methy).R[1].k_r_real_length = (*methy).R[1].r_real_length[i]; + /************这个要改**************/ + + + /************这个要改**************/ + (*methy).R[1].sites[i] = (*methy).R[1].sites[j]; + (*methy).R[1].r_length[i] = (*methy).R[1].r_length[j]; + ///(*methy).R[1].r_size[i] = (*methy).R[1].r_size[j]; + ///(*methy).R[1].reads[i] = (*methy).R[1].reads[j]; + (*methy).R[1].r_size_3_bit[i] = (*methy).R[1].r_size_3_bit[j]; + (*methy).R[1].reads_3_bit[i] = (*methy).R[1].reads_3_bit[j]; + (*methy).R[1].r_real_length[i] = (*methy).R[1].r_real_length[j]; + /************这个要改**************/ + + + /************这个要改**************/ + (*methy).R[1].sites[j] = (*methy).R[1].k_sites; + (*methy).R[1].r_length[j] = (*methy).R[1].k_r_length; + ///(*methy).R[1].r_size[j] = (*methy).R[1].k_r_size; + ///(*methy).R[1].reads[j] = (*methy).R[1].k_reads; + (*methy).R[1].r_size_3_bit[j] = (*methy).R[1].k_r_size_3_bit; + (*methy).R[1].reads_3_bit[j] = (*methy).R[1].k_reads_3_bit; + (*methy).R[1].r_real_length[j] = (*methy).R[1].k_r_real_length; + /************这个要改**************/ + + +} + + + + + + + +inline void assign_cuts_pair(Pair_Methylation* methy) +{ bitmapper_bs_iter i = 0; (*methy).tmp_index[0] = 0; - for (i = 0; i < (*methy).genome_cut; i++) - { - (*methy).tmp_index[i + 1] = (*methy).tmp_index[i] + (*methy).cut_index[i]; + for (i = 0; i < (*methy).genome_cut; i++) + { + (*methy).tmp_index[i + 1] = (*methy).tmp_index[i] + (*methy).cut_index[i]; } memcpy((*methy).cut_index, (*methy).tmp_index, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); @@ -1268,7 +1268,7 @@ inline void assign_cuts_pair(Pair_Methylation* methy) bitmapper_bs_iter tmp_index; ///外层循环是循环每个桶的 - for (index = 0; index < (*methy).genome_cut; index++) + for (index = 0; index < (*methy).genome_cut; index++) { i = (*methy).tmp_index[index]; end_i = (*methy).cut_index[index + 1]; @@ -1293,24 +1293,24 @@ inline void assign_cuts_pair(Pair_Methylation* methy) } } - -} - - - - - - -inline void assign_cuts(Methylation* methy) -{ + +} + + + + + + +inline void assign_cuts(Methylation* methy) +{ bitmapper_bs_iter i = 0; (*methy).tmp_index[0] = 0; - for (i = 0; i < (*methy).genome_cut; i++) - { - (*methy).tmp_index[i + 1] = (*methy).tmp_index[i] + (*methy).cut_index[i]; + for (i = 0; i < (*methy).genome_cut; i++) + { + (*methy).tmp_index[i + 1] = (*methy).tmp_index[i] + (*methy).cut_index[i]; } memcpy((*methy).cut_index, (*methy).tmp_index, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); @@ -1321,7 +1321,7 @@ inline void assign_cuts(Methylation* methy) bitmapper_bs_iter tmp_index; ///外层循环是循环每个桶的 - for (index = 0; index < (*methy).genome_cut; index++) + for (index = 0; index < (*methy).genome_cut; index++) { i = (*methy).tmp_index[index]; end_i = (*methy).cut_index[index + 1]; @@ -1347,7 +1347,7 @@ inline void assign_cuts(Methylation* methy) /** - for (index = 0; index < (*methy).genome_cut; index++) + for (index = 0; index < (*methy).genome_cut; index++) { i = (*methy).cut_index[index]; end_i = (*methy).cut_index[index + 1]; @@ -1369,7 +1369,7 @@ inline void assign_cuts(Methylation* methy) **/ /** - for (index = 0; index < (*methy).genome_cut; index++) + for (index = 0; index < (*methy).genome_cut; index++) { i = (*methy).cut_index[index]; end_i = (*methy).cut_index[index + 1]; @@ -1396,19 +1396,19 @@ inline void assign_cuts(Methylation* methy) /** - for (i = 0; i < (*methy).genome_cut + 1; i++) - { + for (i = 0; i < (*methy).genome_cut + 1; i++) + { if ((*methy).tmp_index[i] != (*methy).cut_index[i]) { fprintf(stderr, "error\n"); - } - + } + } **/ /** - for (i = 0; i < (*methy).genome_cut; i++) + for (i = 0; i < (*methy).genome_cut; i++) { if (Get_Count((*methy).tmp_index, i) != (*methy).cut_index[i]) fprintf(stderr, "error\n"); @@ -1419,9 +1419,9 @@ inline void assign_cuts(Methylation* methy) /** fprintf(stderr, "*************************************\n"); - for (i = 0; i < (*methy).genome_cut; i++) - { - fprintf(stderr, "i: %d, %llu\n", i, (*methy).cut_index[i]); + for (i = 0; i < (*methy).genome_cut; i++) + { + fprintf(stderr, "i: %d, %llu\n", i, (*methy).cut_index[i]); } fprintf(stderr, "\n"); @@ -1429,55 +1429,55 @@ inline void assign_cuts(Methylation* methy) for (i = 0; i < (*methy).current_size; i++) { (*methy).cut_index[get_cut_id(methy, (*methy).sites[i])]--; - } - - for (i = 0; i < (*methy).genome_cut; i++) - { + } + + for (i = 0; i < (*methy).genome_cut; i++) + { if ((*methy).cut_index[i]!=0) { fprintf(stderr, "error\n"); - } - fprintf(stderr, "i: %d, %llu\n", i, (*methy).cut_index[i]); + } + fprintf(stderr, "i: %d, %llu\n", i, (*methy).cut_index[i]); + } + + fprintf(stderr, "\n"); + + + + + fprintf(stderr, "*************************************\n"); + **/ + +} + + + +inline void clear_methylation(Methylation* methy) +{ + (*methy).current_size = 0; + + /** + int i; + for (i = 0; i < (*methy).genome_cut; i++) + { + (*methy).cut_index[i] = 0; } + **/ + + memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); +} + + + +inline void clear_methylation_pair(Pair_Methylation* methy) +{ + (*methy).current_size = 0; + + + memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); +} + - fprintf(stderr, "\n"); - - - - - fprintf(stderr, "*************************************\n"); - **/ - -} - - - -inline void clear_methylation(Methylation* methy) -{ - (*methy).current_size = 0; - - /** - int i; - for (i = 0; i < (*methy).genome_cut; i++) - { - (*methy).cut_index[i] = 0; - } - **/ - - memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); -} - - - -inline void clear_methylation_pair(Pair_Methylation* methy) -{ - (*methy).current_size = 0; - - - memset((*methy).cut_index, 0, sizeof(bitmapper_bs_iter)*((*methy).genome_cut + 1)); -} - - inline void C_to_T(char *Seq, char *bsSeq, int length, int* C_site) { diff --git a/bam_prase.cpp b/bam_prase.cpp index 985fd44..945db81 100644 --- a/bam_prase.cpp +++ b/bam_prase.cpp @@ -59,43 +59,69 @@ void init_bam_header_back(char* tmp_sam_file_name, char* bam_file_name) } -void init_bam_header(char* tmp_sam_file_name, char* bam_file_name) +void init_bam_header(char* bam_file_name, _rg_name_l *_ih_refGenName, int refChromeCont, int argc, char *argv[]) { - samFile* in; - in = sam_open(tmp_sam_file_name, "r"); - bitmapperBS_bam.bam_file = sam_open_format(bam_file_name, "wb", NULL); - ///bitmapperBS_bam.bam_file = sam_open_format("stdout", "wb", NULL); + if (bam_file_name[0] == '\0') + { + bitmapperBS_bam.bam_file = sam_open_format("-", "wb", NULL); + } + else + { + bitmapperBS_bam.bam_file = sam_open_format(bam_file_name, "wb", NULL); + } + + bitmapperBS_bam.bam_header = NULL; - bitmapperBS_bam.output_name = tmp_sam_file_name; + bitmapperBS_bam.output_name = bam_file_name; + + + char** chrome_name = (char**)malloc(sizeof(char*)*refChromeCont); + bitmapper_bs_iter* chrome_length = (bitmapper_bs_iter*)malloc(sizeof(bitmapper_bs_iter)*refChromeCont); + + int i = 0; + + for (i = 0; i < refChromeCont; i++) + { + chrome_name[i]=_ih_refGenName[i]._rg_chrome_name; + chrome_length[i] = _ih_refGenName[i]._rg_chrome_length; + } + ///读入sam的首部 - if ((bitmapperBS_bam.bam_header = sam_hdr_read(in)) == 0) { - fprintf(stderr, "fail to read %s\n", tmp_sam_file_name); + //if ((bitmapperBS_bam.bam_header = sam_hdr_read(in)) == 0) { + if ((bitmapperBS_bam.bam_header = chhy_sam_hdr_read(chrome_name, chrome_length, refChromeCont, + argc, argv, versionN)) == 0) { + fprintf(stderr, "fail to phrase %s\n", bam_file_name); return; } + + ///写入sam的首部 if (sam_hdr_write(bitmapperBS_bam.bam_file, bitmapperBS_bam.bam_header) != 0) { fprintf(stderr, "fail to write %s\n", bam_file_name); return; } - ///一定要关闭,不关的话写不到文件里去 - sam_close(in); + free(chrome_name); + free(chrome_length); } -void init_bam_file_from_sam(char* file_name, char* outputFileName) +///void init_bam_file_from_sam(char* file_name, char* outputFileName) +void init_bam_file_from_sam(char* file_name, char* outputFileName, +_rg_name_l *_ih_refGenName, int refChromeCont, int argc, char *argv[]) { finalizeOutput(); - init_bam_header(file_name, outputFileName); + ///init_bam_header(file_name, outputFileName); + ///init_bam_header(file_name, outputFileName, _ih_refGenName, refChromeCont, argc, argv); } diff --git a/bam_prase.h b/bam_prase.h index 18b1b06..a00bb53 100644 --- a/bam_prase.h +++ b/bam_prase.h @@ -30,7 +30,7 @@ #include #include #include - +#include "Schema.h" typedef struct @@ -89,13 +89,16 @@ void write_alignment_directly(char* alignment, long long alignment_length, bam_o void close_bam_file_rename(char* old_name, char* new_name); -void init_bam_header(char* tmp_sam_file_name, char* bam_file_name); +void init_bam_header(char* bam_file_name, _rg_name_l *_ih_refGenName, int refChromeCont, int argc, char *argv[]); +void init_bam_file_from_sam(char* file_name, char* outputFileName, +_rg_name_l *_ih_refGenName, int refChromeCont, int argc, char *argv[]); + + void close_bam_file(); void write_alignment(const char* alignment); void write_alignment_muti_thread(const char* alignment); void write_alignment_group(char* alignment, const long long length); void write_alignment_group_pre_allocate(char* alignment, const long long length, const long long number); -void init_bam_file_from_sam(char* file_name, char* outputFileName); void convert_string_to_bam(char* alignment, long long alignment_length, bam_phrase* bam_groups); diff --git a/htslib/htslib/sam.h b/htslib/htslib/sam.h index 47b0a74..afa4f6d 100644 --- a/htslib/htslib/sam.h +++ b/htslib/htslib/sam.h @@ -289,6 +289,8 @@ typedef struct { int bam_write1(BGZF *fp, const bam1_t *b) HTS_RESULT_USED; int chhy_bam_write1(BGZF *main_fp, BGZF *buff_fp, const bam1_t *b) HTS_RESULT_USED; int chhy_bam_write1_pure(BGZF *main_fp, BGZF *buff_fp, const bam1_t *b, bgzf_buffer* j) HTS_RESULT_USED; + bam_hdr_t *chhy_sam_hdr_read(char** _ih_refGenName, uint64_t* _ih_refGenLength, int refChromeCont, + int argc, char *argv[], char* versionN); bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc); diff --git a/htslib/sam.c b/htslib/sam.c index 1e839f2..07913d6 100644 --- a/htslib/sam.c +++ b/htslib/sam.c @@ -518,8 +518,8 @@ int bam_read1(BGZF *fp, bam1_t *b) int bam_write1(BGZF *fp, const bam1_t *b) { - ///bgzf_flush_trybgzf_writeﶼӲ, bgzf.cļ - ///еlazy_flush + ///bgzf_flush_try��bgzf_write�ﶼ����Ӳ���, ������������bgzf.c����ļ��� + ///�����������е�lazy_flush��� const bam1_core_t *c = &b->core; uint32_t x[8], block_len = b->l_data - c->l_extranul + 32, y; int i, ok; @@ -533,7 +533,7 @@ int bam_write1(BGZF *fp, const bam1_t *b) x[5] = c->mtid; x[6] = c->mpos; x[7] = c->isize; - ok = (bgzf_flush_try(fp, 4 + block_len) >= 0); ///е + ok = (bgzf_flush_try(fp, 4 + block_len) >= 0); ///�������������е� if (fp->is_be) { for (i = 0; i < 8; ++i) ed_swap_4p(x + i); y = block_len; @@ -572,8 +572,8 @@ int bam_write1(BGZF *fp, const bam1_t *b) int chhy_bam_write1(BGZF *main_fp, BGZF *buff_fp, const bam1_t *b) { - ///bgzf_flush_trybgzf_writeﶼӲ, bgzf.cļ - ///еlazy_flush + ///bgzf_flush_try��bgzf_write�ﶼ����Ӳ���, ������������bgzf.c����ļ��� + ///�����������е�lazy_flush��� const bam1_core_t *c = &b->core; uint32_t x[8], block_len = b->l_data - c->l_extranul + 32, y; int i, ok; @@ -587,7 +587,7 @@ int chhy_bam_write1(BGZF *main_fp, BGZF *buff_fp, const bam1_t *b) x[5] = c->mtid; x[6] = c->mpos; x[7] = c->isize; - ok = (chhy_bgzf_flush_try(main_fp, buff_fp, 4 + block_len) >= 0); ///е + ok = (chhy_bgzf_flush_try(main_fp, buff_fp, 4 + block_len) >= 0); ///�������������е� if (main_fp->is_be) { for (i = 0; i < 8; ++i) ed_swap_4p(x + i); y = block_len; @@ -635,8 +635,8 @@ int chhy_bam_write1(BGZF *main_fp, BGZF *buff_fp, const bam1_t *b) int chhy_bam_write1_pure(BGZF *main_fp, BGZF *buff_fp, const bam1_t *b, bgzf_buffer* j) { - ///bgzf_flush_trybgzf_writeﶼӲ, bgzf.cļ - ///еlazy_flush + ///bgzf_flush_try��bgzf_write�ﶼ����Ӳ���, ������������bgzf.c����ļ��� + ///�����������е�lazy_flush��� const bam1_core_t *c = &b->core; uint32_t x[8], block_len = b->l_data - c->l_extranul + 32, y; int i, ok; @@ -650,7 +650,7 @@ int chhy_bam_write1_pure(BGZF *main_fp, BGZF *buff_fp, const bam1_t *b, bgzf_buf x[5] = c->mtid; x[6] = c->mpos; x[7] = c->isize; - ok = (chhy_bgzf_flush_try_pure(main_fp, buff_fp, 4 + block_len,j) >= 0); ///е + ok = (chhy_bgzf_flush_try_pure(main_fp, buff_fp, 4 + block_len,j) >= 0); ///�������������е� if (main_fp->is_be) { for (i = 0; i < 8; ++i) ed_swap_4p(x + i); y = block_len; @@ -1136,6 +1136,79 @@ static bam_hdr_t *sam_hdr_sanitise(bam_hdr_t *h) { return h; } + + + +bam_hdr_t *chhy_sam_hdr_read(char** _ih_refGenName, uint64_t* _ih_refGenLength, +int refChromeCont, int argc, char *argv[], char* versionN) +{ + + kstring_t str = { 0, 0, NULL }; + bam_hdr_t *h = NULL; + int ret, has_SQ = 0; + + uint64_t refName_length; + int i = 0; + int max_length_size = 20 + 15; + char* tmp_str; + int tmp_str_size = 1000; + tmp_str = (char*)malloc(sizeof(char*)*tmp_str_size); + + sprintf(tmp_str, "@HD\tVN:1.4\tSO:unsorted"); + kputsn(tmp_str, strlen(tmp_str), &str); + kputc('\n', &str); + + for (i = 0; i < refChromeCont; i++) + { + refName_length = strlen(_ih_refGenName[i]) + max_length_size; + if (refName_length > tmp_str_size) + { + tmp_str_size = refName_length; + tmp_str = (char*)realloc(tmp_str, sizeof(char*)*tmp_str_size); + } + + sprintf(tmp_str, "@SQ\tSN:%s\tLN:%llu", _ih_refGenName[i], _ih_refGenLength[i]); + kputsn(tmp_str, strlen(tmp_str), &str); + kputc('\n', &str); + } + + sprintf(tmp_str, "@PG\tID:BitMapperBS\tVN:%s\tCL:", versionN); + refName_length = strlen(tmp_str); + for (i = 0; i < argc; i++) + { + refName_length = strlen(argv[i]) + 2; + if (refName_length > tmp_str_size) + { + tmp_str_size = refName_length; + tmp_str = (char*)realloc(tmp_str, sizeof(char*)*tmp_str_size); + } + + sprintf(tmp_str + strlen(tmp_str), "%s ", argv[i]); + } + + kputsn(tmp_str, strlen(tmp_str), &str); + kputc('\n', &str); + + free(tmp_str); + + + if (str.l == 0) kputsn("", 0, &str); + h = sam_hdr_parse(str.l, str.s); + h->l_text = str.l; h->text = str.s; + return sam_hdr_sanitise(h); +} + + + + + + + + + + + + bam_hdr_t *sam_hdr_read(htsFile *fp) { if (!fp) {