#!/bin/bash function process_file(){ file_name="$1" index=0 i=0 while read line ; do array[$index]="$line" index=$(($index+1)) done < "$file_name" while [ $i -lt $index ] do echo "processing file ${array[$i]}" while read line ; do if [ $line_index == 0 ] then if [[ "$line" == *1:Y:* ]] then out="" else out="$out$line" fi line_index=$((line_index+1)) elif [ $line_index == 1 ] then if [ -z "$out" ] then out="" else ns="${line//[^N]}" perl float_compare.pl "${#line}" "${#ns}" "${max_rate}" r=$? if [ $r -eq 1 ] then out="$out||$line" echo $out >> "$output_folder/file_$i"; out="" else echo -e "$out||$line was removed\n" out="" fi fi line_index=$((line_index+1)) elif [ $line_index == 2 ] then line_index=$((line_index+1)) elif [ $line_index == 3 ] then line_index=0 fi done < "${array[$i]}" done } input_folder=$1 output_folder=$2 max_rate=$3 echo -e "max_rate is $max_rate \n" #check if the input and output folder are defined in command line if [ -z "$input_folder" ] then echo -e "Input folder is not defined \n" exit; elif [ -z "$output_folder" ] then echo -e "Output folder is not defined \n" exit; elif [ -z "$max_rate" ] then echo -e "max rate is not defined \n" else if [ -d "$output_folder" ]; then echo "output directory already exists. Exiting now!" exit; fi mkdir $output_folder #create two temp files - array1.txt and array2.txt. array1.txt contains all files from reading 1 in sorted order #array2.txt contains all files from reading 2 in sorted order ls $input_folder*R1*.fastq | sort > array1.txt ls $input_folder*R2*.fastq | sort > array2.txt #put the fileNames from reading 1 in array - array1 index1=0 while read line ; do array1[$index1]="$line" index1=$(($index1+1)) done < array1.txt #put the fileNames from reading 2 in array - array2 index2=0 while read line ; do array2[$index2]="$line" index2=$(($index2+1)) done < array2.txt # if array1 and array2 are not of equal size, then # some files are missing. if [ $index1 != $index2 ] then echo "some readings are missing\n"; exit; fi # delete temp files as we dont' need them anymore. We already # have that information in array1 and array2 i=0 line_index=0 out="" remove_entry=0 # loop through each files in array1, filter them and put in a file process_file "array1.txt" process_file "array2.txt" rm -f array1.txt rm -f array2.txt fi