Preparation
import pandas as pd
import numpy as np
df_notFound=pd.DataFrame()
df_minorAmb=pd.DataFrame()
chrmList=list(map(str, range(1,23)))+['X','Y']
maxChrm1=0
maxChrm2=0
kind='kind'
path='./'+kind+'/GFE_Result/'
for nb in chrmList:
df=pd.read_csv(path+'GFE_chr'+nb+'_info.txt',sep='\s+',header=None)
df=df.drop(df.shape[0]-1,axis=0)
notFound=df[6][df[6]!='is'].dropna().reset_index(drop=True).astype(int)
#notFound=df[6].str.extract('(\d+)').dropna().T
minorAmb=df[10].dropna().reset_index(drop=True).astype(int)
maxChrm1= max(maxChrm1,notFound.shape[0])
maxChrm2= max(maxChrm2,minorAmb.shape[0])
if notFound.shape[0]==maxChrm1: nf_chrm=nb
if minorAmb.shape[0]==maxChrm2: ma_chrm=nb
chrmList1=list(map(str, range(1,23)))+['X','Y']
chrmList2=list(map(str, range(1,23)))+['X','Y']
chrmList1.remove(nf_chrm)
chrmList2.remove(ma_chrm)
for nb in [str(nf_chrm)]+chrmList1:
df=pd.read_csv(path+'GFE_chr'+nb+'_info.txt',sep='\s+',header=None)
df=df.drop(df.shape[0]-1,axis=0)
notFound=df[6][df[6]!='is'].dropna().reset_index(drop=True)
df_notFound['chr'+nb]=notFound
for nb in [str(ma_chrm)]+chrmList2:
df=pd.read_csv(path+'GFE_chr'+nb+'_info.txt',sep='\s+',header=None)
df=df.drop(df.shape[0]-1,axis=0)
df=df.apply(pd.to_numeric,errors='ignore')
df[10]=df[10].dropna().astype(np.int)
minorAmb=df[10].dropna().reset_index(drop=True)
df_minorAmb['chr'+nb]=minorAmb
new_chrmList=[]
for nb in chrmList:
new_chrmList.append('chr'+nb)
df_notFound=df_notFound[new_chrmList].fillna(0)
df_minorAmb=df_minorAmb[new_chrmList].fillna(0)
df_notFound.loc[df_notFound.shape[0]]=0
df_minorAmb.loc[df_minorAmb.shape[0]]=0
print(df_notFound)
print(df_minorAmb)
df_notFound.to_csv(path+'MLnotFound_site.txt',index=None,sep='\t',header=None)
df_minorAmb.to_csv(path+'ambiguous_site.txt',index=None,sep='\t', float_format='%.0f',header=None)
Search
#!/bin/bash
chrm=$@
input="ambiguous_site.txt"
while read -r line
arg=($line)
do
if [ $chrm == "X" ]; then
if [ "${arg[$((22))]}" == 0 ]; then exit 0; fi
cat Out_GFE_chr$chrm.txt |grep -m 1 "${arg[$((22))]}"
elif [ $chrm == "Y" ]; then
if [ "${arg[$((23))]}" == 0 ]; then exit 0; fi
cat Out_GFE_chr$chrm.txt |grep -m 1 "${arg[$((23))]}"
else
if [ "${arg[$((chrm-1))]}" == 0 ]; then exit 0; fi
cat Out_GFE_chr$chrm.txt |grep -m 1 "${arg[$((chrm-1))]}"
fi
done < "$input"
Run script
#!/bin/bash
for i in {1..22} X Y
do
/awork04-2/PTSCAN_WYC/ADNI/GFE_Result/ambiguous.sh $i &> amb_chr$i.txt &
done
Replace
#!/bin/bash
sed -i 's/-/"/' pro2chrms.txt
Combine
#!/bin/bash
arg=("$@")
paste hs38_${arg[1]} In_GFE_${arg[1]}.* > In_GFE_${arg[1]}.txt && sm.sh Done_to_combine_${arg[0]}_In_GFE_${arg[1]}_files || echo "Not combined In_GFE_"${arg[1]}"_files"
Send email
#!/bin/bash
ssh userID@192.168.0.1 'mail -s "Your job is completed." mailID@mail.com <<< '$@
Split jobs
#!/bin/bash
step=1
file=cmdlist.txt
for i in {0..367}
do
a=splits${i}
for ((j=i; j<=1346;j+=368))
do
echo "python test.py $j $step"
done > $a
done
for i in {0..22}
do
a=split${i}
rm $a
for ((j=i; j<=367;j+=16))
do
cat splits$j >> $a
done
done
rm splits*