04叶绿体流程4-个性化分析

重复序列圈图

mkdir repeat_circos && cd repeat_circos
cp ../annotation/*/ssr_analysis/ssr.info.xls ./
cut -f 4,5 ssr.info.xls | awk '{print "Melicope_pteleifolia""\t"$0}' > ssr.plot.txt

 cp ../annotation/Melicope_pteleifolia/reputer/Melicope_pteleifolia.dispered.repeat.region.xls  ./


一.基因分化热图 遗传距离计算

1.把gb文件放到gbk文件夹中

2.获取同源序列
$ perl /share/nas6/xul/program/chloroplast/phytree/gene_tree/find_total_gene.pl  -o gene -i gbk/ -p 
$ perl /share/nas6/xul/program/chloroplast/phytree/gene_tree/find_total_gene.pl  -i gbk/ -p Gentianella_pygmaea  -o gene   
# -p后面参数名字和gbk文件名里面名字都要一致
$ cd gene/mafft/
$ rm rrn* trn* && nu=`ls ../../gbk/*.gbk |wc |awk '{print $1}'` && rm `grep -c ">" * |grep -v ":$nu"|cut -d ":" -f 1 `

3.计算每个基因的pairwise distance
$ for i in *.fasta ;do echo Rscript /share/nas6/xul/program/chloroplast/pairwise_distance/pairwise_distance.R $i `basename $i .fasta`.distance.xls ;done > pairwise_distance.sh && thread.pl pairwise_distance.sh  63

4.整理结果
$ perl /share/nas6/xul/program/chloroplast/pairwise_distance/get_result.pl *.distance.xls  Gentianella_pygmaea >  all.dis.xls
$ realpath all.dis.xls # 这个文件修改第一行的物种名,不要登陆号

5.作图
$ Rscript /share/nas6/xul/program/chloroplast/pairwise_distance/pheatmap.R all.dis*.xls && convert -density 300 dis.heatmap.pdf  dis.heatmap.png

二.RNA编辑预测 xxx

# perl  /share/nas6/xul/program/mt2/annotation/pipline/src/get_cds_seq_use_gene_annotation_file.pl  -i final_gene_annotaion.info  -f *_FULLCP.fsa -p $sample -o tmp_for_prep # 线粒体

## 叶绿体
perl /share/nas6/xul/program/mt2/annotation/pipline/src/get_cds_seq_use_gene_annotation_file_for_chl.pl -i final_gene_annotaion.info  -f *_FULLCP.fsa -p $sample -o ./ 
生成文件*.cds.for_perp-cp.txt
上传到云平台
[生信云](http://112.86.217.82:9919/#/tool/alltool/detail/336)
用不了,结果不对

---------------------------------网站已废弃----------------------------------------------------
打开http://prep.unl.edu/上传文件
下载文件并解压 tar xf Results.tar.gz 
perl /share/nas6/xul/program/mt2/annotation/pipline/src/get_edit_site_info.pl -i *_Edit_Sites.txt -p Gentianella_pygmaea -o  final_rna_edit_info.xls # 合并表格

三.SSR

3.1 流程里ssr及ssr引物设计

1.SSR分析(/share/nas1/yuj/project/GP-20211130-3777-1_20220325/ssr_analysis下有misa.ini misa.pl)

perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/src/gb2tbl.pl -i 物种.gbk -o tbl
# gbk转tbl

perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/src/tbl_stat.exon.pl -i tbl/物种.tbl -f 物种.fasta -o 物种.exon.intron.stat.xls
# 统计各基因外显子内含子的情况

perl misa.pl 物种.fasta cpSSR
# misa分析ssr,生成cpSSR.misa

perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/misa/src/stat.ssr.pl -i cpSSR.misa -t tbl/物种.tbl -ir 83137-109333,127168-153364 -o1 ssr.info.xls -o2 ssr.stat.xls
# 解析成更容易看的格式 stat是对info的统计

perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/misa/src/plot.ssr.stat.pl -i ssr.stat.xls -o ./ssr_analysis/ 
# 统计后画图
# region.barssr	ssr在不同分区上的内含子外显子间区的分布情况
# region.pie	ssr在不同分区上比例饼状图

perl /share/nas6/xul/program/chloroplast/ssr/draw_ssr.pl -i number.tmp -o ssr.number.bar
# ssr.number.bar	各类ssr计数柱状图


2.引物设计(4286给定ssr 设计其引物)

perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/misa/src/p3_in.pl cpSSR.misa 物种.fasta ./ssr_analysis/
# 生成cpSSR.ssr.p3in	
# 前置文件cpssr.misa(ID 编号 产物长度 开始 终止)

	单纯设计引物,可以直接从填写cpSSR.ssr.p3in开始

perl /share/nas6/xul/program/chloroplast/ssr/ssr_primer_designer.pl -i cpSSR.ssr.p3in -t 20 -o ../ssr_analysis
# 生成cpSSR.ssr.p3out
# 前置文件cpSSR.ssr.p3in  可手动填写,标签详情见 http://www.chenlianfu.com/?p=284 	primer3设计引物详解

perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/misa/src/p3_out.pl cpSSR.ssr.p3out cpSSR.misa cpSSR.ssr.primer.xls 
# 解析成更容易看的格式

3.2 查找多态性好的ssr(分子标记)

脚本路径:
/share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/MISA/misa.pl # misa程序
/share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/MISA/misa.ini # misa配置文件
/share/nas6/xul/program/mt2/phytree/gene_tree/src/fasta2line.pl

misa.ini参数设置:
def 1-10,2-6,3-5,4-5,5-5,6-5 或 1-10,2-5,3-4,4-3,5-3,6-3	
int 100

definition(unit_size,min_repeats):                  1-10 2-5 3-4 4-3 5-3 6-3	
interruptions(max_difference_between_2_SSRs):        100

具体步骤:
1.简单看一下共线性
调整为同一起点

2.比对
cat fasta/*.fasta > allinone.fa
mafft --auto --quiet --thread 30 allinone.fa > all.aln.tmp && perl /share/nas6/xul/program/mt2/phytree/gene_tree/src/fasta2line.pl -i all.aln.tmp -o all.aln &

3.对每一个物种进行查找
## perl misa.pl 物种.fasta cpssr(输出文件前缀,自定义)
cd fasta && cp /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/MISA/misa*  ./
for i in *.fasta;do echo $i; perl misa.pl $i ${i%.fasta} ;done

4.统计合并
cd ..
perl /share/nas6/xul/program/chloroplast/blast/mafft/aln_ssr2marker.pl -i all.aln -o ssr.xls -d fasta(misa结果所在文件夹)

TBtools对应功能:
SSRMiner && Batch Target Region Primer Design

3.3 批量设计ssr引物

cd misa_result(包含所有.fasta文件)

for i in `ls *.fasta`;do echo $i; perl misa.pl $i ${i%.fasta};done
# 生成misa

for i in *.fasta;do echo $i;mkdir ../${i%.fasta}_ssr_analysis && echo 'done';done
# 生成目录

for i in *.fasta;do echo $i;perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/misa/src/p3_in.pl ${i%.fasta}.misa $i ../${i%.fasta}_ssr_analysis/  & done
# 生成cpSSR.ssr.p3in	
# 前置文件cpssr.misa(ID 编号 产物长度 开始 终止)

(可选)cd ../ && rm */cpSSR.ssr.p3out ; cd misa_result
# 删除所有目录里已有的p3out

for i in *.fasta;do echo $i;perl /share/nas6/xul/program/chloroplast/ssr/ssr_primer_designer.pl -i ../${i%.fasta}_ssr_analysis/cpSSR.ssr.p3in -t 20 -o ../${i%.fasta}_ssr_analysis/  & done
# 生成cpSSR.ssr.p3out
# 前置文件cpSSR.ssr.p3in  可手动填写,标签详情见 http://www.chenlianfu.com/?p=284 	primer3设计引物详解

for i in *.fasta;do echo $i;perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/misa/src/p3_out.pl ../${i%.fasta}_ssr_analysis/cpSSR.ssr.p3out ${i%.fasta}.misa ../${i%.fasta}_ssr_analysis/cpSSR.ssr.primer.xls & done
# 解析成更容易看的格式

四.PCR引物设计

3321全长基因组来设计

## 比对
cat ref/fasta/*.fasta > all.fa
mafft --auto --quiet --thread 30 all.fa > all.aln
/share/nas6/xul/soft/trimal/trimal-1.4.1/source/trimal -automated1 -in all.aln -out all.trim.aln
perl /share/nas6/xul/program/chloroplast/blast/mafft/aln2indel_tmp.pl -i all.trim.aln > all.trim.aln.faa # 把非保守位点替换成N

vim 9.fa # 取出.faa中最后一条序列
---确保输入的序列两端至少各有一个GAP---
python3 /share/nas1/yuj/script/Modify-short-consecutive-conserved-sites-to-N/primer_all_ATGC2N.py -i 9.fa -n 20 -o all.trim.aln.faa.fa # 进一步把非连续的保守位点替换成N/较短连续保守位点修改为N


## 设计
1.填cpssr.misa文件 ID 编号 产物长度 开始 终止,生成引物设计软件的输入文件
$ perl primer3_in.pl cpSSR.misa 物种名 .../Personalisation_analysis/pcr

2.上一步生成cpSSR.ssr.p3in,或者手动填写,标签详情见 http://www.chenlianfu.com/?p=284 	primer3设计引物详解
$ perl /share/nas6/xul/program/chloroplast/ssr/ssr_primer_designer.pl -i cpSSR.ssr.p3in -t 20 -o .../Personalisation_analysis/pcr

3.合并设计好的输出结果
$ perl /share/nas6/pub/pipline/genome-assembly-seq/chloroplast-genome-seq/v1.2/annotation/etc/misa/src/p3_out.pl cpSSR.ssr.p3out cpSSR.misa  cpSSR.ssr.primer.xls