#Since these SFF files came from Engencore with the barcodes already removed, we began with separate files by sample. #Three plates run, four samples each, so 12 individual SFF files #So processed SFF run on 12 individual files; then split libraries run on each of 12; then concatenated 12 fna files to make combined.fna #Then ran denoiser preprocess. All of this happened on the DIAG, so QIIME 1.8 > denoiser_preprocess.py -o Denoised_preprocess/ -i Output_ROV007Q1/454Reads.MID1.txt,Output_ROV009Q1/454Reads.MID1.txt,Output_3731K3/454Reads.MID1.txt,Output_ROV008Q3/454Reads.MID2.txt,Output_ROV002Q1/454Reads.MID2.txt,Output_ROV005Q2/454Reads.MID2.txt,Output_3705K10/454Reads.MID3.txt,Output_3705K3/454Reads.MID3.txt,Output_ROV002Q2/454Reads.MID3.txt,Output_ROV003Q3/454Reads.MID4.txt,Output_ROV006Q3/454Reads.MID4.txt,Output_3705K6/454Reads.MID4.txt -f combined.fna -p AYTGGGYDTAAAGNG #Now uploaded files to EC2 (8/21/15). > denoiser.py -i Output_ROV007Q1/454Reads.MID1.txt,Output_ROV009Q1/454Reads.MID1.txt,Output_3731K3/454Reads.MID1.txt,Output_ROV008Q3/454Reads.MID2.txt,Output_ROV002Q1/454Reads.MID2.txt,Output_ROV005Q2/454Reads.MID2.txt,Output_3705K10/454Reads.MID3.txt,Output_3705K3/454Reads.MID3.txt,Output_ROV002Q2/454Reads.MID3.txt,Output_ROV003Q3/454Reads.MID4.txt,Output_ROV006Q3/454Reads.MID4.txt,Output_3705K6/454Reads.MID4.txt -f combined.fna -p Denoised_preprocess -o Denoised_final --titanium #Checked process, had thrown Errno 12 "Cannot allocate memory" in spite of using M3xlarge instance #Queued up again with —low_memory added to end of script #Completed; took approximately a week > inflate_denoiser_output.py -c Denoised_final/centroids.fasta -s Denoised_final/singletons.fasta -f combined.fna -d Denoised_final/denoiser_mapping.txt -o inflated_seqs.fna 1004996 : inflated_seqs.fna (Sequence lengths (mean +/- std): 382.8881 +/- 16.1882) > truncate_reverse_primer.py -f inflated_seqs.fna -m Lophelia1_map.txt -o reverse_primer_removed/ #Details for removal of reverse primers #Original fasta filepath: inflated_seqs.fna #Total seqs in fasta: 1004996 #Mapping filepath: Lophelia1_map.txt #Truncation option: truncate_only #Mismatches allowed: 2 #Total seqs written: 1004996 #SampleIDs not found: 0 #Reverse primers not found: 113833 > split_sequence_file_on_sample_ids.py -i reverse_primer_removed/inflated_seqs_rev_primer_truncated.fna -o Out_countseqs/ > cd Out_countseqs/ > count_seqs.py -i "*.fasta" #40118 : ROV06Q3.fasta (Sequence lengths (mean +/- std): 345.3359 +/- 17.1153) #47225 : 3705K3.fasta (Sequence lengths (mean +/- std): 343.7573 +/- 16.4963) #47650 : ROV02Q1.fasta (Sequence lengths (mean +/- std): 342.8119 +/- 12.4049) #50165 : ROV09Q1.fasta (Sequence lengths (mean +/- std): 343.3982 +/- 15.7234) #65991 : ROV05Q2.fasta (Sequence lengths (mean +/- std): 343.5821 +/- 21.4583) #71882 : 3731K3.fasta (Sequence lengths (mean +/- std): 344.2564 +/- 18.4919) #75171 : 3705K10.fasta (Sequence lengths (mean +/- std): 343.5717 +/- 17.5902) #90148 : ROV08Q3.fasta (Sequence lengths (mean +/- std): 345.4847 +/- 14.8019) #111295 : ROV03Q3.fasta (Sequence lengths (mean +/- std): 345.0060 +/- 20.2655) #122359 : ROV07Q1.fasta (Sequence lengths (mean +/- std): 344.5020 +/- 14.9540) #135562 : 3705K6.fasta (Sequence lengths (mean +/- std): 344.6009 +/- 28.8038) #147430 : ROV02Q2.fasta (Sequence lengths (mean +/- std): 344.2056 +/- 13.8439) #1004996 : Total #All have good numbers, so no need to extract_seqs_by_sample_id.py > pick_open_reference_otus.py -i reverse_primer_removed/inflated_seqs_rev_primer_truncated.fna -m usearch61 -o usearch61_openref_Green/ -f #142 : rep_set_failures.fasta (Sequence lengths (mean +/- std): 225.4577 +/- 52.1775) #Examined the top three sequences with BLAST; all seem like legit bacteria with good e-values, so will keep moving forward > filter_taxa_from_otu_table.py -i otu_table_mc2_w_tax.biom -o otu_table_final.biom -n c__Chloroplast,f__mitochondria #To check, converted input and output biom tables to text and used wc -l to count the lines. 1364 - 1345 = 19 Chloroplast and mitochondria #Confirmed by using grep -c in rep_set_tax_assignments.txt : 12 Chloroplasts & 7 mitochondria > biom summarize_table -i otu_table_final.biom -o summary_otu_table_final.txt #Num samples: 12 #Num observations: 1343 #Total count: 1003600 #Table density (fraction of non-zero values): 0.191 #Counts/sample summary: #Min: 40064.0 #Max: 147292.0 #Median: 73381.500 #Mean: 83633.333 #Std. dev.: 35634.877 #Counts/sample detail: #ROV06Q3: 40064.0 #3705K3: 47200.0 #ROV02Q1: 47538.0 #ROV09Q1: 50019.0 #ROV05Q2: 65582.0 #3731K3: 71832.0 #3705K10: 74931.0 #ROV08Q3: 90135.0 #ROV03Q3: 111249.0 #ROV07Q1: 122278.0 #3705K6: 135480.0 #ROV02Q2: 147292.0 > single_rarefaction.py -i otu_table_final.biom -o otu_table_final_rarefied40064.biom -d 40064 > biom summarize_table -i otu_table_final_rarefied40064.biom -o summary_otu_table_rarefied40064.txt #Confirmed all samples are now 40,064 > mkdir Alpha_Diversity_rare40064/ > alpha_diversity.py -i otu_table_final_rarefied40064.biom -m ace,chao1,observed_otus,simpson_reciprocal,shannon,simpson,simpson_e -o Alpha_Diversity_rare40064/Alpha_Diversity_rare40064.txt -t rep_set.tre # ace chao1 observed_otus simpson_reciprocal shannon simpson simpson_e #ROV05Q2 815.69014732 783.02189781 712.0 3.87789926843 3.7625033485 0.742128422948 0.00544648773656 #ROV02Q2 284.161025656 334.647058824 158.0 1.23069443204 0.866484001449 0.187450618148 0.00778920526606 #3705K10 635.003363823 604.628571429 526.0 6.55109315986 4.26187206745 0.84735372012 0.0124545497336 #ROV07Q1 207.331563707 193.789473684 124.0 1.71778785806 1.38598868353 0.417855938785 0.0138531278876 #3731K3 151.985512204 153.066666667 126.0 2.36748961736 1.96510927228 0.577611663989 0.0187896001378 #ROV09Q1 116.694773176 106.0 94.0 2.26097269312 1.61320633726 0.557712482313 0.0240529009907 #ROV06Q3 117.765023594 116.8125 101.0 1.34428223541 0.98248651588 0.256108595606 0.0133097251031 #3705K6 221.102760852 175.8 117.0 3.18793118178 2.22979959599 0.686316942562 0.0272472750579 #ROV02Q1 373.222457363 348.5 296.0 1.81866733299 1.72865756502 0.450146829021 0.00614414639524 #ROV03Q3 226.699736925 230.565217391 166.0 1.73984377675 1.74600527238 0.42523575199 0.0104809866069 #ROV08Q3 111.081929525 108.4 97.0 1.62961776267 1.41447880022 0.386359167833 0.0168001831203 #3705K3 155.124427259 150.166666667 109.0 1.94863649052 1.66683239848 0.486820653897 0.0178773989956 > summarize_taxa_through_plots.py -o Alpha_Diversity_rare40064/taxa_summary40064/ -i otu_table_final_rarefied40064.biom > beta_diversity.py -i otu_table_final_rarefied40064.biom -m unweighted_unifrac,weighted_unifrac,binary_sorensen_dice,bray_curtis -o compar_div_rare40064/ -t rep_set.tre > beta_significance.py -i otu_table_final_rarefied40064.biom -t rep_set.tre -s unweighted_unifrac -o unw_sig.txt #Determining if samples are statistically significantly different from each other #Default 100 monte carlo randomizations #Unweighted unifrac (so abundance doesn't matter, just presence/absence of taxa) #unweighted unifrac significance test #sample 1 sample 2 p value p value (Bonferroni corrected) #3705K10 3705K3 0.0 <=1.0e-02 #3705K10 3705K6 0.0 <=1.0e-02 #3705K10 3731K3 0.0 <=1.0e-02 #3705K10 ROV02Q1 0.0 <=1.0e-02 #3705K10 ROV02Q2 0.0 <=1.0e-02 #3705K10 ROV03Q3 0.0 <=1.0e-02 #3705K10 ROV05Q2 0.0 <=1.0e-02 #3705K10 ROV06Q3 0.0 <=1.0e-02 #3705K10 ROV07Q1 0.0 <=1.0e-02 #3705K10 ROV08Q3 0.0 <=1.0e-02 #3705K10 ROV09Q1 0.0 <=1.0e-02 #3705K3 3705K6 0.0 <=1.0e-02 #3705K3 3731K3 0.07 1.0 #3705K3 ROV02Q1 0.0 <=1.0e-02 #3705K3 ROV02Q2 0.0 <=1.0e-02 #3705K3 ROV03Q3 0.0 <=1.0e-02 #3705K3 ROV05Q2 0.0 <=1.0e-02 #3705K3 ROV06Q3 0.0 <=1.0e-02 #3705K3 ROV07Q1 0.01 0.66 #3705K3 ROV08Q3 0.01 0.66 #3705K3 ROV09Q1 0.0 <=1.0e-02 #3705K6 3731K3 0.0 <=1.0e-02 > beta_significance.py -i otu_table_final_rarefied40064.biom -t rep_set.tre -s weighted_unifrac -o w_sig.txt #Determining if samples are statistically significantly different from each other #Default 100 monte carlo randomizations #Weighted unifrac (so abundance matters) #weighted unifrac significance test #sample 1 sample 2 p value p value (Bonferroni corrected) #3705K10 3705K3 0.57 1.0 #3705K10 3705K6 0.55 1.0 #3705K10 3731K3 0.18 1.0 #3705K10 ROV02Q1 0.22 1.0 #3705K10 ROV02Q2 0.34 1.0 #3705K10 ROV03Q3 0.11 1.0 #3705K10 ROV05Q2 0.26 1.0 #3705K10 ROV06Q3 0.26 1.0 #3705K10 ROV07Q1 0.28 1.0 #3705K10 ROV08Q3 0.35 1.0 #3705K10 ROV09Q1 0.21 1.0 #3705K3 3705K6 0.51 1.0 #3705K3 3731K3 0.34 1.0 #3705K3 ROV02Q1 0.46 1.0 #3705K3 ROV02Q2 0.42 1.0 #3705K3 ROV03Q3 0.42 1.0 #3705K3 ROV05Q2 0.43 1.0 #3705K3 ROV06Q3 0.42 1.0 #3705K3 ROV07Q1 0.42 1.0 #3705K3 ROV08Q3 0.55 1.0 #3705K3 ROV09Q1 0.54 1.0 #3705K6 3731K3 0.43 1.0 > principal_coordinates.py -i compar_div_rare40064/ -o compar_div_rare40064_PCoA/ > make_2d_plots.py -i compar_div_rare40064_PCoA/pcoa_weighted_unifrac_otu_table_final_rarefied40064.txt -m ../Lophelia1_map.txt -o PCoA_2D_plot_WU/ > make_2d_plots.py -i compar_div_rare40064_PCoA/pcoa_unweighted_unifrac_otu_table_final_rarefied40064.txt -m ../Lophelia1_map.txt -o PCoA_2D_plot_UU/ > make_2d_plots.py -i compar_div_rare40064_PCoA/pcoa_binary_sorensen_dice_otu_table_final_rarefied40064.txt -m ../Lophelia1_map.txt -o PCoA_2D_plot_BSD/ > make_2d_plots.py -i compar_div_rare40064_PCoA/pcoa_bray_curtis_otu_table_final_rarefied40064.txt -m ../Lophelia1_map.txt -o PCoA_2D_plot_BC/ > make_otu_heatmap.py -i Alpha_Diversity_rare40064/taxa_summary40064/otu_table_final_rarefied40064_L2.biom -o heatmap_L2_rare40064.pdf > compute_core_microbiome.py -i otu_table_final_rarefied40064.biom -o otu_core_everybody > compute_core_microbiome.py -i otu_table_final_rarefied40064.biom -o otu_core_gulf --mapping_fp Lophelia1_map.txt --valid_states "Location:Gulf" #Looking at core of Gulf of Mexico samples only > compute_core_microbiome.py -i otu_table_final_rarefied40064.biom -o otu_core_atl1 --mapping_fp Lophelia1_map.txt --valid_states "Location:Atl" #Looking at core of Atlantic samples only > compute_core_microbiome.py -i otu_table_final_rarefied40064.biom -o otu_core_VK826 --mapping_fp Lophelia1_map.txt --valid_states "Site:VK826" #Looking at core of VK826 samples only > compute_core_microbiome.py -i otu_table_final_rarefied40064.biom -o otu_core_VK906 --mapping_fp Lophelia1_map.txt --valid_states "Site:VK906" #Looking at core of VK906 samples only > compute_core_microbiome.py -i otu_table_final_rarefied40064.biom -o otu_core_WFS --mapping_fp Lophelia1_map.txt --valid_states "Site:WFS1" #Looking at core of WFS1 samples only > compute_core_microbiome.py -i otu_table_final_rarefied40064.biom -o otu_core_atl2 --mapping_fp Lophelia1_map.txt --valid_states "Site:ATL" #Looking at core of Atlantic samples only; using as double check; should be identical to Location:Atl results