Create FeatureData[Sequence]#
import tempfile
import requests
import qiime2
data = requests.get("https://www.dropbox.com/s/uqj79nepvub1cxc/tblASVtaxonomy_silva132_v4v5_filter.csv?dl=1")
with tempfile.NamedTemporaryFile() as f:
f.write(data.content)
f.flush()
pd_orig_taxa = pd.read_csv(f.name)
pd_orig_taxa.index = pd_orig_taxa['Sequence'].str.encode('ascii').apply(lambda x: hashlib.md5(x).hexdigest())
pd_orig_taxa
pd_seqs = pd_orig_taxa['Sequence']
pd_seqs
q2_rep_seqs = qiime2.Artifact.import_data('FeatureData[Sequence]', pd_seqs)
q2_rep_seqs
!head {str(q2_rep_seqs._archiver.path / str(q2_rep_seqs.uuid) / 'data' / 'dna-sequences.fasta')}
q2_rep_seqs.save('rep-seqs.qza')
Create FeatureData[Taxonomy]#
def collate_taxa_columns(row):
assignments = []
for taxon in ('Kingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus'):
prefix = taxon[0].lower() + '__'
assignment = row[taxon]
if assignment == '<not present>':
assignment = '' # usual 'k__'
assignments.append(prefix + assignment)
confidence_col = f'Confidence{taxon}'
if row[confidence_col] < .7:
break
else:
confidence = row[confidence_col]
new_series = pd.Series(dict(Taxon='; '.join(assignments), Confidence=confidence))
new_series.name = row.name
return new_series
pd_taxonomy = pd_orig_taxa.apply(collate_taxa_columns, axis=1)
pd_taxonomy.index.name = 'Feature ID'
pd_taxonomy
q2_taxonomy = qiime2.Artifact.import_data('FeatureData[Taxonomy]', pd_taxonomy)
q2_taxonomy
!head {str(q2_taxonomy._archiver.path / str(q2_taxonomy.uuid) / 'data' / 'taxonomy.tsv')}
q2_taxonomy.save('taxonomy.qza')
Create FeatureTable[Frequency]#
asv_map = pd_orig_taxa['ASV'].reset_index().set_index('ASV')['Feature ID']
asv_map
data = requests.get("https://www.dropbox.com/s/r5ag9d0lwlcg91n/tblcounts_asv_wide.csv?dl=1")
with tempfile.NamedTemporaryFile() as f:
f.write(data.content)
f.flush()
pd_asv_counts = pd.read_csv(f.name, index_col='ASV').transpose()
#pd_asv_counts = pd.read_csv('tblcounts_asv_wide.csv', index_col='ASV').transpose()
pd_asv_counts = pd_asv_counts.rename(columns=asv_map)
pd_asv_counts
q2_table = qiime2.Artifact.import_data('FeatureTable[Frequency]', pd_asv_counts)
q2_table
!biom summarize-table -i {str(q2_table._archiver.path / str(q2_table.uuid) / 'data' / 'feature-table.biom')} | head
q2_table.save('table.qza')