If I got your question well you just need sequence of the gene. Is it possible to try another parser for this purpoe, i.e. SeqIO.parse(gb_file, "genbank") and then in a cycle for record in SeqIO.parse(gb_file, "genbank"): iterates through the records. Then use another cycle as follows:
from Bio import SeqIO
from Bio.SeqFeature import FeatureLocation
def extract_gene_name_seq(gb_file):
gene_name_seq = []
try:
for record in SeqIO.parse(gb_file, "genbank"):
# The SeqIO.parse() function takes two main arguments:
# A file handle (or filename) to read the data from.
# The format of the sequence file.
# It returns an iterator that yields SeqRecord objects, one for each sequence in the file.
for feature in record.features:
if feature.type == "CDS":
try:
name = feature.qualifiers.get('gene', [''])[0]
location = feature.location
if isinstance(location, FeatureLocation):
start = location.start
end = location.end
gene_sequence = location.extract(record.seq)
strand = location.strand
gene_name_seq.append((name, strand, start, end, gene_sequence))
else:
print(f"Skipping feature with non-standard location: {feature}")
except (KeyError, AttributeError) as e:
print(f"Error processing feature: {feature}. Error: {e}")
continue
except Exception as e:
print(f"Error parsing GenBank file: {e}")
return []
return gene_name_seq
# Example usage:
gb_file = "Capsicum_annuum.gb" # Replace with your GenBank file name
gene_name_seq_data = extract_gene_name_seq(gb_file)
print('Length of gene_name_seq_data', len(gene_name_seq_data))
if gene_name_seq_data:
for name, strand, start, end, gene_sequence in gene_name_seq_data:
print(f"Gene: {name}, Strand {strand}, Start {start}, End {end}, Gene_sequence: {gene_sequence}")
else:
print("No gene information found or an error occurred.")