So what I'm seeing is that your opening parenthesis/bracket is preceeded by a space -- if you look for that space and then the punctuation, maybe that will help?
So like something like this??
def extract_main_protein_name(name):
"""
Extract the main protein name by removing descriptive parentheses/brackets
while preserving functional ones (like ion charges).
"""
# Remove descriptive content in parentheses/brackets that are preceded by a space
# This regex looks for white space followed by an opening bracket/parenthesis, then removes everything until the end
pattern = r'\s+[\(\[].*'
cleaned_name = re.sub(pattern, '', name).strip()
return cleaned_name
# Apply the function to clean protein names
df['Protein names'] = df['Protein names'].apply(extract_main_protein_name)