import numpy as np
import pandas as pd
import os #for getting the working directory
from Bio.PDB import * #only used for downloading PDB files
import matplotlib.pyplot as plt
from read_process_PDB import *
import seaborn as sns; sns.set(); sns.set_context('talk')


directory = os.getcwd() #get working directory, this will help us name our stored .pdb files in a shorter fashion


prot = pd.DataFrame(['1EMA','5HMP','4B50','5JVM','1BOM','6FQF','1OED'],columns = ['ID']) #construct dataframe
prot['file_name'] = [PDBList().retrieve_pdb_file(p,file_format='pdb').replace(directory,'.') for p in prot['ID']]

prot['pointer'] = [open(name) for name in prot['file_name']]
#this will store the file pointers within a column of the dataframe

Structure exists: '/Users/shivamchitnis/Documents/GitHub/rad-gyres/em/pdb1ema.ent' 
Structure exists: '/Users/shivamchitnis/Documents/GitHub/rad-gyres/hm/pdb5hmp.ent' 
Structure exists: '/Users/shivamchitnis/Documents/GitHub/rad-gyres/b5/pdb4b50.ent' 
Structure exists: '/Users/shivamchitnis/Documents/GitHub/rad-gyres/jv/pdb5jvm.ent' 
Structure exists: '/Users/shivamchitnis/Documents/GitHub/rad-gyres/bo/pdb1bom.ent' 
Structure exists: '/Users/shivamchitnis/Documents/GitHub/rad-gyres/fq/pdb6fqf.ent' 
Structure exists: '/Users/shivamchitnis/Documents/GitHub/rad-gyres/oe/pdb1oed.ent'


ID = 0
plot_protein(atomize(prot['pointer'][ID]),title = prot['ID'][ID])


ID = 3
plot_protein(atomize(prot['pointer'][ID]),title = prot['ID'][ID])


ID = 1
plot_protein(atomize(prot['pointer'][ID]),title = prot['ID'][ID])


def com(structure):
    com = np.zeros(3)
    mass = structure['element'].replace({'N':14,'C':12,'H':1,'O':16,
                                                               'S':32,'O1-':16}).astype('float')
    M = np.sum(np.array(mass))
    com = mass.dot(structure[['x','y','z']])
    return np.array(com/M)


ID = 5
COM = com(atomize(prot['pointer'][ID]))
plot_protein(atomize(prot['pointer'][ID]),title = prot['ID'][ID],point=COM)


ID = 6
COM = com(atomize(prot['pointer'][ID]))
plot_protein(atomize(prot['pointer'][ID]),title = prot['ID'][ID],point=COM)


def radius_of_gyration(structure,com):
    N = np.size(np.array(structure['x']))
    Rg2 = np.array(structure[['x','y','z']]-com)
    Rg = np.sqrt((Rg2*Rg2).sum(-1).sum()/N)
    return Rg


ID = 2
r_g = radius_of_gyration(atomize(prot['pointer'][ID]),com(atomize(prot['pointer'][ID])))
plot_protein(atomize(prot['pointer'][ID]),title = prot['ID'][ID],
             point=com(atomize(prot['pointer'][ID])),sphere=r_g)


ID = 4
r_g = radius_of_gyration(atomize(prot['pointer'][ID]),com(atomize(prot['pointer'][ID])))
plot_protein(atomize(prot['pointer'][ID]),title = prot['ID'][ID],
             point=com(atomize(prot['pointer'][ID])),sphere=r_g)


prot['chain_length'] = [len(atomize(p).query('name == "CA"')) for p in prot['pointer']]
prot['Rg'] = [radius_of_gyration(atomize(p),com(atomize(p))) for p in prot['pointer']]


plt.figure(figsize=(5.5,5))
plt.plot(prot['chain_length'],prot['Rg'],'o',alpha=0.5)
plt.xlabel(r'$chain\ length\ (N_{C_\alpha})$')
plt.ylabel(r'$<R_G^2>\ (A^\circ)$')
plt.tight_layout()
plt.show()


R_g = []
sub_len = []
for p in prot['pointer']:
    structure = atomize(p)
    _,subunits = structure['chain'].factorize()
    for s in subunits:
        sub = structure.loc[structure['chain'] == s]
        R_g.append(radius_of_gyration(sub,com(sub)))
        sub_len.append(len(sub.query('name == "CA"')))
R_g = np.array(R_g)
sub_len = np.array(sub_len)


plt.figure(figsize=(5.5,5))
plt.plot(sub_len,R_g,'o',alpha=0.5)
plt.xlabel(r'$chain\ length\ (N_{C_\alpha})$')
plt.ylabel(r'$<R_G^2>\ (A^\circ)$')
plt.tight_layout()
plt.show()

Protein Structure — Radius of Gyration¶

Downloading Files¶

Parsing and Visualizing¶

1EMA¶

5JVM¶

5HMP¶

Centre of Mass¶

6FQF¶

1OED¶

Radius of Gyration¶

4B50¶

1BOM¶

Radius of Gyration vs. Chain Length¶

Subunits¶

Takeaways¶