################################################################################ # # Questions? Problems? Complaints? Better Ideas? # Damian Allis, damian@somewhereville.com, www.somewhereville.com # # This script takes the double helix output from NAMOT and NAMOT2 (a and b # strands) and converts them into a format that the current ffAMBER # implementation for GROMACS can use in the generation of the GROMACS .top file. # ################################################################################ # # Generally, the following list of GROMACS runs should get you through an # energy minimization without problem. Note only 10 cations are added # to your structure. Change accordingly (or don't. It doesn't matter for # the test). # # Run these in order: # # pdb2gmx -nomerge -f DNA.pdb -o DNA_pdb2gmx.gro -p DNA_pdb2gmx.top # editconf -f DNA_pdb2gmx.gro -o DNA_editconf.gro -d 1.0 -bt triclinic # genbox -cp DNA_editconf.gro -cs -o DNA_genbox.gro -p DNA_pdb2gmx.top # grompp -f em -c DNA_genion.gro -p DNA_pdb2gmx.top -o DNA_grompp2em.tpr # genion -np 10 -norandom -pname Na -o DNA_genion.gro -s DNA_gromppem.tpr # -p DNA_pdb2gmx.top (this .top goes in the same line as the genion) # grompp -f em -c DNA_genion.gro -p DNA_pdb2gmx.top -o DNA_grompp2em.tpr2 # mdrun -s DNA_grompp2em.tpr -o DNA_md_em.trr -c DNA_md_em.pdb -v # ################################################################################ # # In case you don't have one handy, here's the contents of an em.mpd file # for use in the energy minimization test. # # Copy this content below, remove the "#", save as a text filed named # -> em.mpd # # cpp = /usr/bin/cpp # define = -DFLEXIBLE # integrator = steep # nsteps = 5000 # emtol = 10.0 # emstep = 0.01 # nstcgsteep = 100 # coulombtype = PME # rvdw = 1.0 # rlist = 1.1 # rcoulomb = 1.1 # pme_order = 4 # ewald_rtol = 1e-5 # vdwtype = shift # ns_type = grid # nstlist = 10 # ################################################################################ # # Here's the command line: # # ./NAMOT_to_ffAMBER_in_GROMACS.sed $1 $2 # # $1 = file name (including the .pdb, as I often forget to not include it) # $2 = number of the 3' base for conversion into Dn3 (n = A,T,G,C) # the number in $2 will automatically do the 3' and 5' conversion (keep the # terminal hydrogens on the PO4- groups) # ################################################################################ ################################################################################ # # The magic happens below. # ################################################################################ ################################################################################ # # First thing first, make a backup of the original pdb file in case you goof. # cp $1 $1_original # ################################################################################ # # This section converts all of the "*" with "z" so that you're not using the # asterisk during the editing. Replacing with the ffAMBER-requisite # "single-quote" (') makes the sed script more complicated than it needs to be. # sed 's/*/z/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # This section changes the nitrogen hydrogen (NH2) labels to those expected by # ffMABER. Hn2/1, where n is the atom number in the formal labeling scheme. # # THYIME is its own problem, as the methyl carbon needs modification. # This is addressed by a separate ffAMBER modification. Check # http://www.somewhereville.com/?cat=74 (my AMBER category) for details. # # # HN2A/B occurs in GUANINE. # sed 's/HN2A/ H21/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/HN2B/ H22/' $1 > $1_temp rm $1 mv $1_temp $1 # # # HN4A/B occurs in CYTOSINE. # sed 's/HN4A/ H41/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/HN4B/ H42/' $1 > $1_temp rm $1 mv $1_temp $1 # # # HN6A/B occurs in ADENINE. # sed 's/HN6A/ H61/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/HN6B/ H62/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # This section converts ADE, CYT, GUA, THY in the NAMOT output to DA, DC, DG, DT # in accord with the topology labels used by ffAMBER in GROMACS for the nucleic # acids. # # sed 's/ADE / DA /' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/CYT / DC /' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/GUA / DG /' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/THY / DT /' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # This is the bulk of the conversion, moving atoms around and formatting. # Mostly, just moving atom labels over one column to the left. This doesn't # necessarily have to be done, but conforms to pdb format better and some # program may need the atom labels in the columns as defined below. # # This section changes all of the hydrogen atom labels. # sed 's/1H5z/H5z1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/2H5z/H5z2/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/H2Az/H2z1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/H2Bz/H2z2/' $1 > $1_temp rm $1 mv $1_temp $1 # # These are global changes in column position and fix all nucleic acids. # sed 's/ P D/P D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ N9 D/N9 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ N7 D/N7 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ N6 D/N6 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ N3 D/N3 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ N1 D/N1 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ C8 D/C8 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ C6 D/C6 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ C5 D/C5 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ C4 D/C4 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ C2 D/C2 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ H8 D/H8 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ H2 D/H2 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ H3 D/H3 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ H6 D/H6 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ O3 D/O3 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ O4 D/O4 D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ O2 D/O D/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/C5M D/C7 D/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # This section converts the 5' end of both chains into ffAMBER format. Always # begins with "1". If you deleted some of the double strand at the 5' end and # the first base number is NOT 1, this script will still run but give you # a final structure that will require additional modification before running # the pdb2gmx topology generator. # ################################################################################ # # chain a ADENINE adjustment # sed 's/ DA a 1/ DA5 a 1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HB DA5/H5T DA5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DA5/O5z DA5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DA/H3T DA3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DA/O3z DA3/' $1 > $1_temp rm $1 mv $1_temp $1 # # chain b ADENINE adjustment # sed 's/ DA b 1/ DA5 b 1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HB DA5/H5T DA5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DA5/O5z DA5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DA/H3T DA3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DA/O3z DA3/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # chain a THYMINE adjustment # sed 's/ DT a 1/ DT5 a 1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HB DT5/H5T DT5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DT5/O5z DT5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DT/H3T DT3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DT/O3z DT3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG a 1/ DG5 a 1/' $1 > $1_temp rm $1 mv $1_temp $1 # # chain b THYMINE adjustment # sed 's/ DT b 1/ DT5 b 1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HB DT5/H5T DT5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DT5/O5z DT5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DT/H3T DT3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DT/O3z DT3/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # chain a GUANINE adjustment # sed 's/ HB DG5/H5T DG5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DG5/O5z DG5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DG/H3T DG3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DG/O3z DG3/' $1 > $1_temp rm $1 mv $1_temp $1 # # chain b GUANINE adjustment # sed 's/ DG b 1/ DG5 b 1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HB DG5/H5T DG5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DG5/O5z DG5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DG3/H3T DG3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DG3/O3z DG3/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # chain a CYTOSINE adjustment # sed 's/ DC a 1/ DC5 a 1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HB DC5/H5T DC5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DC5/O5z DC5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DC/H3T DC3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DC/O3z DC3/' $1 > $1_temp rm $1 mv $1_temp $1 # # chain b CYTOSINE adjustment # sed 's/ DC b 1/ DC5 b 1/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HB DC5/H5T DC5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O5T DC5/O5z DC5/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ HE DC3/H3T DC3/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/O3T DC3/O3z DC3/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # This section changes the last base in each chain (a and b) from the default # "Dn" to "Dn3" so that the topology generation gets the 3' end correct. # Goes by units, tens, hun, thou and searches specifically for the pattern # in question (taking care to follow the standard format for base number. # # NOTE: We do the junction, crossover, etc. generation outside of NAMOT. # Therefore, each file output by NAMOT only has chain "a" and chain "b". # ################################################################################ # # changes the 3' strand if the length is from 1 to 9 (units) # strand 1/a # sed 's/ DA a '$2'/DA3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC a '$2'/DC3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG a '$2'/DG3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT a '$2'/DT3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # # changes the 3' strand if the length is from 1 to 9 (units) # strand 2/b # sed 's/ DA b '$2'/DA3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC b '$2'/DC3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG b '$2'/DG3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT b '$2'/DT3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # changes the 3' strand if the length is from 10 to 99 (tens) # strand 1/a # sed 's/ DA a '$2'/DA3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC a '$2'/DC3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG a '$2'/DG3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT a '$2'/DT3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # # changes the 3' strand if the length is from 10 to 99 (tens) # strand 2/b # sed 's/ DA b '$2'/DA3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC b '$2'/DC3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG b '$2'/DG3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT b '$2'/DT3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # changes the 3' strand if the length is from 100 to 999 (hund) # strand 1/a # sed 's/ DA a '$2'/DA3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC a '$2'/DC3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG a '$2'/DG3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT a '$2'/DT3 a '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # # changes the 3' strand if the length is from 100 to 999 (hund) # strand 2/b # sed 's/ DA b '$2'/DA3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC b '$2'/DC3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG b '$2'/DG3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT b '$2'/DT3 b '$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # changes the 3' strand if the length is from 1000 to 9999 (thou) # strand 1/a # sed 's/ DA a'$2'/DA3 a'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC a'$2'/DC3 a'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG a'$2'/DG3 a'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT a'$2'/DT3 a'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # # changes the 3' strand if the length is from 1000 to 9999 (thou) # strand 2/b # sed 's/ DA b'$2'/DA3 b'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DC b'$2'/DC3 b'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DG b'$2'/DG3 b'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 sed 's/ DT b'$2'/DT3 b'$2'/' $1 > $1_temp rm $1 mv $1_temp $1 # ################################################################################ # # Home stretch. Changes all of the "z" atoms in the pdb file to ' (single- # quotes) for ffMABER. # sed s/\z/\'/g $1 > $1_temp rm $1 mv $1_temp $1_proper_pdb # # ################################################################################ # # Questions? Problems? Complaints? Better Ideas? # Damian Allis, damian@somewhereville.com, www.somewhereville.com # ################################################################################