!============================================================================================= ! Automatic state decomposition algorithm for the construction of Markov / master equation ! models. ! ! Written by John D. Chodera <jchodera@gmail.com>, Dill lab, UCSF, 2006. ! ! Copyright (c) 2006 The Regents of the University of California. All Rights Reserved. ! ! This program is free software; you can redistribute it and/or modify it under the terms of ! the GNU General Public License as published by the Free Software Foundation; either version 2 ! of the License, or (at your option) any later version. ! ! This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; ! without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ! See the GNU General Public License for more details. ! ! You should have received a copy of the GNU General Public License along with this program; ! if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, ! Boston, MA 02110-1301, USA. !============================================================================================= ! TODO: ! - Add an option to XML control file to NOT perform least-squared alignment before computation of RMSD. ! This could be useful for certain systems. ! - Compute state normalized fluctuation autocorrelation functions within decompose after each iteration, ! and report on estimate of independent transitions out of each microstate and macrostate. ! This correlation time is also an upper bound on the internal equilibration time of each state. ! - Use estimate of independent transitions out of each state to lump microstates that have been split too small. ! - Compute lower bound estimate on internal equilibration time from transition matrix constructed from restriction to each macrostate. ! - Also write eigenvalues mu, for reference, and theoretical and achieved metastabilities as a function of iteration. ! - Write current and optimal metastability of splitting and lumping stages as a function of iteration, for convenience. ! - For MCSA lumping, can we speed things up by only computing diagonal elements of lumped transition matrix? ! - Replace eigensolver for eigs with efficient method like ARPACK using sparse matrices and specialized sparse matrix-vector multiplication routines? Will need to change transition matrix computation routines to generate sparse matrices instead of full ones. ! - Split off K-medoid routine for easy reuse. ! - Change from using 'rmsd_mode' to allocating/defining 'rmsd_atomindices(:)' to use in compute_distance at the beginning of each iteration? ! The current method appears to involve creating many temporary arrays, which may incur a performance hit. ! - Eliminate temporary copies in distance calculation for efficiency. ! - Can we pass snapshots directly, rather than indices of snapshots? ! - Improve efficiency of openmp parallelization of updateGenerators code. ! - Generalize splitting and lumping subroutines to allow the user to focus on particular states to split more finely? !============================================================================================= ! PUBLIC SUBROUTINES: ! * initialize ! Parses control file, initializes the various data structures, loads trajectories. ! ! * iterate ! Performs cycles of splitting and lumping to refine the initial state decomposition. ! ! * finalize ! Frees allocated data. !============================================================================================= !============================================================================================= ! A module to store simulation data and perform decomposition operations on state space. !============================================================================================= module decompose use numeric_kinds ! for precision use pdbio, only : pdbatom_t ! PDB atom type, for storage of PDB file metadata use transitionmatrix, only : trajectory_t ! Trajectory type use constants ! global constants implicit none ! TODO: Sort out permissions. !private public :: initialize, iterate, finalize, readStateAssignments, writeTimescales !============================================================================================= ! Constants. !============================================================================================= logical, parameter :: debug = .TRUE. ! Flag to enable printing of debugging information. integer, parameter :: MAX_PCDATA_LENGTH = 1024 !============================================================================================= ! Data types. !============================================================================================= !============================================================================================= ! Private module data. !============================================================================================= ! Molecular topology data. integer :: natoms ! total number of atoms per snapshot integer, dimension(:), pointer :: ca_atomindices ! alpha carbon atom indices integer, dimension(:), pointer :: heavy_atomindices ! heavy atom and polar hydrogen atom indices (excluding symmetry-related atoms) integer, dimension(:), pointer :: bb_atomindices ! backbone atom atomindices integer, dimension(:), pointer :: user_atomindices ! user-specified atom indices integer, dimension(:), pointer :: ls_align_atomindices ! atom indices to use when aligning structures for writing PDBs ! Reference PDB data for reading and writing of PDB files. type(pdbatom_t), dimension(:), allocatable :: pdbatoms ! atom information from PDB file so that PDB files can be written real(sp), dimension(:,:), allocatable :: reference_snapshot ! reference snapshot for aligning all snapshots upon writing ! Snapshot (conformation) storage data. integer :: nsnapshots ! total number of snapshots real(sp), dimension(:,:,:), allocatable :: snapshots ! snapshots(i,n,k) is dimension k of atom n of snapshot i ! Trajectory (temporal connectivity of snapshots) data. ! NOTE: Trajectories are stored as contiguous sets of conformations in 'snapshots'. ! See transitionmatrix.f90 for a definition of type(trajectory_t). integer :: ntrajectories ! number of trajectories type(trajectory_t), dimension(:), allocatable :: trajectories ! Trajectory data. ! Distance metric data. ! integer, dimension(:), allocatable :: rmsd_atomindices ! atom indices to use for computing RMSD integer, parameter :: allatom_rmsd = 1, ca_rmsd = 2, backbone_rmsd = 3, heavyatom_rmsd = 4, user_rmsd = 5 integer :: rmsd_mode = allatom_rmsd ! choice of above mode for rmsd ! Run parameters. character(len = MAX_PCDATA_LENGTH) :: title ! the title of the system integer :: tau ! observation interval for computing transition matrix for lumping real(sp) :: tau_unit ! unit timestep (in ps or ns) integer :: niterations ! number of iterations of splitting and lumping to carry out integer :: iteration ! current iteration of splitting and lumping integer :: resume_from_iteration ! specified iteration to resume from, or -1 if none specified integer :: target_microstate_size ! the target size (number of snapshots) that is used in determining how many states to split to integer :: max_microstates_per_macrostate_first_iteration ! the maximum number of microstates to split each macrostate into, or no limit if negative integer :: max_microstates_per_macrostate ! the maximum number of microstates to split each macrostate into, or no limit if negative integer :: rmsd_mode_first_iteration ! RMSD mode to use in first iteration integer :: rmsd_mode_subsequent_iterations ! RMSD mode to use in subsequent iterations logical :: full_kmedoid_update ! if .true., the exact data point that minimizes the intracluster variance is used -- otherwise, stochastic update with ntries = ngenerators is used. logical :: use_timereversed ! if .true., will use time-reversed trajectories integer :: target_nmacrostates ! target number of macrostates integer :: mcsa_ntrials ! number of MC/SA lumping optimization runs per lump step integer :: mcsa_nsteps ! number of steps per MC/SA lumping optimization run character(len=MAX_FILENAME_LENGTH) :: output_directory ! directory to write output files to integer :: max_tau ! maximum lag time to write timescales for integer :: tau_step ! lag time step to use integer :: max_ntimescales ! maximum number of timescales of interest (may be less) real(sp) :: timescales_confidence_interval ! confidence interval to use in computing confidence bounds on timescales integer :: timescales_bootstrap_ntrials ! number of bootstrap trials to use in computing timescales logical :: use_ev_initial_lumping ! flag to set whether eigenvector scheme is to be used for initial lumping real(sp) :: generator_refinement_fraction ! fraction of snapshots from 0 to 1 to use for generator refinement integer :: generator_refinement_minsize ! minimum number of snapshots per microstate to use in generator refinement logical :: write_split_timescales ! .true. if we should write timescales each iteration for splitting logical :: write_merged_timescales ! .true. if we should write timescales each iteration for lumping logical :: write_merged_eigenvectors ! .true. if we should write merged eigenvectors after lumping logical :: write_split_pdbs ! .true. if we should write state PDB files each iteration after splitting integer :: split_pdb_nmodels ! number of models to write logical :: write_merged_pdbs ! .true. if we should write state PDB files each iteration after merging integer :: merged_pdb_nmodels ! number of models to write integer :: state_pdb_nmodels ! the number of models to be included in each state PDB file logical :: write_split_table ! .true. if a translation of lumped states -> split states is to be written logical :: write_lump_table ! .true. if a translation of split states -> lumped states is to be written real(sp) :: kmedoid_fraction ! fraction of data to use in k-medoid iterations integer :: kmedoid_iterations ! number of iterations of k-medoids to conduct (can be zero) ! State assignment data. integer :: nmicrostates ! number of microstates integer, dimension(:), allocatable :: microstate_assignments ! microstate assignments for each snapshot, each having the snapshot index of the generator integer, dimension(:), allocatable :: compacted_microstate_assignments ! microstate assignments for each snapshot, from 1...nmicrostates integer, dimension(:), pointer :: microstates ! unique ids of microstates integer :: nmacrostates ! number of macrostates integer, dimension(:), allocatable :: macrostate_assignments ! macrostate assignments for each snapshot, each having the snapshot index of a member of the state integer, dimension(:), allocatable :: compacted_macrostate_assignments ! macrostate assignments for each snapshot, from 1...nmacrostates integer, dimension(:), pointer :: macrostates ! unique ids of macrostates contains !============================================================================================= ! Read reference PDB file, determining number of atoms, saving atom metadata, and storing reference snapshot. ! ! After this call, the following module data is set: ! natoms, pdbatoms, reference_snapshot !============================================================================================= subroutine readReferencePDB(reference_pdb_filename) use pdbio ! for PDB I/O ! Parameters. character(len=*), intent(in) :: reference_pdb_filename ! The filename of the reference PDB file. ! Local variables. integer :: nmodels ! number of models in the PDB file real(sp), dimension(:,:,:), allocatable :: pdbmodels ! models read from the PDB file ! Query for number of atoms and models. call pdb_read(reference_pdb_filename, natoms=natoms, nmodels=nmodels) if(debug) print *, 'The reference PDB files contains ', natoms, ' atoms, and ', nmodels, ' models.' ! Allocate storage for atom records (metadata) and model coordinates. allocate(pdbatoms(natoms),pdbmodels(nmodels,natoms,3)) ! Read atom records and model coordinates. call pdb_read(reference_pdb_filename, atoms=pdbatoms, models=pdbmodels) ! Store the first model as the reference snapshot. ! TODO: Average the models instead? allocate(reference_snapshot(natoms,3)) reference_snapshot = pdbmodels(1,:,:) ! Free model coordinates. deallocate(pdbmodels) end subroutine readReferencePDB !============================================================================================= ! Print out which atoms have been selected by the given mask. !============================================================================================= subroutine showMask(mask) ! Parameters. logical, dimension(natoms), intent(in) :: mask ! Local variables. integer :: n do n = 1,natoms if(mask(n)) then write(*,'(I5,1X,A3,1X,I5,1X,A4,1X,A1)') & pdbatoms(n)%serial, pdbatoms(n)%resName, pdbatoms(n)%resSeq, pdbatoms(n)%name, '*' else write(*,'(I5,1X,A3,1X,I5,1X,A4,1X,A1)') & pdbatoms(n)%serial, pdbatoms(n)%resName, pdbatoms(n)%resSeq, pdbatoms(n)%name, ' ' end if end do end subroutine showMask !============================================================================================= ! Set up atom indices lists used for computing RMSD, LS-RMSD alignments, etc. ! ! NOTE: This subroutine is fairly ugly because these sorts of selection operations are inelegant ! in Fortran 90. Is there a better way to do this? !============================================================================================= subroutine setupAtomIndices() use utilities, only : find ! Local variables. logical, dimension(natoms) :: mask integer :: nindices integer :: n ! alpha carbons (CA) or terminal methyl groups (CH3) mask = ((pdbatoms%name == " CA ") .or. (pdbatoms%name == " CH3")) ! Exclude ions. mask = mask .and. .not. (pdbatoms%name == "Cl- " .or. pdbatoms%name == "Na+ ") ! Select. ca_atomindices => find(mask) if(debug) then print *, 'Selecting alpha carbons:' call showMask(mask) print *, 'atom indices: ', ca_atomindices end if ! backbone atoms (including H and O) mask = ((pdbatoms%name == " CA ") .or. (pdbatoms%name == " CH3") .or. (pdbatoms%name == " N ") & .or. (pdbatoms%name == " C ") .or. (pdbatoms%name == " O ") .or. (pdbatoms %name == " H ")) ! Exclude ions. mask = mask .and. .not. (pdbatoms%name == "Cl- " .or. pdbatoms%name == "Na+ ") ! Select. bb_atomindices => find(mask) if(debug) then print *, 'Selecting backbone atoms:' call showMask(mask) print *, 'atom indices: ', bb_atomindices end if ! heavy atoms plus polar hydrogens ! select all nonhydrogens mask = ((pdbatoms%name(2:2) /= 'H')) ! Exclude ions. mask = mask .and. .not. (pdbatoms%name == "Cl- " .or. pdbatoms%name == "Na+ ") ! remove symmetry-related heavy atoms mask = mask .and. .not. & ( (pdbatoms%resName == "VAL" .and. pdbatoms%name == " CG ") & .or. (pdbatoms%resName == "LEU" .and. pdbatoms%name == " CD ") & .or. (pdbatoms%resName == "PHE" .and. (pdbatoms%name(1:3) == " CD" .or. pdbatoms%name(1:3) == " CE")) & .or. (pdbatoms%resName == "TYR" .and. (pdbatoms%name(1:3) == " CD " .or. pdbatoms%name(1:3) == " CE")) & .or. (pdbatoms%resName == "GLU" .and. (pdbatoms%name == " OD1" .or. pdbatoms%name == " OD2")) & .or. (pdbatoms%resName == "ASP" .and. (pdbatoms%name == " OG1" .or. pdbatoms%name == " OG2")) & .or. (pdbatoms%resName == "HIP" .and. (pdbatoms%name == " ND1" .or. pdbatoms%name == " NE2")) & .or. (pdbatoms%resName == "ARG" .and. (pdbatoms%name == " NH1" .or. pdbatoms%name == " NH2")) & .or. (pdbatoms%resSeq == pdbatoms(natoms)%resSeq .and. (pdbatoms%name == " O " .or. pdbatoms%name == " OXT")) ) ! add in polar hydrogens mask = mask .or. & ( (pdbatoms%resName == "HID" .and. pdbatoms%name == " HD1") & .or. (pdbatoms%resName == "HIE" .and. pdbatoms%name == " HE1") & .or. (pdbatoms%resName == "HIE" .and. pdbatoms%name == " HE1") & .or. (pdbatoms%resName == "SER" .and. pdbatoms%name == " HG ") & .or. (pdbatoms%resName == "THR" .and. pdbatoms%name == " HG ") & .or. (pdbatoms%resName == "TYR" .and. pdbatoms%name == " HH ") & .or. (pdbatoms%resName == "TRP" .and. pdbatoms%name == " HE1") ) ! Select. heavy_atomindices => find(mask) if(debug) then print *, 'Selecting heavy atoms and polar hydrogens, excluding symmetry-related atoms:' call showMask(mask) print *, 'atom indices: ', heavy_atomindices end if end subroutine setupAtomIndices !============================================================================================= ! Read trajectories indexed in a trajectory-index file. ! ! Trajectories can either be stored in AMBER netCDF trajectories or as multiple models in ! a PDB file. The parameter trajectory_format should be set to either 'netCDF' or 'PDB'. ! ! This subroutine sets the following module data: ! ntrajectories, trajectories, nsnapshots, snapshots !============================================================================================= subroutine readTrajectories(trajectory_index_filename, trajectory_format) use pdbio ! for automatic unit number determination and PDB file reading use netcdfio ! for netCDF trajectory reading use timer ! for timing info ! Parameters. character(len=*), intent(in) :: trajectory_index_filename ! name of trajectory index file character(len=*), intent(in) :: trajectory_format ! trajectory format -- one of 'netCDF' or 'PDB' ! Local variables. integer :: iunit ! unit number for reading file character(len=MAX_FILENAME_LENGTH) :: line ! an entire line from the PDB file character(len=MAX_FILENAME_LENGTH) :: trajectory_filename ! Filename of current trajectory integer :: trajectory_index ! Current trajectory index real(dp) :: log_weight ! Log weight of current trajectory integer :: trajectory_length ! Current length (in snapshots) of trajectory integer :: initial_snapshot, final_snapshot ! Indices of first and last snapshot from the trajectory in 'snapshots' type(netcdfio_t) :: nc ! netcdf file handle ! Open trajectory index file. if(debug) print *, 'opening trajectory index file ', trim(trajectory_index_filename) call resetTimer() call new_unit(iunit) open(unit=iunit, file=trajectory_index_filename, status='OLD', err=29, position='REWIND', & form='FORMATTED', action='READ') ! Read trajectory file once to determine number of trajectories. ntrajectories = 0 do ! Read a line. read(unit=iunit, fmt='(a)', end=20, err=29) line ! Extract weight and trajectory filename. read(line, fmt = '(F16.8,1X,A)') log_weight, trajectory_filename ! Increment trajectory counter. ntrajectories = ntrajectories + 1 end do 20 continue if(debug) print *, 'ntrajectories = ', ntrajectories ! Rewind file. rewind(unit=iunit) ! Allocate trajectory storage. allocate(trajectories(ntrajectories)) ! Read and store trajectory file information, and count number of snapshots. trajectory_index = 1 nsnapshots = 0 do ! Read a line. read(unit=iunit, fmt='(a)', end=21, err=29) line ! Extract weight and trajectory filename. read(line, fmt = '(F16.8,1X,A)') log_weight, trajectory_filename ! Query trajectory file for length of trajectory. if(trim(trajectory_format) == 'netCDF') then nc = netcdf_open(trim(trajectory_filename), nframes=trajectory_length) call netcdf_close(nc) elseif(trim(trajectory_format) == 'PDB') then ! TODO: As a check, make sure number of atoms matches file. call pdb_read(trim(trajectory_filename), nmodels=trajectory_length) else write(*,*) 'Trajectory format "', trim(trajectory_format), '" not supported.' stop end if ! Store trajectory start and stop indices. trajectories(trajectory_index)%filename = trajectory_filename trajectories(trajectory_index)%log_weight = log_weight trajectories(trajectory_index)%length = trajectory_length if (trajectory_index == 1) then trajectories(trajectory_index)%start = 1 else trajectories(trajectory_index)%start = trajectories(trajectory_index-1)%start + trajectories(trajectory_index-1)%length end if ! Accumulate total number of snapshots. nsnapshots = nsnapshots + trajectory_length ! Increment trajectory counter. trajectory_index = trajectory_index + 1 end do 21 continue ! Close file. close(iunit) if(debug) write(*,*) readTimer(), ' seconds elapsed.' ! Allocate storage for snapshots. if(debug) write(*,*) 'Allocating ', (nsnapshots*natoms*3*4/(1024.0*1024.0)), ' MB for storage for ', nsnapshots, ' snapshots...' call resetTimer() allocate(snapshots(nsnapshots,natoms,3)) ! Read and store snapshots. ! NOTE: This section could be parallelized if netCDF was thread-safe for opening and reading separate files. ! NOTE: We could potentially try opening all the trajectory files first, storing their 'nc' entries in an array, and then read them in parallel... if(debug) write(*,*) 'Reading ', nsnapshots,' snapshots from ', ntrajectories, ' trajectories...' do trajectory_index = 1,ntrajectories ! Extract relevant information from trajectory data structure. trajectory_filename = trajectories(trajectory_index)%filename initial_snapshot = trajectories(trajectory_index)%start final_snapshot = initial_snapshot + trajectories(trajectory_index)%length - 1 ! Read snapshots from trajectory into their appropriate position in 'snapshots'. if(trim(trajectory_format) == 'netCDF') then nc = netcdf_open(trim(trajectory_filename)) call netcdf_read(nc, snapshots(initial_snapshot:final_snapshot,:,:)) call netcdf_close(nc) elseif(trim(trajectory_format) == 'PDB') then call pdb_read(trim(trajectory_filename), models = snapshots(initial_snapshot:final_snapshot,:,:)) else write(*,*) 'Trajectory format "', trim(trajectory_format), '" not supported.' stop end if end do if(debug) write(*,*) readTimer(), ' seconds elapsed.' return ! ERROR in read. Execution should only reach here upon error. 29 continue write(*,*) 'decompose: readTrajectories: Error opening ', trim(trajectory_index_filename) stop end subroutine readTrajectories !============================================================================================= ! Write trajectories in "old style" trajectory file format. !============================================================================================= subroutine writeTrajectoriesOldFormat(filename) use pdbio, only : new_unit ! Parameters. character(len=*), intent(in) :: filename ! Local variables. integer :: iunit ! unit number for reading file character(len=MAX_LINE_LENGTH) :: line, text ! an entire line from the PDB file integer :: trajectory_index ! Current trajectory index integer :: trajectory_length ! Current length (in snapshots) of trajectory integer :: initial_snapshot, final_snapshot ! Indices of first and last snapshot from the trajectory in 'snapshots' integer :: i integer :: snapshot_index ! Open trajectory index file. if(debug) print *, 'opening trajectories output ', trim(filename) call new_unit(iunit) open(unit=iunit, file=filename, status='REPLACE', err=39, position='REWIND', & form='FORMATTED', action='WRITE') ! Loop over trajectories. snapshot_index = 1 do trajectory_index = 1,ntrajectories ! Construct line. write(text,'(I12)') trajectory_index write(iunit,'(A)',advance='no') trim(adjustl(text)) write(iunit,'(A)',advance='no') ' 1' do i = 1,trajectories(trajectory_index)%length write(text,'(I12)') snapshot_index write(iunit,'(A)',advance='no') ' '//trim(adjustl(text)) snapshot_index = snapshot_index + 1 end do write(iunit,'(A)') '' end do ! Close file. close(iunit) return ! ERROR in read. Execution should only reach here upon error. 39 continue write(*,*) 'decompose: writeTrajectoriesOldFormat: Error opening ', trim(filename) stop end subroutine writeTrajectoriesOldFormat !============================================================================================= ! If tag is present, set flag to .true., otherwise is .false. !============================================================================================= subroutine get_xml_flag(fxml, key, flag) use flib_xpath ! for XML files ! Parameters. type(xml_t), intent(inout) :: fxml ! XML file. character(len=*), intent(in) :: key ! Key logical, intent(out) :: flag ! Local variables. integer :: status character(len=MAX_PCDATA_LENGTH) :: pcdata ! Assign default. flag = .false. ! Rewind. call rewind_xmlfile(fxml) ! Parse. call get_node(fxml, path="//"//trim(key), pcdata=pcdata, status=status) if(status >= 0) flag = .true. if(debug) print *, key, ' = ', flag end subroutine get_xml_flag !============================================================================================= ! Extract an integer value from XML file. !============================================================================================= subroutine get_xml_integer(fxml, key, value, default) use flib_xpath ! for XML files ! Parameters. type(xml_t), intent(inout) :: fxml ! XML file. character(len=*), intent(in) :: key ! Key integer, intent(out) :: value integer, intent(in) :: default ! Local variables. integer :: status character(len=MAX_PCDATA_LENGTH) :: pcdata ! Assign default. value = default ! Rewind. call rewind_xmlfile(fxml) ! Parse. call get_node(fxml, path="//"//trim(key), pcdata=pcdata, status=status) if(status >= 0) read(pcdata,'(I16)') value if(debug) print *, key, ' = ', value end subroutine get_xml_integer !============================================================================================= ! Extract an integer value from XML file. !============================================================================================= function get_xml_integers(fxml, key, default, nmax) result(value) use flib_xpath ! for XML files ! Parameters. type(xml_t), intent(inout) :: fxml ! XML file. character(len=*), intent(in) :: key ! Key integer, dimension(:), intent(in) :: default integer, intent(in) :: nmax ! maximum possible size for integer array ! Return variable. integer, dimension(:), pointer :: value ! Local variables. integer :: status character(len=MAX_PCDATA_LENGTH) :: pcdata integer, dimension(nmax) :: temparray integer :: nelements ! Rewind. call rewind_xmlfile(fxml) ! Parse. call get_node(fxml, path="//"//trim(key), pcdata=pcdata, status=status) if(status >= 0) then nelements = 0 call build_data_array(pcdata, temparray, nelements) allocate(value(nelements)) value = temparray else allocate(value(size(default,1))) value = default end if if(debug) print *, key, ' = ', value end function get_xml_integers !============================================================================================= ! Extract an integer value from XML file. !============================================================================================= subroutine get_xml_realsp(fxml, key, value, default) use flib_xpath ! for XML files ! Parameters. type(xml_t), intent(inout) :: fxml ! XML file. character(len=*), intent(in) :: key ! Key real(sp), intent(out) :: value real(sp), intent(in) :: default ! Local variables. integer :: status character(len=MAX_PCDATA_LENGTH) :: pcdata ! Assign default. value = default ! Rewind. call rewind_xmlfile(fxml) ! Parse. call get_node(fxml, path="//"//trim(key), pcdata=pcdata, status=status) if(status >= 0) read(pcdata,'(F16.8)') value if(debug) print *, key, ' = ', value end subroutine get_xml_realsp !============================================================================================= ! Extract a string from an XML file. !============================================================================================= subroutine get_xml_string(fxml, key, value, default) use flib_xpath ! for XML files ! Parameters. type(xml_t), intent(inout) :: fxml ! XML file. character(len=*), intent(in) :: key ! Key character(len=*), intent(out) :: value character(len=*), intent(in) :: default ! Local variables. integer :: status character(len=MAX_PCDATA_LENGTH) :: pcdata ! Assign default. value = default ! Rewind. call rewind_xmlfile(fxml) ! Parse. call get_node(fxml, path='//'//trim(key), pcdata=pcdata, status=status) if(status == 0) value = pcdata if(debug) print *, key, ' = ', trim(value) end subroutine get_xml_string function determine_rmsd_mode(string) result(rmsd_mode) ! Parameters. character(len=*), intent(in) :: string ! Return value. integer :: rmsd_mode if(string(1:2) .eq. 'ca') then rmsd_mode = ca_rmsd elseif(string(1:8) .eq. 'backbone') then rmsd_mode = backbone_rmsd elseif(string(1:) .eq. 'allatom') then rmsd_mode = allatom_rmsd elseif(string(1:9) .eq. 'heavyatom') then rmsd_mode = heavyatom_rmsd elseif(string(1:4) .eq. 'user') then rmsd_mode = user_rmsd else write(*,*) 'Unrecognized RMSD option: ', string stop end if end function determine_rmsd_mode !============================================================================================= ! Check to make sure no states are assigned to be zero. !============================================================================================= subroutine checkstates(stateassignments) ! Paramters. integer, dimension(:), intent(in) :: stateassignments ! Local variables. integer :: i if(any(stateassignments==0)) then write(*,*) 'some states assigned state 0!' do i = 1,size(stateassignments,1) write(19,'(I8,X,I8)') i, stateassignments(i) end do stop end if end subroutine checkstates !============================================================================================= ! Initialize the algorithm. ! ! Read reference PDB data, setup atom indices lists, read snapshot data. !============================================================================================= subroutine initialize(control_filename, nocheck) use netcdfio ! for netCDF snapshot I/O use utilities, only : unique use flib_xpath ! for XML files ! Parameters. character(len=*), intent(in) :: control_filename ! name of XML control file logical, intent(in), optional :: nocheck ! if .TRUE., no checking of input states is done ! Local variables. character(len=MAX_FILENAME_LENGTH) :: filename ! Temporary filename storage. type(xml_t) :: fxml ! XML file. type(dictionary_t) :: attributes ! storage for attributes associated with a node integer :: status ! integer indicating success or failure of xmlf90 call character(len=MAX_PCDATA_LENGTH) :: pcdata ! storage for node data character(len=MAX_PCDATA_LENGTH) :: trajectory_format ! trajectory format type logical :: no_timereversed ! If .true., time-reversed trajectories will not be included. ! Open XML control file. if(debug) write(*,*) 'Opening XML control file ', trim(control_filename) call open_xmlfile(control_filename, fxml, status) ! DEBUG. ! call enable_debug(sax=.false.) ! Get title. call get_xml_string(fxml, 'title', title, '') ! Set target microstate sizes. call get_xml_integer(fxml, 'target_microstate_size', target_microstate_size, 100) call get_xml_integer(fxml, 'max_microstates_per_macrostate_first_iteration', max_microstates_per_macrostate_first_iteration, -1) call get_xml_integer(fxml, 'max_microstates_per_macrostate', max_microstates_per_macrostate, -1) ! Set splitting parameters. call get_xml_realsp(fxml, 'kmedoid_fraction', kmedoid_fraction, 1.0) if(kmedoid_fraction <= 0 .or. kmedoid_fraction > 1) then write(*,*) 'kmedoid_fraction must be in interval (0,1].' stop end if call get_xml_integer(fxml, 'kmedoid_iterations', kmedoid_iterations, 5) call get_xml_string(fxml, 'rmsd_mode_first_iteration', pcdata, 'ca') rmsd_mode_first_iteration = determine_rmsd_mode(pcdata) call get_xml_string(fxml, 'rmsd_mode_subsequent_iterations', pcdata, 'heavyatom') rmsd_mode_subsequent_iterations = determine_rmsd_mode(pcdata) call get_xml_flag(fxml, 'full_kmedoid_update', full_kmedoid_update) ! Set lumping parameters. call get_xml_integer(fxml, 'tau', tau, 1) call get_xml_realsp(fxml, 'tau_unit', tau_unit, 1.0) call get_xml_flag(fxml, 'use_ev_initial_lumping', use_ev_initial_lumping) call get_xml_integer(fxml, 'mcsa_ntrials', mcsa_ntrials, 20) call get_xml_integer(fxml, 'mcsa_nsteps', mcsa_nsteps, 20000) call get_xml_integer(fxml, 'target_nmacrostates', target_nmacrostates, 10) ! Set output directory. call get_xml_string(fxml, 'output_directory', output_directory, 'output') ! Set writing of splitting and lumping tables. write_split_table = .true. write_lump_table = .true. ! Read reference PDB file, storing atom metadata (for atom indices generation, PDB writing), ! and storing reference snapshot (for aligning states written to PDB files). call get_xml_string(fxml, 'reference_pdb_filename', filename, 'reference.pdb') if(debug) write(*,*) 'Reading reference PDB file ', trim(filename) call readReferencePDB(filename) ! Set up atom indices. call setupAtomIndices() ! Set up user-specified RMSD atom indices. user_atomindices => get_xml_integers(fxml, 'user_atomindices', heavy_atomindices, natoms) ! Set up atom indices for alignment. ls_align_atomindices => get_xml_integers(fxml, 'ls_align_atomindices', bb_atomindices, natoms) ! Read MD trajectories, setting up both trajectory and snapshot information. call get_xml_string(fxml, 'trajectory_index_filename', filename, 'trajectory-index') call get_xml_string(fxml, 'trajectory_format', trajectory_format, 'netCDF') call readTrajectories(trim(filename), trim(trajectory_format)) ! Write which snapshots are in which trajectories if instructed to do so. call get_xml_string(fxml, 'write_trajectories', filename, '') if(len_trim(filename) > 0) call writeTrajectoriesOldFormat(filename) ! Set max_tau and tau_step for writing timescales. call get_xml_flag(fxml, 'write_split_timescales', write_split_timescales) call get_xml_flag(fxml, 'write_merged_timescales', write_merged_timescales) call get_xml_flag(fxml, 'write_merged_eigenvectors', write_merged_eigenvectors) call get_xml_integer(fxml, 'max_ntimescales', max_ntimescales, target_nmacrostates-1) call get_xml_integer(fxml, 'tau_step', tau_step, 1) call get_xml_integer(fxml, 'max_tau', max_tau, ceiling((maxval(trajectories%length)-1) / 2.0)) call get_xml_realsp(fxml, 'timescales_confidence_interval', timescales_confidence_interval, 0.68) call get_xml_integer(fxml, 'timescales_bootstrap_ntrials', timescales_bootstrap_ntrials, 40) ! Determine whether time-reversibility is to be exploited. call get_xml_flag(fxml, 'no_timereversed', no_timereversed) use_timereversed = .not. no_timereversed if(no_timereversed) then ! Disable all options that have to do with computing timescales, because current code assumes ! detailed balance is satisfied by using time-reversed trajectories. write(*,*) 'Trajectories are not time-reversible, so disabling computation of eigenvalues/vectors.' write_split_timescales = .false. write_merged_timescales = .false. write_merged_eigenvectors = .false. end if ! Ensure that max_tau is shorter than the longest trajectory lengths. if(max_tau > maxval(trajectories%length)-1) then write(*,*) 'max_tau is longer than longest trajectory -- adjusting to a more sensible value.' max_tau = ceiling((maxval(trajectories%length)-1) / 2.0) end if ! Set state PDB ensemble options. write_split_pdbs = .false. split_pdb_nmodels = 30 write_merged_pdbs = .false. merged_pdb_nmodels = 30 call rewind_xmlfile(fxml) call get_node(fxml, path="//write_split_pdbs", pcdata=pcdata, attributes=attributes, status=status) if(status >= 0) then write_split_pdbs = .true. call get_value(attributes, 'nmodels', pcdata, status) if(status >= 0) read(pcdata,'(I8)') split_pdb_nmodels if(debug) write(*,*) 'Will write split PDBs with nmodels = ', split_pdb_nmodels end if call rewind_xmlfile(fxml) call get_node(fxml, path="//write_merged_pdbs", pcdata=pcdata, attributes=attributes, status=status) if(status >= 0) then write_merged_pdbs = .true. call get_value(attributes, 'nmodels', pcdata, status) if(status >= 0) read(pcdata,'(I8)') merged_pdb_nmodels if(debug) write(*,*) 'Will write merged PDBs with nmodels = ', merged_pdb_nmodels end if ! Allocate storage for microstate and macrostate assignments. allocate(microstate_assignments(nsnapshots),macrostate_assignments(nsnapshots)) allocate(compacted_microstate_assignments(nsnapshots),compacted_macrostate_assignments(nsnapshots)) ! Nullify microstates. nmicrostates = 0 microstates => null() ! Initialize by assigning all snapshots to one macrostate. macrostate_assignments(:) = 1 ! Start from iteration 1 (unless overridden). iteration = 1 ! Get number of iterations to execute. call get_xml_integer(fxml, 'niterations', niterations, 10) ! Determine initialization of iteration and state assignments. call rewind_xmlfile(fxml) call get_node(fxml, path="//initialize", pcdata=pcdata, attributes=attributes, status=status) if(status >= 0) then ! Determine iteration to start from. call get_value(attributes, 'iteration', pcdata, status) if(status >= 0) read(pcdata,'(I8)') iteration ! Determine statassignments to start from. call get_value(attributes, 'initial_stateassignments', filename, status) if(status >= 0) then ! Attempt to load macrostate stateassignments. if(debug) write(*,*) 'Attempting to resume from end of iteration ', iteration call readStateassignments(filename, macrostate_assignments) end if end if ! Determine macrostates and compacted macrostates. macrostates => unique(macrostate_assignments, compacted=compacted_macrostate_assignments) nmacrostates = size(macrostates,1) if(debug) write(*,*) 'There are ', nmacrostates, ' macrostates:' if(debug) write(*,*) macrostates ! DEBUG if(.not. present(nocheck)) then call checkstates(macrostate_assignments) call checkstates(compacted_macrostate_assignments) end if ! Close XML control file. call close_xmlfile(fxml) end subroutine initialize !============================================================================================= ! Clean up allocated data. !============================================================================================= subroutine finalize ! Free snapshot storage. deallocate(snapshots) ! Free trajectory storage. deallocate(trajectories) ! Free atomindices. deallocate(ca_atomindices, bb_atomindices, heavy_atomindices) ! Free PDB atoms. deallocate(pdbatoms) ! Free state assignments. deallocate(microstate_assignments, macrostate_assignments) end subroutine finalize !============================================================================================= ! Compute the distance between two snapshots. !============================================================================================= function computeDistance(snapshot_index_1, snapshot_index_2) use theobald_rmsd, only : ls_rmsd ! for LS-RMSD using fast Newton iteration quaternion-based method !use kabsch_rmsd, only : ls_rmsd ! for LS-RMSD using Kabsch eigensolver method use pdbio ! Parameters. integer, intent(in) :: snapshot_index_1 ! Index of first snapshot. integer, intent(in) :: snapshot_index_2 ! Index of second snapshot. ! Return value. real(sp) :: computeDistance ! The RMSD between the two snapshots. ! Compute snapshot RMSD. select case(rmsd_mode) case(heavyatom_rmsd) computeDistance = ls_rmsd(size(heavy_atomindices,1), snapshots(snapshot_index_1,heavy_atomindices,:), & snapshots(snapshot_index_2,heavy_atomindices,:)) case(backbone_rmsd) computeDistance = ls_rmsd(size(bb_atomindices,1), snapshots(snapshot_index_1,bb_atomindices,:), & snapshots(snapshot_index_2,bb_atomindices,:)) case(ca_rmsd) computeDistance = ls_rmsd(size(ca_atomindices,1), snapshots(snapshot_index_1,ca_atomindices,:), & snapshots(snapshot_index_2,ca_atomindices,:)) case(user_rmsd) computeDistance = ls_rmsd(size(user_atomindices,1), snapshots(snapshot_index_1,user_atomindices,:), & snapshots(snapshot_index_2,user_atomindices,:)) case default ! Compute all-atom RMSD by default. computeDistance = ls_rmsd(natoms, snapshots(snapshot_index_1,:,:), snapshots(snapshot_index_2,:,:)) end select end function computeDistance !============================================================================================= ! Assign the specified snapshots to closest generators. !============================================================================================= subroutine assignSnapshotsToGenerators(nsnapshot_indices, snapshot_indices, ngenerators, generator_indices, stateassignments) ! Parameters. integer, intent(in) :: nsnapshot_indices integer, dimension(nsnapshot_indices), intent(in) :: snapshot_indices integer, intent(in) :: ngenerators integer, dimension(ngenerators), intent(in) :: generator_indices integer, dimension(nsnapshots), intent(out) :: stateassignments ! Local variables. integer :: i, j ! Loop variables. real(sp), dimension(ngenerators) :: generator_distances ! distances to all generators ! Assign each snapshot to the closest generator. if(debug) write(*,*) 'Assigning ', nsnapshot_indices, ' snapshots to generators...' !$omp parallel do default(shared) private(generator_distances) do i = 1, nsnapshot_indices if(debug .and. mod(i,1000).eq.0) write(*,*) 'snapshot ', i, ' / ', nsnapshots ! Compute distances to all generators. do j = 1, ngenerators generator_distances(j) = computeDistance(snapshot_indices(i), generator_indices(j)) end do ! Assign state to the snapshot index of the closet generator. stateassignments(snapshot_indices(i)) = generator_indices(minloc(generator_distances,1)) end do !$omp end parallel do if(debug) write(*,*) 'Done.' ! Sanity check. if(any(stateassignments(snapshot_indices) == 0)) then write(*,*) 'assignSnapshotsToGenerators: some stateassignments are zero!' stop end if end subroutine assignSnapshotsToGenerators !============================================================================================= ! Compute the intrastate variance of a set of snapshots from a given generator. !============================================================================================= function computeStateVariance(nindices, indices, generator_index) result(variance) ! Parameters. integer, intent(in) :: nindices ! number of snapshots in the state integer, dimension(:), intent(in) :: indices ! indices of snapshots belonging to the state integer, intent(in) :: generator_index ! index of generator ! Return value. real(dp) :: variance ! Local variables. integer :: i integer :: snapshot_index ! Compute variance. variance = 0.0 !$omp parallel do default(shared) reduction(+:variance) do i = 1,nindices variance = variance + computeDistance(generator_index, indices(i))**2 end do !$omp end parallel do variance = variance / real(nindices-1,dp) end function computeStateVariance !============================================================================================= ! Use a stochastic procedure to attempt to find a better generator for a given state. ! A number of trials are attempted with randomly-chosen generators, attempting to reduce the intrastate variance. ! If ntries == nindices, then all data points in the state are considered to find the one that minimizes the intrastate variance. !============================================================================================= subroutine updateGenerator(nindices, indices, generator_index, ntries) use utilities, only : generateUniformInteger ! Parameters. integer, intent(in) :: nindices ! the number of indices in the state integer, dimension(nindices), intent(in) :: indices ! indices of the snapshots integer, intent(inout) :: generator_index ! the initial generator index, replaced by the new one if a snapshot with smaller variance is found integer, intent(in) :: ntries ! number of tries to improve generator ! Local variables. integer :: index integer :: i, j integer :: try integer :: trial_generator_index real(sp) :: variance, trial_variance if(ntries < nindices) then ! Use stochastic algorithm to try to update generator. ! Compute variance using initial generator_index. variance = computeStateVariance(nindices, indices, generator_index) do try = 1,ntries ! Select a snapshot at random from this state. trial_generator_index = indices(generateUniformInteger(nindices)) ! Compute variance about this proposed generator. trial_variance = computeStateVariance(nindices, indices, trial_generator_index) ! Assign this to be the new generator if variance has decreased. if(trial_variance < variance) then variance = trial_variance generator_index = trial_generator_index end if end do else ! Try all members of state. do i = 1,nindices trial_generator_index = indices(i) ! Compute variance about this proposed generator. trial_variance = computeStateVariance(nindices, indices, trial_generator_index) ! Assign this to be the new generator if variance has decreased. if(trial_variance < variance) then variance = trial_variance generator_index = trial_generator_index end if end do end if end subroutine updateGenerator !============================================================================================= ! Update generator positions using stochastic update scheme. !============================================================================================= subroutine updateGenerators(nsnapshot_indices, snapshot_indices, ngenerators, generator_indices, stateassignments) use utilities, only : find, generateUniformInteger ! Parameters. integer, intent(in) :: nsnapshot_indices integer, dimension(nsnapshot_indices), intent(in) :: snapshot_indices integer, intent(in) :: ngenerators integer, dimension(ngenerators), intent(out) :: generator_indices integer, dimension(nsnapshots), intent(in) :: stateassignments ! Local variables. integer :: i, j ! Loop variables. integer :: generator_index real(sp), dimension(ngenerators) :: generator_distances ! distances to all generators integer, dimension(:), pointer :: indices ! indices of snapshots belonging to current generator integer :: nindices ! number of indices do i = 1,ngenerators ! Find snapshots belonging to this microstate. indices => find(stateassignments(snapshot_indices) == generator_indices(i)) indices = snapshot_indices(indices) nindices = size(indices,1) write(*,*) 'calling updateGenerator for generator ', i, ' (', nindices, ' members)' ! Try to find another member of this state that minimizes the intrastate variance about this generator. if(full_kmedoid_update) then ! Use full update, considering all snapshots in state. call updateGenerator(nindices, indices, generator_indices(i), nindices) else ! Use stochastic update with ntries = ngenerators. call updateGenerator(nindices, indices, generator_indices(i), ngenerators) end if ! Clean up. deallocate(indices) end do end subroutine updateGenerators !============================================================================================= ! Split a set of snapshots into a specified number of subsets by spatial decomposition of the ! region into a set of convex polytopes defined by proximity to particular 'generators'. ! In effect, a Vornoi decomposition of conformation space (in some space in which RMSD is the metric) is produced. ! ! How the generators are arrived at is, in principle, arbitrary. ! This implementation uses a few iterations of an iterative k-medoid algorithm. !============================================================================================= subroutine splitState(nsnapshot_indices, snapshot_indices, ngenerators, stateassignments, generators) use utilities, only : generateUniqueRandomIndices, histogram, find ! Parameters. integer :: nsnapshot_indices ! Length of snapshot_indices integer, dimension(nsnapshot_indices), intent(in) :: snapshot_indices ! The list of indices of snapshots to split. integer, intent(in) :: ngenerators ! The number of microstates to split this state into. integer, dimension(nsnapshots), intent(out) :: stateassignments ! The state assignments (indices of generator snapshtots) for each snapshot in the list. integer, dimension(ngenerators), intent(out), optional :: generators ! Snapshot indices of the generators of the convex polytopes. ! NOTE that generators = unique(stateassignments), though not necessarily in that order. ! Local variables. integer :: i, j ! Loop variables. integer, dimension(ngenerators) :: generator_indices ! snapshot indices of generators integer :: iteration ! iteration number of generator refinement integer :: nselected_snapshots ! number of selected snapshots (for fast generator update) integer, dimension(:), allocatable :: selected_snapshot_indices ! selected snapshots (for fast generator update) ! Sanity check on input parameters. Ensure nsnapshots > ngenerators. if(ngenerators > nsnapshot_indices) then write(*,*) 'split: error: ngenerators > nsnapshot_indices' stop end if ! Choose how many snapshots to select for rapid determination of generators. nselected_snapshots = ceiling(nsnapshot_indices * kmedoid_fraction) if(nselected_snapshots < ngenerators) nselected_snapshots = nsnapshot_indices ! Determine snapshots to use in generator refinement. allocate(selected_snapshot_indices(nselected_snapshots)) if(nselected_snapshots < nsnapshot_indices) then ! Select snapshots. if(debug) write(*,*) 'Choosing ', nselected_snapshots, ' of ', nsnapshot_indices, ' snapshots to refine generators...' call generateUniqueRandomIndices(nsnapshot_indices, nselected_snapshots, selected_snapshot_indices) selected_snapshot_indices = snapshot_indices(selected_snapshot_indices) else ! Use all snapshots. No need to select subset. selected_snapshot_indices = snapshot_indices end if ! Initiate algorithm by choosing a random set of ngenerators snapshots from the state, without replacement. if(debug) write(*,*) 'Choosing a random set of ', ngenerators, ' snapshots for generators...' call generateUniqueRandomIndices(nselected_snapshots, ngenerators, generator_indices) generator_indices = selected_snapshot_indices(generator_indices) ! Iterate to refine generators. do iteration = 1,kmedoid_iterations ! Assign all snapshots to closest generator. write(*,*) 'Assigning snapshots to generators...' call assignSnapshotsToGenerators(nselected_snapshots, selected_snapshot_indices, & ngenerators, generator_indices, stateassignments) ! Update generators. write(*,*) 'Updating generators...' call updateGenerators(nselected_snapshots, selected_snapshot_indices, & ngenerators, generator_indices, stateassignments) end do ! Assign all snapshots to closest generator. call assignSnapshotsToGenerators(nsnapshot_indices, snapshot_indices, ngenerators, generator_indices, stateassignments) call checkstates(stateassignments(snapshot_indices)) ! Provide generator indices if desired. if(present(generators)) generators = generator_indices ! Clean up. deallocate(selected_snapshot_indices) end subroutine splitState !============================================================================================= ! The transition matrix evaluation criteria to be maximized when trying to determine ! the optimal lumping of microstates into macrostates. !============================================================================================= function evaluationCriteria(Tji) use utilities, only : trace ! Parameters. real(dp), dimension(:,:) :: Tji ! Lumped transition matrix to be evaluated. ! Tji(j,i) is the transition probability of finding the system in state j a time tau later, ! given that it initially started in state i. ! Return value. real(sp) :: evaluationCriteria ! The criteria to be maximized over various possible lumpings of microstates into macrostates. ! Local variables. integer :: i ! In this implementation, we use the trace (which equals the sum of the eigenvalues) as the ! criteria to be maximized. This is the sum of the self-transition probabilities, and tries ! to find the most metastable decomposition possible. evaluationCriteria = trace(Tji) ! Sanity check. if(isnan(evaluationCriteria)) then write(*,*) 'evaluationCritera: trace is Nan' write(*,*) 'Tji = ' do i = 1,nmacrostates write(*,'(6F8.5)') Tji(i,:) end do stop end if end function evaluationCriteria !============================================================================================= ! Optimize a lumping of microstates into macrostates by using an MC/SA algorithm to maximize an objective. !============================================================================================= subroutine lump_mcsa(nmacrostates, best_criteria, best_micro_to_macro, N_ji) use utilities, only : generateUniformInteger, generateUniform01, histogram, generateUniqueRandomIndices use transitionmatrix, only : computeTransitionMatrix, computeLumpedTransitionMatrix ! Parameters. integer, intent(in) :: nmacrostates ! The number of macrostates to lump to. real(sp), intent(out) :: best_criteria ! The largest value of the objective function found. integer, dimension(nmicrostates), intent(inout) :: best_micro_to_macro ! The best microstate-to-macrostate lumping found. real(dp), dimension(nmicrostates,nmicrostates), intent(in) :: N_ji ! microstate transition count matrix ! Local variables. real(dp), dimension(nmacrostates,nmacrostates) :: T_ba ! Lumped transition matrix. ! T_ba(b,a) is the transition probability of finding the system in macrostate b a time tau later, ! given that it initially started in state a. integer, dimension(nmicrostates) :: micro_to_macro, micro_to_macro_proposed ! translation from microstate to macrostate index real(sp) :: criteria, criteria_proposed ! current and proposed objective function values of current and proposed lumpings integer, dimension(nmicrostates) :: indices ! indices storage integer :: i, j, n ! loop indices integer :: trial ! current MC/SA trial (max is ntrials) integer :: step ! current step in MC/SA trial (max is nsteps) integer, dimension(nmacrostates) :: macrostate_histogram ! histogram counts for macrostates, to make sure they don't end up empty integer :: microstate, macrostate logical :: accept real(sp) :: beta real(sp) :: r ! Take initial lumping from input best_micro_to_macro. micro_to_macro = best_micro_to_macro ! Determine how many microstates are lumped into each macrostate. macrostate_histogram = histogram(micro_to_macro, nmacrostates) ! Evaluate criteria function for initial lumping and store. ! T_ba = computeTransitionMatrix(ntrajectories, trajectories, nmacrostates, micro_to_macro(compacted_microstate_assignments), tau) call computeLumpedTransitionMatrix(nmicrostates, N_ji, nmacrostates, T_ba, micro_to_macro) criteria = evaluationCriteria(T_ba) ! Store as best. best_criteria = criteria best_micro_to_macro = micro_to_macro if(debug) write(*,*) 'initial criteria: ', best_criteria ! Perform MC/SA run. do step = 1,mcsa_nsteps if(debug .and. mod(step,1000)==0) write(*,*) 'step ', step, ' of ', mcsa_nsteps, ': ', criteria, best_criteria ! Propose a move by randomly assigning a random microstate to a random macrostate. micro_to_macro_proposed = micro_to_macro microstate = generateUniformInteger(nmicrostates) macrostate = generateUniformInteger(nmacrostates) micro_to_macro_proposed(microstate) = macrostate ! Reject immediately if no change in lumping. if(micro_to_macro_proposed(microstate) == micro_to_macro(microstate)) cycle ! Reject immediately if one of the macrostates contains no microstates. if(any(histogram(micro_to_macro_proposed, nmacrostates) == 0)) cycle ! DEBUG !write(*,*) 'histogram = ', histogram(micro_to_macro_proposed(compacted_microstate_assignments), nmacrostates) ! Evaluate transition matrix and criteria. !T_ba = computeTransitionMatrix(ntrajectories, trajectories, nmacrostates, micro_to_macro_proposed(compacted_microstate_assignments), tau) call computeLumpedTransitionMatrix(nmicrostates, N_ji, nmacrostates, T_ba, micro_to_macro_proposed) criteria_proposed = evaluationCriteria(T_ba) ! DEBUG !write(*,*) ' criteria: ', criteria_proposed ! Accept or reject based on Metropolis criteria. accept = .false. if(criteria_proposed >= criteria) then ! Always accept if criteria is increased. accept = .true. ! Check if criteria is optimal. if(criteria > best_criteria) then ! Store as best. best_criteria = criteria best_micro_to_macro = micro_to_macro_proposed end if else ! Set annealing temperature. beta = real(step,sp) ! Generate uniform variate. r = generateUniform01() ! Apply Metropolis test. if(r < exp(beta * (criteria_proposed - criteria))) accept = .true. end if ! Process acceptance. if(accept) then criteria = criteria_proposed micro_to_macro = micro_to_macro_proposed end if end do end subroutine lump_mcsa !============================================================================================= ! Construct an initial random partitioning. !============================================================================================= subroutine random_lumping(nmicrostates, nmacrostates, micro_to_macro) use utilities, only : generateUniqueRandomIndices ! Arguments. integer, intent(in) :: nmicrostates ! Number of microstates. integer, intent(in) :: nmacrostates ! Number of macrostates. integer, dimension(nmicrostates), intent(out) :: micro_to_macro ! micro_to_macro(microstate) = macrostate which microstate belongs to. ! Local variables. integer :: i ! Loop variable. integer, dimension(nmicrostates) :: indices ! indices storage ! Pick an initial lumping of microstates to form nmacrostates macrostates, ensuring that each ! macrostate contains at least one microstate. call generateUniqueRandomIndices(nmicrostates, nmicrostates, indices) do i = 1,nmicrostates micro_to_macro(indices(i)) = mod(i, nmacrostates) + 1 end do end subroutine random_lumping !============================================================================================= ! Construct an initial guess at the partitioning using eigenvector-based PCCA-like algorithm. !============================================================================================= subroutine ev_lumping(nmicrostates, nmacrostates, mu_k, u_ik, micro_to_macro) use utilities, only : mean, find, unique, histogram ! Arguments. integer, intent(in) :: nmicrostates ! Number of microstates. integer, intent(in) :: nmacrostates ! Number of macrostates. integer, dimension(nmicrostates), intent(out) :: micro_to_macro ! micro_to_macro(microstate) = macrostate which microstate belongs to. real(dp), dimension(nmacrostates), intent(in) :: mu_k ! mu_k(k) is the kth largest eigenvalue of the transition matrix real(dp), dimension(nmicrostates,nmacrostates), intent(in) :: u_ik ! u_ik(i,k) is component i of eigenvector k. ! Local variables. integer :: i, j, k ! Loop variables. integer, dimension(:), pointer :: indices, indices2 ! indices storage integer :: current_nmacrostates ! Current number of macrostates. real(dp), dimension(nmicrostates) :: ev real(sp), dimension(nmacrostates) :: deviation ! deviation of ev from mean over this macrostate real(dp) :: mean_ev integer :: macrostate_to_split ! Start with all microstates in one macrostate. micro_to_macro(1:nmicrostates) = 1 current_nmacrostates = 1 ! Split using other eigenvectors. do k = 2,nmacrostates ! Get current eigenvector. ev = u_ik(:,k) ! Find the macrostate with the largest sum of differences between the mean eigenvector component and all of its members (L1 norm?). do i = 1,current_nmacrostates ! Get indices of microstates in macrostate i. indices => find(micro_to_macro .eq. i) ! Compute L1 norm of difference. mean_ev = mean(ev(indices)) deviation(i) = sum(abs(ev(indices) - mean_ev)) ! Clean up deallocate(indices) end do macrostate_to_split = maxloc(deviation(1:current_nmacrostates),1) ! Split the state at the mean. indices => find(micro_to_macro .eq. macrostate_to_split) mean_ev = mean(ev(indices)) indices2 => find(ev(indices) > mean_ev) micro_to_macro(indices(indices2)) = current_nmacrostates + 1 current_nmacrostates = current_nmacrostates + 1 deallocate(indices, indices2) end do ! Check to make sure no macrostates are empty. if(any(histogram(micro_to_macro,nmacrostates) == 0)) then write(*,*) 'ev_lumping has generated an empty macrostate!' write(*,*) 'micro_to_macro = ' write(*,*) micro_to_macro stop end if end subroutine ev_lumping !============================================================================================= ! Lump microstates into macrostates by kinetic clustering using a MC/SA algorithm. !============================================================================================= subroutine lump() use utilities, only : generateUniformInteger, generateUniform01, histogram, generateUniqueRandomIndices, unique use transitionmatrix, only : computeTransitionMatrix, transitionMatrixEigs, computeTransitionCountMatrix ! Parameters. ! Constants. ! Local variables. integer :: i, j, n integer :: trial ! current MC/SA trial (max is ntrials) integer :: best_trial real(sp) :: criteria integer, dimension(nmicrostates) :: micro_to_macro integer, dimension(nmicrostates) :: ev_micro_to_macro ! eigenvector-based lumping initial guess real(sp), dimension(:), allocatable :: trial_criteria integer, dimension(:,:), allocatable :: trial_micro_to_macro real(sp) :: criteria_upper_bound real(dp), dimension(nmacrostates) :: mu_k ! Dominant eigenvalues of the transition matrix. real(dp), dimension(nmicrostates,nmacrostates) :: u_ik ! Dominant eigenvectors of the transition matrix. integer :: status real(dp), dimension(nmicrostates,nmicrostates) :: N_ji ! symmetrized transition count matrix to use for efficiently computing lumped transition matrix write(*,*) 'Histogram of microstate populations:' write(*,'(100I6)') histogram(compacted_microstate_assignments, nmicrostates) write(*,*) 'total = ', sum(histogram(compacted_microstate_assignments, nmicrostates)) if(debug) write(*,*) 'Lumping ', nmicrostates, ' microstates into ', nmacrostates, ' macrostates...' ! Compute eigenvalues and eigenvectors of transition matrix. if(debug) write(*,*) 'Computing eigenvalues and eigenvectors of transition matrix...' call transitionMatrixEigs(ntrajectories, trajectories, nmicrostates, compacted_microstate_assignments, & tau, nmacrostates, mu_k, ev_left = u_ik) if(debug) write(*,*) 'mu_k = ', mu_k ! Determine upper bound on metastability from eigenvalues. criteria_upper_bound = sum(mu_k) if(use_ev_initial_lumping) then ! Compute eigenvector-based initial lumping. write(*,*) 'Computing eigenvector-based lumping to use as initial guess...' call ev_lumping(nmicrostates, nmacrostates, mu_k, u_ik, ev_micro_to_macro) end if ! Compute symmetrized transition count matrix to use for efficiently computing transition matrix. call computeTransitionCountMatrix(ntrajectories, trajectories, nmicrostates, compacted_microstate_assignments, tau, N_ji, & use_timereversed) ! Perform trials. allocate(trial_criteria(mcsa_ntrials), trial_micro_to_macro(mcsa_ntrials, nmicrostates)) !$omp parallel do default(shared) private(criteria,micro_to_macro) do trial = 1, mcsa_ntrials if(debug) write(*,*) 'mcsa trial ', trial, ' of ', mcsa_ntrials ! Select initial lumping. if(use_ev_initial_lumping) then ! Use ev-based lumping. micro_to_macro = ev_micro_to_macro else call random_lumping(nmicrostates, nmacrostates, micro_to_macro) end if ! Perform one pass of MC/SA to obtain a guess at the best criteria and lumping. call lump_mcsa(nmacrostates, criteria, micro_to_macro, N_ji) ! Store this lumping. ! TODO: Compact this into above call. trial_criteria(trial) = criteria trial_micro_to_macro(trial,:) = micro_to_macro end do !$omp end parallel do ! Determine best and convergence. write(*,*) 'trial_criteria' write(*,*) trial_criteria best_trial = maxloc(trial_criteria,1) criteria = trial_criteria(best_trial) micro_to_macro = trial_micro_to_macro(best_trial,:) ! write(*,*) 'optimal lumping = ' ! write(*,'(I4)') micro_to_macro write(*,*) count(trial_criteria == criteria), ' / ', mcsa_ntrials, ' trials found the optimum.' write(*,*) 'criteria is ', criteria, ' / ', criteria_upper_bound ! Use this lumping to generate compacted macrostates assignments. compacted_macrostate_assignments = micro_to_macro(compacted_microstate_assignments) ! Determine microstate ids that go with macrostates. deallocate(macrostates, stat=status) allocate(macrostates(nmacrostates)) do i = 1,nmicrostates macrostates(micro_to_macro(i)) = microstates(i) end do ! Map compacted to expanded macrostate ids. macrostate_assignments = macrostates(compacted_macrostate_assignments) deallocate(trial_criteria, trial_micro_to_macro) end subroutine lump !============================================================================================= ! Write state assignments. !============================================================================================= subroutine writeStateassignments(filename, nstates, stateassignments) use pdbio, only : new_unit ! Parameters. character(len=*), intent(in) :: filename ! File to write stateassignments to. integer, intent(in) :: nstates ! Number of states. integer, dimension(:), intent(in) :: stateassignments ! Individual state assignments. ! Local variables. integer :: nsnapshots integer :: snapshot_index integer :: iunit ! unit to use for file management character(len=MAX_LINE_LENGTH) :: line, text ! Determine list size. nsnapshots = size(stateassignments,1) ! DEBUG if(debug) write(*,*) 'Writing state assignments to ', trim(filename), '...' ! Open file for writing. call new_unit(iunit) open(unit=iunit, file=filename, status='REPLACE', err=10, position='REWIND', & form='FORMATTED', action='WRITE') ! Write. do snapshot_index = 1,nsnapshots ! Write line. write(iunit,'(I8,X,I8)') snapshot_index, stateassignments(snapshot_index) end do ! Close file. close(unit=iunit) if(debug) write(*,*) 'Done.' return ! Report error if file could not be opened. 10 continue write(*,*) 'decompose: writeStateassignments: Error opening file ', trim(filename) stop end subroutine writeStateassignments !============================================================================================= ! Read state assignments. !============================================================================================= subroutine readStateassignments(filename, stateassignments) use pdbio, only : new_unit ! Parameters. character(len=*), intent(in) :: filename ! File to write stateassignments to. integer, dimension(:), intent(out) :: stateassignments ! Individual state assignments. ! Local variables. integer :: nsnapshots integer :: snapshot_index integer :: iunit ! unit to use for file management character(len=MAX_LINE_LENGTH) :: line, text ! Determine list size. nsnapshots = size(stateassignments,1) ! DEBUG if(debug) write(*,*) 'Reading state assignments from ', trim(filename), '...' ! Open file for reading. call new_unit(iunit) open(unit=iunit, file=trim(filename), status='OLD', err=10, position='REWIND', & form='FORMATTED', action='READ') ! Read. read(iunit,'(9X,I8)') stateassignments ! Close file. close(unit=iunit) if(debug) write(*,*) 'Done.' return ! Report error if file could not be opened. 10 continue write(*,*) 'decompose: readStateassignments: Error opening file ', trim(filename) stop end subroutine readStateassignments !============================================================================================= ! Write eigenvectors. !============================================================================================= subroutine writeEigenvectors(prefix, nstates, compacted_stateassignments, tau, neigs) use pdbio, only : new_unit use transitionmatrix, only : transitionMatrixEigs use timer ! Parameters. character(len=*), intent(in) :: prefix ! File to write stateassignments to. integer, intent(in) :: nstates ! Number of states. integer, dimension(:), intent(in) :: compacted_stateassignments ! Individual state assignments integer, intent(in) :: tau ! Lag time to use integer, intent(in) :: neigs ! number of eigenvectors to write ! Local variables. integer :: nsnapshots integer :: snapshot_index integer :: iunit ! unit to use for file management character(len=MAX_LINE_LENGTH) :: line, text integer :: ntau ! Number of lag times. integer, dimension(:), allocatable :: taus real(sp), dimension(:,:), allocatable :: timescales, timescales_lower, timescales_upper ! timescales(timescale_index, tau_index) ! lower and upper are confidence bounds integer :: i, k ! Loop variables. character(len=MAX_FILENAME_LENGTH) :: filename ! Filename to write to. integer :: tau_step_ ! tau increment to use integer :: ntimescales ! number of timescales to use real(dp), dimension(nstates, neigs) :: ev_left, ev_right real(dp), dimension(neigs) :: mu_k if(debug) write(*,*) 'Computing eigenvalues and eigenvectors of transition matrix...' call transitionMatrixEigs(ntrajectories, trajectories, nstates, compacted_stateassignments, & tau, neigs, mu_k, ev_left = ev_left, ev_right = ev_right) if(debug) write(*,*) 'mu_k = ', mu_k ! Get a new file unit to use. call new_unit(iunit) ! Write eigenvectors. filename = trim(prefix)//'.ev_right' open(unit=iunit, file=filename, status='REPLACE', err=11, position='REWIND', & form='FORMATTED', action='WRITE') do k = 1,neigs line = '' do i = 1,nstates write(text,'(E16.8)') ev_right(i,k) line = trim(line)//' '//adjustl(text) end do ! Write to file. write(iunit,'(A)',advance='no') trim(line) write(iunit,'(A)') '' end do close(unit=iunit) ! Write eigenvectors. filename = trim(prefix)//'.ev_left' open(unit=iunit, file=filename, status='REPLACE', err=11, position='REWIND', & form='FORMATTED', action='WRITE') do k = 1,neigs line = '' do i = 1,nstates write(text,'(E16.8)') ev_left(i,k) line = trim(line)//' '//adjustl(text) end do ! Write to file. write(iunit,'(A)',advance='no') trim(line) write(iunit,'(A)') '' end do close(unit=iunit) if(debug) write(*,*) 'Done.' return ! Report error if file could not be opened. 11 continue write(*,*) 'decompose: writeEigenvectors: Error opening file ', trim(filename) stop end subroutine writeEigenvectors !============================================================================================= ! Write timescales. !============================================================================================= subroutine writeTimescales(prefix, nstates, stateassignments, max_tau, tau_step) use pdbio, only : new_unit use transitionmatrix, only : computeTimescalesBootstrap use timer ! Parameters. character(len=*), intent(in) :: prefix ! File to write stateassignments to. integer, intent(in) :: nstates ! Number of states. integer, dimension(:), intent(in) :: stateassignments ! Individual state assignments integer, intent(in) :: max_tau ! Largest tau to use. Must be shorter than longest trajectory. integer, intent(in), optional :: tau_step ! Increment to use in choosing taus. ! Local variables. integer :: nsnapshots integer :: snapshot_index integer :: iunit ! unit to use for file management character(len=MAX_LINE_LENGTH) :: line, text integer :: ntau ! Number of lag times. integer, dimension(:), allocatable :: taus real(sp), dimension(:,:), allocatable :: timescales, timescales_lower, timescales_upper ! timescales(timescale_index, tau_index) ! lower and upper are confidence bounds integer :: i, tau_index ! Loop variables. character(len=MAX_FILENAME_LENGTH) :: filename ! Filename to write to. integer :: tau_step_ ! tau increment to use integer :: ntimescales ! number of timescales to use if(debug) write(*,*) 'Computing timescales...' call resetTimer() ! Determine number of snapshots from stateassignments list. nsnapshots = size(stateassignments,1) ! Choose taus to evaluate at. tau_step_ = 1 if(present(tau_step)) tau_step_ = tau_step ntau = int(max_tau / tau_step_) allocate(taus(ntau)) taus = (/ (i, i = tau_step_,max_tau,tau_step_) /) ! Choose number of timescales based on number of states. ntimescales = min(nstates - 2, max_ntimescales) ! Allocate storage. allocate(timescales(ntimescales,ntau), timescales_lower(ntimescales,ntau), timescales_upper(ntimescales, ntau)) ! Compute timescales for all taus. call computeTimescalesBootstrap(ntrajectories, trajectories, nstates, stateassignments, ntau, ntimescales, & tau_unit, taus, timescales, & timescales_confidence_interval, timescales_lower, timescales_upper, timescales_bootstrap_ntrials) ! Get a new file unit to use. call new_unit(iunit) ! Write lag times. filename = trim(prefix)//'.lag_times' open(unit=iunit, file=filename, status='REPLACE', err=10, position='REWIND', & form='FORMATTED', action='WRITE') do tau_index = 1,ntau write(iunit,*) taus(tau_index) * tau_unit end do close(unit=iunit) ! Write timescales. filename = trim(prefix)//'.timescales' open(unit=iunit, file=filename, status='REPLACE', err=10, position='REWIND', & form='FORMATTED', action='WRITE') do tau_index = 1,ntau line = '' write(text,'(F12.3)') taus(tau_index) * tau_unit line = adjustl(text)//' ' do i = 1,ntimescales write(text,'(E18.8)') timescales(i,tau_index) line = trim(line)//' '//adjustl(text) end do ! Write to file. write(iunit,'(A)',advance='no') trim(line) write(iunit,'(A)') '' end do close(unit=iunit) ! Write confidence bounds. filename = trim(prefix)//'.timescales_lower' open(unit=iunit, file=filename, status='REPLACE', err=10, position='REWIND', & form='FORMATTED', action='WRITE') do tau_index = 1,ntau line = '' write(text,'(F12.3)') taus(tau_index) * tau_unit line = adjustl(text)//' ' do i = 1,ntimescales write(text,'(E18.8)') timescales_lower(i,tau_index) line = trim(line)//' '//adjustl(text) end do ! Write to file. write(iunit,'(A)',advance='no') trim(line) write(iunit,'(A)') '' end do close(unit=iunit) filename = trim(prefix)//'.timescales_upper' open(unit=iunit, file=filename, status='REPLACE', err=10, position='REWIND', & form='FORMATTED', action='WRITE') do tau_index = 1,ntau line = '' write(text,'(F12.3)') taus(tau_index) * tau_unit line = adjustl(text)//' ' do i = 1,ntimescales write(text,'(E18.8)') timescales_upper(i,tau_index) line = trim(line)//' '//adjustl(text) end do ! Write to file. write(iunit,'(A)',advance='no') trim(line) write(iunit,'(A)') '' end do close(unit=iunit) ! Clean up deallocate(taus,timescales,timescales_lower,timescales_upper) if(debug) write(*,*) readTimer(), ' seconds elapsed' return ! Report error if file could not be opened. 10 continue write(*,*) 'decompose: writeTimescales: Error opening file ', trim(filename) stop end subroutine writeTimescales !============================================================================================= ! Write transformation from initial states to final states to file. ! ! For both splitting and lumping, the format is: ! ! [lumped state] [splitstate1] [splitstate2] ... [splitstateN] ! ! Note that, for splitting, the initial state before lumping is on the left and the states it was subsequently split into are on the right. ! For lumping, it is the opposite: The initial states that formed the lumped state are on the right, and the lumped state is on the left. ! ! There is always a one-to-many relation. ! ! NOTE: We require ninitialstates > nfinalstates. !============================================================================================= subroutine writeStateTransformation(filename, lumped_stateassignments, split_stateassignments) use pdbio, only : new_unit use utilities, only : unique, find ! Parameters. character(len=*), intent(in) :: filename ! File to write stateassignments to. integer, dimension(:), intent(in) :: lumped_stateassignments, split_stateassignments ! Individual state assignments. ! Local variables. integer :: iunit ! unit to use for file management integer :: nsnapshots integer :: snapshot_index, state_index character(len=MAX_LINE_LENGTH) :: line, text integer :: last_state integer, dimension(:), pointer :: snapshot_indices ! indices of snapshots matching the current state integer, dimension(:), pointer :: unique_lumped_states, unique_split_states integer :: state ! current state id integer :: nunique_split_states integer :: nlumpedstates integer :: i if(debug) write(*,*) 'Writing state transformation table...' ! Get number of snapshots. nsnapshots = size(lumped_stateassignments,1) write(*,*) 'nsnapshots = ', nsnapshots ! Get a new file unit to use. call new_unit(iunit) ! Open file for formatted writing. open(unit=iunit, file=filename, status='REPLACE', err=11, position='REWIND', & form='FORMATTED', action='WRITE') ! Make a list of unique lumped states. unique_lumped_states => unique(lumped_stateassignments) nlumpedstates = size(unique_lumped_states,1) ! List all final states corresponding to each unique initial state. do state_index = 1,nlumpedstates ! Get current state. state = unique_lumped_states(state_index) ! Find all snapshots in this state. snapshot_indices => find(lumped_stateassignments == state) ! Make a list of all unique final states. unique_split_states => unique(split_stateassignments(snapshot_indices)) nunique_split_states = size(unique_split_states,1) ! Construct a line of text to write to file. line = '' write(text,*) state line = trim(line)//adjustl(text)//' :' do i = 1,nunique_split_states write(text,*) unique_split_states(i) line = trim(line)//' '//adjustl(text) end do ! Write to file. write(iunit,'(A)',advance='no') trim(line) write(iunit,'(A)') '' ! Clean up. deallocate(snapshot_indices,unique_split_states) end do ! Close file. close(unit=iunit) ! Clean up. deallocate(unique_lumped_states) if(debug) write(*,*) 'Done.' return ! Report error if file could not be opened. 11 continue write(*,*) 'decompose: writeStateTransformation: Error opening file ', trim(filename) stop end subroutine writeStateTransformation !============================================================================================= ! Write PDB files containing an ensemble of configurations chosen at random from the state. !============================================================================================= subroutine writeStatePDBs(prefix, nstates, state_representatives, stateassignments, nmodels_to_write) use utilities, only : find, generateUniqueRandomIndices use pdbio, only : pdb_write use timer use kabsch_rmsd, only : ls_align ! Arguments. character(len=*), intent(in) :: prefix ! Prefix of filename to write stateassignments to. integer, intent(in) :: nstates ! Number of states. integer, dimension(nstates), intent(in) :: state_representatives ! Indices of state representatives. integer, dimension(:), intent(in) :: stateassignments ! Individual state assignments integer, intent(in) :: nmodels_to_write ! Number of models to write to PDB file. ! Local variables. integer :: state_index ! State index, from 1...nstates. integer :: state_representative ! Index of representative snapshot. integer, dimension(:), pointer :: members ! Indices of snapshots that belong to the current state under consideration. integer :: nmembers ! Length of index list. integer :: nmodels ! Number of models to actually write for this PDB file (may be less than nmodels_to_write ! if there are fewer members than desired models). integer, dimension(:), allocatable :: indices ! Unique list of snapshot indices of models to write. character(len=MAX_FILENAME_LENGTH) :: filename ! Filename to write to. character(len=59), dimension(1) :: remarks ! optional remarks to put in header real(dp) :: elapsed_seconds ! Number of seconds elapsed. real(sp), dimension(:,:,:), allocatable :: models ! Models that have been aligned to the state representative. real(sp), dimension(natoms,3) :: representative_snapshot ! The representative snapshot, aligned to the reference snapshot. integer :: model_index ! Loop index. if(debug) write(*,*) 'Writing states to PDB files...' ! Loop over all states. call resetTimer() do state_index = 1,nstates ! Get the index of the state representative snapshot. state_representative = state_representatives(state_index) ! Skip this state if the representative is set to 0. if (state_representative .eq. 0) cycle ! Store the state representative as the first model. representative_snapshot = snapshots(state_representative,:,:) ! Align the representative to the 'reference' PDB file using backbone indices. call ls_align(natoms, representative_snapshot, reference_snapshot, ls_align_atomindices) ! Build a list of all snapshots in this state. members => find(stateassignments == state_representative) nmembers = size(members,1) ! Determine how many models to write. ! If requested to write more models than members, make sure that only nmembers models are written. nmodels = nmodels_to_write if(nmembers < nmodels) nmodels = nmembers ! Choose models at random from list of snapshots belonging to state. allocate(indices(nmodels)) call generateUniqueRandomIndices(nmembers, nmodels, indices) indices = members(indices) ! Replace the first model with the state representative. indices(1) = state_representative ! Store and align models. allocate(models(nmodels,natoms,3)) models = snapshots(indices,:,:) do model_index = 1,nmodels call ls_align(natoms, models(model_index,:,:), representative_snapshot, ls_align_atomindices) end do ! Construct filename as '${prefix}-state_id.pdb' write(filename,*) state_representative filename = trim(prefix)//'-'//trim(adjustl(filename))//'.pdb' ! Construct remarks text, indicating state_id and number of members. write(remarks,*) 'State', state_representative, ', ', nmembers, ' members' ! Write models to PDB file. call pdb_write(filename, pdbatoms, models, remarks=remarks) ! Deallocate members list. deallocate(members, indices, models) end do if(debug) write(*,*) readTimer(), ' seconds elapsed' end subroutine writeStatePDBs !============================================================================================= ! Split all macrostates into microstates. ! ! Each macrostate, if above a certain threshold size, is split into a number of microstates ! determined by its size. These microstates are assigned as state identities the indices of ! their generators. !============================================================================================= subroutine split() use utilities, only : find, unique ! Local variables. integer :: macrostate, compacted_macrostate_index ! macrostate index integer, dimension(:), pointer :: indices ! snapshot indices of macrostate members integer :: nindices ! number of members of the macrostate integer :: nmembers ! Number of members of this state. integer :: nmaxmicrostates ! Maximum possible number of microstates. integer :: target_nmicrostates ! Target number of microstates to split to. integer :: status integer :: counter counter = 0 ! Attempt to split each macrostate. do compacted_macrostate_index = 1,nmacrostates ! Get the unique macrostate id for this state. macrostate = macrostates(compacted_macrostate_index) ! Get snapshot indices of all members of this macrostate. indices => find(compacted_macrostate_assignments == compacted_macrostate_index) ! Determine number of snapshots in this macrostate. nindices = size(indices,1) counter = counter + nindices ! Determine how many microstates to split to by using the given target microstate size. target_nmicrostates = (nindices / target_microstate_size) ! Limit the number of microstates we can split an individual macrostate into, if this option has been specified. if(iteration == 1) then if(max_microstates_per_macrostate_first_iteration >= 0) & target_nmicrostates = min(target_nmicrostates, max_microstates_per_macrostate_first_iteration) else if(max_microstates_per_macrostate >= 0) target_nmicrostates = min(target_nmicrostates, max_microstates_per_macrostate) end if ! Don't do anything if the state is too small to be split. if(target_nmicrostates <= 1) then ! Assign microstates from macrostate. microstate_assignments(indices) = macrostate ! Cycle. cycle end if if(debug) write(*,*) 'Splitting macrostate ', compacted_macrostate_index, ' of ', nmacrostates, ', id ', macrostate, & ' (', nindices, ' members) into ', target_nmicrostates, ' microstates' ! Split the macrostate into microstates, assigning them unique ids by their generator. call splitState(nindices, indices, target_nmicrostates, microstate_assignments) ! Free list of macrostate members. deallocate(indices) end do write(*,*) 'counter = ', counter indices => find(microstate_assignments == 0) write(*,*) 'find found ', size(indices,1), ' zero indices:' write(*,*) indices ! Generate compacted microstate assignments, where indices go from 1...nmicrostates. deallocate(microstates, stat=status) microstates => unique(microstate_assignments, compacted=compacted_microstate_assignments) nmicrostates = size(microstates,1) call checkstates(microstate_assignments) end subroutine split !============================================================================================= ! Generate and refine a state decomposition by iterative splitting and lumping. !============================================================================================= subroutine iterate() use timer ! for timing info ! Local variables. real(dp) :: elapsed_seconds ! Time elapsed during each call. character(len=MAX_FILENAME_LENGTH) :: filename ! Storage for constructing filenames for writing out progress files. ! Iterate to refine state decomposition. do iteration = iteration, niterations if(debug) write(*,*) 'State decomposition iteration ', iteration, ' of ', niterations, '...' !============================================================================================= ! Split to microstates. !============================================================================================= ! Determine the distance metric to use for this iteration. if(iteration == 1) then rmsd_mode = rmsd_mode_first_iteration else rmsd_mode = rmsd_mode_subsequent_iterations end if if(debug) write(*,*) 'Using rmsd_mode = ', rmsd_mode ! Split all states. call resetTimer() call split() write(*,*) 'checking microstate assignments...' call checkstates(microstate_assignments) call checkstates(compacted_microstate_assignments) if(debug) write(*,*) readTimer(), ' seconds elapsed' ! Write split stateassignments. write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.stateassignments-split' call writeStateassignments(filename, nmicrostates, microstate_assignments) write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.stateassignments-split-compacted' call writeStateassignments(filename, nmicrostates, compacted_microstate_assignments) ! Write microstates. write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.microstates' call writeStateassignments(filename, nmicrostates, microstates) if(write_split_table) then ! TODO: Write translation from lumped -> split state ids. call resetTimer() write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.split' call writeStateTransformation(filename, macrostate_assignments, microstate_assignments) if(debug) write(*,*) readTimer(), ' seconds elapsed' end if if(write_split_timescales) then ! Write timescales. write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.timescales-split' call writeTimescales(filename, nmicrostates, compacted_microstate_assignments, max_tau) end if if(write_split_pdbs) then ! Write state PDBs. call resetTimer() write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.ensemble-split' call writeStatePDBs(filename, nmicrostates, microstates, microstate_assignments, split_pdb_nmodels) if(debug) write(*,*) readTimer(), ' seconds elapsed' end if !============================================================================================= ! Lump microstates to macrostates. !============================================================================================= ! Lump to desired number of macrostates. call resetTimer() nmacrostates = target_nmacrostates call lump() call checkstates(macrostate_assignments) call checkstates(compacted_microstate_assignments) if(debug) write(*,*) readTimer(), ' seconds elapsed' ! Write lumped state assignments. write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.stateassignments-merged' call writeStateassignments(filename, nmacrostates, macrostate_assignments) write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.stateassignments-merged-compacted' call writeStateassignments(filename, nmacrostates, compacted_macrostate_assignments) ! Write macrostates. write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.macrostates' call writeStateassignments(filename, nmacrostates, macrostates) if(write_lump_table) then ! TODO: Write translation from split -> lumped state ids. call resetTimer() write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.lump' call writeStateTransformation(filename, macrostate_assignments, microstate_assignments) if(debug) write(*,*) readTimer(), ' seconds elapsed' end if if(write_merged_timescales) then ! Write timescales. call resetTimer() write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.timescales-merged' call writeTimescales(filename, nmacrostates, compacted_macrostate_assignments, max_tau) if(debug) write(*,*) readTimer(), ' seconds elapsed' end if if(write_merged_eigenvectors) then ! Write eigenvectors. call resetTimer() write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.merged' call writeEigenvectors(filename, nmacrostates, compacted_macrostate_assignments, tau, min(max_ntimescales+1,nmacrostates)) if(debug) write(*,*) readTimer(), ' seconds elapsed' end if if(write_merged_pdbs) then ! Write state PDBs. call resetTimer() write(filename,*) iteration filename = trim(output_directory)//'/'//trim(adjustl(filename))//'.ensemble-merged' call writeStatePDBs(filename, nmacrostates, macrostates, macrostate_assignments, merged_pdb_nmodels) if(debug) write(*,*) readTimer(), ' seconds elapsed' end if end do end subroutine iterate end module decompose