<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#!/usr/bin/env python

import sys, os
import numpy as np
sys.path += os.environ['PATH'].split(':')

import ArgLib
from msmbuilder import Serializer
from msmbuilder.Project import Project
from euclid.clustering import deterministic_subsample, KCenters, HybridKMedoids
from euclid.metrics import RMSD

def main():
    # get command line arguments
    arglist=["projectfn", "clusters", "stride", "atomindices",
             "globalkmediods", "localkmediods", "rmsdcutoff","outdir"]
    options=ArgLib.parse(arglist)
    
    # process command line arguments into the right type
    gens_path = os.path.join(options.outdir, 'Gens.lh5')
    stride = int(options.stride)
    k = int(options.clusters)
    rmsd_cutoff = float(options.rmsdcutoff)
    local_num_iters = int(options.localkmediods)
    global_num_iters = int(options.globalkmediods)
    
    # load stuff up
    project = Project.LoadFromHDF(options.projectfn)
    atom_indices = np.loadtxt(options.atomindices, int)
    trajs = [project.LoadTraj(i) for i in range(project['NumTrajs'])]
    
    # Set up metric and print 
    metric = RMSD(atom_indices)
    print '\n\n' + '=' * 80 + '\n' + '-' * 80
    print 'Computing distances with "%s"' % metric
    print '=' * 80 + '\n' + '-' * 80
    
    # check paths
    if not os.path.exists(options.outdir):
        os.mkdir(options.outdir)
    else:
        print 'Error: %s already exists' % gens_path
        sys.exit(1)
    
    if stride != 1:
        # run subsampling, clustering
        strided = deterministic_subsample(trajs)
        clust = HybridKMedoids(metric, strided, k=k, distance_cutoff=rmsd_cutoff,
                            local_num_iters=local_num_iters, global_num_iters=global_num_iters)
        gens = clust.get_generators_as_traj()
    
    else:
        # since there's no striding, we get assignments for free
        clust = HybridKMedoids(metric, trajs, k=k, distance_cutoff=rmsd_cutoff,
                            local_num_iters=local_num_iters, global_num_iters=global_num_iters)
        gens = clust.get_generators_as_traj()
        assignments = clust.get_assignments()
        distances = clust.get_distances()
        
        # save assignments, distances
        assignments_path = os.path.join(options.outdir, 'Assignments.h5')
        distances_path = os.path.join(options.outdir, 'Assignments.h5.RMSD')
        print 'Saving assignments to %s' % assignments_path
        Serializer.SaveData(assignments_path, assignments)
        Serializer.SaveData(distances_path, distances)
    
    # save generators
    print 'Saving generators to %s' % gens_path
    gens['DistanceMetricUsed'] = repr(metric)
    gens.SaveToLHDF(gens_path)
    
if __name__ == '__main__':
    main()
    </pre></body></html>