You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
def rosetta_output_succeeded( potential_struct_dir ):
path_to_rosetta_output = os.path.join( potential_struct_dir, rosetta_output_file_name )
if not os.path.isfile(path_to_rosetta_output):
return False
db3_file = os.path.join( potential_struct_dir, output_database_name )
if not os.path.isfile( db3_file ):
return False
success_line_found = False
no_more_batches_line_found = False
with open( path_to_rosetta_output, 'r' ) as f:
for line in f:
if line.startswith( 'protocols.jd2.JobDistributor' ) and 'reported success in' in line:
success_line_found = True
if line.startswith( 'protocols.jd2.JobDistributor' ) and 'no more batches to process' in line:
no_more_batches_line_found = True
return no_more_batches_line_found and success_line_found
def find_finished_jobs( output_folder ):
return_dict = {}
job_dirs = [ os.path.abspath(os.path.join(output_folder, d)) for d in os.listdir(output_folder) if os.path.isdir( os.path.join(output_folder, d) )]
for job_dir in job_dirs:
completed_struct_dirs = []
for potential_struct_dir in sorted([ os.path.abspath(os.path.join(job_dir, d)) for d in os.listdir(job_dir) if os.path.isdir( os.path.join(job_dir, d) )]):
if rosetta_output_succeeded( potential_struct_dir ):
completed_struct_dirs.append( potential_struct_dir )
return_dict[job_dir] = completed_struct_dirs
nstructs_to_analyze = set([total_structs])
for x in range(10, total_structs):
if x % 10 == 0:
nstructs_to_analyze.add(x)
nstructs_to_analyze = sorted(nstructs_to_analyze)
all_ddg_scores = []
for nstructs in nstructs_to_analyze:
ddg_scores = scores.loc[ ((scores['state'] == 'unbound_mut') | (scores['state'] == 'bound_wt')) & (scores['struct_num'] <= nstructs) ].copy()
for column in ddg_scores.columns:
if column not in ['state', 'case_name', 'backrub_steps', 'struct_num', 'score_function_name']:
ddg_scores.loc[:,column] *= -1.0
ddg_scores = ddg_scores._append( scores.loc[ ((scores['state'] == 'unbound_wt') | (scores['state'] == 'bound_mut')) & (scores['struct_num'] <= nstructs) ].copy() )
ddg_scores = ddg_scores.groupby( ['case_name', 'backrub_steps', 'struct_num', 'score_function_name'] ).sum().reset_index()
if nstructs == total_structs:
struct_scores = ddg_scores.copy()
ddg_scores = ddg_scores.groupby( ['case_name', 'backrub_steps', 'score_function_name'] ).mean().round(decimals=5).reset_index()
new_columns = list(ddg_scores.columns.values)
new_columns.remove( 'struct_num' )
ddg_scores = ddg_scores[new_columns]
ddg_scores[ 'scored_state' ] = 'ddG'
ddg_scores[ 'nstruct' ] = nstructs
all_ddg_scores._append(ddg_scores)
return (pd.concat(all_ddg_scores), struct_scores)
def calc_dgs( scores ):
l = []
total_structs = np.max( scores['struct_num'] )
nstructs_to_analyze = set([total_structs])
for x in range(10, total_structs):
if x % 10 == 0:
nstructs_to_analyze.add(x)
nstructs_to_analyze = sorted(nstructs_to_analyze)
for state in ['mut', 'wt']:
for nstructs in nstructs_to_analyze:
dg_scores = scores.loc[ (scores['state'].str.endswith(state)) & (scores['state'].str.startswith('unbound')) & (scores['struct_num'] <= nstructs) ].copy()
for column in dg_scores.columns:
if column not in ['state', 'case_name', 'backrub_steps', 'struct_num', 'score_function_name']:
dg_scores.loc[:,column] *= -1.0
dg_scores = dg_scores._append( scores.loc[ (scores['state'].str.endswith(state)) & (scores['state'].str.startswith('bound')) & (scores['struct_num'] <= nstructs) ].copy() )
dg_scores = dg_scores.groupby( ['case_name', 'backrub_steps', 'struct_num', 'score_function_name'] ).sum().reset_index()
dg_scores = dg_scores.groupby( ['case_name', 'backrub_steps', 'score_function_name'] ).mean().round(decimals=5).reset_index()
new_columns = list(dg_scores.columns.values)
new_columns.remove( 'struct_num' )
dg_scores = dg_scores[new_columns]
dg_scores[ 'scored_state' ] = state + '_dG'
dg_scores[ 'nstruct' ] = nstructs
l.append( dg_scores )
return l
def analyze_output_folder( output_folder ):
# Pass in an outer output folder. Subdirectories are considered different mutation cases, with subdirectories of different structures.
finished_jobs = find_finished_jobs( output_folder )
if len(finished_jobs) == 0:
print( 'No finished jobs found' )
return
if name == 'main':
for folder_to_analyze in sys.argv[1:]:
if os.path.isdir( folder_to_analyze ):
analyze_output_folder( folder_to_analyze )
``
erroe happend as
Unique values in 'state' column: ['bound_wt' 'unbound_mut']
Traceback (most recent call last):
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 1874, in _agg_py_fallback
res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/ops.py", line 849, in agg_series
result = self._aggregate_series_pure_python(obj, func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/ops.py", line 877, in _aggregate_series_pure_python
res = func(group)
^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 2380, in
alt=lambda x: Series(x).mean(numeric_only=numeric_only),
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/series.py", line 6225, in mean
return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py", line 11992, in mean
return self._stat_function(
^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py", line 11949, in _stat_function
return self._reduce(
^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/series.py", line 6133, in _reduce
return op(delegate, skipna=skipna, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py", line 147, in f
result = alt(values, axis=axis, skipna=skipna, **kwds)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py", line 404, in new_func
result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py", line 720, in nanmean
the_sum = _ensure_numeric(the_sum)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/nanops.py", line 1693, in _ensure_numeric
raise TypeError(f"Could not convert string '{x}' to numeric")
TypeError: Could not convert string 'bound_wtunbound_mutbound_mutunbound_wtbound_wtunbound_mutbound_mutunbound_wtbound_wtunbound_mutbound_mutunbound_wt' to numeric
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/ai/data/Software/flexddg/flex_ddG_tutorial/analyze_flex_ddG2.py", line 214, in
analyze_output_folder( folder_to_analyze )
File "/ai/data/Software/flexddg/flex_ddG_tutorial/analyze_flex_ddG2.py", line 190, in analyze_output_folder
ddg_scores, struct_scores = calc_ddg( scores )
^^^^^^^^^^^^^^^^^^
File "/ai/data/Software/flexddg/flex_ddG_tutorial/analyze_flex_ddG2.py", line 137, in calc_ddg
ddg_scores = ddg_scores.groupby( ['case_name', 'backrub_steps', 'score_function_name'] ).mean().round(decimals=5).reset_index()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 2378, in mean
result = self._cython_agg_general(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 1929, in _cython_agg_general
new_mgr = data.grouped_reduce(array_func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py", line 1428, in grouped_reduce
applied = sb.apply(func)
^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/internals/blocks.py", line 366, in apply
result = func(self.values, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 1926, in array_func
result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 1878, in _agg_py_fallback
raise type(err)(msg) from err
TypeError: agg function failed [how->mean,dtype->object]
then the code blow were added in to populate the state column with the appropriate values `state = os.path.basename(os.path.dirname(output_path)) if state.startswith("bound"): state = "bound_" + state.split("_")[1] elif state.startswith("unbound"): state = "unbound_" + `state.split("_")[1]
After that, I got a empty dataframe in the output file.
Because of the pands .append() method which is now depracated, so I modify the code as showed blow;
``#!/usr/bin/python3`
import sys
import os
import sqlite3
import shutil
import tempfile
from pprint import pprint
import pandas as pd
import numpy as np
import re
import datetime
import sys
import collections
import threading
rosetta_output_file_name = 'rosetta.out'
output_database_name = 'ddG.db3'
trajectory_stride = 5
script_output_folder = 'analysis_output'
zemu_gam_params = {
'fa_sol' : (6.940, -6.722),
'hbond_sc' : (1.902, -1.999),
'hbond_bb_sc' : (0.063, 0.452),
'fa_rep' : (1.659, -0.836),
'fa_elec' : (0.697, -0.122),
'hbond_lr_bb' : (2.738, -1.179),
'fa_atr' : (2.313, -1.649),
}
def gam_function(x, score_term = None ):
return -1.0 * np.exp( zemu_gam_params[score_term][0] ) + 2.0 * np.exp( zemu_gam_params[score_term][0] ) / ( 1.0 + np.exp( -1.0 * x * np.exp( zemu_gam_params[score_term][1] ) ) )
def apply_zemu_gam(scores):
new_columns = list(scores.columns)
new_columns.remove('total_score')
scores = scores.copy()[ new_columns ]
for score_term in zemu_gam_params:
assert( score_term in scores.columns )
scores[score_term] = scores[score_term].apply( gam_function, score_term = score_term )
scores[ 'total_score' ] = scores[ list(zemu_gam_params.keys()) ].sum( axis = 1 )
scores[ 'score_function_name' ] = scores[ 'score_function_name' ] + '-gam'
return scores
def rosetta_output_succeeded( potential_struct_dir ):
path_to_rosetta_output = os.path.join( potential_struct_dir, rosetta_output_file_name )
if not os.path.isfile(path_to_rosetta_output):
return False
def find_finished_jobs( output_folder ):
return_dict = {}
job_dirs = [ os.path.abspath(os.path.join(output_folder, d)) for d in os.listdir(output_folder) if os.path.isdir( os.path.join(output_folder, d) )]
for job_dir in job_dirs:
completed_struct_dirs = []
for potential_struct_dir in sorted([ os.path.abspath(os.path.join(job_dir, d)) for d in os.listdir(job_dir) if os.path.isdir( os.path.join(job_dir, d) )]):
if rosetta_output_succeeded( potential_struct_dir ):
completed_struct_dirs.append( potential_struct_dir )
return_dict[job_dir] = completed_struct_dirs
def get_scores_from_db3_file(db3_file, struct_number, case_name):
conn = sqlite3.connect(db3_file)
conn.row_factory = sqlite3.Row
c = conn.cursor()
def process_finished_struct( output_path, case_name ):
db3_file = os.path.join( output_path, output_database_name )
assert( os.path.isfile( db3_file ) )
struct_number = int( os.path.basename(output_path) )
scores_df = get_scores_from_db3_file( db3_file, struct_number, case_name )
def calc_ddg( scores ):
total_structs = np.max( scores['struct_num'] )
def calc_dgs( scores ):
l = []
def analyze_output_folder( output_folder ):
# Pass in an outer output folder. Subdirectories are considered different mutation cases, with subdirectories of different structures.
finished_jobs = find_finished_jobs( output_folder )
if len(finished_jobs) == 0:
print( 'No finished jobs found' )
return
if name == 'main':
for folder_to_analyze in sys.argv[1:]:
if os.path.isdir( folder_to_analyze ):
analyze_output_folder( folder_to_analyze )
``
erroe happend as
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/ai/data/Software/flexddg/flex_ddG_tutorial/analyze_flex_ddG2.py", line 214, in
analyze_output_folder( folder_to_analyze )
File "/ai/data/Software/flexddg/flex_ddG_tutorial/analyze_flex_ddG2.py", line 190, in analyze_output_folder
ddg_scores, struct_scores = calc_ddg( scores )
^^^^^^^^^^^^^^^^^^
File "/ai/data/Software/flexddg/flex_ddG_tutorial/analyze_flex_ddG2.py", line 137, in calc_ddg
ddg_scores = ddg_scores.groupby( ['case_name', 'backrub_steps', 'score_function_name'] ).mean().round(decimals=5).reset_index()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 2378, in mean
result = self._cython_agg_general(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 1929, in _cython_agg_general
new_mgr = data.grouped_reduce(array_func)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/internals/managers.py", line 1428, in grouped_reduce
applied = sb.apply(func)
^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/internals/blocks.py", line 366, in apply
result = func(self.values, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 1926, in array_func
result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/lib/python3.11/site-packages/pandas/core/groupby/groupby.py", line 1878, in _agg_py_fallback
raise type(err)(msg) from err
TypeError: agg function failed [how->mean,dtype->object]
then the code blow were added in to populate the state column with the appropriate values
`state = os.path.basename(os.path.dirname(output_path)) if state.startswith("bound"): state = "bound_" + state.split("_")[1] elif state.startswith("unbound"): state = "unbound_" + `state.split("_")[1]
After that, I got a empty dataframe in the output file.
The text was updated successfully, but these errors were encountered: