Skip to content

Commit

Permalink
Updated workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
kcleal committed Nov 20, 2024
1 parent 654245f commit 2af3a89
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 28 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ jobs:
export PATH="$BREW_PREFIX/bin:$PATH"
export LIBRARY_PATH="$BREW_PREFIX/lib:$LIBRARY_PATH"
export PKG_CONFIG_PATH="$BREW_PREFIX/lib/pkgconfig:$PKG_CONFIG_PATH"
export MACOSX_DEPLOYMENT_TARGET=13.0
echo "MACOSX_DEPLOYMENT_TARGET=13.0" >> $GITHUB_ENV
export MACOSX_DEPLOYMENT_TARGET=14.0
echo "MACOSX_DEPLOYMENT_TARGET=14.0" >> $GITHUB_ENV
brew install libomp
- name: Install project dependencies
run: which python; python -m pip install -r requirements.txt
- name: Build wheels
Expand All @@ -44,7 +45,7 @@ jobs:
python3 -m pip install -r requirements.txt
CIBW_BEFORE_BUILD_LINUX: pip install -r requirements.txt
CIBW_REPAIR_WHEEL_COMMAND_MACOS: |
delocate-wheel -w {dest_dir} -v {wheel} --require-target-macos-version 13.0
delocate-wheel -w {dest_dir} -v {wheel} --require-target-macos-version 14.0
CIBW_TEST_SKIP: "*-macosx_arm64"
CIBW_TEST_REQUIRES: |
cython click>=8.0 numpy scipy pandas pysam>=0.22.0 networkx>=2.4 scikit-learn>=0.22 sortedcontainers lightgbm
Expand Down
13 changes: 8 additions & 5 deletions ci/osx-deps
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
#!/bin/bash

export MACOSX_DEPLOYMENT_TARGET=13.0

BREW_PREFIX="$(brew --prefix)"

export MACOSX_DEPLOYMENT_TARGET=14.0
export CFLAGS="${CFLAGS} -mmacosx-version-min=14.0"
export CPPFLAGS="${CXXFLAGS} -mmacosx-version-min=14.0 -I${BREW_PREFIX}/include "
export LDFLAGS="${LDFLAGS} -mmacosx-version-min=14.0 -L${BREW_PREFIX}/lib"

git clone --depth 1 https://github.com/ebiggers/libdeflate.git
cd libdeflate
CFLAGS+=' -fPIC -O3 -mmacosx-version-min=13.0 ' cmake -B build
CFLAGS+=' -fPIC -O3 -mmacosx-version-min=13.0 ' cmake --build build
CFLAGS+=" -fPIC -O3 " cmake -B build
CFLAGS+=" -fPIC -O3 " cmake --build build
cp build/libdeflate.a ${BREW_PREFIX}/lib
cp libdeflate.h ${BREW_PREFIX}/include
cd ../
Expand All @@ -19,8 +23,7 @@ tar -xvf htslib.tar.bz2
mv htslib-1.21 htslib && rm htslib.tar.bz2
cd htslib

CFLAGS+=" -mmacosx-version-min=13.0 " CPPFLAGS+=" -I${BREW_PREFIX}/include " LDFLAGS+=" -L${BREW_PREFIX}/lib" \
./configure \
./configure \
--enable-libcurl \
--enable-s3 \
--enable-lzma \
Expand Down
87 changes: 67 additions & 20 deletions dysgu/call_component.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)


from sklearn.cluster import KMeans, DBSCAN
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings('ignore', category=ConvergenceWarning)
# from sklearn.cluster import KMeans, DBSCAN
# from sklearn.exceptions import ConvergenceWarning
# warnings.filterwarnings('ignore', category=ConvergenceWarning)


np.random.seed(1)
Expand Down Expand Up @@ -760,32 +760,77 @@ cdef group_read_subsets(rds, insert_ppf, insert_size, insert_stdev):
return spanning_alignments, informative, generic_insertions


def dbscan_spanning(spanning, informative):
def linear_scan_clustering(spanning, informative):
# This is essentially a 1D-DBSCAN
if len(spanning) <= 3:
return [(spanning, informative)]

X = np.array([s.len for s in spanning]).reshape(-1, 1)
X_max = X.max()
if X.min() == X_max:
lengths = [s.len for s in spanning]
cdef float X_max = max(lengths)
if <float>min(lengths) == X_max:
return [(spanning, informative)]

eps = min(int(X_max * 0.03), int(math.pow(X_max, 0.45)))
# Eps is the clustering distance to use
cdef int eps = min(int(X_max * 0.03), int(math.pow(X_max, 0.45)))
eps = max(1, eps)

cl = DBSCAN(eps=eps, min_samples=2)
labels = cl.fit_predict(X)

m = int(max(labels))
if len(labels) == 0 or m == -1:
indices = sorted(range(len(lengths)), key=lambda k: lengths[k])

clusters = []
current_cluster = [spanning[indices[0]]]
cdef int last_length = spanning[indices[0]].len
cdef int i, idx, current_length
for i in range(1, len(indices)):
idx = indices[i]
current_length = spanning[idx].len
if current_length - last_length <= eps:
current_cluster.append(spanning[idx])
else:
if len(current_cluster) >= 2:
clusters.append([current_cluster, []])
current_cluster = [spanning[idx]]
last_length = current_length
if len(current_cluster) >= 2:
clusters.append([current_cluster, []])

if not clusters:
return [(spanning, informative)]

result = [[[], []] for i in range(m + 1)]
for idx, l in enumerate(labels):
if l == -1:
continue
result[l][0].append(spanning[idx])
return clusters

return result

# def dbscan_spanning(spanning, informative):
# if len(spanning) <= 3:
# return [(spanning, informative)]
#
# X = np.array([s.len for s in spanning]).reshape(-1, 1)
# X_max = X.max()
# if X.min() == X_max:
# return [(spanning, informative)]
#
# eps = min(int(X_max * 0.03), int(math.pow(X_max, 0.45)))
# eps = max(1, eps)
#
# cl = DBSCAN(eps=eps, min_samples=2)
# labels = cl.fit_predict(X)
#
# m = int(max(labels))
# if len(labels) == 0 or m == -1:
# return [(spanning, informative)]
#
# result = [[[], []] for i in range(m + 1)]
# for idx, l in enumerate(labels):
# if l == -1:
# continue
# result[l][0].append(spanning[idx])
#
# # result2 = linear_scan_clustering(spanning, informative)
# #
# # echo([[i.len for i in clst[0] if i] for clst in result])
# # echo([[i.len for i in clst[0] if i] for clst in result2])
# # echo()
#
# return result


def process_spanning(paired_end, spanning_alignments, divergence, length_extend, informative,
Expand Down Expand Up @@ -963,7 +1008,9 @@ cdef single(rds, int insert_size, int insert_stdev, float insert_ppf, int clip_l
if len(spanning_alignments) > 0:
# if not paired_end:
candidates = []
for spanning_alignments, informative in dbscan_spanning(spanning_alignments, informative):

# for spanning_alignments, informative in dbscan_spanning(spanning_alignments, informative):
for spanning_alignments, informative in linear_scan_clustering(spanning_alignments, informative):
candidates.append(process_spanning(paired_end, spanning_alignments, divergence, length_extend, informative,
generic_insertions, insert_ppf, to_assemble))
return candidates
Expand Down

0 comments on commit 2af3a89

Please sign in to comment.