Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions regression/run_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
CLUDB="${RESULTS}/clu"
"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${CLUDB}" --shuffle 0

"${MMSEQS}" cluster "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3
"${MMSEQS}" cluster "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 --linclust-version 1
"${MMSEQS}" createtsv "${CLUDB}" "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv"

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
TARGET="15698"
TARGET="15695"
awk -v actual="$ACTUAL" -v target="$TARGET" \
'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \
> "${RESULTS}.report"
14 changes: 14 additions & 0 deletions regression/run_cluster2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/sh -e

CLUDB="${RESULTS}/clu"
"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${CLUDB}" --shuffle 0

"${MMSEQS}" cluster "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 --linclust-version 2 --cluster-version 2
"${MMSEQS}" createtsv "${CLUDB}" "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv"

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
TARGET="13619"
awk -v actual="$ACTUAL" -v target="$TARGET" \
'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \
> "${RESULTS}.report"
4 changes: 2 additions & 2 deletions regression/run_cluster_update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ cat "$RESULTS/clu1.fasta" "$RESULTS/clu2.fasta" > "$RESULTS/cluCombined.fasta"
"${MMSEQS}" createdb "$RESULTS/clu1.fasta" "${SEQCLUDB1}"
"${MMSEQS}" createdb "$RESULTS/cluCombined.fasta" "${SEQCLUDB2}"

"${MMSEQS}" linclust "${SEQCLUDB1}" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 -a -c 0.50 --min-seq-id 0.50
"${MMSEQS}" clusterupdate "${SEQCLUDB1}" "${SEQCLUDB2}" "$RESULTS/results_clu" "$RESULTS/seqdb_update" "$RESULTS/clu_updated" "$RESULTS/tmp" --cov-mode 1 -c 0.50 --min-seq-id 0.50
"${MMSEQS}" linclust "${SEQCLUDB1}" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 -a -c 0.50 --min-seq-id 0.50 --linclust-version 1
"${MMSEQS}" clusterupdate "${SEQCLUDB1}" "${SEQCLUDB2}" "$RESULTS/results_clu" "$RESULTS/seqdb_update" "$RESULTS/clu_updated" "$RESULTS/tmp" --cov-mode 1 -c 0.50 --min-seq-id 0.50 --linclust-version 1
"${MMSEQS}" createtsv "$RESULTS/seqdb_update" "$RESULTS/seqdb_update" "$RESULTS/clu_updated" "$RESULTS/clu_updated.tsv"

CLUSTERMEMEBER=$(wc -l "$RESULTS/clu_updated.tsv" | awk '{print $1}')
Expand Down
4 changes: 2 additions & 2 deletions regression/run_easy_cluster.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/sh -e
"${MMSEQS}" easy-cluster "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3
"${MMSEQS}" easy-cluster "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 --linclust-version 1

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
TARGET="15698"
TARGET="15695"
awk -v actual="$ACTUAL" -v target="$TARGET" \
'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \
> "${RESULTS}.report"
4 changes: 2 additions & 2 deletions regression/run_easy_cluster_reassign.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/bin/sh -e

cat "${DATADIR}/clu.fasta" | "${MMSEQS}" easy-cluster stdin "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 -c 0.8 --cov-mode 1 --cluster-reassign 1
cat "${DATADIR}/clu.fasta" | "${MMSEQS}" easy-cluster stdin "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 -c 0.8 --cov-mode 1 --cluster-reassign 1 --linclust-version 1

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
TARGET="17234"
TARGET="17229"
awk -v actual="$ACTUAL" -v target="$TARGET" \
'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \
> "${RESULTS}.report"
4 changes: 2 additions & 2 deletions regression/run_easy_linclust.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/sh -e
"${MMSEQS}" easy-linclust "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50
"${MMSEQS}" easy-linclust "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --linclust-version 1 --alignment-mode 2

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
TARGET="26493"
TARGET="26477"
awk -v actual="$ACTUAL" -v target="$TARGET" \
'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \
> "${RESULTS}.report"
2 changes: 1 addition & 1 deletion regression/run_easy_nuclcluster.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh -e
TARGET="${DATADIR}/genes.fasta"

"${MMSEQS}" easy-cluster "${TARGET}" "$RESULTS/results" "$RESULTS/tmp" -k 13 --min-seq-id 0.8 -c 0.5 --cov-mode 1
"${MMSEQS}" easy-cluster "${TARGET}" "$RESULTS/results" "$RESULTS/tmp" -k 13 --min-seq-id 0.8 -c 0.5 --cov-mode 1 --linclust-version 1

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
Expand Down
2 changes: 1 addition & 1 deletion regression/run_linclust.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
CLUDB=
"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu"

"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50
"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --linclust-version 1 --alignment-mode 2
"${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv"

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
Expand Down
13 changes: 13 additions & 0 deletions regression/run_linclust2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh -e
CLUDB=
"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu"

"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --linclust-version 2
"${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv"

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
TARGET="25666"
awk -v actual="$ACTUAL" -v target="$TARGET" \
'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \
> "${RESULTS}.report"
12 changes: 12 additions & 0 deletions regression/run_linclust2_split.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh -e
"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu"

"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --split-memory-limit 10M --linclust-version 2
"${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv"

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")"
TARGET="25666"
awk -v actual="$ACTUAL" -v target="$TARGET" \
'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \
> "${RESULTS}.report"
2 changes: 1 addition & 1 deletion regression/run_linclust_split.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/sh -e
"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu"

"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --split-memory-limit 10M
"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --split-memory-limit 10M --linclust-version 1 --alignment-mode 2
"${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv"

awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv"
Expand Down
3 changes: 3 additions & 0 deletions run_regression.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,14 @@ run_test NUCLPROT_SEARCH "run_nuclprot.sh"
run_test NUCLNUCL_SEARCH "run_nuclnucl.sh"
run_test NUCLNUCL_TRANS_SEARCH "run_nuclnucl_translated.sh"
run_test CLUSTER "run_cluster.sh"
run_test CLUSTER2 "run_cluster2.sh"
run_test EASY_CLUSTER "run_easy_cluster.sh"
run_test EASY_NUCL_CLUSTER "run_easy_nuclcluster.sh"
run_test CLUSTER_REASSIGN "run_easy_cluster_reassign.sh"
run_test LINCLUST "run_linclust.sh"
run_test LINCLUST2 "run_linclust2.sh"
run_test LINCLUST_SPLIT "run_linclust_split.sh"
run_test LINCLUST2_SPLIT "run_linclust2_split.sh"
run_test EASY_LINCLUST "run_easy_linclust.sh"
run_test CLUSTHASH "run_clusthash.sh"
run_test PROTNUCL_SEARCH "run_protnucl.sh"
Expand Down