diff --git a/regression/run_cluster.sh b/regression/run_cluster.sh index a88239b..017e25e 100755 --- a/regression/run_cluster.sh +++ b/regression/run_cluster.sh @@ -3,12 +3,12 @@ CLUDB="${RESULTS}/clu" "${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${CLUDB}" --shuffle 0 -"${MMSEQS}" cluster "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 +"${MMSEQS}" cluster "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 --linclust-version 1 "${MMSEQS}" createtsv "${CLUDB}" "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv" awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="15698" +TARGET="15695" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_cluster2.sh b/regression/run_cluster2.sh new file mode 100755 index 0000000..b1935ab --- /dev/null +++ b/regression/run_cluster2.sh @@ -0,0 +1,14 @@ +#!/bin/sh -e + +CLUDB="${RESULTS}/clu" +"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${CLUDB}" --shuffle 0 + +"${MMSEQS}" cluster "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 --linclust-version 2 --cluster-version 2 +"${MMSEQS}" createtsv "${CLUDB}" "${CLUDB}" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv" + +awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" +ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" +TARGET="13619" +awk -v actual="$ACTUAL" -v target="$TARGET" \ + 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ + > "${RESULTS}.report" diff --git a/regression/run_cluster_update.sh b/regression/run_cluster_update.sh index 6af461b..037b16b 100755 --- a/regression/run_cluster_update.sh +++ b/regression/run_cluster_update.sh @@ -9,8 +9,8 @@ cat "$RESULTS/clu1.fasta" "$RESULTS/clu2.fasta" > "$RESULTS/cluCombined.fasta" "${MMSEQS}" createdb "$RESULTS/clu1.fasta" "${SEQCLUDB1}" "${MMSEQS}" createdb "$RESULTS/cluCombined.fasta" "${SEQCLUDB2}" -"${MMSEQS}" linclust "${SEQCLUDB1}" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 -a -c 0.50 --min-seq-id 0.50 -"${MMSEQS}" clusterupdate "${SEQCLUDB1}" "${SEQCLUDB2}" "$RESULTS/results_clu" "$RESULTS/seqdb_update" "$RESULTS/clu_updated" "$RESULTS/tmp" --cov-mode 1 -c 0.50 --min-seq-id 0.50 +"${MMSEQS}" linclust "${SEQCLUDB1}" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 -a -c 0.50 --min-seq-id 0.50 --linclust-version 1 +"${MMSEQS}" clusterupdate "${SEQCLUDB1}" "${SEQCLUDB2}" "$RESULTS/results_clu" "$RESULTS/seqdb_update" "$RESULTS/clu_updated" "$RESULTS/tmp" --cov-mode 1 -c 0.50 --min-seq-id 0.50 --linclust-version 1 "${MMSEQS}" createtsv "$RESULTS/seqdb_update" "$RESULTS/seqdb_update" "$RESULTS/clu_updated" "$RESULTS/clu_updated.tsv" CLUSTERMEMEBER=$(wc -l "$RESULTS/clu_updated.tsv" | awk '{print $1}') diff --git a/regression/run_easy_cluster.sh b/regression/run_easy_cluster.sh index 17ffbde..26bd669 100755 --- a/regression/run_easy_cluster.sh +++ b/regression/run_easy_cluster.sh @@ -1,9 +1,9 @@ #!/bin/sh -e -"${MMSEQS}" easy-cluster "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 +"${MMSEQS}" easy-cluster "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 --cluster-steps 3 --linclust-version 1 awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="15698" +TARGET="15695" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_easy_cluster_reassign.sh b/regression/run_easy_cluster_reassign.sh index da72cf7..bbc8d31 100755 --- a/regression/run_easy_cluster_reassign.sh +++ b/regression/run_easy_cluster_reassign.sh @@ -1,10 +1,10 @@ #!/bin/sh -e -cat "${DATADIR}/clu.fasta" | "${MMSEQS}" easy-cluster stdin "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 -c 0.8 --cov-mode 1 --cluster-reassign 1 +cat "${DATADIR}/clu.fasta" | "${MMSEQS}" easy-cluster stdin "$RESULTS/results" "$RESULTS/tmp" --min-seq-id 0.3 -s 2 -c 0.8 --cov-mode 1 --cluster-reassign 1 --linclust-version 1 awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="17234" +TARGET="17229" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_easy_linclust.sh b/regression/run_easy_linclust.sh index aba13fd..7485457 100755 --- a/regression/run_easy_linclust.sh +++ b/regression/run_easy_linclust.sh @@ -1,9 +1,9 @@ #!/bin/sh -e -"${MMSEQS}" easy-linclust "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 +"${MMSEQS}" easy-linclust "${DATADIR}/clu.fasta" "$RESULTS/results" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --linclust-version 1 --alignment-mode 2 awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" -TARGET="26493" +TARGET="26477" awk -v actual="$ACTUAL" -v target="$TARGET" \ 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ > "${RESULTS}.report" diff --git a/regression/run_easy_nuclcluster.sh b/regression/run_easy_nuclcluster.sh index b18de88..f4ed62b 100755 --- a/regression/run_easy_nuclcluster.sh +++ b/regression/run_easy_nuclcluster.sh @@ -1,7 +1,7 @@ #!/bin/sh -e TARGET="${DATADIR}/genes.fasta" -"${MMSEQS}" easy-cluster "${TARGET}" "$RESULTS/results" "$RESULTS/tmp" -k 13 --min-seq-id 0.8 -c 0.5 --cov-mode 1 +"${MMSEQS}" easy-cluster "${TARGET}" "$RESULTS/results" "$RESULTS/tmp" -k 13 --min-seq-id 0.8 -c 0.5 --cov-mode 1 --linclust-version 1 awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" diff --git a/regression/run_linclust.sh b/regression/run_linclust.sh index 7cfdd29..c66a1ee 100755 --- a/regression/run_linclust.sh +++ b/regression/run_linclust.sh @@ -2,7 +2,7 @@ CLUDB= "${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu" -"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 +"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --linclust-version 1 --alignment-mode 2 "${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv" awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" diff --git a/regression/run_linclust2.sh b/regression/run_linclust2.sh new file mode 100755 index 0000000..33a41d4 --- /dev/null +++ b/regression/run_linclust2.sh @@ -0,0 +1,13 @@ +#!/bin/sh -e +CLUDB= +"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu" + +"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --linclust-version 2 +"${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv" + +awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" +ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" +TARGET="25666" +awk -v actual="$ACTUAL" -v target="$TARGET" \ + 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ + > "${RESULTS}.report" diff --git a/regression/run_linclust2_split.sh b/regression/run_linclust2_split.sh new file mode 100755 index 0000000..030d671 --- /dev/null +++ b/regression/run_linclust2_split.sh @@ -0,0 +1,12 @@ +#!/bin/sh -e +"${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu" + +"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --split-memory-limit 10M --linclust-version 2 +"${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv" + +awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" +ACTUAL="$(cut -f1 "$RESULTS/results_summary.tsv")" +TARGET="25666" +awk -v actual="$ACTUAL" -v target="$TARGET" \ + 'BEGIN { print (actual == target) ? "GOOD" : "BAD"; print "Expected: ", target; print "Actual: ", actual; }' \ + > "${RESULTS}.report" diff --git a/regression/run_linclust_split.sh b/regression/run_linclust_split.sh index 3d8c340..f9c59a0 100755 --- a/regression/run_linclust_split.sh +++ b/regression/run_linclust_split.sh @@ -1,7 +1,7 @@ #!/bin/sh -e "${MMSEQS}" createdb "${DATADIR}/clu.fasta" "${RESULTS}/clu" -"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --split-memory-limit 10M +"${MMSEQS}" linclust "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/tmp" --cov-mode 1 --cluster-mode 0 -c 0.90 --min-seq-id 0.50 --split-memory-limit 10M --linclust-version 1 --alignment-mode 2 "${MMSEQS}" createtsv "${RESULTS}/clu" "${RESULTS}/clu" "$RESULTS/results_clu" "$RESULTS/results_cluster.tsv" awk 'BEGIN { l = "" } l != $1 { l = $1; cnt++; } { t++; } END { print cnt"\t"t"\t"(t/cnt) }' "$RESULTS/results_cluster.tsv" > "$RESULTS/results_summary.tsv" diff --git a/run_regression.sh b/run_regression.sh index 7b67cf0..eadc524 100755 --- a/run_regression.sh +++ b/run_regression.sh @@ -74,11 +74,14 @@ run_test NUCLPROT_SEARCH "run_nuclprot.sh" run_test NUCLNUCL_SEARCH "run_nuclnucl.sh" run_test NUCLNUCL_TRANS_SEARCH "run_nuclnucl_translated.sh" run_test CLUSTER "run_cluster.sh" +run_test CLUSTER2 "run_cluster2.sh" run_test EASY_CLUSTER "run_easy_cluster.sh" run_test EASY_NUCL_CLUSTER "run_easy_nuclcluster.sh" run_test CLUSTER_REASSIGN "run_easy_cluster_reassign.sh" run_test LINCLUST "run_linclust.sh" +run_test LINCLUST2 "run_linclust2.sh" run_test LINCLUST_SPLIT "run_linclust_split.sh" +run_test LINCLUST2_SPLIT "run_linclust2_split.sh" run_test EASY_LINCLUST "run_easy_linclust.sh" run_test CLUSTHASH "run_clusthash.sh" run_test PROTNUCL_SEARCH "run_protnucl.sh"