#!/bin/bash
#
# Contingency table orchestrator: phenotype x all genotypes
#
# Submits a single DNAnexus swiss-army-knife job that runs all 137 variant
# sources in parallel, merges results, and outputs one TSV file.
#
# Usage:
#   bash run_contingency.sh <PHENOTYPE> [--types <comma-separated>] [--nproc N] [--phenotype-type binary|continuous]
#
# Arguments:
#   PHENOTYPE        - Phenotype column name (e.g., zopiclone__M796)
#   --types          - Comma-separated variant types to process (default: all)
#                      Options: snv,hla,cypmicro,cypdosage,lof,mpc,cnv
#   --nproc          - Number of parallel tasks within the job (default: 36)
#   --phenotype-type - binary (default) or continuous
#
# Examples:
#   bash run_contingency.sh zopiclone__M796
#   bash run_contingency.sh zopiclone__M796 --types lof,mpc,cypmicro,cypdosage
#   bash run_contingency.sh p50_i0__standing_height --phenotype-type continuous
#

set -euo pipefail

PHENOTYPE="${1:?Usage: bash run_contingency.sh <PHENOTYPE> [--types <types>] [--nproc N]}"
shift 1

# Default: all types
TYPES="snv,hla,cypmicro,cypdosage,lof,mpc,cnv"
NPROC=36
PHENO_TYPE="binary"

while [[ $# -gt 0 ]]; do
    case "$1" in
        --types) TYPES="$2"; shift 2 ;;
        --nproc) NPROC="$2"; shift 2 ;;
        --phenotype-type) PHENO_TYPE="$2"; shift 2 ;;
        *) echo "Unknown argument: $1"; exit 1 ;;
    esac
done

TOOLS_DX="/results/tools"
TOOLS_MNT="/mnt/project${TOOLS_DX}"
CONTINGENCY_RESULTS_DX="/results/tools/contingency_results/${PHENOTYPE}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUTPUT_FILE="${SCRIPT_DIR}/contingency_${PHENOTYPE}.tsv"

echo "=== Contingency Table Pipeline (single-job mode) ==="
echo "Phenotype:  $PHENOTYPE"
echo "Types:      $TYPES"
echo "Parallel:   $NPROC"
echo "Pheno type: $PHENO_TYPE"
echo "Output:     $OUTPUT_FILE"
echo ""

# --- Step 0: Upload scripts to DNAnexus ---
echo "--- Step 0: Uploading scripts ---"
dx mkdir -p "$TOOLS_DX/" 2>/dev/null || true
dx mkdir -p "$CONTINGENCY_RESULTS_DX/" 2>/dev/null || true

for script in extract_phenotype.py contingency_all.py contingency_runner.sh; do
    echo "Uploading ${script}..."
    # Remove ALL copies (dx rm by name only removes one)
    for fid in $(dx ls "${TOOLS_DX}/${script}" --brief 2>/dev/null); do
        dx rm "$fid" 2>/dev/null || true
    done
    dx upload "${SCRIPT_DIR}/${script}" --path "${TOOLS_DX}/" --brief
done

# --- Step 1: Submit single job ---
echo ""
echo "--- Step 1: Submitting single contingency job ---"

JOB_ID=$(dx run app-swiss-army-knife \
    --instance-type mem3_ssd1_v2_x48 \
    -icmd="bash ${TOOLS_MNT}/contingency_runner.sh '${PHENOTYPE}' '${TYPES}' ${NPROC} '${PHENO_TYPE}' # v$(date +%s)" \
    --destination "${CONTINGENCY_RESULTS_DX}/" \
    --brief -y)

echo "Job: $JOB_ID"
echo "Monitor: dx watch $JOB_ID"
echo ""

# --- Step 2: Wait for job ---
echo "--- Step 2: Waiting for job ---"
dx wait "$JOB_ID"

# Check job state
STATE=$(dx describe "$JOB_ID" --json 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin).get('state','unknown'))" 2>/dev/null || echo "unknown")
if [[ "$STATE" != "done" ]]; then
    echo "ERROR: Job failed (state: $STATE). Check: dx watch $JOB_ID"
    exit 1
fi

# --- Step 3: Download result ---
echo ""
echo "--- Step 3: Downloading result ---"

RESULT_DX="${CONTINGENCY_RESULTS_DX}/contingency_${PHENOTYPE}.tsv"
if dx download "$RESULT_DX" -o "$OUTPUT_FILE" -f; then
    N_VARIANTS=$(tail -n +2 "$OUTPUT_FILE" | wc -l)
    N_SOURCES=$(awk -F'\t' 'NR>1{print $2}' "$OUTPUT_FILE" | sort -u | wc -l)
    echo ""
    echo "=== Results ==="
    echo "Total variants: $N_VARIANTS"
    echo "Total sources:  $N_SOURCES"
    echo "Output file:    $OUTPUT_FILE"

    if [[ $N_VARIANTS -gt 0 ]]; then
        echo ""
        echo "Preview (first 10 rows):"
        head -11 "$OUTPUT_FILE" | column -t -s $'\t'
    fi
else
    echo "ERROR: Could not download result. Check: dx watch $JOB_ID"
    exit 1
fi
