diff --git a/.gitignore b/.gitignore index 9ea93d7..5a22307 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ solr_auth* postgresql.jar log/ cre.env +.env diff --git a/.local/README.md b/.local/README.md new file mode 100644 index 0000000..e4f7eb8 --- /dev/null +++ b/.local/README.md @@ -0,0 +1,103 @@ +# Cohort360-QueryExecutor Local Helper Repository + +This directory contains a refactored and cleaner set of scripts for local development, integrated with the root project's existing Docker configuration. + +## Features +- **Integrated**: Uses the root `Dockerfile` and `docker-compose.yml`. +- **Colored Output**: Scripts use a utility logger for better readability. +- **Flexible**: Supports both direct Server Mode (running JAR) and Docker Mode. +- **Isolated**: Keeps local configurations and temporary files out of the main repository's git history. + +## Table of Contents +1. [Prerequisites](#prerequisites) +2. [Configuration](#configuration) +3. [Setup](#setup) +4. [Running Server Mode (Direct JAR)](#running-server-mode-direct-jar) +5. [Running Docker Mode](#running-docker-mode) +6. [API Usage](#api-usage) + +--- + +## Prerequisites +- **Java 11 or 17** (for Server Mode) +- **Docker and Docker Compose** (for Docker Mode) +- **Maven** (handled via `./mvnw` wrapper) + +--- + +## Configuration +Before running anything, initialize your local environment: + +```bash +cp .local/env.example .local/.env +``` + +Edit `.local/.env` to configure your local environment (FHIR URL, PostgreSQL, Solr, etc.). + +--- + +## Setup +To build the project and download required dependencies (like the PostgreSQL driver): + +```bash +./.local/scripts/setup.sh +``` + +This script: +1. Builds the project using Maven. +2. Downloads the PostgreSQL driver JAR. +3. Automatically initializes `.local/.env` if it doesn't exist. + +--- + +## Running Server Mode (Direct JAR) +Runs the application directly on your host machine. This is faster for iterative development. + +```bash +./.local/scripts/run-server.sh +``` + +This script: +- Loads environment variables from `.local/.env`. +- Generates `solr_auth.txt` for Solr authentication. +- Applies the necessary JVM `--add-opens` flags (matching the production `entrypoint.sh`). + +--- + +## Running Docker Mode +Runs everything in containers. + +```bash +./.local/docker/run-docker.sh +``` + +This script: +- Builds the `sjs:latest` image using the root `Dockerfile`. +- Starts services using the root `docker-compose.yml` with local overrides (`.local/docker/docker-compose.local.yml`). +- Includes a local PostgreSQL container for testing if needed. + +--- + +## API Usage + +Once the server is running (port `8091` by default): + +### Example Query (Count) +```bash +curl -X POST http://localhost:8091/jobs -H "Content-Type: application/json" -d '{ + "input": { + "cohortDefinitionSyntax": "{\"sourcePopulation\":{\"caresiteCohortList\":[118]},\"_type\":\"request\",\"request\":{\"_type\":\"andGroup\",\"_id\":0,\"isInclusive\":true,\"criteria\":[{\"_type\":\"basicResource\",\"_id\":1,\"isInclusive\":true,\"resourceType\":\"Patient\",\"filterFhir\":\"active=true&gender=female\",\"criteria\":[],\"dateRangeList\":[],\"temporalConstraints\":[]}],\"dateRangeList\":[],\"temporalConstraints\":[]},\"temporalConstraints\":[]}", + "mode": "count" + } +}' +``` + +### Check Job Status +```bash +curl http://localhost:8091/jobs +``` + +### Cancel Job +```bash +curl -X DELETE http://localhost:8091/jobs/ +``` diff --git a/.local/docker/docker-compose.local.yml b/.local/docker/docker-compose.local.yml new file mode 100644 index 0000000..bffacf0 --- /dev/null +++ b/.local/docker/docker-compose.local.yml @@ -0,0 +1,32 @@ +services: + app: + build: + dockerfile: Dockerfile + container_name: sjs-app-local + environment: + - PG_HOST=db + - SPARK_MASTER=spark://spark-master:7077 + ports: + - 8091:8091 + env_file: + - .env + + spark-master: + container_name: spark-master-local + + worker-1: + container_name: spark-worker-1-local + + worker-2: + container_name: spark-worker-2-local + + # Optional: PostgreSQL for local development + db: + image: postgres:15 + container_name: sjs-db-local + environment: + POSTGRES_DB: omop + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + ports: + - 5555:5555 diff --git a/.local/docker/run-docker.sh b/.local/docker/run-docker.sh new file mode 100755 index 0000000..65dfc38 --- /dev/null +++ b/.local/docker/run-docker.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Change directory to project root +SCRIPT_DIR="$(dirname "$0")" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$PROJECT_ROOT" + +# Load utils +source .local/scripts/utils.sh + +log_step "Checking environment configuration..." +ENV_FILE=".env" + +if [ ! -f "$ENV_FILE" ]; then + log_warn "$ENV_FILE file not found. Creating it from .local/env.example..." + cp .local/env.example "$ENV_FILE" +fi + +log_step "Starting Docker Compose services..." +sudo docker compose -f docker-compose.yml -f .local/docker/docker-compose.local.yml up --build diff --git a/.local/env.example b/.local/env.example new file mode 100644 index 0000000..d9dc31d --- /dev/null +++ b/.local/env.example @@ -0,0 +1,38 @@ +# FHIR Server Configuration +FHIR_URL=http://localhost:8080 +FHIR_URL_COHORT=http://localhost:8080 +# FHIR_ACCESS_TOKEN= + +# Django API Configuration (for callbacks) +DJANGO_CALLBACK_URL=http://localhost:8085 +SJS_TOKEN={GENERATED_TOKEN_USE_IN_DJANGO} + +# PostgreSQL Configuration +PG_HOST=localhost +PG_PORT=5432 +PG_DB=omop +PG_SCHEMA=public +PG_USER=postgres +DB_OMOP_PASSWORD=postgres + +# Solr Configuration +SOLR_ZK=localhost:9983 +SOLR_USER= +SOLR_PASSWORD= + +# Spark Configuration +SPARK_MASTER="local[*]" +SPARK_DRIVER_HOST=localhost +SPARK_DRIVER_PORT=4000 +# SPARK_EXECUTOR_MEMORY=2g + +# Application Configuration +SJS_APP_PORT=8091 +JOBS_THREADS=20 +JOBS_AUTO_RETRY=0 +USE_SOURCE_POPULATION=true +USE_SOURCE_POPULATION_ON_PATIENT=true +USE_ACTIVE_FILTER=true +COHORT_CREATION_LIMIT=500 +DEFAULT_RESOLVER=solr +DEFAULT_COHORT_CREATION_SERVICE=pg diff --git a/.local/scripts/run-server.sh b/.local/scripts/run-server.sh new file mode 100755 index 0000000..bedcd60 --- /dev/null +++ b/.local/scripts/run-server.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Change directory to project root +SCRIPT_DIR="$(dirname "$0")" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$PROJECT_ROOT" + +# Load utils +source .local/scripts/utils.sh + +log_step "Loading environment configuration..." +ENV_FILE=".env" +load_env "$ENV_FILE" + +# Generate solr_auth.txt if SOLR_USER and SOLR_PASSWORD are set +if [ ! -z "$SOLR_USER" ] && [ ! -z "$SOLR_PASSWORD" ]; then + log_info "Generating solr_auth.txt..." + echo "httpBasicAuthUser=$SOLR_USER" > solr_auth.txt + echo "httpBasicAuthPassword=$SOLR_PASSWORD" >> solr_auth.txt +fi + +export JAVA_SOLR_OPT="-Dsolr.httpclient.builder.factory=org.apache.solr.client.solrj.impl.PreemptiveBasicAuthClientBuilderFactory -Dsolr.httpclient.config=solr_auth.txt" + +# JVM options for Spark 3.4+ and Java 11/17 (mirrors entrypoint.sh) +export JAVA_OPTS="--add-opens=java.base/java.lang=ALL-UNNAMED \ + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ + --add-opens=java.base/java.io=ALL-UNNAMED \ + --add-opens=java.base/java.net=ALL-UNNAMED \ + --add-opens=java.base/java.nio=ALL-UNNAMED \ + --add-opens=java.base/java.util=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ + --add-opens=java.base/sun.security.action=ALL-UNNAMED \ + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ + --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED" + +log_success "Starting Cohort Requester Server Mode..." +java $JAVA_OPTS $JAVA_SOLR_OPT -jar target/cohort-requester.jar diff --git a/.local/scripts/setup.sh b/.local/scripts/setup.sh new file mode 100755 index 0000000..92218ac --- /dev/null +++ b/.local/scripts/setup.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -e + +# Change directory to project root +SCRIPT_DIR="$(dirname "$0")" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$PROJECT_ROOT" + +# Load utils +source .local/scripts/utils.sh + +log_step "Starting project setup..." + +# Check for .env file +if [ ! -f ".local/.env" ]; then + log_warn ".local/.env file not found. Creating it from .local/env.example..." + cp .local/env.example .local/.env + log_info "Please review and edit .local/.env as needed." +fi + +log_step "Building the project..." +if ./mvnw clean package -DskipTests; then + log_success "Project built successfully." +else + log_error "Build failed." + exit 1 +fi + +log_step "Preparing dependencies..." +mkdir -p target + +# Fetch PostgreSQL JAR if not present +POSTGRES_VERSION=$(./mvnw help:evaluate -Dexpression=postgres.version -q -DforceStdout) +if [ ! -f "postgresql.jar" ]; then + log_info "Downloading PostgreSQL driver (version $POSTGRES_VERSION)..." + ./mvnw org.apache.maven.plugins:maven-dependency-plugin:3.1.1:get -Dartifact=org.postgresql:postgresql:$POSTGRES_VERSION + ./mvnw org.apache.maven.plugins:maven-dependency-plugin:3.1.1:copy -Dartifact=org.postgresql:postgresql:$POSTGRES_VERSION -DoutputDirectory=./ + mv postgresql-$POSTGRES_VERSION.jar postgresql.jar + log_success "PostgreSQL driver downloaded." +else + log_info "PostgreSQL driver already exists." +fi + +log_success "Setup complete!" diff --git a/.local/scripts/utils.sh b/.local/scripts/utils.sh new file mode 100644 index 0000000..b1012fb --- /dev/null +++ b/.local/scripts/utils.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +function log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +function log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +function log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +function log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +function log_step() { + echo -e "${BLUE}==>${NC} $1" +} + +# Function to load environment variables from a file if it exists +function load_env() { + local env_file="$1" + if [ -f "$env_file" ]; then + log_info "Loading environment variables from $env_file..." + set -a + source "$env_file" + set +a + else + log_warn "Environment file $env_file not found." + fi +} diff --git a/README.md b/README.md index 95b5998..0b308cb 100755 --- a/README.md +++ b/README.md @@ -10,21 +10,30 @@ The Cohort Requester is a spark application server for querying FHIR data with a ## Quick Start +### 1. Configuration + Fill in the configuration file `src/main/resources/application.conf` with the appropriate values. -Or use the following minimal needed environment variables: +Or use environment variables. For local development, you can create a `.env` file at the root of the project: + ```bash -# The URL of the FHIR server -export FHIR_URL=http://localhost:8080/fhir +# The URL of the FHIR server +export FHIR_URL=http://localhost:XXXX # "if FHIR local otherwise put the URL of your FHIR server (for a test with a public FHIR #http://hapi.fhir.org/baseR4)" +# URL of the associated Django API for callbacks +export DJANGO_CALLBACK_URL=http://localhost:8000 ``` +### 2. Build + Build the project with maven: ```bash -mvn clean package +./mvnw clean package -DskipTests ``` +### 3. Run + Run the application server: ```bash -java \ +java \ --add-opens=java.base/java.lang=ALL-UNNAMED \ --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ @@ -52,6 +61,23 @@ curl -X POST http://localhost:8090/jobs -H "Content-Type: application/json" -d ' }' ``` +### Check Job Status + +You can list all jobs: +```bash +curl http://localhost:8090/jobs +``` + +Or check the status of a specific job using its `jobId` (returned in the POST response): +```bash +curl http://localhost:8090/jobs/ +``` + +To cancel a job: +```bash +curl -X DELETE http://localhost:8090/jobs/ +``` + ## Job Queries The job query format is as follows : @@ -60,16 +86,12 @@ The job query format is as follows : "input": { "cohortDefinitionSyntax": "", "mode": "", - "modeOptions": { // optional mode options - // optional list of criteria ids separated by commas or "all", this will activate a detailed count of the patients per criteria - // or "ratio", this will activate a detailed count of final matched patients per criteria + "modeOptions": { "details": "
", - // optional sampling ratio value between 0.0 and 1.0 to limit the number of patients of the cohort to create (it can be used to sample an existing cohort) "sampling": "", - // optional cohort id to use as a base for the "createDiff" mode "baseCohortId": "" }, - "callbackUrl": "" // optional callback url to retrieve the result + "callbackUrl": "" } } ```