#!/bin/bash
# DX_APP_WIZARD_NAME DX_APP_WIZARD_VERSION
# Generated by dx-app-wizard.
#
# Scatter-process-gather execution pattern: Your app will split its
# input into multiple pieces, each of which will be processed in
# parallel, after which they are gathered together in some final
# output.
#
# This pattern is very similar to the "parallelized" template.  What
# it does differently is that it formally breaks out the "scatter"
# phase as a separate black-box entry point in the app.  (As a side
# effect, this requires a "map" entry point to call "process" on each
# of the results from the "scatter" phase.)
#
# Note that you can also replace any entry point in this execution
# pattern with an API call to run a separate app or applet.
#
# The following is a Unicode art picture of the flow of execution.
# Each box is an entry point, and vertical lines indicate that the
# entry point connected at the top of the line calls the entry point
# connected at the bottom of the line.  The letters represent the
# different stages in which the input is transformed, e.g. the output
# of the "scatter" entry point ("array:B") is given to the "map" entry
# point as input.  The "map" entry point calls as many "process" entry
# points as there are elements in its array input and gathers the
# results in its array output.
#
#          ┌──────┐
#       A->│ main │->D (output from "postprocess")
#          └┬─┬─┬─┘
#           │ │ │
#          ┌┴──────┐
#       A->│scatter│->array:B
#          └───────┘
#             │ │
#            ┌┴──────────────┐
#   array:B->│      map      │->array:C
#            └─────────┬─┬─┬─┘
#               │      │ . .
#               │     ┌┴──────┐
#               │  B->│process│->C
#               │     └───────┘
#            ┌──┴────────┐
#   array:C->│postprocess│->D
#            └───────────┘
#
# A = original app input, split up by "scatter" into pieces of type B
# B = an input that will be provided to a "process" entry point
# C = the output of a "process" entry point
# D = app output aggregated from the outputs of the "process" entry points
#
# See https://documentation.dnanexus.com/developer for documentation and
# tutorials on how to modify this file.

main() {
DX_APP_WIZARD_INITIALIZE_INPUTDX_APP_WIZARD_DOWNLOAD_ANY_FILES
    # To report any recognized errors in the correct format in
    # $HOME/job_error.json and exit this script, you can use the
    # dx-jobutil-report-error utility as follows:
    #
    #   dx-jobutil-report-error "My error message"
    #
    # Note however that this entire bash script is executed with -e
    # when running in the cloud, so any line which returns a nonzero
    # exit code will prematurely exit the script; if no error was
    # reported in the job_error.json file, then the failure reason
    # will be AppInternalError with a generic error message.
    #
    # We first create the "scatter" job which will scatter some input
    # (replace with your own input as necessary).  The utility
    # dx-jobutil-new-job uses the same syntax as dx run for specifying
    # input, and you can explicitly specify the class to enable proper
    # parsing.  If you leave out the class, the utility will attempt
    # to parse it intelligently (e.g. if it looks like a number, it
    # will interpret it as such).

    input_to_scatter='placeholder value'
    scatter_job=$(dx-jobutil-new-job scatter \
        -iinput_to_scatter:string="$input_to_scatter")

    # We will want to call "process" on each output of "scatter", so
    # we call the "map" entry point to do so.  We can also provide
    # here additional input that we want each "process" entry point to
    # receive, e.g. a file ID (given as a STRING) to which the
    # "process" function should add rows of data and a number to use
    # as a parameter.  We will use the "-i" syntax so that it can just
    # be slotted into the bash command to run the "process" job.
    #
    # WARNING: If you provide a data object ID without specifying that
    # it is a "string", then dx-jobutil-new-job will package it in a
    # DNAnexus link, and that will cause the system to make the subjob
    # wait until the object is CLOSED before starting to run the job.
    # If your intention is for the subjob to run on an OPEN object,
    # then the input ID MUST be given as a string.
    process_inputs='-iadditional_input:string="file ID, for example" -ianother_input=32'
    map_job=$(dx-jobutil-new-job map \
        -iarray_of_scattered_input="$scatter_job":array_of_scattered_input \
        -iprocess_inputs="$process_inputs")

    # Finally, we want the "postprocess" job to run after "map" is
    # done calling "process" on each of its inputs.  Note that a job
    # is marked as "done" only after all of its child jobs are also
    # marked "done".
    postprocess_additional_input='file ID, for example'
    postprocess_job=$(dx-jobutil-new-job postprocess \
        -iprocess_outputs="$map_job":process_outputs \
        -iadditional_input:string="$postprocess_additional_input" \
        --depends-on $map_job)
DX_APP_WIZARD_UPLOAD_ANY_FILES
    # If you would like to include any of the output fields from the
    # postprocess job as the output of your app, you should return it
    # here using a reference.
    #
    #   dx-jobutil-add-output app_output_field "$postprocess":final_output --class=jobref
    #
    # Tip: you can include in your output at this point any open
    # objects (such as files) which are closed by another entry
    # point that finishes later.  The system will check to make sure
    # that the output object is closed and will attempt to clone it
    # out as output into the parent container only after all subjobs
    # have finished.
DX_APP_WIZARD_OUTPUT
}

scatter() {
    echo "Value of input_to_scatter: '${input_to_scatter}'"

    # Fill in code here to do whatever is necessary to scatter the
    # input.
    declare -a scattered_input=(placeholder1 placeholder2)

    for piece in "${scattered_input[@]}"
    do
        dx-jobutil-add-output array_of_scattered_input "$piece" --array
    done
}

map() {
    echo "Value of array_of_scattered_input: '${array_of_scattered_input[@]}'"
    echo "Value of process_inputs: '${process_inputs}'"

    # The following calls "process" for each of the items in
    # *array_of_scattered_input*, using as input the item in the
    # array, as well as the rest of the input parameters given in
    # *process_inputs*.

    eval process_args=("$process_inputs")

    for scattered_input in "${array_of_scattered_input[@]}"
    do
        process_job=$(dx-jobutil-new-job process \
            -iscattered_input="$scattered_input" \
            "${process_args[@]}")
        dx-jobutil-add-output process_outputs --array \
            "$process_job":process_output
    done
}

process() {
    echo "Value of scattered_input: '${scattered_input}'"
    echo "Value of additional_input: '${additional_input}'"
    echo "Value of another_input: '${another_input}'"

    # Fill in code here to process the input and create output.

    # As always, you can choose not to return output if the
    # "postprocess" stage does not require any input, e.g. rows have
    # been added to a file that has been created in advance.  Just
    # make sure that the "postprocess" job does not run until all
    # "process" jobs have finished by making it wait for "map" to
    # finish using the depends_on argument (this is already done for
    # you in the invocation of the "postprocess" job in "main").

    dx-jobutil-add-output process_output "process placeholder output"
}

postprocess() {
    echo "Value of process_outputs: '${process_outputs[@]}'"
    echo "Value of additional_input: '${additional_input}'"

    # This is the "gather" phase which aggregates and performs any
    # additional computation after the "map" (and therefore after all
    # the "process") jobs are done.

    dx-jobutil-add-output final_output "postprocess placeholder output"
}
