
    5i                     x    d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d Zd Zd Zd Zd Zd	 Zdd
Zy)    )print_functionN)file_load_utilsc                     t        j                  |        |D ]5  }t        j                  t        j                  j	                  | |             7 y)a=  
    Create a set of directories, so we could store the input files.
    For example, seq1 could be stored under:
        /in/seq1/NC_001122.fasta

    TODO: this call could fail, we need to report a reasonable error code

    Note that we create a directory for every file array, even if
    it has zero inputs.
    N)r   
ensure_dirospathjoin)idirdirsds      /home/marpiech/ifpan-abm-pgxpred/analysis/marpiech-gwas-test/venv/lib/python3.12/site-packages/dxpy/bindings/download_all_inputs.py_create_dirsr      s>     t$ :""277<<a#89:    c                     | d   }t         j                  j                  || d         }t        d|z   dz   |z          t        j
                  j                          t        j                  ||       | S )Nsrc_file_id	trg_fnamezdownloading file: z to filesystem: )	r   r   r	   printsysstdoutflushdxpydownload_dxfile)file_recr
   src_filetrg_files       r   _download_one_filer   ,   sc    &Hww||D(;"78H	

),>
>
IJJJ8,Or   c                 *    | D ]  }t        ||        y N)r   )to_downloadr
   r   s      r   _sequential_file_downloadr    6   s     +8T*+r   c           	      F   	 t         j                  j                  |      5 }| D ci c]  }|j                  t        ||      | }}t         j                  j                  |      D ]  }||   }	 |j                           	 d d d        y c c}w # t        $ r- t        j                  j                  |d   d|d   d        w xY w# 1 sw Y   y xY w# t        $ rA t        dt        j                         t        j                  t        j                         Y y w xY w)N)max_workersr   z -> r   z generated an exception )file)
concurrentfuturesThreadPoolExecutorsubmitr   as_completedresult	Exceptionr   stderrwriteKeyboardInterruptr   r   _exitEX_IOERR)r   r
   max_num_parallel_downloadsexecutorr   future_filesfutures          r   _parallel_file_downloadr5   <   s   226 3 8 	;C,79 ( %OO,>$OQYY 9L 9$,,99,G '/MMO		 	9 ! JJ$$&.}&=x?T&V W	 	  
 	bszz"
sX    C C
B)C
0B C
C C
6CC

CC C AD D c                     t        j                  t        j                         d      \  }}i }d }| D ]<  }||vr||   } ||dz   |d   |        ||dz   |d   |        ||dz   |d   |       > |S )	z
    Create a dict of values for the downloaded files. This is similar to the variables created
    when running a bash app.
    Nc                     | |vr||| <   y y r    )keyvaluedict_s      r   add_if_no_collisionz-_gen_helper_dict.<locals>.add_if_no_collision]   s    eE#J r   _pathr   _namebasename_prefixprefix)r   analyze_bash_varsget_input_json_file)filtered_inputsfile_key_descs_ignoreflattened_dictr<   input_input_var_dicts          r   _gen_helper_dictrJ   R   s     .??++-t5NG N " Z''/FW,nV.DnUFW,nZ.H.YFY.x0H.YZ r   c           
      H    t        | |t        t        |dz        d            S )zC
    Ensure at least ~1.2 GB memory per thread, see PTFM-18767
    i     )minmaxint)max_threads	num_coresmem_available_mbs      r   _get_num_parallel_threadsrS   l   s%     {Is3/?/D+Eq'IJJr   c                 $   t        j                         }	 t        j                         }t        j                  |      \  }}}g }	|D ]4  }
d}| |
|
j                  d      d   | v rd}|s$|	j                  |
       6 t        ||	       | rt        j                  ||       }g }|j                         D ]  }|j                  |        |r~t        j                         j                  dz	  }t!        j"                         }t%        |||      }t&        j(                  j+                  dj                  |             t-        |||       nt/        ||       t1        |      }|S # t        $ r# dj                        }|dz  }t        |        w xY w)	aS  
    :param exclude: List of input variables that should not be downloaded.
    :type exclude: Array of strings
    :param parallel: Should we download multiple files in parallel? (default: False)
    :type filename: boolean
    :param max_threads: If parallel is True, how many threads should be used
        to download files? (default: 8)
    :type append: int
    :returns: dict of lists of strings where each key is the input variable
                and each list element is the full path to the file that has
                been downloaded.

    This function downloads all files that were supplied as inputs to the app.
    By convention, if an input parameter "FOO" has value

        {"$dnanexus_link": "file-xxxx"}

    and filename INPUT.TXT, then the linked file will be downloaded into the
    path:

        $HOME/in/FOO/INPUT.TXT

    If an input is an array of files, then all files will be placed into
    numbered subdirectories under a parent directory named for the
    input. For example, if the input key is FOO, and the inputs are {A, B,
    C}.vcf then, the directory structure will be:

        $HOME/in/FOO/0/A.vcf
                     1/B.vcf
                     2/C.vcf

    Zero padding is used to ensure argument order. For example, if there are
    12 input files {A, B, C, D, E, F, G, H, I, J, K, L}.txt, the directory
    structure will be:

        $HOME/in/FOO/00/A.vcf
                     ...
                     11/L.vcf

    This allows using shell globbing (FOO/*/*.vcf) to get all the files in the input
    order and prevents issues with files which have the same filename.z0Error: Could not find the input json file: {0}.
zE       This function should only be called from within a running job.T/r   F   z"Downloading files using {} threads)r   get_input_dirrC   get_job_input_filenamesIOErrorformatr   splitappendr   filter_dictvaluesextendpsutilvirtual_memorytotalmultiprocessing	cpu_countrS   r   r,   r-   r5   r    rJ   )excludeparallelrP   r
   job_input_filer   inputsrestmsgdirs_to_creater   keepr   	ival_list	total_memrQ   r1   helper_varss                     r   download_all_inputsrp   s   s   X ((*D(<<>,DD^Tfd N %amQ7*!!!$% ~&  ,,VW= K]]_ &	9%& ))+11R7	#--/	%>{IW`%a"

=DDE_`aT3MN!+t4"6*KU  AHHXVVc
	s   -E# #,F)NF   )
__future__r   concurrent.futuresr%   r   r   rc   r`   r   
dxpy.utilsr   r   r   r    r5   rJ   rS   rp   r8   r   r   <module>ru      sD   " &  	 
    &:"+,4KZr   