
    5io                   x   d dl mZmZmZmZmZ d dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlZd dlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZmZmZmZmZ dd
l m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+m,Z, ddl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZAmBZB ddlCmDZD ddlEmFZF ddlGmHZH ddlImJZJ ddlKmLZL ddlMmNZNmOZOmPZP ddlQmRZR ddlSmTZT ddlUmVZV ddlWmXZX ddlYmZZZm[Z[ ddl\m]Z]m^Z^  e
j                  d       Z` e
j                  d!      Zad" Zbd# Zcd$ Zdd% ZedFd&Zfd' Zgd( Zhd) Zid* Zjd+ Zkd, ZldGd-ZmdHd.Znd/ Zod0 Zpd1 Zqd2d3d4g g  erd5      ej                  fd6Ztd7 Zud8 Zvd9 Zw G d: d;ex      Zyd< Zzd= Z{dId>Z|d? Z} G d@ dAe      Z~ G dB dC      Z G dD dE      Zy)J    )print_functionunicode_literalsdivisionabsolute_importannotationsN)reduce   )fill)DXRecord)	is_dxlinkdescribe)DXFile)resolve_existing_path	is_hashidResolutionErrorresolve_pathcheck_folder_exists)	as_handle)
print_desc)err_exitPermissionDeniedInvalidInputInvalidStatedefault_expected_exceptions)validate_JSONfinal_payload)get_genotype_only_typesadd_germline_base_sqlsort_germline_variantharmonize_germline_sqlharmonize_germline_resultsget_germline_ref_payloadget_germline_loci_payloadupdate_genotype_only_refget_genotype_typesinfer_genotype_type%get_types_to_filter_out_when_inferingfilter_results)inference_validation)validate_somatic_filter)somatic_final_payload)cohort_filter_payloadcohort_final_payload)Dataset)ArgsValidator)JSONValidator)*EXTRACT_ASSAY_EXPRESSION_INPUT_ARGS_SCHEMA)$EXTRACT_ASSAY_EXPRESSION_JSON_SCHEMA)1EXTRACT_ASSAY_EXPRESSION_FILTERING_CONDITIONS_1_01EXTRACT_ASSAY_EXPRESSION_FILTERING_CONDITIONS_1_1?EXTRACT_ASSAY_EXPRESSION_FILTERING_CONDITIONS_1_1_non_optimized)JSONFiltersValidator)VizPayloadBuilder)	VizClient)transform_to_expression_matrix   )write_expression_outputpretty_print_json)"EXTRACT_ASSAY_EXPRESSION_JSON_HELP/EXTRACT_ASSAY_EXPRESSION_ADDITIONAL_FIELDS_HELPz^database_\w{24}__\w+$z^database-\w{24}$c                   t        |       \  }}}|t        d| z   dz         ||d   d   k7  rt        d| z   dz   |z   dz         |d   d   dk7  rt        d	j                  |              	 t	        j
                  d
|d   z   dz   |dd      }d   dk7  rt        d|d   z         d|d   v sd|d   v r|d   }nt        d|d   z         |||fS # t        $ r t        dt        f       Y at        t        f$ r  t        d|d   z  t        t        f       Y t        $ r}t        t        |             Y d }~d }~ww xY w)NzUnable to resolve "z." to a data object or folder name in a projectr   projectz&" to a data object or folder name in ''classrecordzK{}: Invalid path. The path must point to a record type of cohort or dataset/idz
/visualizeF)r@   cohortBrowserzInsufficient permissionsexpected_exceptions%s : Invalid cohort or datasetdatasetVersionz3.0z>%s : Invalid version of cohort or dataset. Version must be 3.0r.   recordTypesCohortBrowserdatasetRecordProjectzL%s : Invalid path. The path must point to a record type of cohort or dataset)r   r   r   formatdxpyDXHTTPRequestr   r   r   	Exceptionstr)pathr@   folder_pathentity_resultrespdetailsdataset_projects          |/home/marpiech/ifpan-abm-pgxpred/analysis/marpiech-gwas-test/venv/lib/python3.12/site-packages/dxpy/cli/dataset_utilities.pyresolve_validate_record_pathrZ   Y   s   *?*E'G[-!>?
 	

 
M*-i8	8!78  	
 	
 Z )X5Y``aef	

!!-%%4%8
" &L#$%	

 	T-((omAT.T56Z=!"	

 M4887  V+BRATU,' 
,}T/BB!	
  Ws$   +"C E.+EE#D<<Ec                    | d   dz   | d   z   dz   |z   }	 t        j                  ||d      }d   dz   }|S # t        $ r}t        t	        |             Y d }~,d }~ww xY w)	Nurlz/viz-query/3.0/datasetrD   Fresourcedataprepend_srvsql;)rO   rP   rQ   r   rR   )rV   payloadrouteresource_val
resp_queryrW   sql_resultss          rY   viz_query_api_callri      sx    ;!22T)_DsJURL''!U

 U#c)K  Ws   8 	AAAc                    t        | |d      S )Nz	raw-queryri   rV   rd   s     rY   raw_query_api_callrm      s    dG[99    c                    t        | |d      S )Nzraw-cohort-queryrk   rl   s     rY   raw_cohort_query_api_callrp      s    dG-?@@rn   c                   | d   dz   | d   z   dz   }	 t        j                  ||d      }d|j                         v r]|d   d   d	k(  rd
|d   d   z   }n2|r|d   d   dk(  rd|d   d   z   }n|d   d   dk(  r	|d   d   }n|d   }t        t	        |             |S # t
        $ r}t        t	        |             Y d }~S d }~ww xY w)Nr\   z
/data/3.0/r]   z/rawFr^   errortyper   z4Insufficient permissions due to the project policy.
messageQueryTimeOutzhPlease consider using `--sql` option to generate the SQL query and query via a private compute cluster.

DxApiError)rO   rP   keysr   rR   rQ   )rV   rd   sql_messagerf   resp_rawerr_messagerW   s          rY   raw_api_callr{      s   ;-Y?&HL%%!U
 hmmo% (N:UX`ahXijsXtt'!26!:n!L J  MU  V]  M^  _h  Mi  i'"6*l:&w/	:&w/S%& O  WOs   BB 	C%B??Cc           
        | j                   s;| j                  s/| j                  s#| j                  | j                  t        d       | j                  | j                  t        d       ddddddd}d }| j                  r*d|d<    || |      }|rt        d	j                  |             | j                  r/d|d
<   d|d<    || |      }|rt        dj                  |             t        t        j                  | j                  d            }t        |      dk(  r|dk7  r|dk(  rd}n|dk(  rd}nd}nt        d       t        | j                        \  }}}}	|d   }
d}d}d}g }g }| j                   rt        j                  j                   dk(  rt        j                  j"                  dk(  rdnd}	 ddlat)        t+        t,        t&        j.                  j1                  d      dd             }|dk  r*t3        dj                  |t&        j.                  |             | j6                  t9        j:                         }n| j6                  d!k(  rd"}nvt8        j                  j=                  | j6                        rBt8        j                  j?                  | j6                        r| j6                  }nt        d#       nt        d#       |r1t        j@                  }t        j@                  }t        j@                  }nt8        j                  jC                  ||d$   d%z   z         }t8        j                  jC                  ||d$   d&z   |z         }t8        j                  jC                  ||d$   d'z   |z         }|||g}| j                  s| j                  r| jD                  rd(}n}| j6                  Mt9        j:                         }t8        j                  jC                  ||d$   |z         }|jG                  |       n| j6                  d!k(  rd"}npt8        j                  j=                  | j6                        rt8        j                  j?                  | j6                        rD| j6                  }t8        j                  jC                  ||d$   |z         }|jG                  |       n|jG                  | j6                         nt8        j                  j=                  t8        j                  jI                  | j6                              s)t8        j                  jI                  | j6                        s| j6                  }nBt        d)j                  t8        j                  jI                  | j6                        *             |D ]3  }t8        j                  j=                  |      s#|jG                  |       5 |rt        d+j                  |*             tK        |
|	,      jM                         }g }| j                  7| j                  j1                  d      D cg c]  }|jO                          }}n| j                  t8        j                  jQ                  | j                        rHtS        | j                  d-      5 }|D ]"  }|jG                  |jO                  d.             $ 	 ddd       n%t        d/j                  | j                  0             |ryg }|D ]  }|j1                  d      }t        |      dk  r|jG                  |       4|d   |jT                  d   jW                         vs,|d   |jT                  d   |d      d1   jW                         vs|jG                  |        |rt        d2|z         ||D cg c]$  }|d3jC                  |j1                  d            i& c}d4} d5|d6   v r-|jY                  d7      r|jY                  d7      | d8<   |d9   | d9<   | jD                  r=t[        ||       }!|rt3        |!       nZtS        |d:      5 }"t3        |!|";       ddd       n7t]        ||       }#t_        ||||#d<   |=       n| jD                  rt        d>       | j                   r%|ja                         }$|$jc                  |?      }%| j                  s| j                  rcte        |jT                        \  }&}'| j                  rt3        d.jC                  |&             | j                  rt        |jT                  |'|        yyy# t4        $ r&}t        d j                  ||             Y d}~#d}~ww xY wc c}w # 1 sw Y   xY wc c}w # 1 sw Y   xY w)@z
    Retrieves the data or generates SQL to retrieve the data from a dataset or cohort for a set of entity.fields. Additionally, the dataset's dictionary can be extracted independently or in conjunction with data.
    NzMust provide at least one of the following options: --fields, --fields-file, --dump-dataset-dictionary, --list-fields, --list-entitieszpdx extract_dataset: error: only one of the arguments, --fields or --fields-file, may be supplied at a given timeF,)dump_dataset_dictionaryrb   fieldsfields_fileoutputdelimc           	         g }|j                         D ]H  \  }}| j                  |   |k7  s|j                  dj                  |j	                  dd                   J |S )Nz--{}_-)items__dict__appendrN   replace)args
restricted
error_listoptionvalues        rY   check_optionsz&extract_dataset.<locals>.check_options   s`    
'--/ 	KMFE}}V$-!!&--sC0H"IJ	K rn   list_entitiesz*--list-fields cannot be specified with: {}list_fieldsentitiesz,--list-entities cannot be specified with: {}unicode_escaper:   "z.csv	.tsvz.txtzInvalid delimiter specifiedr]          z2.0.3z2.2.3r   .r	   )r	   r   zWarning: For '-ddd' usage, the recommended pandas version is {}. The installed version of pandas is {}. It is recommended to update pandas. For example, 'python3 -m pip install -I pandas=={}'.z'-ddd' requires the use of pandas, which is not currently installed. Please install pandas to a version {}. For example, 'python3 -m pip install -I pandas==X.X.X' where X.X.X is {}.r   Tz>Error: When using -ddd, --output must be an existing directory
recordNamez.data_dictionaryz.codingsz.entity_dictionary	.data.sql Error: {path} could not be foundrS   z!Error: path already exists {path}r@   r
zNThe file, {input_fields_file}, supplied using --fields-file could not be found)input_fields_filer   z(The following fields cannot be found: %s$)project_contextr   rL   rK   baseSqlbase_sqlfilterswfileresultsout_file_nameprint_to_stdoutsepraw_resultscolumn_namesz3`--sql` passed without `--fields` or `--fields-file)output_file_dataoutput_file_entityoutput_file_codingr   )3r~   r   r   r   r   r   rN   rR   codecsdecoder   lenrZ   rS   sysversion_infomajorminorpandaspdtuplemapint__version__splitprintImportErrorr   osgetcwdexistsisdirstdoutjoinrb   r   dirname	DXDatasetget_descriptorstripisfileopenmodelrw   getrm   r{   csv_from_jsonget_dictionarywriteretrieve_entities)(r   listing_restrictedr   r   	delimiterout_extensionr@   rU   rV   rX   
dataset_idout_directoryout_file_fieldr   files_to_checkfile_already_existrecommended_pandas_versionsystem_pandas_versioner   r   r   file_name_suffixr   rec_descriptorfields_listfieldinfilelineentryentity_fielditemrd   rh   fry   rec_dictwrite_otentity_names_and_titles_main_entitys(                                           rY   extract_datasetr      sw   
 ((  ""KK$ U	

 	(~	

 $) .3?+"4);<
AHHTU,1=))-:&"4);<
CJJ:VWFMM$**.>?@I
9~yC/"M$"M"M./4PQUQZQZ4[1G]D/iJMNON## 140@0@0F0F!0KPSP`P`PfPfjkPkWqx"	$)#c2>>3G3G3LRa3P*Q$R! %v-KKQ62BNND^L  ;;IIKM[[C"OWW^^DKK(ww}}T[[) $T UV"zz!$!$!ww||tL14FFV  "$tL1J>N" "$tL14HH=X" /0BDVWN{{d&&88*,;;IIKMWW\\tL14DDN !!.1[[C"OWW^^DKK(ww}}T[[) $!#!4#58H#H" %%n5"))$++6WW^^BGGOODKK89KKB
 "[[N2995 :   ,77>>$%%d+, 4;;AS;TUz?CRRTNK{{26++2C2CC2HIu{{}II				%77>>$**+d&&, 9" 9D&&tzz$'7899 9 `gg&*&6&6 h  
  	)E ;;s+L< 1$!!%(Q~';';J'G'L'L'NN?%++J7QH$&
 !!%(	) ?*LM  'EPQTchhtzz#78Q
 d=11xx	"&*hhy&9
#!%iGI88,T7;Kk".#. /!+A./ / $D'2H, /$Y/( 
FG##!002>>-11	 " 
 T--0A.BVBV0W-$))345,,lDA  .u  	C DJ  DJ..D 	h J9 9< R/ /s=   7A,d  e(e)e !e%	e%eee%e.c                    | j                   }g }g }|st        d       ||fS |D ]-  }|d   |k(  r|j                  |       |j                  |       / ||fS )NzNo valid assays in the dataset.generalized_assay_model)assaysr   r   )r   
assay_type
assay_listselected_type_assaysother_assaysas         rY   get_assay_infor     sy    &&JL23 !,//  	'A*+z9$++A.##A&		'
 !,//rn   c                   d| z   }t        t        |      d| z            }i }|j                  d      rt        j                  j                  |      rjt        j                  |      j                  dk(  rt        dj                  |              nt        |d      5 }	 t        j                  |      }d d d        nSt        d
j                  |             n7|dk(  rt        dj                  |              n	 t        j                  |      }| dv rt!        ||        |S | dv rt#        ||        |S # t        $ r}t        dt        	       Y d }~d }~ww xY w# 1 sw Y   TxY w# t        $ r}t        dt        	       Y d }~vd }~ww xY w)Nzargs.retrieve_	retrieve_z.jsonr   z~No filter given for --retrieve-{filter_type} or JSON for "--retrieve-{filter_type}" does not contain valid filter information.)filter_typer   z)JSON for variant filters is malformatted.rG   z/JSON file {filter_json} provided does not exist)filter_jsonz{})allele
annotationgenotype)variant)rR   varsendswithr   rS   r   statst_sizer   rN   r   jsonloadrQ   r   loadsr   r*   )r   r   
filter_argfilter_valuefilter	json_file
json_errors          rY   json_validation_functionr    s   !K/JtDz+";<=LFW%77>>,'ww|$,,1 U  \  \$/ \  ,, 	!%9!5  AHH , I  4 Q  X  X + X L1 ::fk* M 
	#4MA %  G0K  0  ?(C sH   ED3:E& 3	E<EEEEE#&	F
/FF
c           	         d}g d}||D cg c]  }|dj                  ||f      i c}d||dd}	t        | |	d      }
t        ||d|
d	   |
       y c c}w )Nvcf_meta_information_unique)FieldIDTypeNumberDescriptionr   FnamerE   )r   r   	is_cohortvariant_browser)rx   r   r   r   )r   r{   r   )rV   
project_idassay_id
assay_namer   r   tablecolumnscolumnrd   ry   s              rY   retrieve_meta_infor     s|    )E>G%>E
4:VSXXufo./
 

G D'u=H#'Y'
s   Ac                0   | j                   rd}n|dk(  r| j                  rd}nd}g }g }d}d}| j                  Jt        j                         }t        j
                  j                  |||z         }|j                  |       nK| j                  dk(  rd}n8t        j
                  j                  | j                        rQt        j
                  j                  | j                        rt        d	       n|j                  | j                         nt        j
                  j                  t        j
                  j                  | j                              s)t        j
                  j                  | j                        s| j                  }nBt        d
j                  t        j
                  j                  | j                                     |D ]3  }	t        j
                  j                  |	      s#|j                  |	       5 |rt        d       ||fS )Nr   somaticz.vcf_meta_info.txtr   r   Fr   Tz+--output should be a file, not a directory.r   r   z1Cannot specify the output to be an existing file.)rb   r   r   r   r   rS   r   r   r   r   r   r   rN   )
r   record_namefriendly_assay_typer   r   r   out_filer   r   r   s
             rY   assign_output_methodr&    s   xx&			)d.E.E/!NHO{{		77<<{=M/MNh'				$77==%BC%%dkk2	4	5RWW__> ;;.55277??4;;;W5X	
  ,77>>$%%d+, DE_$$rn   c                   |dk(  rd}n|dk(  rd}| rWt        |      \  }}|st        dj                  |             n*|D ]  }t        |d           t	        j
                  d       t        |      \  }}|D 	cg c]  }	|	d   	 }
}	|D 	cg c]  }	|	d	   	 }}	|D cg c]  }|d   	 }}|
r|r|
d   }|d   }nt        dj                  |             |ri|t        |
      vrE|t        |      v rt        d
j                  |             n4t        dj                  ||             n|}|D ]  }	|	d   |k(  s|	d	   } i }|dk(  r^d}|D ]V  }|d   k(  s|d   r|d   d   j                  dd      d   }|d   d   d   d   d   d   |d<   dD ]  }||v s||   ||<    X n1|dk(  r,d}|D ]%  }|d   k(  s|d   s|d   d   dz   |d   d   z   }' |fS c c}	w c c}	w c c}w )zN
    Generalized function for determining assay name and reference genome
    germlinegenetic_variantr"  somatic_variant)r   z,There's no {} assay in the dataset provided.r  r   uuidz|This is not a valid assay. For valid assays accepted by the function, `extract_assay {}`, please use the --list-assays flag.z0Assay {assay_name} does not exist in the {path}.)r  rS   z	GRCh38.92reference_genomer   r:   r   r   r   rs   mappingr  genotype_type_table)exclude_refdataexclude_halfrefexclude_nocallr   	referenceannotation_source_version)r   r   rN   r   r   exitlistr   )list_assaysr  rS   r$  r   r   target_assaysr   r   gatarget_assay_namestarget_assay_idsoaother_assay_namesselected_assay_nameselected_assay_idadditional_descriptor_infoselected_ref_genomeexclude_genotypes                      rY   get_assay_name_inforB  C  s    j(&
			)&
 (6z)
% CJJK^_`" !ai !HHQK %3:%!]L 0=="V*==-:;r6
;;.:;F;; .03,Q/?FFGZ[\T"455T"344 S  Z  Z+ FMM#-D N  #-# 3f:+(*6
%3 "$j() 	[Ay//'(*+,>*?*G*M*McST*UVW*X'DEjMR\D]^fDghnDopyDz  |C  ED*+@A(` [$'1,GHIYGZ23CD[	[ 
		)  	qAy//AkN&'nV&<s&BQ{^ToEp&p#	q  13FHb
cca >;;s   =GG$!G)c           	         |j                  dd      t        |      z
  }t        j                  d|t	        | fd|i|t        j
                        S )Nwidth_adjustmentr   ^)flags)popr   resubr
   	MULTILINE)stringcomment_stringkwargsrD  s       rY   comment_fillrN    sK    zz"4a83~;NN66#~tF'`EU'`Y_'`hjhthtuurn   c                n    | d   ddigd||did}t        | |      d   D cg c]  }|d   	 c}S c c}w )zR
    Get the list of sample_ids from the sample table for the selected assay.
    rM   	sample_idzsample$sample_idassay_filtersr  )r   r   raw_filtersr   )r{   )rV   r  r  sample_payloadr   s        rY   retrieve_samplesrT    sS    
   67!345'*H)MNN
 %1~$Fy$QRqAkNRRRs   2c                "   d}| j                   s| j                  s| j                  rd}| j                  rI|st	        d       n;| j
                  s$| j                  s| j                  s| j                  rt	        d       | j
                  r=| j                  rt	        d       n%| j                  rt	        d       n|rt	        d       | j                  s| j                  r;| j                   s$| j                  s| j                  s| j
                  rt	        d       | j                   r| j                  rt        t        d	d
      dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        dd
      z   dz   dz          t        j                  d       n9| j                  r| j                  r t        t        d	d
      dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        d      z   dz   t        dd
      z   dz   dz          t        j                  d       nd| j                  rW| j                  rJt        t        d	d
      dz   t        d       z   dz   t        d!      z   dz   t        d"      z   dz   t        d#      z   dz   t        d$      z   dz   t        d%      z   dz   t        d&      z   dz   t        d'      z   dz   t        d(      z   dz   t        d)      z   dz   t        d*      z   dz   t        d+      z   dz   t        d,d
      z   dz   t        d-d
      z   dz   t        d.d/gd0d1d2d3d4d2gg d5d6      z   dz   t        d7d
      z   dz   t        d.d/gd8d9gg d:d;      z          t        j                  d       | j                   rt!        d<|       }n1| j                  rt!        d=|       }n| j                  rt!        d>|       }| j                  r|st	        d?       | j                  r|st	        d@       t#        | j$                        \  }}}}dA|dB   v r,t'        | j
                  | j                  g      rt	        dC       |dD   }t)        ||E      j+                         }t-        | j
                  | j                  | j$                  dF|      \  }	}
}}t/        | |dG   dF      \  }}dH}| j                   rd<}n| j                  rd=}|r|rt1        |	|
|||I      \  }}t3        ||       | j                  r=t5        ||      }|rt        |       n]t7        |dJ      5 }t        ||K       dHdHdH       n:t9        ||      }t;        |dL   t<        M      }t?        ||dN||tA        dO      P       | j                  r|r|jC                  dQ      }|jC                  dR      }|jC                  dS      }tE        | j                  | j                  |||       | j                  s| j                  r tG        |jC                  dTg             }g |dT<   dU|v rg }ntI        ||||      }t1        ||	|
||d>| V      \  }}t3        ||       g }|rtK        |      D ]  \  }}|jM                         }|dWk(  rd|dX<   n|dYk(  rd|dZ<   n
|d[k(  rd|d\<   t1        ||	|
||d]|dWk7  |dYk7  |d[k7  |tO        |      d^z
  k(  _
      \  } }!t3        ||        |jQ                  |         tS        |      }"| j                  rg }#|"rPt5        ||      dHd` }$	 da|db   z   dcz   }%tU        jV                  |%|$      jY                          |#jQ                  |$       |D ]A  } | df   jQ                  dWdgi       t5        ||       dHd` }&|#jQ                  ta        |&             C dhjc                  |#      diz   }|rt        |       yHt7        |dJ      5 }t        ||K       dHdHdH       yHg }|"r t9        ||      }'|je                  |'dL          |D ],  } t9        ||       }(|je                  tg        |(dL   |             . |r&ti        ||      })|)rt9        ||)      }*tk        ||*       | j                  s| j                  rtm        ||	|
      }+to        |jC                  djg             },|,rtq        |,js                  |+            }+tu        |dk   |      }-t9        ||-      dL   D .cg c]  }.|. }/}.| j                  rdWnd[}0tw        |+|/||0      }tO              dkD  rty        |dT|      }|j{                  t<        M       t?        ||dN||tA        dO      P       yHyHyH# 1 sw Y   xY w# tZ        $ r( t	        ddj]                  |db         t^        fe       Y Iw xY w# 1 sw Y   yHxY wc c}.w )lz
    Retrieve the selected data or generate SQL to retrieve the data from an genetic variant assay in a dataset or cohort based on provided rules.
    FTzPlease specify one of the following: --retrieve-allele, --retrieve-genotype or --retrieve-annotation" for details on the corresponding JSON template and filter definition.zPlease check to make sure the parameters are set properly. --json-help cannot be specified with options other than --retrieve-annotation/--retrieve-allele/--retrieve-genotype.z3The flag, --sql, cannot be used with --list-assays.zUWhen --list-assays is specified, output is to STDOUT. "--output" may not be supplied.5--list-assays cannot be presented with other options.zUThe flags, --infer-ref and --infer-nocall, can only be used with --retrieve-genotype."Filters and respective definitions# rL  
#
a?  rsid: rsID associated with an allele or set of alleles. If multiple values are provided, the conditional search will be, "OR." For example, ["rs1111", "rs2222"], will search for alleles which match either "rs1111" or "rs2222". String match is case sensitive. Duplicate values are permitted and will be handled silently.a  type: Type of allele. Accepted values are "SNP", "Ins", "Del", "Mixed". If multiple values are provided, the conditional search will be, "OR." For example, ["SNP", "Ins"], will search for variants which match either "SNP" or "Ins". String match is case sensitive.aU  dataset_alt_af: Dataset alternate allele frequency, a json object with empty content or two sets of key/value pair: {min: 0.1, max:0.5}. Accepted numeric value for each key is between and including 0 and 1.  If a user does not want to apply this filter but still wants this information in the output, an empty json object should be provided.a<  gnomad_alt_af: gnomAD alternate allele frequency. a json object with empty content or two sets of key/value pair: {min: 0.1, max:0.5}. Accepted value for each key is between 0 and 1. If a user does not want to apply this filter but still wants this information in the output, an empty json object should be provided.zlocation: Genomic range in the reference genome where the starting position of alleles fall into. If multiple values are provided in the list, the conditional search will be, "OR." String match is case sensitive.z*JSON filter template for --retrieve-alleler   a  {
  "rsid": ["rs11111", "rs22222"],
  "type": ["SNP", "Del", "Ins"],
  "dataset_alt_af": {"min": 0.001, "max": 0.05},
  "gnomad_alt_af": {"min": 0.001, "max": 0.05},
  "location": [
    {
      "chromosome": "1",
      "starting_position": "10000",
      "ending_position": "20000"
    },
    {
      "chromosome": "X",
      "starting_position": "500",
      "ending_position": "1700"
    }
  ]
}r   a^  allele_id: ID of an allele for which annotations should be returned. If multiple values are provided, annotations for any alleles that match one of the values specified will be listed. For example, ["1_1000_A_T", "1_1010_C_T"], will search for annotations of alleles which match either "1_1000_A_T" or ""1_1010_C_T". String match is case insensitive.a+  gene_name: Gene name of the annotation. A list of gene names whose annotations will be returned. If multiple values are provided, the conditional search will be, "OR." For example, ["BRCA2", "ASPM"], will search for annotations which match either "BRCA2" or "ASPM". String match is case insensitive.a'  gene_id: Ensembl gene ID (ENSG) of the annotation. If multiple values are provided, the conditional search will be, "OR." For example, ["ENSG00000302118", "ENSG00004000504"], will search for annotations which match either "ENSG00000302118" or "ENSG00004000504". String match is case insensitive.a  feature_id: Ensembl feature id (ENST) where the range overlaps with the variant. Currently, only  coding transcript IDs are searched. If multiple values are provided, the conditional search will be, "OR." For example, ["ENST00000302118.5", "ENST00004000504.1"], will search for annotations which match either "ENST00000302118.5" or "ENST00004000504.1". String match is case insensitive.aw  consequences: Consequence as recorded in the annotation. If multiple values are provided, the conditional search will be, "OR." For example, ["5_prime_UTR_variant", "3_prime_UTR_variant"], will search for annotations which match either "5 prime UTR variant" or "3 prime UTR variant". String match is case sensitive. For all supported consequences terms, please refer to snpeff: http://pcingola.github.io/SnpEff/se_inputoutput/#effect-prediction-details (Effect Seq. Ontology column). This filter cannot be specified by itself, and must be included with at least one of the following filters: "gene_id", "gene_name",or "feature_id".a[  putative_impact: Putative impact as recorded in the annotation. Possible values are [ "HIGH", "MODERATE", "LOW", "MODIFIER"]. If multiple values are provided, the conditional search will be, "OR." For example, ["MODIFIER", "HIGH"], will search for annotations which match either "MODIFIER" or "HIGH". String match is case insensitive. For all supported terms, please refer to snpeff: http://pcingola.github.io/SnpEff/se_inputoutput/#impact-prediction. This filter cannot be specified by itself, and must be included with at least one of the following filters: "gene_id", "gene_name", or "transcript_id".a  hgvs_c: HGVS (DNA) code of the annotation. If multiple values are provided, the conditional search will be, "OR." For example, ["c.-49A>G", "c.-20T>G"], will search for annotations which match either "c.-49A>G" or "c.-20T>G". String match is case sensitive.a	  hgvs_p: HGVS (Protein) code of the annotation. If multiple values are provided, the conditional search will be, "OR." For example, ["p.Gly2Asp", "p.Aps2Gly"], will search for annotations which match either "p.Gly2Asp" or "p.Aps2Gly". String match is case sensitive.z.JSON filter template for --retrieve-annotationa  {
  "allele_id":["1_1000_A_T","2_1000_G_C"],
  "gene_name": ["BRCA2"],
  "gene_id": ["ENST00000302118"],
  "feature_id": ["ENST00000302118.5"],
  "consequences": ["5 prime UTR variant"],
  "putative_impact": ["MODIFIER"],
  "hgvs_c": ["c.-49A>G"],
  "hgvs_p": ["p.Gly2Asp"]
}a  allele_id: ID(s) of one or more alleles for which sample genotypes will be returned. If multiple values are provided, any samples having at least one allele that match any of the values specified will be listed. For example, ["1_1000_A_T", "1_1010_C_T"], will search for samples with at least one allele matching either "1_1000_A_T" or "1_1010_C_T". String match is case insensitive.zlocation: Genomic position in the reference genome of the starting position of the alleles.  If multiple values are provided in the list, the conditional search will be, "OR." String match is case sensitive.z6allele_id and location are mutually exclusive filters.a  sample_id: Optional, one or more sample IDs for which sample genotypes will be returned. If the provided object is a cohort, this further intersects the sample ids. If a user has a list of samples more than 1,000, it is recommended to use a cohort id containing all the samples.zegenotype_type: Optional, one or more genotype types for which sample genotype types will be returned.zOne of:z5	ref	(homozygous for the reference allele			e.g. 0/0)zC	het-ref	(heterozygous for the ref allele and alt allele		e.g. 0/1)z3	hom	(homozygous for the non-ref allele			e.g. 1/1)z?	het-alt	(heterozygous with two distinct alt alleles		e.g. 1/2)zC	half	(only one allele is known, second allele is unknown	e.g. ./1)z/	no-call	(both alleles are unknown				e.g. ./.)z-JSON filter templates for --retrieve-genotypezExample using location:s1s2110000)
chromosomestarting_positionX500)refhet-refhomhet-althalfno-call)rP  locationgenotype_typezExample using allele_id:
1_1000_A_T
2_1000_G_C)rd  re  rf  rg  )rP  	allele_idrj  r   r   r   zc--assay-name must be used with one of --retrieve-allele,--retrieve-annotation, --retrieve-genotype.zWhen --sql provided, must also provide at least one of the three options: --retrieve-allele <JSON>; --retrieve-genotype <JSON>; --retrieve-annotation <JSON>.rL   rK   Currently --assay-name and --list-assays may not be used with a CohortBrowser record (Cohort Object) as input. To select a specific assay or to list assays, please use a Dataset Object as input.r]   r   r(  r   N)full_input_dictr  rE   r   genome_referencer   r   r   r   )keyr   |)r   r   r   r   r   
quote_charr/  r0  r1  rj  rm  )ro  r  rE   r   rp  r   orderrc  ref_ynrg  
halfref_ynrh  	nocall_yngenotype_onlyr:   )
ro  r  rE   r   rp  r   r/  r0  r1  rt  z\br.  z\w+z2Failed to find the table, {}, in the generated SQLrG   r   z
allele$refz UNION rc   rP  ri  )>retrieve_alleleretrieve_annotationretrieve_genotype	json_helpr   r6  r  rb   r   	infer_refinfer_nocallr   rN  r   r4  r<   r  rZ   rS   anyr   r   rB  r&  r   r   rm   r   r{   sortedr   r   rR   r   r)   r'   r   	enumeratecopyr   r   r%   rH  searchgrouprQ   rN   AttributeErrorr    r   extendr!   r"   r$   rT  setr5  intersectionr#   r&   r(   sort)1r   filter_givenfilter_dictr@   rU   rV   rX   r   r   r=  r>  r@  r?  r%  r   r   rd   r   rh   sql_filery   ordered_resultsr/  r0  r1  types_to_filter_outgenotype_only_typesgenotype_payloadgenotype_only_payloadsigenotype_only_typegenotype_only_filter_dictgenotype_only_payloadr   genotype_typessql_queriesgenotype_sql_querygeno_table_regexgenotype_only_sql_querygenotype_resp_rawgenotype_only_resp_rawref_payloadlocus_id_refssamplesselected_samplesloci_payloadlocuslocitype_to_infers1                                                    rY   extract_assay_germliner    s   
 Lt774;Q;Q~~ ~ DHH B 88JK[[g LM ~~**4#;#;txx4K[K[g
 >>ARVWZaa  _  `` cjj  g  hh krr   u  vv y@@   \  ]	] `g	g
   t  uu
 x IZ^_` cgg aa	 HHQK		!	!>>ARVWZaa  ~   BII  K  LL OVV   G  HH KRR   b  c	c fm	m
   W
  X
X

 [
b
b
   {	  |	|	 	F
F
   a  bb ell   i  jj mtt M^bc	d gk	k o
o HHQK			>>ARVWZaa  _  `` cjj  o  pp szz UVW Zaa   v  w	w zA	A
   E  FF
 IMM Y'( +// YZ[ ^bb fgh koo WX	Y \`	` bc
d gk
k efg jnn TUV Y`` L]abc fmm 6tLM PTT "&*D\+.WM+.UK% *a	0 12 7M3N2 QU3U4 ""&".!=%J# 5!D HHQK.x>		!	!.|TB			.z4@ u
 xx p
 5QQUQZQZ4[1G]D/$}--#			4??+3 	 Q	
 iJz?CRRTN^q$//499j._[*,?A[ !5T4;Mz ZHoK		!	!"|,'$ #0# 
 	dG,88,T7;Kk"(C( 6H+H56 6 $D'2H$Xi%8>STO& /+(s8 , : > >?P Q : > >?P Q9==>NONN	 >>T.."GXgikHl"m+-K( +%"$"9+:I?\j#l )6'$ #0")))
%+ 	d$45!# *33F)G E%%,7,<,<,>)%.:>-h7'61>B-l;'94=A-k:+8$=,($+%8 /$6%$?$6&$@#5#Bs#67!;;,(%q &d,AB&--.CD1E6 ,K888K%7>N%OPSQS%T"s',/IJ_/`'`ci'i$II.0BCIIK ""#56 *@ T%%h/66|7LM*<TCX*YZ][]*^'""#9:Q#RST $..5;Kk"(C( 6H+H56 6 !O$07G$H!&&'8'CD *@ s%)5d<Q)R&&&'ABXYbBcep'qrs
 #
 7HXY$0{$CM,_mL~~!2!2*41DFWX#&{{B'G#H #"#3#@#@#IJG8Z9PRbc+7l+KI+VW%WW)-Y"5gt_Vc"d*+a/&4_oWj&kO  %: ;& /+(s8U #/6 6z ! sQXX23HIKao`qs ss(6 6> Xs0   f?/g 8h 	h?g	-g=<g= h	c                    g }t        | d   j                               D ]<  }|j                  dj                  || d   |   d                | d   |   d   du s;|}> |fS )zc
    Retrieves the entities in form of <entity_name>	<entity_title> and identifies main entity
    r   z{}	{}entity_titleis_main_entityT)r  rw   r   rN   )r   r   entitymain_entitys       rY   r   r     s     !z*//12 !&&OOFE*$5f$=n$MN	
 V$%56$> K! #K//rn   c           
        | d   j                         }| d   |   g}|j                  rjg }g }t        |j                  j                  d            D ]/  }||v r|j	                  | d   |          |j	                  |       1 |rt        d|z         g }|D ]Q  }t        |d   j                               D ]0  }|j	                  dj                  |d   ||d   |   d                2 S t        dj                  |             y	)
zs
    Listing fileds in the model in form at <entity>.<field_name>	<field_title> for specified list of entities
    r   r}   z1The following entity/entities cannot be found: %sr   z{}.{}	{}r  titler   N)	rw   r   r  r   r   r   rN   r   r   )	r   r  r   present_entitiesentities_to_list_fieldsr   r  r   r   s	            rY   r   r     s    Z(--/$Z0=>}}"$
T]]0056 	*F))'..uZ/@/HI!!&)		*
 H:UVF) F8,1134 	EMM""6NE6(+;E+B7+K	 
$))F
rn   r   Fr}   r   c                   |rt         j                  }nt        | d      }t        j                  |t        |      dd d||dd|
      }|j                          |D ]  }	|j                  |	        |s|j                          y y )Nr   Tr   F)	r   doublequote
escapecharlineterminator	quotecharquotingskipinitialspacestrict
fieldnames)	r   r   r   csv
DictWriterrR   writeheaderwriterowclose)
r   r   r   r   r   rs  r  fields_output
csv_writerr   s
             rY   r   r     s     

]C0c(J  #E"#  rn   c                b
   t        | j                  | j                  | j                  | j                  g      }| j
                  r|rt        d       | j                  r-t        | j                  | j                  |g      rt        d       | j                  r>t        | j                  | j                  | j                  | j                  | j                  g      rt        d       n| j                  t        d       nt        t        dd      d	z   t        d
      z   d	z   t        d      z   d	z   t        d      z   d	z   t        d      z   d	z   t        d      z   d	z   t        d      z   d	z   t        d      z   d	z   t        d      z   d	z   t        d      z   d	z   t        d      z   d	z   t        dd      z   dz   dz          t        j                  d       | j                  rt        | j                  | j                  |g      rt        d       nd }t        t!        d      dz          g dg dg dg dg dg d g d!g} ||       t        dt!        d"      z   dz          g dg d#g d$g d%g d&g d'g d(g d)g d*g d+g d,g d-g d.g d/g d0g d1g d2g d3g d4g d5g d6g d7g d8g} ||       t        j                  d       | j                  Q| j                  j#                  d9      D cg c]  }|j%                          }}g d:}|D ]  }||vst        d;        t'        | j(                        \  }	}
}}d<|d=   v r,t        | j                  | j                  g      rt        d>       |d?   }t+        ||@      j-                         }t/        | j                  | j                  | j(                  dA|      \  }}}}t1        | |dB   dA      \  }}| j
                  r%t        ||	||||       t        j                  d       | j                  rt3        dC|       }| j                  r t5        ||||	|| j                  D      \  }}nt5        ||||	|| j                  E      \  }}d<|d=   v r-|j7                  dF      r|j7                  dF      |dG<   |dH   |dH<   | j                  r=t9        ||      }|rt        |       yt;        |dI      5 }t        ||J       ddd       yt=        ||      }t?        ||dK|dL   |tA        dK      tB        jD                  M       yyc c}w # 1 sw Y   yxY w)Nz
    Retrieve the selected data or generate SQL to retrieve the data from an somatic variant assay in a dataset or cohort based on provided rules.
    z_The flag, --retrieve-meta-info cannot be used with arguments other than --assay-name, --output.rV  zv--json-help cannot be passed with any of --assay-name, --sql, --additional-fields, --additional-fields-help, --output.Nz8--json-help cannot be passed without --retrieve-variant.rW  rX  rY  rZ  a  location: "location" filters variants based on having an allele_id which has a corresponding annotation row which matches the supplied "chromosome" with CHROM and where the start position (POS) of the allele_id is between and including the supplied "starting_position" and "ending_position". If multiple values are provided in the list, the conditional search will be, "OR". String match is case sensitive.aV  symbol: "symbol" filters variants based on having an allele_id which has a corresponding annotation row which has a matching symbol (gene) name. If multiple values are provided, the conditional search will be, "OR". For example, ["BRCA2", "ASPM"], will search for variants which match either "BRCA2" or "ASPM". String match is case sensitive.a  gene: "gene" filters variants based on having an allele_id which has a corresponding annotation row which has a matching gene ID of the variant. If multiple values are provided, the conditional search will be, "OR". For example, ["ENSG00000302118", "ENSG00004000504"], will search for variants which match either "ENSG00000302118" or "ENSG00004000504". String match is case insensitive.a  feature: "feature" filters variants based on having an allele_id which has a corresponding annotation row which has a matching feature ID. The most common Feature ID is a transcript_id. If multiple values are provided, the conditional search will be, "OR". For example, ["ENST00000302118", "ENST00004000504"], will search for variants which match either "ENST00000302118" or "ENST00004000504". String match is case insensitive.aT  hgvsc: "hgvsc" filters variants based on having an allele_id which has a corresponding annotation row which has a matching HGVSc. If multiple values are provided, the conditional search will be, "OR". For example, ["c.-49A>G", "c.-20T>G"], will search for alleles which match either "c.-49A>G" or "c.-20T>G". String match is case sensitive.aY  hgvsp: "hgvsp" filters variants based on having an allele_id which has a corresponding annotation row which has a matching HGVSp. If multiple values are provided, the conditional search will be, "OR". For example, ["p.Gly2Asp", "p.Aps2Gly"], will search for variants which match either "p.Gly2Asp" or "p.Aps2Gly". String match is case sensitive.a!  allele_id: "allele_id" filters variants based on allele_id match. If multiple values are provided, anymatch will be returned. For example, ["1_1000_A_T", "1_1010_C_T"], will search for allele_ids which match either "1_1000_A_T" or ""1_1010_C_T". String match is case sensitive/exact match.a  variant_type: Type of allele. Accepted values are "SNP", "INS", "DEL", "DUP", "INV", "CNV", "CNV:TR", "BND", "DUP:TANDEM", "DEL:ME", "INS:ME", "MISSING", "MISSING:DEL", "UNSPECIFIED", "REF" or "OTHER". If multiple values are provided, the conditional search will be, "OR". For example, ["SNP", "INS"], will search for variants which match either "SNP" or ""INS". String match is case insensitive.aV  sample_id: "sample_id" filters either a pair of tumor-normal samples based on having sample_id which has a corresponding sample row which has a matching sample_id. If a user has more than 500 IDs, it is recommended to either retrieve multiple times, or use a cohort id containing all desired individuals, providing the full set of sample_ids.a}  assay_sample_id: "assay_sample_id" filters either a tumor or normal sample based on having an assay_sample_id which has a corresponding sample row which has a matching assay_sample_id. If a user has a list of more than 1,000 IDs, it is recommended to either retrieve multiple times, or use a cohort id containing all desired individuals, providing the full set of assay_sample_ids.z+JSON filter template for --retrieve-variantr   ao  {
  "location": [
    {
      "chromosome": "1",
      "starting_position": "10000",
      "ending_position": "20000"
    },
    {
      "chromosome": "X",
      "starting_position": "500",
      "ending_position": "1700"
    }
  ],
  "annotation": {
    "symbol": ["BRCA2"],
    "gene": ["ENST00000302118"],
    "feature": ["ENST00000302118.5"],
    "hgvsc": ["c.-49A>G"],
    "hgvsp": ["p.Gly2Asp"]
  },
  "allele" : {
    "allele_id":["1_1000_A_T","2_1000_G_C"],
    "variant_type" : ["SNP", "INS"]
  },
  "sample": {
    "sample_id": ["Sample1", "Sample2"],
    "assay_sample_id" : ["Sample1_tumt", "Sample1_nor"]
  }
}r   z@--additional-fields-help cannot be presented with other options.c           
         | D ]Y  } dj                   |d d  }t        |      }|t        j                  ddd|z  z   t	        |d   |             z  }t        |       [ y )Nz{: <22} {: <22} r	   r    )rD  )rN   r   rH  rI  r
   r   )r   rowfields_stringwidths       rY   print_fieldsz+extract_assay_somatic.<locals>.print_fieldsi  sk    ! )C$=$6$=$=s2Aw$GM.E!RVVD$u2Dd3q6ejdjFk%llM-(	)rn   z8The following fields will always be returned by default:)NAMETITLEDESCRIPTION)assay_sample_idzAssay Sample IDzhA unique identifier for the tumor or normal sample. Populated from the sample columns of the VCF header.)rm  z	Allele IDz&An unique identification of the allele)CHROM
Chromosomez1Chromosome of variant, verbatim from original VCF)POSPositionz8Starting position of variant, verbatim from original VCF)REFzReference Allelez5Reference allele of locus, verbatim from original VCF)r   AllelezSequence of the allelezThe following fields may be added to the output by using option --additional-fields. If multiple fields are specified, use a comma to separate each entry. For example, "sample_id,tumor_normal")rP  z	Sample IDz-Unique ID of the pair of tumor-normal samples)tumor_normalzTumor-Normalz9One of ["tumor", "normal"] to describe source sample type)r  r  zLComma separated list of associated IDs for the variant from the original VCF)QUALr  z,Quality of locus, verbatim from original VCF)FILTERr  z?Comma separated list of filters for locus from the original VCF)reference_sourcezReference SourcezSOne of ["GRCh37", "GRCh38"] or the allele_sample_id of the respective normal sample)variant_typezVariant Typez-The type of allele, with respect to reference)symbolic_typezSymbolic TypezJOne of ["precise", "imprecise"]. Non-symbolic alleles are always "precise")file_idzSource File IDz1DNAnexus platform file-id of original source file)INFOr  z(INFO section, verbatim from original VCF)FORMATr  z*FORMAT section, verbatim from original VCF)SYMBOLSymbolz/A list of gene name associated with the variant)GENOTYPEr  zLGENOTYPE section, as described by FORMAT section, verbatim from original VCF)normal_assay_sample_idzNormal Assay Sample IDz8Assay Sample ID of respective "normal" sample, if exists)normal_allele_idszNormal Allele IDsz5Allele ID(s) of respective "normal" sample, if exists)GenezGene IDz/A list of gene IDs, associated with the variant)Featurez
Feature IDz2A list of feature IDs, associated with the variant)HGVScr  z9A list of sequence variants in HGVS nomenclature, for DNA)HGVSpr  z=A list of sequence variants in HGVS nomenclature, for protein)CLIN_SIGzClinical Significancez5A list of allele specific clinical significance terms)ALTr  zZAlternate allele(s) at locus, comma separated if more than one, verbatim from original VCF)	alt_indexzALT allele_indexzOrder of the allele, as represented in the ALT field. If the allele is missing (i.e, "./0",  "0/." or "./.") then the alt_index will be emptyr}   )rP  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  One or more of the supplied fields using --additional-fields are invalid. Please run --additional-fields-help for a list of valid fieldsrL   rK   rn  r]   r   r"  r   r  )ro  r  rE   r   rp  additional_fieldsinclude_normal)ro  r  rE   r   rp  r  r   r   r   r   r   r   r   )r   r   r   r   r   rs  r  )#r  include_normal_sampler  r}  rb   r   r   r6  r  r   retrieve_variantr   rN  r   r4  additional_fields_helpr
   r   r   rZ   rS   r   r   rB  r&  r  r+   r   rm   r   r{   r   rR   r  
QUOTE_NONE)r   invalid_combo_argsr  fixed_fieldsr  additional_fieldadditional_fields_inputaccepted_additional_fieldsr   r@   rU   rV   rX   r   r   r=  r>  r@  r?  r%  r   r  rd   r   rh   r  ry   s                              rY   extract_assay_somaticr  8  s6    d88$:P:PRVR`R`bfbjbjkl#5m	
 C$++?Q RSC	
 ~~d.H.H$J`J`bfbjbjkl I ""*OPARVWZaa  v  ww zAA  u  vv y@@   a  bb ell   J  K	K NU	U
   s  tt
 w~~   x  yy |CC   @  AA DKK   k  ll ovv   u  v	v y@	@   \  ]
] `g
g IZ^_` cgg  m
m
 HHQK"".@ABR) $QRUYYZ< o`hkpJLL &$  a  b  b  ei  i  j!A!l!~!}!a!x "a!r "R!s!]!c!h "J "R "E!g!p!p!t! "N "T-!U. *+HHQK )TXTjTjTpTpqtTu"v@P#3#9#9#;"v"v &w", 	eE66  d  e	e
 5QQUQZQZ4[1G]D/$}--#t7G7G6X2Y Q	
 iJz?CRRTN^q$//499i_[*,?A[ !5T4;My YHo4*;=PRackl.y$?!!#8 +($ '!4"9#99$ G[ $9 +($ '!4#99$ G[ d=11xx	"&*hhy&9
#!%iGI 88,T7;Kk"(C( 6H+H56 6 $D'2H& /$Y/(t9Q 3 #wx6 6s   /T T%%T.c           
     \  1 t        |       }t        |t        t              }|j	                          | j
                  r$t        t               t        j                  d       | j                  r$t        t               t        j                  d       | j                  rt        | j                        \  }}}|&t        dj                  | j                  |             n|j                  d      }||d   d   k7  r%t        dj                  | j                  |             dd	g}|d   d
   1|d   d   dk7  st!        1fd|D              rt        dj                  |d   1             t#        d         }t%        j&                  |      \  }	}
|
r| j(                  s| j*                  rt        d       t-        |
d   d         dk  rt        dj                  |
d                |
j                  d      j                  d      }|
j                  d      j                  d      }d}nBd}d}d}t-        |	j.                        dk  r$t        dj                  |	j0                               | j*                  r/t        	j3                  d      ddi t        j                  d       | j(                  rL	j5                  | j(                  d      st        dj                  | j(                  |	j0                               nY| j(                  sM	j6                  j                  d      't9        |	j6                  j                  d            dk(  rt        d       t        j:                  j<                  dk(  rt>        t@        _!        | jD                  r!	 tA        jF                  | jD                        }n@| jL                  r4	 tO        | jL                        5 }tA        jP                  |      }ddd       | jV                  r| jD                  s| jL                  sd}i }nd}s|st        d#       | jX                  r&d$|v rt        d%       | jZ                  rt        d&       t        j:                  j<                  dk(  rKt\        t^        d'   d(   d)   d*   d+<   t\        t^        d'   d(   d)   d,   d+<   t\        t^        d'   d(   d)   d-   d+<   |r'ta        t^        t        .      }|jc                  |/       | j(                  r.	j6                  d   D ]  }|d0   | j(                  k(  s|d1   } n n	j6                  d   d   d1   }td        tf        d2}|	ji                        j                  d3      }|j                  |      }|d4k(  rd'|v rtj        }ntf        }d'|v s|r:|d4k(  rtj        }| jV                  rd|d5   d'   d6<   njm                  |d'd,d-d7d8       to        ||t        9      }|jq                         }|d:   j                  d;      }| jZ                  rg }| jZ                  D ]P  }|js                  d<      D cg c]#  }|ju                         s|ju                         % }}|jw                  |       R |d:   j                  d=      } ty        |      ty        | D !"ch c]  }!|!j{                         D ]  }"|"  c}"}!      z
  }#t9        |#      dk7  rt        d>       | D $cg c]*  }$ty        |$j{                               ty        |      z  s)|$, }%}$|jw                  |%       t}        |rdind|d?   d|t        @      }&|	j~                  }'| j(                  r| j(                  n|	j3                  d      d   }(|&j                  |(||A       |&j                         })|	j                  d   }*|	j                  }+t        |+|t              },| jV                  r|,j                  |)|*      }-n|,j                  |)|*      }-d}.| jV                  r|-dB   n|-dC   }/| jX                  r|/rt        |-dC         \  }0}.|0}/|/si g}/t        | j                  | j                  | jV                  |/d|	j                  d0   |.D       y# t@        jB                  $ r$}d tI        |      z   }t        |       Y d}~d}~wtJ        $ r}t        tI        |             Y d}~d}~ww xY w# 1 sw Y   xY w# t@        jB                  $ r$}d tI        |      z   }t        |       Y d}~'d}~wtR        $ rQ}d!tI        |      v r%t        d"j                  |jT                               nt        tI        |             Y d}~d}~wtJ        $ r}t        tI        |             Y d}~d}~ww xY wc c}w c c}"}!w c c}$w )Ez
    `dx extract_assay expression`

    Retrieve the selected data or generate SQL to retrieve the data from an expression assay in a dataset or cohort based on provided rules.
    )parser_dictschemaerror_handlerr   Nz.Unable to resolve "{}" to a data object in {}.r   r@   zuUnable to resolve "{}" to a data object or folder name in {}. Please make sure the object is in the selected project.r.   rL   typesrB   rC   c              3  &   K   | ]  }|v 
 y wN ).0x_record_typess     rY   	<genexpr>z+extract_assay_expression.<locals>.<genexpr>  s      A
'(A]"A
s   z_{} Invalid path. The path must point to a record type of cohort or dataset and not a {} object.rE   zCurrently "--assay-name" and "--list-assays" may not be used with a CohortBrowser record (Cohort Object) as input. To select a specific assay or to list assays, please use a Dataset object as input.rW   versiong      @zC{}: Version of the cohort is too old. Version must be at least 3.0.r   r   TFzD{}: Version of the dataset is too old. Version must be at least 3.0.molecular_expressionr   r   zAssay {} does not exist in {}, or the assay name provided cannot be recognized as a molecular expression assay. For valid assays accepted by the function, `extract_assay expression`, please use the --list-assays flag.z3No molecular expression assays found in the datasetr	   z9JSON provided for --retrieve-expression is malformatted.
zNo such file or directoryz=JSON file {} provided to --retrieve-expression does not existzNo filter JSON is passed with --retrieve-expression or input JSON for --retrieve-expression does not contain valid filter information.
expressionzvExpression filters are not compatible with --expression-matrix argument. Please remove "expression" from filters JSON.zE--additional-fields cannot be used with --expression-matrix argument.ri  r   
propertiesr_  rs   r`  ending_position)r  r  )
input_jsonr  r+  )z1.01.1generalized_assay_model_versionr  filtering_conditionsmax_item_limiti)r  rq  start_subkey
end_subkeywindow_widthcheck_each_separately)r  r  r  output_fields_mappingdefaultr}   
additionalr  order_by)r   r  r   r  limitr   r  r  )r  r  r   rb   r   )save_uncommon_delim_to_txtoutput_file_namecolnames)Kr  r/   r1   r   validate_input_combinationr}  r   r=   r   r4  r  r>   rS   r   rN   r   allr   r.   resolve_cohort_to_datasetr  r6  floatr  rE   assay_names_listis_assay_name_validassays_info_dictr   r   r   
ValueErrorr  JSONDecodeErrorr   r  rR   rQ   filter_json_filer   r  OSErrorfilenamerb   expression_matrixr  unicoder2   r0   validater3   r4   assay_info_dictr5   are_list_items_within_ranger6   parser   r   r  r  rw   r7   descriptor_file_dictassemble_assay_raw_filtersbuilddetail_describevizserver_urlr8   get_raw_sqlget_datar9   r;   r   r   )2r   r  input_validatorr@   rT   rU   entity_describeEXPECTED_TYPESrC   r]   cohort_infoBASE_SQLCOHORT_FILTERS	IS_COHORTuser_filters_jsonr   json_reading_errorr   return_all_datainput_json_validatormolecular_assayASSAY_IDconditions_mappingr  filter_schemainput_json_parservizserver_raw_filters_db_columns_listr  r   r  r   all_additional_colsdkincorrect_colsr  user_additional_colsvizDATASET_DESCRIPTOR
ASSAY_NAMEvizserver_payload	record_idr\   clientvizserver_responser  output_datatransformed_responser  s2                                                    @rY   extract_assay_expressionrN    s	    t*K#9O
 ..0~~01""=> yy.CDII.N+m @GGIIw ,//
;O mJ/	:: H  O  OIIw $_5%j1':$W-9S A
,:A
 >
 qxx!$' /$/0&@@H$"2"2 ] [+I67#=Y``#D) #y155i@H(__Y7;;IFNIH!NIW__%+Zaa

 w''(>?JTJ **4??<RS l  s  sOOWZZ
 __##''(>?G3wOgOgOkOk  mC  PD  LE  IJ  LJJK ")
	 $

4+;+; < 
			d++, 1$(IIaL!1, xx((1F1F_ U	
 ,, I !!W
 "  	-Z8A,O	

	
  	-Z8A,O	

	
  	-Z8A,O	

	
 ,7x 
 	%%1B%C   '778NO 	Ov&$//9*62	
 ++,BCAFvN ED
 '.&=&=h&G&K&KLm&n#&**+JKM '%/**[MMM &&/*e3[M88 #' 01 
 !<<,0,&&+ =  -$
 .335$	c)n 
  ** 	,D(,

3E11779QWWYEEE$$U+	, ,#

#l
 	 ./#+>1QVVX>Q>Q>3
 
 ~!# [ + 
c!&&(mcBS>T.TA 
  
 	 45
./8N+dz*	C !55??0H0HI_0`ab0c  "":O #  		 ''-I


C sGX.Fxx#//0A9M#__->	JH &*XX5!3Ei3P 
 +)Gy)*
&h +d

#' 008o ## 	)a& 
 '(( 	SV	
1 1## 	)a& 
 '(( 	!*c!f4SZZ

 Q  	SV	f F ? 
s   a9 >c& c)c& 8fff#"*f)f)9cb++c7ccc#c& &f9df$Ae00f<ffc                P   t        |       \  }}}d\  }}|dk7  r`t        j                  j                  |      }t        j                  j	                  |      }	 t        |||      }|sdj                  ||      }||||fS # t        $ r}dt        |      v rd}n|Y d}~:d}~ww xY w)zc
    Resolves dx path into project, folder and name. Fails if non existing folder is provided.
    NNrD   zfolder could not be foundFNz4The folder: {} could not be found in the project: {})	r   r   rS   basenamer   r   r   rR   rN   )	rS   r@   folderr  err_msgfolder_existsfolder_namerT   r   s	            rY   resolve_validate_dx_pathrV  5  s     ).GVT'G]}gg&&v.ggoof-	/kRM LSSG FD'))  	*c!f4 % 	s   B   	B%	B  B%c                      e Zd Zy)VizserverErrorN)__name__
__module____qualname__r  rn   rY   rX  rX  L  s    rn   rX  c                   | j                   d   d   }| j                   d   d   }| j                   d   |   d   |   d   d   }|dv rd	 }n-|d
v rd }n%|dv rd }ndj                  |      }t        |      t        ||g       }	dj                  ||      }
||
ig}||dd|
d|	dgiiid}	 t	        ||      }t               }|d   D ]  }|j                  ||           |t        |	      k7  r;t        |	      j                  |      }dj                  |d   |      }t        |      |	|fS # t
        $ r(}t        d      j                  t        |            d }~ww xY w)Nglobal_primary_keyr  r   r   r   r-  column_sql_type)integerbigintc                     | t        |      gz   S r  )r   r   bs     rY   <lambda>z%validate_cohort_ids.<locals>.<lambda>Y      As1vhJ rn   )r  doublec                     | t        |      gz   S r  )r  rb  s     rY   rd  z%validate_cohort_ids.<locals>.<lambda>[  s    AuQxjL rn   )rK  c                     | t        |      gz   S r  )rR   rb  s     rY   rd  z%validate_cohort_ids.<locals>.<lambda>]  re  rn   zInvalid input record. Cohort ID field in the input dataset or cohortbrowser record is of type, {type}. Support is currently only available for Cohort ID fields having one of the following types; string, integer and float)rs   z{}${}pheno_filtersr   in)	conditionvalues)r   r   r   zeException caught while validating cohort ids.  Bad response from Vizserver.Original Error message:
{}r   z`The following supplied IDs do not match IDs in the main entity of dataset, {dataset_name}: {ids}r]   )dataset_nameids)r   rN   r  r   r{   rQ   rX  rR   r  add
difference)
descriptorr@   rV   rn  entity_name
field_namegpk_typelambda_for_list_convrS  id_listentity_field_namer   rd   ry   excdiscovered_idsresultmissing_idss                     rY   validate_cohort_idsr|  O  s   ""#78BK!!"67@J 
+K8B:NyYZklH((6	(	(8	Z	6 q  x  x  @H  x  I!!)33G{J? 123K #%&*g>(
Gg. UN9% /6*-./ W%'l--n=t{{  LP  QZ  L[  cn{  o!!(((%  *
 &S
	s   -D. .	E7#EEc                    g d}|j                  |      }	 t        |       }|j                  |d         |k  ryy# t        $ r Y yw xY w)z
    Validates that issuing user has required access level.
    Args:
        project: str: tasked project_id
        access_level: str: minimum requested level
    Retuns: boolean
    )VIEWUPLOAD
CONTRIBUTE
ADMINISTERFlevelT)indexr   r   )r@   access_level
level_rankaccess_level_idxproject_describes        rY   has_access_levelr    sa     @J!''5#G, (125EE	  s   ; 	AAc                >    t        | |      sdj                  |      S y)z
    Validates that project has requested access.
    Args:
        project: str: tasked project_id
        access_level: str: minimum requested level Default at least UPLOAD
    Returns:
        Error message

    zBAt least {} permission is required to create a record in a projectN)r  rN   )r@   r  s     rY   validate_project_accessr    s)     G\2SZZ
 	
 3rn   c                   t         j                  }t         j                  j                  dd      }d}| j                  r?t        | j                        \  }}}}|rt        |       t        |      }|rt        |       | j                  j                  d      }t        |      \  }}}}	g }
| j                  rGt        | j                  d      5 }|D ]"  }|
j                  |j                  d             $ 	 ddd       | j                  r6| j                  j                  d      D cg c]  }|j                          }
}t!        |d   |		      j#                         }	 t%        ||	||
      \  }}|j                  d|j                  d            }	 t/        |j0                  d   d   |j0                  d   d   |j                  di       ||      }t5        |      }t7        ||||d   |d   |d   |d   |||j                  d      
      }t        j8                  j:                  j<                  di |}| j>                  rtA        |jC                                y	 tE        |jG                  dd      | jH                         y# 1 sw Y   xY wc c}w # t&        $ r&}t        t)        |      t&        f
       Y d}~Rd}~wt*        $ r&}t        t)        |      t*        f
       Y d}~d}~wt,        $ r}t        t)        |             Y d}~d}~ww xY w# t,        $ r)}t        dj3                  |d   |             Y d}~wd}~ww xY w# t,        $ r}t        t)        |             Y d}~yd}~ww xY w)zL
    Create a cohort from dataset/cohort and specified list of samples.
    	DX_CLI_WDrD   Nfromr   r   r}   r]   r   rG   r   r   r]  r  r   r   z{}: {}rE   	databasesr  combinedT)incl_propertiesincl_detailsr  )%rO   WORKSPACE_IDconfigr   PATHrV  r   r  r   rZ   cohort_ids_filer   r   r   
cohort_idsr   r   r   r|  r  rR   rX  rQ   r,   r   rN   rp   r-   bindingsdxrecordnew_dxrecordbriefr   get_idr   r   verbose)r   path_projectpath_folder	path_namerS  FROMfrom_projectrU   rV   rX   r  r   r   rE   r   list_of_idsru  errr   raw_cohort_query_payloadr   rb   cohort_payload	dx_records                           rY   create_cohortr    s!    $$L++//+s3KIyy8PII9
5k9g W),7W==V$D9UVZ9[6L- G$&&, 	1 1tzz$/01	1 (,(=(=c(BC"288:CC tIHWWYN,?P_aegn,o)) xx	488J#78H:#8  !56x@  !56w?HHY# $
  $D*B
CC)[YX +N &&33EnEIzzi !	y))$T)RTXT`T`aq	1 	1
 D  >S
}== BS/@AA S"  :t!4a899:.  	SV	ss   (J'JJ AL 'M J	L!KLK//L;LL	M&M

M	M9M44M9c                  4     e Zd ZdZdZd fd	Zd Zd Z xZS )r   a9  
    A class to handle record objects of type Dataset.
    Inherits from DXRecord, but automatically populates default fields, details and properties.

    Attributes:
        All the same as DXRecord
        name - from record details
        description - from record details
        schema - from record details
        version - from record details
        descriptor - DXDatasetDescriptor object
    Functions
        get_descriptor - calls DXDatasetDescriptor(descriptor_dxfile) if descriptor is None
        get_dictionary - calls descriptor.get_dictionary

    r.   c                b   t         t        |   ||       | j                  dddh       | j                  | j
                  v sJ d| j                  v sJ t        | j                  d         r!t        | j                  d   d|      | _	        nt        d| j                  d   z         d | _        | j                  j                  d	      | _        | j                  j                  d
      | _        | j                  j                  d      | _        | j                  j                  d      | _        y )NTr  rW   )default_fieldsr   rq  rb)moder@   rI   r  descriptionr  r  )superr   __init__r   _record_typer  rW   r   r   descriptor_dxfiler   rq  r   r  r  r  r  )selfdxidr@   	__class__s      rY   r  zDXDataset.__init__  s    i'g6T<2KL  DJJ...t||+++T\\,/0%+DLL,FT[b%cD"5\8RRSLL$$V,	<<++M:ll&&x0||''	2rn   c                ~    | j                   &t        | j                  | j                        | _         | j                   S )N)r  )rq  DXDatasetDescriptorr  r  r  s    rY   r   zDXDataset.get_descriptor,  s3    ??"1&&t{{DO rn   c                n    | j                   | j                          | j                   j                         S r  )rq  r   r   r  s    rY   r   zDXDataset.get_dictionary3  s+    ??"!--//rn   rP  )	rY  rZ  r[  __doc__r  r  r   r   __classcell__)r  s   @rY   r   r   	  s    " L30rn   r   c                      e Zd ZdZd Zd Zy)r  z
    A class to represent a parsed descriptor of a Dataset record object.

    Attributes
        Representation of JSON object stored in descriptor file
    Functions
        get_dictionary - calls DXDatasetDictionary(descriptor)

    c                   t         j                  j                  dk(  xr t         j                  j                  dk(  }t	        |fddi|5 }|rY|j                         }t        |      t        k7  r|j                  d      }t        j                  |t        j                        }n%t        j                  |t        j                        }d d d        D ]  }t        | |||           |j                  d      | _        y # 1 sw Y   7xY w)Nr      is_gzipTzutf-8)object_pairs_hookr  )r   r   r   r   r   readrs   rR   r   r  r  collectionsOrderedDictr  setattrr   r  )r  dxfilerM  python3_5_xr   jsonstrobjrq  s           rY   r  zDXDatasetDescriptor.__init__D  s    &&,,1Qc6F6F6L6LPQ6Qv6t6v6 	N!&&(=C'%nnW5GjjK<S<STii[5L5LM	N  	)CD#s3x(	)jj*	N 	Ns   BDDc                    t        |       S r  )DXDatasetDictionaryr  s    rY   r   z"DXDatasetDescriptor.get_dictionaryU  s    "4((rn   N)rY  rZ  r[  r  r  r   r  rn   rY   r  r  9  s    +")rn   r  c                  J    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Z	 dd
Zy)r  a3  
    A class to represent data, coding and entity dictionaries based on the descriptor.
    All 3 dictionaries will have the same internal representation as dictionaries of string to pandas dataframe.
    Attributes
        data - dictionary of entity name to pandas dataframe representing entity with fields, relationships, etc.
        entity - dictionary of entity name to pandas dataframe representing entity title, etc.
        coding - dictionary of coding name to pandas dataframe representing codes, their hierarchy (if applicable) and their meanings
    c                    | j                  |      | _        | j                  |      | _        | j	                  |      | _        y r  )load_data_dictionarydata_dictionaryload_coding_dictionarycoding_dictionaryload_entity_dictionaryentity_dictionary)r  rq  s     rY   r  zDXDatasetDictionary.__init__c  s;    #88D!%!<!<Z!H!%!<!<Z!Hrn   c                   t        j                         }t        j                         }|j                  d   D ]y  }| j                  |j                  d   |   ||j                  d   d   k(  |j                  d         ||<   |j	                  | j                  |j                  d   |                { g }|j                  D ]K  }d}|d   d   d   |d   d   d	   fD ]
  }||vsd
} n |r+|j                  | j                  ||             M |D ]  }	|j                  |	d         }
|
j                  r$|
d   |	d   k(  }|j                         dk7  rt        dt        |	      z         |
d   j                  }|
d   j                  }dj                  |	d   |	d         ||<   |	d   ||<   |
j!                  ||      }
 |S )z;
        Processes data dictionary from descriptor
        r   r]  r  )is_primary_entityr]  Fjoinsr   tor  Tsource_entityr  source_fieldr:   zInvalid edge: referenced_entity_fieldrelationshipz{}:{}destination_entitydestination_field)r  r  )r  r  r   create_entity_dframeupdate!get_join_path_to_entity_field_map	join_infor   create_edger   emptysumr  rR   rl  rN   assign)r  rq  eblocksjoin_path_to_entity_fieldrr  edgesji	skip_edgerS   edgesource_eblock
eb_row_idxrc  rels                 rY   r  z(DXDatasetDictionary.load_data_dictionaryh  s    ))+$/$;$;$=!%++J7 	K#'#<#<  ,[9:#3#34H#I(#SS$.$4$45I$J $= $GK  &,,66$$Z0=	 && 		NBIGQ-r'{1~f/EF 88 $I
 T--b2KLM		N  	D#KK_(=>M &&*62d>6JJ
>>#q($%5D	%ABB#$=>EE#N3::")..-.5H0I#J #'~"6J - 4 4!$c !5 !	$ rn   c                R   g d}g d}dddddd}||z   D ci c]  }|g  }}|d	   gt        |d
         z  |d<   dgt        |d
         z  |d<   dgt        |d
         z  |d<   |d
   D ]  }	|d
   |	   }
|d	   j                  |
d	          |d   j                  ||
d             |d   j                  |d   r|
d	   |d   k(  r|rdndnd       |d   j                  |
d   r|
d   nd       |d   j                  |
d          |d   j                  |
d          |d   j                  d|
j                         v r|
d   rdj                  |
d         nd       |d   j                  |
d   rdnd       |d   j                  |
d   rdnd       |d   j                  |
d          |d   j                  |
d   r|
d   nd       |d   j                  |
d          |d   j                  |
d           	 t        j                  |      }|S c c}w # t        $ r}|d }~ww xY w)!zH
        Returns DataDictionary pandas DataFrame for an entity.
        r  r  rs   primary_key_type)coding_nameconceptr  rT   is_multi_selectis_sparse_codinglinkoutlongitudinal_axis_typer  r  r  unitsr_  r  datedatetimerK  )r_  rf  r  r  rK  r  r   r  r   r  r  rs   r  primary_keygloballocalr  r  r  rT   z > r  yesr  r  r  r  r  N)r   r   rw   r   r   	DataFramer  )r  r  r  r]  required_columns
extra_colsdataset_datatype_dictcoldcolsr   
field_dictdframerx  s                rY   r  z(DXDatasetDictionary.create_entity_dframe  s    J

 !"!
 %5z$ABSbBB!&>*S1A-BBh,.4#fX6F2G+G'(!#s6(+;'< <nH% %	7E)%0J&M  F!34&M  !6z&7I!JK$%,, =)"6*f].CC /G
  - ''-7-F
=)B )##Jy$9:- ''
=(AB- ''!Z__%66:m;T 

:m45
 #$++#$56B $%,,#$67R )##Jy$9:*+2267 34
 'N!!*W"56'N!!*W"56K%	7N	\\%(F c C\  	I	s   
H:H 	H&H!!H&c                   t        j                         }|d   D ]  }|d   |   d   }dj                  |d   |d   |d         }|d   |f||<   |d   r?t        j	                  |d         r'dj                  |d   |d   |d         }|d   |f||<   x|d   s~|d	   st
        j	                  |d         sd
j                  |d	   j                  dd      j                         |d         }|d   |f||<    |S )z
        Returns map with "database$table$column", "unique_database$table$column",
        as keys and values are (entity, field)
        r   r-  z{}${}${}database_namer  r  r  database_unique_namedatabase_idz{}__{}r   r   )r  r  rN   database_unique_name_regexmatchdatabase_id_regexr   lower)r  r  r  r   field_valuedb_tb_col_pathunique_db_tb_col_pathunique_db_names           rY   r  z5DXDatasetDictionary.get_join_path_to_entity_field_map  s[   
 %0$;$;$=!H% 	TE *51)<K'..O,G$H%N
 :@8O%n5127Q7W7W238 )3(9(9 67())% 6ND)*?@
 O,.%++K,HI!).66sC@FFH0" >DF^U<S).9=	T> )(rn   c                    t        j                         }|d   d   d   }|d   d   d   }||   \  |d<   |d<   ||   \  |d<   |d<   |d	   |d	<   |S )
zL
        Convert an item join_info to an edge. Returns ordereddict.
        r  r   r  r  r  r  r  r  r  )r  r  )r  join_info_joinsr  r  	column_tocolumn_froms         rY   r  zDXDatasetDictionary.create_edge  s     &&(#G,Q/5	%g.q1&96O7
3_tN3 &k2	
%&$%.~>^rn   c                   t        j                         }|j                  d   D ]`  }|j                  d   |   d   D ]F  }|j                  d   |   d   |   d   }|s!||vs&| j                  |j                  |||      ||<   H b |S )z=
        Processes coding dictionary from descriptor
        r   r   r  )r  r  r   create_coding_name_dframe)r  rq  cblocksr  r   coding_name_values         rY   r  z*DXDatasetDictionary.load_coding_dictionary#  s     ))+ &&z2 	F#))*5f=hG $.$4$4Z$@$H$R%%!! %):')I151O1O"((&%9J2G-.	 rn   c                H   i }|d   |   d   |   d   rd}fdt         |d   |   d   d|       \  }}}|j                  |||D 	cg c]  }	|d   |   d	   |	    c}	|D 	cg c]8  }	|d   |   d
   r)|	|d   |   d
   j                         v r|d   |   d
   |	   nd: c}	|d       nt        |d   |   d	   j                          \  }
}|d   |   d
   r#t        |d   |   d
   j                          \  }
}ndgt	        |
      z  }|
D 	cg c](  }	t        |d   |   d   j                  |	      dz         * }}	|j                  |
|||d       |gt	        |d         z  |d<   	 t        j                  |      }|S c c}	w c c}	w c c}	w # t        $ r}|d}~ww xY w)zN
        Returns CodingDictionary pandas DataFrame for a coding_name.
        r   r   is_hierarchicalr   c              3     K   | D ]j  }t        |t              rLt        t        |j	                                     \  }}|dz  }|||f  |||      D ]  \  }}}|||f  _|dz  }|||f l yw)z~Serialize the node hierarchy by depth-first traversal.

                Yields: tuples of (code, parent_code)
                r:   N)
isinstancedictnextiterr   )	nodesparent_code	displ_ordnodenext_parent_codechild_nodes	deep_nodedeep_parentunpack_hierarchys	           rY   r(  zGDXDatasetDictionary.create_coding_name_dframe.<locals>.unpack_hierarchy;  s     
 " =D!$-8<T$**,=O8P5(+!Q	.YFFAQ')99B F=I{I $-k9"EEF "Q	#[)<<=s   A1A4codingsdisplayr   codes_to_meaningscodes_to_conceptsN)coder!  meaningr  display_orderr:   )r-  r.  r  r/  r-  r  )
zipr  rw   r   r   r   r  r   r  r  )r  r   r  r   r  r  r"  	all_codesparentsccodesmeaningsconceptsr/  r  rx  r(  s                   @rY   r  z-DXDatasetDictionary.create_coding_name_dframe3  sg    V$X.u56GHI=( -0!)$%67	BB	-)Iw	
 LL%#* "+  i():;<OPQRS  "+   "),->?@ST !$Y/0AB 3 "df!% i():;<OPQRS ""  &/'4 "y!"345HIOOQOE8 Y 123FG"%9%&789LMSSU#x !6CJ.  E)$%67	BHHKaOPM  LL!''%2	 !22Sv5GGm	\\%(F g  6"  	I	s*   
F%=F-F+F 	F!FF!c           
     D   t        j                         }|j                  d   D ]y  }|j                  d   |   }t        j                  j                  ||j                  d      |j                  d      |j                  d      |j                  d      dg      ||<   { |S )z=
        Processes entity dictionary from descriptor
        r   r  entity_label_singularentity_label_pluralentity_description)r  r  r8  r9  r:  )r  r  r   r   r  	from_dictr   )r  rq  r  rr  r  s        rY   r  z*DXDatasetDictionary.load_entity_dictionary  s     (335%++J7 	K%%j1+>F-/\\-C-C #.(.

>(B17<S1T/5zz:O/P.4jj9M.N
.k*	 ! rn   c                t  
 t        |ddd      }d 

fd}| j                  r) || j                  g d      } |j                  |fi | | j                  r) || j                  g d	      } |j                  |fi | | j                  r* || j                  d
dg      }	 |	j                  |fi | yy)zI
        Create CSV files with the contents of the dictionaries.
        TFr   )r   headerr  na_repc                    | j                   j                  |      }t        |      |j                         j	                         z   }| j
                  dd|f   S )zHSort dataframe columns alphabetically but with `required_columns` first.N)r  rp  r5  sort_valuestolistloc)r  r  r  sorted_colss       rY   sort_dataframe_columnsz9DXDatasetDictionary.write.<locals>.sort_dataframe_columns  sL    223CDJ/0:3I3I3K3R3R3TTK::an--rn   c                    t         j                  | j                         D cg c]  }| c}d      } ||      S c c}w )z(Join all blocks into a pandas DataFrame.F)r  )r   concatrl  )ord_dict_of_dfr  rc  dfrD  s       rY   as_dataframez/DXDatasetDictionary.write.<locals>.as_dataframe  s=    ~'<'<'>?!A?eLB)".>?? @s   	>r  )r  )r  r-  r.  r  r  N)r  r  to_csvr  r  )r  r   r   r   r   csv_optsrI  data_dframecoding_dframeentity_dframerD  s             @rY   r   zDXDatasetDictionary.write  s     	
	.	@
 &$$!OK K/<8<!!(&&!CM !M  !3@x@!!(&&(N9SM !M  !3@x@	 "rn   N)r   r   r   r}   )rY  rZ  r[  r  r  r  r  r  r  r  r  r  r   r  rn   rY   r  r  Y  sC    I
3jL\%)N" Xt!* VY*Arn   r  )T)z#  )rV   r  r  rR   r  rR   returnr5  )r  )
__future__r   r   r   r   r   r   r  r  r   rH  r  rO   r   
subprocess	functoolsr   utils.printingr
   r  r   bindings.dxdataobject_functionsr   r   bindings.dxfiler   utils.resolverr   r   r   r   r   utils.file_handler   utils.describer   
exceptionsr   r   r   r   r   "dx_extract_utils.filter_to_payloadr   r   dx_extract_utils.germline_utilsr   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   !dx_extract_utils.input_validationr)   )dx_extract_utils.input_validation_somaticr*   'dx_extract_utils.somatic_filter_payloadr+   &dx_extract_utils.cohort_filter_payloadr,   r-   bindings.apollo.datasetr.   *bindings.apollo.cmd_line_options_validatorr/   )bindings.apollo.json_validation_by_schemar0   :bindings.apollo.schemas.input_arguments_validation_schemasr1   4bindings.apollo.schemas.assay_filtering_json_schemasr2   2bindings.apollo.schemas.assay_filtering_conditionsr3   r4   r5   2bindings.apollo.vizserver_filters_from_json_parserr6   )bindings.apollo.vizserver_payload_builderr7   bindings.apollo.vizclientr8   $bindings.apollo.data_transformationsr9   output_handlingr;   r<   help_messagesr=   r>   compiler	  r  rZ   ri   rm   rp   r{   r   r   r  r   r&  rB  rN  rT  r  r   r   rR   QUOTE_MINIMALr   r  rN  rV  rQ   rX  r|  r  r  r  r   r  r  r  rn   rY   <module>rn     s  ( ` _ 
   	 	 
     !  A $ q q ) '  N    E O K ` - F E s g g  g U I 1 Q G n'RZZ(@A BJJ34 99x
:A*|B~0 0f2'%RLd^v
	SGT
08 3xDgROh
*.	Y 	;)|&
 Vr-0 -0`) )@rA rArn   