APA QTL analysis

APA QTL analysis#

This notebook aims to provide a master view of handling multiple apa QTL tasks for different project.

Aim#

*on cluster only (calling in the batch format)

This notebooks is designed to submit apa_calling tasks to the cluster in a easier way. Since qsub is used in this script. Please only call this tool when you are on the cluster.

Pre-requiste#

Supposely you have runned the UTR_reference and bam2tools section in the apa_calling.ipynb

You should have the folder structure like /tissue_name/bacth_name/wig_files + flagstat_files

Input#

  • tissue_dir : where your generated wig files and flagstat files stored

  • batch: the name of batches

Output#

This notebook is aimed to generate multiple shell scripts and submit them for each batch in each tissue.
It would generate the directory named calling_scripts under the batch directory.
The shell script and the log files would be stored in that location.

# Generate multiple shell scripts
[apa_1]
task: trunk_workers = 1, trunk_size = 1, walltime = walltime, mem = mem, cores = ncore
python3: expand = True
    import os
    if not os.path.exists("{tissue_dir}"):
        print("please check the tissue directory")
        exit(1)
    for i in {batch}:
        batch_dir = "{tissue_dir}".rstrip("/") + "/" + i
        if not os.path.exists(batch_dir):
            print("please check the batch folder" + i + "under tissue directory" + "{tissue_dir}")
            exit(1)
        script_dir = batch_dir + "/calling_scripts"
        if not os.path.exists(script_dir):
            os.makedirs(script_dir)
        config_file = script_dir + "/config.sh"
        dapars_file = script_dir + "/dapars.sh"
        data = ""
        data += "#!/bin/sh\n#$ -l h_rt=400:00:00\n#$ -l h_vmem=4G\n#$ -N job_submitter\n#$ -cwd\n#$ -S /bin/bash\n#$ -q csg.q\n\n"
        data += "export PATH=$HOME/miniconda3/bin:$PATH\n"
        data += "sos run /mnt/mfs/statgen/ls3751/github/xqtl-protocol/code/molecular_phenotypes/calling/apa_calling.ipynb APAconfig --cwd " + \
                 batch_dir + " --bfile " + batch_dir + " --annotation " + "{annotation}" + \
                 " --container  /mnt/mfs/statgen/ls3751/container/dapars2_final.sif" + " -c /home/ls3751/project/ls3751/csgg.yml &> config.log"
        with open(config_file, 'w') as output:
            output.write(data) 
        data2 = ""
        data2 += "#!/bin/sh\n#$ -l h_rt=400:00:00\n#$ -l h_vmem=64G\n#$ -N job_submitter\n#$ -cwd\n#$ -S /bin/bash\n#$ -q csg.q\n\n"
        data2 += "export PATH=$HOME/miniconda3/bin:$PATH\n"
        data2 += "sos run /mnt/mfs/statgen/ls3751/github/xqtl-protocol/code/molecular_phenotypes/calling/apa_calling.ipynb APAmain --cwd " + \
                 batch_dir + " --chrlist chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY" \
                 + " --container  /mnt/mfs/statgen/ls3751/container/dapars2_final.sif" + " -c /home/ls3751/project/ls3751/csgg.yml &> calling.log"
        with open(dapars_file, 'w') as output2:
            output2.write(data2)
# submit the task files to the cluster - config part
[apa_2]
task: trunk_workers = 1, trunk_size = 1, walltime = walltime, mem = mem, cores = ncore
python3: expand = True
    import os
    for i in {batch}:
        new_cmd = "qsub" + " " + "{tissue_dir}" + "/" + i + "/calling_scripts/config.sh"
        os.system(new_cmd)
# submit the task files to the cluster - dapars part
[apa_3]
task: trunk_workers = 1, trunk_size = 1, walltime = walltime, mem = mem, cores = ncore
python: expand = True
    import os
    for i in {batch}:
        new_cmd = "qsub" + " " + "{tissue_dir}" + "/" + i + "/calling_scripts/dapars.sh"
        os.system(new_cmd)