From 96e5becccb520b684d17ec717411b4850ecb6b8f Mon Sep 17 00:00:00 2001 From: Julien Cornut <julien.cornut@hesge.ch> Date: Thu, 25 Feb 2016 08:15:01 +0100 Subject: [PATCH] Automatic Update --- ProcessFASTQ.ipynb | 196 +++++++++------------------------------------ source | 2 +- 2 files changed, 39 insertions(+), 159 deletions(-) diff --git a/ProcessFASTQ.ipynb b/ProcessFASTQ.ipynb index c28f999..a9e3ecb 100644 --- a/ProcessFASTQ.ipynb +++ b/ProcessFASTQ.ipynb @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 89, "metadata": { "collapsed": false }, @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 90, "metadata": { "collapsed": false }, @@ -145,10 +145,10 @@ "# fname = \"flowcell362_lane4_pair1_ACAGTG\"\n", "# fname = \"flowcell362_lane4_pair1_ACTTGA\"\n", "# fname = \"flowcell362_lane4_pair1_CAGATC\"\n", - "fname = \"flowcell362_lane4_pair1_TGACCA\"\n", + "# fname = \"flowcell362_lane4_pair1_TGACCA\"\n", "# fname = \"flowcell362_lane4_pair1_Undetermined\"\n", "# fname = \"flowcell384_lane7_pair1_ACAGTG\"\n", - "# fname = \"flowcell384_lane7_pair1_ACTTGA\"\n", + "fname = \"flowcell384_lane7_pair1_ACTTGA\"\n", "# fname = \"flowcell384_lane7_pair1_CAGATC\"\n", "# fname = \"flowcell384_lane7_pair1_GATCAG\"\n", "# fname = \"flowcell384_lane7_pair1_TGACCA\"\n", @@ -157,12 +157,12 @@ "\n", "print(\"\\nAvailable files :\\n\")\n", "\n", - "fn = {n.split('.')[0] for n in listdir(\"0-Raws/\")}\n", - "wn = {n for n in listdir(\"7-WIGs/\")}\n", + "fn = {n.split('.')[0] for n in listdir(\"0-Raws/\")} # Basename\n", + "wn = {n for n in listdir(\"7-WIGs/\")} # P and M files\n", "\n", - "lst = (n+\" \\t\\t Processsed\" \\\n", + "lst = (n+\" \\t\\t Processsed\" \\\n", " if (n+\".P.wig\" and n+\".M.wig\") in wn \\\n", - " else n \\\n", + " else n \\\n", " for n in sorted(fn))\n", "\n", "for l in lst:\n", @@ -208,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": { "collapsed": false }, @@ -231,22 +231,11 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/plain": [ - "'flowcell362_lane4_pair1_TGACCA.fastq.gz'" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Copy file from 0-Raws to root dir of notebook to work safer\n", "copyfile(\"0-Raws/\"+fname+\".fastq.gz\", fname+\".fastq.gz\")" @@ -266,25 +255,11 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "flowcell362_lane4_pair1_TGACCA\n", - "\n", - "@SN279:493:C84L3ACXX:4:2309:1453:2230 1:N:0:TGACCA\n", - "AACGGATAAAAGCTACCCCGGGGATAACTGTAGGCACCATCAGTAGATCGG\n", - "+\n", - "CC@FFFFDHHHBHGGHIIIFHHGGGGGGIGFGIJJJJEHIJIJFFHIGGHH\n" - ] - } - ], + "outputs": [], "source": [ "%%bash\n", "source ./source\n", @@ -303,19 +278,11 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Processing will take approximatively 0 hour(s) 17 minute(s) and 30 second(s)\n" - ] - } - ], + "outputs": [], "source": [ "# Get file size in bytes\n", "fsize = path.getsize(fname+\".fastq.gz\")\n", @@ -358,7 +325,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": { "collapsed": false }, @@ -370,21 +337,11 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "This is cutadapt 1.9.1 with Python 3.5.1\n", - "Command line parameters: -a CTGTAGGCACCATCAATAGATCGGAA -o 1-Cutadapted/flowcell362_lane4_pair1_TGACCA.fastq.gz --quiet flowcell362_lane4_pair1_TGACCA.fastq.gz\n", - "Trimming 1 adapter with at most 10.0% errors in single-end mode ...\n" - ] - } - ], + "outputs": [], "source": [ "%%bash\n", "source ./source\n", @@ -402,19 +359,11 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Cutadapt run time : 0:17:45.886909\n" - ] - } - ], + "outputs": [], "source": [ "# Store current time\n", "after = datetime.datetime.now()\n", @@ -427,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": { "collapsed": true }, @@ -448,7 +397,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": { "collapsed": false }, @@ -460,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": { "collapsed": false }, @@ -475,19 +424,11 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Zcat run time : 0:00:28.624817\n" - ] - } - ], + "outputs": [], "source": [ "# Store current time\n", "after = datetime.datetime.now()\n", @@ -513,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": { "collapsed": false }, @@ -525,7 +466,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": { "collapsed": true }, @@ -550,19 +491,11 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Filtering run time : 0:15:06.447127\n" - ] - } - ], + "outputs": [], "source": [ "# Store current time\n", "after = datetime.datetime.now()\n", @@ -598,28 +531,12 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Time loading forward index: 00:00:00\n", - "Time loading mirror index: 00:00:00\n", - "End-to-end 2/3-mismatch full-index search: 00:03:08\n", - "# reads processed: 20265615\n", - "# reads with at least one reported alignment: 6973259 (34.41%)\n", - "# reads that failed to align: 13292356 (65.59%)\n", - "Reported 6973259 alignments to 1 output stream(s)\n", - "Time searching: 00:03:08\n", - "Overall time: 00:03:08\n" - ] - } - ], + "outputs": [], "source": [ "%%bash\n", "source ./source\n", @@ -668,7 +585,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": { "collapsed": false }, @@ -681,24 +598,11 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "@SQ\tSN:tW(UCA)Q\tLN:74\n", - "\n", - "@SQ\tSN:tY(GUA)Q\tLN:84\n", - "\n", - "@PG\tID:Bowtie\tVN:1.1.2\tCL:\"bowtie --wrapper basic-0 -S -v 3 -p 8 --time --best ref/2-Indexes/Yeast-Noncoding/Yeast-Noncoding 3-Filtered/flowcell362_lane4_pair1_TGACCA.fastq 4-Bowtied/flowcell362_lane4_pair1_TGACCA.sam\"\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "with open(\"3-Filtered/\" +fname+\".fastq\",\"r\") as filtered, \\\n", " open(\"4-Bowtied/\" +fname+\".sam\",\"r\") as matches, \\\n", @@ -726,19 +630,11 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Filtering non-codant tRNA run time : 0:13:33.672128\n" - ] - } - ], + "outputs": [], "source": [ "# Store current time\n", "after = datetime.datetime.now()\n", @@ -760,27 +656,11 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Time loading forward index: 00:00:00\n", - "Time loading mirror index: 00:00:00\n", - "End-to-end 2/3-mismatch full-index search: 00:02:28\n", - "# reads processed: 13292356\n", - "# reads with at least one reported alignment: 4749170 (35.73%)\n", - "# reads that failed to align: 8543186 (64.27%)\n", - "Reported 4749170 alignments to 1 output stream(s)\n", - "Time searching: 00:02:28\n", - "Overall time: 00:02:28\n" - ] - } - ], + "outputs": [], "source": [ "%%bash\n", "source ./source\n", diff --git a/source b/source index bfa3916..f2ac438 100644 --- a/source +++ b/source @@ -1 +1 @@ -export FILENAME=flowcell384_lane7_pair1_ACAGTG \ No newline at end of file +export FILENAME=flowcell384_lane7_pair1_ACTTGA \ No newline at end of file -- GitLab