From 96e5becccb520b684d17ec717411b4850ecb6b8f Mon Sep 17 00:00:00 2001
From: Julien Cornut <julien.cornut@hesge.ch>
Date: Thu, 25 Feb 2016 08:15:01 +0100
Subject: [PATCH] Automatic Update

---
 ProcessFASTQ.ipynb | 196 +++++++++------------------------------------
 source             |   2 +-
 2 files changed, 39 insertions(+), 159 deletions(-)

diff --git a/ProcessFASTQ.ipynb b/ProcessFASTQ.ipynb
index c28f999..a9e3ecb 100644
--- a/ProcessFASTQ.ipynb
+++ b/ProcessFASTQ.ipynb
@@ -41,7 +41,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 89,
    "metadata": {
     "collapsed": false
    },
@@ -95,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 90,
    "metadata": {
     "collapsed": false
    },
@@ -145,10 +145,10 @@
     "# fname = \"flowcell362_lane4_pair1_ACAGTG\"\n",
     "# fname = \"flowcell362_lane4_pair1_ACTTGA\"\n",
     "# fname = \"flowcell362_lane4_pair1_CAGATC\"\n",
-    "fname = \"flowcell362_lane4_pair1_TGACCA\"\n",
+    "# fname = \"flowcell362_lane4_pair1_TGACCA\"\n",
     "# fname = \"flowcell362_lane4_pair1_Undetermined\"\n",
     "# fname = \"flowcell384_lane7_pair1_ACAGTG\"\n",
-    "# fname = \"flowcell384_lane7_pair1_ACTTGA\"\n",
+    "fname = \"flowcell384_lane7_pair1_ACTTGA\"\n",
     "# fname = \"flowcell384_lane7_pair1_CAGATC\"\n",
     "# fname = \"flowcell384_lane7_pair1_GATCAG\"\n",
     "# fname = \"flowcell384_lane7_pair1_TGACCA\"\n",
@@ -157,12 +157,12 @@
     "\n",
     "print(\"\\nAvailable files :\\n\")\n",
     "\n",
-    "fn = {n.split('.')[0] for n in listdir(\"0-Raws/\")}\n",
-    "wn = {n for n in listdir(\"7-WIGs/\")}\n",
+    "fn = {n.split('.')[0] for n in listdir(\"0-Raws/\")} # Basename\n",
+    "wn = {n               for n in listdir(\"7-WIGs/\")} # P and M files\n",
     "\n",
-    "lst = (n+\" \\t\\t Processsed\"              \\\n",
+    "lst = (n+\" \\t\\t Processsed\"                 \\\n",
     "       if (n+\".P.wig\" and n+\".M.wig\") in wn \\\n",
-    "       else n \\\n",
+    "       else n                               \\\n",
     "       for n in sorted(fn))\n",
     "\n",
     "for l in lst:\n",
@@ -208,7 +208,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -231,22 +231,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'flowcell362_lane4_pair1_TGACCA.fastq.gz'"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Copy file from 0-Raws to root dir of notebook to work safer\n",
     "copyfile(\"0-Raws/\"+fname+\".fastq.gz\", fname+\".fastq.gz\")"
@@ -266,25 +255,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "flowcell362_lane4_pair1_TGACCA\n",
-      "\n",
-      "@SN279:493:C84L3ACXX:4:2309:1453:2230 1:N:0:TGACCA\n",
-      "AACGGATAAAAGCTACCCCGGGGATAACTGTAGGCACCATCAGTAGATCGG\n",
-      "+\n",
-      "CC@FFFFDHHHBHGGHIIIFHHGGGGGGIGFGIJJJJEHIJIJFFHIGGHH\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
     "source ./source\n",
@@ -303,19 +278,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing will take approximatively 0 hour(s) 17 minute(s) and 30 second(s)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Get file size in bytes\n",
     "fsize = path.getsize(fname+\".fastq.gz\")\n",
@@ -358,7 +325,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -370,21 +337,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "This is cutadapt 1.9.1 with Python 3.5.1\n",
-      "Command line parameters: -a CTGTAGGCACCATCAATAGATCGGAA -o 1-Cutadapted/flowcell362_lane4_pair1_TGACCA.fastq.gz --quiet flowcell362_lane4_pair1_TGACCA.fastq.gz\n",
-      "Trimming 1 adapter with at most 10.0% errors in single-end mode ...\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
     "source ./source\n",
@@ -402,19 +359,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cutadapt run time : 0:17:45.886909\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Store current time\n",
     "after = datetime.datetime.now()\n",
@@ -427,7 +376,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -448,7 +397,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -460,7 +409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -475,19 +424,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Zcat run time : 0:00:28.624817\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Store current time\n",
     "after = datetime.datetime.now()\n",
@@ -513,7 +454,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -525,7 +466,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": null,
    "metadata": {
     "collapsed": true
    },
@@ -550,19 +491,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Filtering run time : 0:15:06.447127\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Store current time\n",
     "after = datetime.datetime.now()\n",
@@ -598,28 +531,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": null,
    "metadata": {
     "collapsed": false,
     "scrolled": false
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Time loading forward index: 00:00:00\n",
-      "Time loading mirror index: 00:00:00\n",
-      "End-to-end 2/3-mismatch full-index search: 00:03:08\n",
-      "# reads processed: 20265615\n",
-      "# reads with at least one reported alignment: 6973259 (34.41%)\n",
-      "# reads that failed to align: 13292356 (65.59%)\n",
-      "Reported 6973259 alignments to 1 output stream(s)\n",
-      "Time searching: 00:03:08\n",
-      "Overall time: 00:03:08\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
     "source ./source\n",
@@ -668,7 +585,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
@@ -681,24 +598,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "@SQ\tSN:tW(UCA)Q\tLN:74\n",
-      "\n",
-      "@SQ\tSN:tY(GUA)Q\tLN:84\n",
-      "\n",
-      "@PG\tID:Bowtie\tVN:1.1.2\tCL:\"bowtie --wrapper basic-0 -S -v 3 -p 8 --time --best ref/2-Indexes/Yeast-Noncoding/Yeast-Noncoding 3-Filtered/flowcell362_lane4_pair1_TGACCA.fastq 4-Bowtied/flowcell362_lane4_pair1_TGACCA.sam\"\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "with open(\"3-Filtered/\"     +fname+\".fastq\",\"r\") as filtered, \\\n",
     "     open(\"4-Bowtied/\"      +fname+\".sam\",\"r\")   as matches,  \\\n",
@@ -726,19 +630,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Filtering non-codant tRNA run time : 0:13:33.672128\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Store current time\n",
     "after = datetime.datetime.now()\n",
@@ -760,27 +656,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": null,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Time loading forward index: 00:00:00\n",
-      "Time loading mirror index: 00:00:00\n",
-      "End-to-end 2/3-mismatch full-index search: 00:02:28\n",
-      "# reads processed: 13292356\n",
-      "# reads with at least one reported alignment: 4749170 (35.73%)\n",
-      "# reads that failed to align: 8543186 (64.27%)\n",
-      "Reported 4749170 alignments to 1 output stream(s)\n",
-      "Time searching: 00:02:28\n",
-      "Overall time: 00:02:28\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
     "source ./source\n",
diff --git a/source b/source
index bfa3916..f2ac438 100644
--- a/source
+++ b/source
@@ -1 +1 @@
-export FILENAME=flowcell384_lane7_pair1_ACAGTG
\ No newline at end of file
+export FILENAME=flowcell384_lane7_pair1_ACTTGA
\ No newline at end of file
-- 
GitLab