diff --git a/ProcessFASTQ.ipynb b/ProcessFASTQ.ipynb index 0ac866ad58a0bc8aaaeaa8c3984910dde79db73a..7fb7a8b39a791c5f06022bb2f3c8f883f826135c 100644 --- a/ProcessFASTQ.ipynb +++ b/ProcessFASTQ.ipynb @@ -671,54 +671,6 @@ " " ] }, - { - "cell_type": "code", - "execution_count": 279, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "@PG\tID:Bowtie\tVN:1.1.2\tCL:\"bowtie --wrapper basic-0 -S -v 3 -p 8 --time --best ref/2-Indexes/Yeast-Noncoding/Yeast-Noncoding 3-Filtered/testing.fastq 4-Bowtied/testing.sam\"\n", - "SN279:493:C84L3ACXX:4:2309:1272:2215\t4\t*\t0\t0\t*\t*\t0\t0\tNGAGGACCGAACCGACTTACGTT\t#1=DDFFFGGHHHIIJIJIJJFH\tXM:i:0\n", - "SN279:493:C84L3ACXX:4:2309:1275:2239\t4\t*\t0\t0\t*\t*\t0\t0\tTCAGGTTACCCGCGACAGGACGGAA\tCC@DFDEDHHDFHEEFIIIIGIE:@\tXM:i:0\n", - "SN279:493:C84L3ACXX:4:2309:2182:2231\t4\t*\t0\t0\t*\t*\t0\t0\tAAGATCCCTCGAAGATGACGAGGT\tCCCFFD>DDCDDFH@AFHIJIGE?\tXM:i:0\n", - "SN279:493:C84L3ACXX:4:2309:1652:2216\t0\tRDN25-1\t2545\t255\t24M\t*\t0\t0\tNCACGTTCTAGCATTCAAGGTCCC\t#1=BDDDDFFDBBGHGEEIEHIII\tXA:i:1\tMD:Z:0C23\tNM:i:1\n", - "SN279:493:C84L3ACXX:4:2309:2260:2244\t0\tRDN58-1\t131\t255\t28M\t*\t0\t0\tAGGGGGCATGCCTGTTTGAGCGTCATTT\tCCCFFFFFHHHHHIGIJJIJJJJJJIJJ\tXA:i:0\tMD:Z:28\tNM:i:0\n", - "\n", - "\n", - "\n", - "\n", - "@SN279:493:C84L3ACXX:4:2309:1272:2215 1:N:0:CAGATC\n", - "NGAGGACCGAACCGACTTACGTT\n", - "+\n", - "#1=DDFFFGGHHHIIJIJIJJFH\n", - "@SN279:493:C84L3ACXX:4:2309:1309:2236 1:N:0:CAGATC\n", - "CTTTCTTTACTTATTCAATGAAGCGGA\n", - "+\n", - "CCCFFDFFHHHHHJJJJJJJJJJJJJJ\n", - "@SN279:493:C84L3ACXX:4:2309:1275:2239 1:N:0:CAGATC\n", - "TCAGGTTACCCGCGACAGGACGGAA\n" - ] - } - ], - "source": [ - "%%bash \n", - "source ./source\n", - "\n", - "sed -n 415,420p 4-Bowtied/$FILENAME.sam\n", - "\n", - "echo\n", - "echo\n", - "echo\n", - "echo\n", - "\n", - "head 3-Filtered/$FILENAME.fastq" - ] - }, { "cell_type": "code", "execution_count": 239, @@ -734,7 +686,7 @@ }, { "cell_type": "code", - "execution_count": 268, + "execution_count": 281, "metadata": { "collapsed": false }, @@ -791,7 +743,39 @@ " '0',\n", " 'TCAGGTTACCCGCGACAGGACGGAA',\n", " 'CC@DFDEDHHDFHEEFIIIIGIE:@',\n", - " 'XM:i:0\\n']\n" + " 'XM:i:0\\n']\n", + "\n", + "PRINT : \n", + "\n", + "SN279:493:C84L3ACXX:4:2309:1275:2239\n", + "SN279:493:C84L3ACXX:4:2309:1309:2236:\t4\n", + "\n", + "\n", + "ID: SN279:493:C84L3ACXX:4:2309:1275:2239\n", + "Name: SN279:493:C84L3ACXX:4:2309:1275:2239\n", + "Description: SN279:493:C84L3ACXX:4:2309:1275:2239 1:N:0:CAGATC\n", + "Number of features: 0\n", + "Per letter annotation for: phred_quality\n", + "Seq('TCAGGTTACCCGCGACAGGACGGAA', SingleLetterAlphabet())\n", + "['SN279:493:C84L3ACXX:4:2309:2182:2231',\n", + " '4',\n", + " '*',\n", + " '0',\n", + " '0',\n", + " '*',\n", + " '*',\n", + " '0',\n", + " '0',\n", + " 'AAGATCCCTCGAAGATGACGAGGT',\n", + " 'CCCFFD>DDCDDFH@AFHIJIGE?',\n", + " 'XM:i:0\\n']\n", + "\n", + "PRINT : \n", + "\n", + "SN279:493:C84L3ACXX:4:2309:2182:2231\n", + "SN279:493:C84L3ACXX:4:2309:1275:2239:\t4\n", + "\n", + "\n" ] } ], @@ -824,12 +808,13 @@ " arr = matches.readline().split('\\t')\n", " pprint(arr)\n", " i+=1\n", - " if(i>3): break\n", + " \n", " seq_id = arr[0]\n", " flag = arr[1]\n", " if(flag=='4'):\n", " print(\"\\nPRINT : \\n\\n{0}\\n{1}:\\t{2}\\n\\n\".format(seq_id,record.id,flag))\n", " seqiter.append(record)\n", + " if(i>4): break\n", " \n", " #print(\"{0}\\n{1}:\\t{2}\".format(seq_id,record.id,flag,))\n", " \n",