diff --git a/ProcessFASTQ.ipynb b/ProcessFASTQ.ipynb index 00480dfdc2bbd81f7ace527108509c519ce0af3e..9a9f23274feef42fdc90ccf94c10c45a283e34fb 100644 --- a/ProcessFASTQ.ipynb +++ b/ProcessFASTQ.ipynb @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 216, + "execution_count": 263, "metadata": { "collapsed": false }, @@ -120,7 +120,7 @@ "flowcell384_lane7_pair1_CAGATC \t\t Processsed\n", "flowcell384_lane7_pair1_GATCAG \t\t Processsed\n", "flowcell384_lane7_pair1_TGACCA \t\t Processsed\n", - "testing\n" + "testing \t\t Processsed\n" ] } ], @@ -686,7 +686,7 @@ }, { "cell_type": "code", - "execution_count": 262, + "execution_count": 266, "metadata": { "collapsed": false }, @@ -706,18 +706,39 @@ "Description: SN279:493:C84L3ACXX:4:2309:1272:2215 1:N:0:CAGATC\n", "Number of features: 0\n", "Per letter annotation for: phred_quality\n", - "Seq('NGAGGACCGAACCGACTTACGTT', SingleLetterAlphabet())\n" - ] - }, - { - "ename": "NameError", - "evalue": "name 'pprint' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m<ipython-input-262-737603416198>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 22\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrecord\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 23\u001b[0m \u001b[0marr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmatches\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'\\t'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 24\u001b[1;33m \u001b[0mpprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 25\u001b[0m \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 26\u001b[0m \u001b[0mseq_id\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mNameError\u001b[0m: name 'pprint' is not defined" + "Seq('NGAGGACCGAACCGACTTACGTT', SingleLetterAlphabet())\n", + "['SN279:493:C84L3ACXX:4:2309:1272:2215',\n", + " '4',\n", + " '*',\n", + " '0',\n", + " '0',\n", + " '*',\n", + " '*',\n", + " '0',\n", + " '0',\n", + " 'NGAGGACCGAACCGACTTACGTT',\n", + " '#1=DDFFFGGHHHIIJIJIJJFH',\n", + " 'XM:i:0\\n']\n", + "PRINT : SN279:493:C84L3ACXX:4:2309:1272:2215\n", + "SN279:493:C84L3ACXX:4:2309:1272:2215:\t4\n", + "ID: SN279:493:C84L3ACXX:4:2309:1309:2236\n", + "Name: SN279:493:C84L3ACXX:4:2309:1309:2236\n", + "Description: SN279:493:C84L3ACXX:4:2309:1309:2236 1:N:0:CAGATC\n", + "Number of features: 0\n", + "Per letter annotation for: phred_quality\n", + "Seq('CTTTCTTTACTTATTCAATGAAGCGGA', SingleLetterAlphabet())\n", + "['SN279:493:C84L3ACXX:4:2309:1275:2239',\n", + " '4',\n", + " '*',\n", + " '0',\n", + " '0',\n", + " '*',\n", + " '*',\n", + " '0',\n", + " '0',\n", + " 'TCAGGTTACCCGCGACAGGACGGAA',\n", + " 'CC@DFDEDHHDFHEEFIIIIGIE:@',\n", + " 'XM:i:0\\n']\n" ] } ], @@ -743,16 +764,18 @@ " # if matches.readline().split('\\t')[1] == '4')\n", " \n", " seqiter = []\n", + " i=2\n", " \n", " for record in filt_iter:\n", " print(record)\n", " arr = matches.readline().split('\\t')\n", " pprint(arr)\n", - " break\n", + " i+=1\n", + " if(i>3): break\n", " seq_id = arr[0]\n", " flag = arr[1]\n", " if(flag=='4'):\n", - " print(\"{0}\\n{1}:\\t{2}\".format(seq_id,record.id,flag))\n", + " print(\"PRINT : {0}\\n{1}:\\t{2}\".format(seq_id,record.id,flag))\n", " seqiter.append(record)\n", " \n", " #print(\"{0}\\n{1}:\\t{2}\".format(seq_id,record.id,flag,))\n",