From 23768372ce6a4c6c36035b084f3bf8760970b297 Mon Sep 17 00:00:00 2001 From: manu8996 Date: Wed, 21 Nov 2018 05:20:26 +0530 Subject: [PATCH] Sentence tfidf calculated --- Text Summarisation.ipynb | 135 ++++++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 38 deletions(-) diff --git a/Text Summarisation.ipynb b/Text Summarisation.ipynb index 347eabe..bfe51a0 100644 --- a/Text Summarisation.ipynb +++ b/Text Summarisation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -80,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -127,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -166,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -463,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -482,7 +482,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -500,7 +500,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -519,7 +519,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -536,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": { "scrolled": true }, @@ -557,7 +557,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -571,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "metadata": { "scrolled": false }, @@ -662,7 +662,7 @@ " 'actually, it is immaterial to my argument whether all the\\ncomplements hang from the same node or whether they, too, are introduced\\nby binary branching, like the adjuncts.']" ] }, - "execution_count": 19, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -673,7 +673,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -695,7 +695,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "metadata": { "scrolled": false }, @@ -811,7 +811,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1171,7 +1171,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1188,7 +1188,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1205,7 +1205,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1222,7 +1222,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1243,31 +1243,90 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 139, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "<1x443 sparse matrix of type ''\n", - "\twith 19 stored elements in Compressed Sparse Row format>" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] } ], "source": [ - "X[0]" + "print(type(X[35,:]))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 140, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", + " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n" + ] + } + ], + "source": [ + "print((X[35,:].toarray()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.2133141289945213, 0.19268349217056602, 0.23567236548378917, 0.20918963054521372, 0.20414163476852237, 0.2142031418888816, 0.2953940191257623, 0.3190722211068253, 0.24766883854585858, 0.24359008885419822, 0.2691392393632223, 0.21792781559904986, 0.4409236974794452, 0.2003291117020829, 0.141093056522983, 0.3013860982971811, 0.2648058357549551, 0.154810820419545, 0.22094264628922314, 0.19597742106773774, 0.12235705255435296, 0.3420746571230864, 0.7068189089897328, 0.18711131675460949, 0.17564167393574218, 0.1600690831192614, 0.14707137788350813, 0.24855083679987244, 0.31842509242552686, 0.3622285702824559, 0.20744839403144236, 0.2915297848121885, 0.25615439420276653, 0.32492545860541067, 0.3372416580862453, 0, 0.21810916095415211, 0, 0.25201721754566336, 0.1168623335134955, 0.1850542899139658, 0.4340132904839581, 0.2558776048492524, 0, 0.23871620079008612, 0, 0.21505914004381577, 0.188702608551362, 0.2473621280450456, 0.17103202855623842, 0.30282806549952346, 0.447213595499958, 0.7071067811865475, 0.3616038758957919, 0.44575716620707667, 0.24363129254497012, 0.4977539498337, 0.3950854623715471, 0.4977539498337, 0.31133778533740636, 0.4977539498337, 0.4015513269236906, 0.5768478040843482, 0.5773502691896257, 0.39184457982347703, 0.3697810962297591, 0.43673030234484533, 0.5503728292507438, 0.7071067811865475, 0.3936975574505491, 0.4996381244664003, 0.3656381806345497, 0.7071067811865475, 0.37744343499577615, 0.7071067811865475, 0.21557143081821167, 0.5603565195423051, 0.41760955902435687, 0.19846903596500826, 0.22973631970082772, 0.19687072052499346]\n" + ] + } + ], + "source": [ + "sent_tfid = []\n", + "for i in range(X.shape[0]):\n", + " count = 0\n", + " sum_row = 0\n", + " #k = X[i,:].toarray()\n", + " for j in range(X.shape[1]):\n", + " if X[i,:].toarray()[0][j] != 0:\n", + " sum_row += X[i,:].toarray()[0][j]\n", + " count += 1\n", + " try: \n", + " div_ans = sum_row / count\n", + " except:\n", + " div_ans = 0\n", + " #print(i)\n", + " sent_tfid.append(div_ans)\n", + "print(sent_tfid)\n", + " \n", + " \n", + " " + ] } ], "metadata": {