From 6162a8dd3eb4f1f2b0343f68d00d96b32900851e Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Thu, 5 Nov 2015 20:20:53 -0800
Subject: [PATCH 01/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index d4e8493..31f82da 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 """
 Created on Thu Aug 13 13:01:21 2015
-
 @author: Calvin
 """
 
@@ -16,11 +15,11 @@
 import json
 import os
 
-os.chdir('/Users/Dalton/Documents/Projects/BundledOptionsExp/Collection/OptionSelect')
+os.chdir('/Users/Calvin/Desktop/')
 
 # Define the location of the csv file with modeled preferences, should make relative
 # Three col CSV (Item-Code, Option-Type, Value)
-csv_filepath='rank3306.csv'
+csv_filepath='rank9999.csv'
 
 
 #%% Magic Numbers
@@ -28,7 +27,7 @@
 #cxpb- probability of a cross over occuring in one chromosome of a mating pair
 #mutpb- probability of at each nucleotide of a mutation
 #number of individuals to put in HOF in each epoc
-nepochs, ngen, npop, cxpb, mutpb =2,80,2000, 0.1, 0.05
+nepochs, ngen, npop, cxpb, mutpb =2,200,2000, 0.1, 0.05
     
 HOFsize=1
 
@@ -61,14 +60,17 @@ def evalFit(individual):
     #####similarityCost=np.sum(np.in1d(individual[0][0],[ bundleLookup[k] for k in individual[0][1] ]))
     similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
     similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    #similarityCost=   np.sum([np.sum(c)>1 for c in [np.in1d(k,x) for k in y]])
     #x is singelton, y is array of tuples of constituent items
     ######similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,individual[0][0]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    rangeCost=(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))/125
-    uniformCost=1/(kstest(indiv[0],'uniform')[0]+kstest(indiv[1],'uniform')[0]+kstest(indiv[2],'uniform')[0])
+    rangeCost=20*(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))/50
+    #uniformCost=-(kstest(indiv[0],'uniform')[0]+kstest(indiv[1],'uniform')[0]+kstest(indiv[2],'uniform')[0])
     #uniformCost=(ks_2samp(indiv[0], uni)[1]+ks_2samp(indiv[1], uni)[1]+ks_2samp(indiv[2], uni)[1])    
-    distanceCost=(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1])
-    cost=20*rangeCost+30*uniformCost+10*distanceCost+similarityCost+similarity2   
+    #distanceCost=10*(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1])
+    distanceCost = 0
+    #uniformityCost = -np.power(np.diff(np.hstack((0,indiv[0],60))),3).sum()-np.power(np.diff(np.hstack((0,indiv[1],60))),3).sum()-np.power(np.diff(np.hstack((0,indiv[2],60))),3).sum()
+    spacingCost = 4*(np.mean(np.diff(indiv[0]))+np.mean(np.diff(indiv[1]))+np.mean(np.diff(indiv[2])))
+    varCost = -(np.var(np.diff(indiv[0]))+np.var(np.diff(indiv[1]))+np.var(np.diff(indiv[2])))
+    cost=rangeCost+distanceCost+spacingCost+varCost+rangeCost+similarityCost+similarity2#+similarityCost+similarity2#80*uniformCost+10*distanceCost+similarityCost+similarity2   
     return (cost,)
 
 def getSims(individual):
@@ -275,21 +277,10 @@ def main_program(pop):
     with open('jsonOut.txt', 'w') as outfile:
         outfile.write(str(outputData))
 
-    extended = np.unique(np.hstack((np.ravel([bundleLookup[x] for x in bestIndividual[1]]), np.ravel([bundleLookup2[x] for x in bestIndividual[2]]), bestIndividual[0])))
-    outputDataFull = np.hstack((extended, bestIndividual[1],bestIndividual[2], medianUntransed))
-    outputDataFull = np.unique(outputDataFull)
-    outputDataFull = np.sort(outputDataFull)
-    transedFullData = []
-    for x in outputDataFull:
-        if x in singletonLookup.keys():
-            transedFullData.append(singletonLookup[x])
-        elif x in bundleLookup.keys():
-            transedFullData.append((bundleLookup[x],bundleLookup[x]))
-        elif x in bundleLookup2.keys():
-            transedFullData.append(bundleLookup2[x])
-        else:
-            raise ValueError('Custom error: item in outputData JSON was not in any value dictionary')
-    outputData = { 'options' : transedFullData}
+    extended = np.unique(np.hstack((np.ravel([bundleLookup[x] for x in bestIndividual[1]]), np.ravel([bundleLookup2[x] for x in bestIndividual[2]]), [singletonLookup[x] for x in bestIndividual[0]]))).tolist()
+    homoTransed = [(x,x) for x in homoTransed]
+    ouputDataFull  = extended+homoTransed+heteroTransed #median is in bestIndividual, so is included
+    outputData = { 'options' : ouputDataFull }
     outputData = json.dumps(outputData)
     with open('jsonOutExtended.txt', 'w') as outfile:
         outfile.write(str(outputData))

From 489162cdfa5f446596d4c3fb9243602389498050 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Tue, 10 Nov 2015 09:40:01 -0800
Subject: [PATCH 02/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index 31f82da..acd7a6c 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -145,17 +145,15 @@ def custHallOfFame(population,maxaddsize):
 def inputErrorCheck(raw_data):
     if not raw_data[['item1', 'item2']].applymap(np.isreal).all().all():
         raise ValueError('Custom error, ask CL : Some item value is not a number')
-    if [raw_data['index']>60].any:
+    if (raw_data.index>=60).any():
         raise ValueError("Custom error, ask CL : An item index is > 60")
-    for bundleType in range(1,4):
-        if raw_data[raw_data['type']==bundleType].duplicated(subset=['item1', 'item2']).any():
-            print raw_data[raw_data['type']==bundleType].duplicated(subset=['item1', 'item2'])
-            raise ValueError('Custom error, ask CL : Some item value is duplicated')
-    
-    
+    if raw_data.duplicated(subset=['item1', 'item2']).any():
+        print raw_data[raw_data.duplicated(subset=['item1', 'item2'])]
+        raise ValueError('Custom error, ask CL : Some item value is duplicated')
 
 #%%==============import data from csv======================%%#
 raw_choice_dataset = pd.read_csv(csv_filepath, sep=',', header=0)
+inputErrorCheck(raw_choice_dataset)
 
 valueDictionary={}
 for x in range(1,4):
@@ -266,9 +264,9 @@ def main_program(pop):
     bestIndividual = results[maxIndex]
     
     singletonTransed = [singletonLookup[item] for item in bestIndividual[0]]
-    median = singletonTransed[5]
-    medianUntransed = bestIndividual[0][5]
-    singletonTransed = np.delete(singletonTransed, 5).tolist()
+    median = singletonTransed[4]
+    medianUntransed = bestIndividual[0][4]
+    singletonTransed = np.delete(singletonTransed, 4).tolist()
     homoTransed = [bundleLookup[item] for item in bestIndividual[1]]
     heteroTransed = [bundleLookup2[item] for item in bestIndividual[2]]
     

From 070b646759f2121029a03e8a659dd6e646703bb7 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Tue, 10 Nov 2015 09:40:33 -0800
Subject: [PATCH 03/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index acd7a6c..47753c8 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -66,11 +66,10 @@ def evalFit(individual):
     #uniformCost=-(kstest(indiv[0],'uniform')[0]+kstest(indiv[1],'uniform')[0]+kstest(indiv[2],'uniform')[0])
     #uniformCost=(ks_2samp(indiv[0], uni)[1]+ks_2samp(indiv[1], uni)[1]+ks_2samp(indiv[2], uni)[1])    
     #distanceCost=10*(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1])
-    distanceCost = 0
     #uniformityCost = -np.power(np.diff(np.hstack((0,indiv[0],60))),3).sum()-np.power(np.diff(np.hstack((0,indiv[1],60))),3).sum()-np.power(np.diff(np.hstack((0,indiv[2],60))),3).sum()
     spacingCost = 4*(np.mean(np.diff(indiv[0]))+np.mean(np.diff(indiv[1]))+np.mean(np.diff(indiv[2])))
     varCost = -(np.var(np.diff(indiv[0]))+np.var(np.diff(indiv[1]))+np.var(np.diff(indiv[2])))
-    cost=rangeCost+distanceCost+spacingCost+varCost+rangeCost+similarityCost+similarity2#+similarityCost+similarity2#80*uniformCost+10*distanceCost+similarityCost+similarity2   
+    cost=rangeCost+spacingCost+varCost+rangeCost+similarityCost+similarity2#+similarityCost+similarity2#80*uniformCost+10*distanceCost+similarityCost+similarity2   
     return (cost,)
 
 def getSims(individual):

From fb1e6b809659cf5ec72aacc239517b7c28e224a9 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Tue, 17 Nov 2015 08:22:45 -0800
Subject: [PATCH 04/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 68 +++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index 47753c8..06c6613 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -6,20 +6,21 @@
 
 #%%==========imports and constants=================%%#
 import numpy as np
-import pandas as pd
+import pandas as pd  
 from deap import base, creator, tools
-import matplotlib.pyplot as plt
 from scipy.stats import kstest, ks_2samp
-import random, operator, seaborn
+import random, operator
 import multiprocessing as mp
 import json
 import os
+import matplotlib.pyplot as plt
+import seaborn as sns
 
 os.chdir('/Users/Calvin/Desktop/')
 
 # Define the location of the csv file with modeled preferences, should make relative
 # Three col CSV (Item-Code, Option-Type, Value)
-csv_filepath='rank9999.csv'
+csv_filepath=r'csvs\rank3301.csv'
 
 
 #%% Magic Numbers
@@ -27,13 +28,12 @@
 #cxpb- probability of a cross over occuring in one chromosome of a mating pair
 #mutpb- probability of at each nucleotide of a mutation
 #number of individuals to put in HOF in each epoc
-nepochs, ngen, npop, cxpb, mutpb =2,200,2000, 0.1, 0.05
+nepochs, ngen, npop, cxpb, mutpb =1,100,2000, 0.1, 0.05
     
 HOFsize=1
 
 HallOfFame=[]
 
-SID='a03'
 n_single=20 #1 number of possibilities for singleton
 n_hetero=15 #2 number of possibilities for the heterogenous bundle
 n_homo=22 #3 number of possibilities for the homogeneous scaling
@@ -43,8 +43,8 @@
 chromosomeDict={0:n_single, 1:n_hetero, 2:n_homo}
 
 #Define the seed for the random number generator for replication purposes
-random.seed(1)
-np.random.seed(1)
+#random.seed(1)
+#np.random.seed(1)
 
 #%%===========define fitness and functions=================%%#
 uni=np.random.uniform(0,60,500)
@@ -57,19 +57,14 @@ def evalFit(individual):
     DistanceCost- Uses KS divergence to indicate differences between distributions
     Cost currently is a simple weightable summation, might be changed to F score"""
     indiv=genoToPheno(individual)
-    #####similarityCost=np.sum(np.in1d(individual[0][0],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    #x is singelton, y is array of tuples of constituent items
-    ######similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,individual[0][0]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    rangeCost=20*(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))/50
-    #uniformCost=-(kstest(indiv[0],'uniform')[0]+kstest(indiv[1],'uniform')[0]+kstest(indiv[2],'uniform')[0])
-    #uniformCost=(ks_2samp(indiv[0], uni)[1]+ks_2samp(indiv[1], uni)[1]+ks_2samp(indiv[2], uni)[1])    
+    #similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
+    #similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
+    rangeCost=20*(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))/50   
     #distanceCost=10*(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1])
-    #uniformityCost = -np.power(np.diff(np.hstack((0,indiv[0],60))),3).sum()-np.power(np.diff(np.hstack((0,indiv[1],60))),3).sum()-np.power(np.diff(np.hstack((0,indiv[2],60))),3).sum()
-    spacingCost = 4*(np.mean(np.diff(indiv[0]))+np.mean(np.diff(indiv[1]))+np.mean(np.diff(indiv[2])))
-    varCost = -(np.var(np.diff(indiv[0]))+np.var(np.diff(indiv[1]))+np.var(np.diff(indiv[2])))
-    cost=rangeCost+spacingCost+varCost+rangeCost+similarityCost+similarity2#+similarityCost+similarity2#80*uniformCost+10*distanceCost+similarityCost+similarity2   
+    #uniformityCost = -np.power(np.diff(np.hstack((0,indiv[0],60))),2).sum()-np.power(np.diff(np.hstack((0,indiv[1],60))),2).sum()-np.power(np.diff(np.hstack((0,indiv[2],60))),2).sum()
+    spacingCost = 5*(np.mean(np.diff(np.hstack((0,indiv[0],60))))+np.mean(np.diff(np.hstack((0,indiv[1],60))))+np.mean(np.diff(np.hstack((0,indiv[2],60)))))
+    varCost = -3*(np.var(np.diff(np.hstack((0,indiv[0],60))))+np.var(np.diff(np.hstack((0,indiv[1],60))))+np.var(np.diff(np.hstack((0,indiv[2],60)))))
+    cost=rangeCost+spacingCost+varCost+rangeCost#+similarityCost+similarity2#80*uniformCost+10*distanceCost+similarityCost+similarity2   
     return (cost,)
 
 def getSims(individual):
@@ -149,9 +144,18 @@ def inputErrorCheck(raw_data):
     if raw_data.duplicated(subset=['item1', 'item2']).any():
         print raw_data[raw_data.duplicated(subset=['item1', 'item2'])]
         raise ValueError('Custom error, ask CL : Some item value is duplicated')
+    if raw_data[['item1', 'item2']].applymap(lambda x: x>30).any().any():
+        raise ValueError('Item number is greater than 30')
+        
+def getRank(item):
+    if type(item)==tuple:
+        return raw_choice_dataset.loc[(raw_choice_dataset['item1']==item[0]) & (raw_choice_dataset['item2']==item[1]),'rank'].values[0] 
+    else:
+        raise ValueError('Custom error: Some item is not a tuple in rank ordering')
 
 #%%==============import data from csv======================%%#
 raw_choice_dataset = pd.read_csv(csv_filepath, sep=',', header=0)
+
 inputErrorCheck(raw_choice_dataset)
 
 valueDictionary={}
@@ -263,9 +267,9 @@ def main_program(pop):
     bestIndividual = results[maxIndex]
     
     singletonTransed = [singletonLookup[item] for item in bestIndividual[0]]
-    median = singletonTransed[4]
-    medianUntransed = bestIndividual[0][4]
-    singletonTransed = np.delete(singletonTransed, 4).tolist()
+    median = singletonTransed[5]
+    medianUntransed = bestIndividual[0][5]
+    singletonTransed = np.delete(singletonTransed, 5).tolist()
     homoTransed = [bundleLookup[item] for item in bestIndividual[1]]
     heteroTransed = [bundleLookup2[item] for item in bestIndividual[2]]
     
@@ -276,9 +280,21 @@ def main_program(pop):
 
     extended = np.unique(np.hstack((np.ravel([bundleLookup[x] for x in bestIndividual[1]]), np.ravel([bundleLookup2[x] for x in bestIndividual[2]]), [singletonLookup[x] for x in bestIndividual[0]]))).tolist()
     homoTransed = [(x,x) for x in homoTransed]
-    ouputDataFull  = extended+homoTransed+heteroTransed #median is in bestIndividual, so is included
-    outputData = { 'options' : ouputDataFull }
+    extended= [(x,0) for x in extended]
+    outputDataFull  = extended+homoTransed+heteroTransed #median is in bestIndividual, so is included
+    outputDataFull=sorted(outputDataFull, key = getRank)
+    outputData = { 'options' : outputDataFull }
     outputData = json.dumps(outputData)
     with open('jsonOutExtended.txt', 'w') as outfile:
         outfile.write(str(outputData))
-        
+    #outputDataFull=sorted(outputDataFull, key = getRank)
+    #outputDataFull=sorted(outputDataFull) 
+    plt.hold(True)
+    plt.title(csv_filepath)
+    sns.set_context(rc={"figure.figsize": (8, 4)})
+    plt.bar(np.asarray(bestIndividual[0]),np.ones((1,len(bestIndividual[0])))[0], color = 'blue')
+    plt.bar(np.asarray(bestIndividual[1]),np.ones((1,len(bestIndividual[1])))[0], color = 'red')
+    plt.bar(np.asarray(bestIndividual[2]),np.ones((1,len(bestIndividual[2])))[0], color = 'green')
+    individual=[bestIndividual]
+    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
+    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])

From 549a6def97ffe469e2274bfec7b2ff567447c150 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Tue, 17 Nov 2015 08:23:58 -0800
Subject: [PATCH 05/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index 06c6613..61421fc 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -28,7 +28,7 @@
 #cxpb- probability of a cross over occuring in one chromosome of a mating pair
 #mutpb- probability of at each nucleotide of a mutation
 #number of individuals to put in HOF in each epoc
-nepochs, ngen, npop, cxpb, mutpb =1,100,2000, 0.1, 0.05
+nepochs, ngen, npop, cxpb, mutpb =2,100,2000, 0.1, 0.05
     
 HOFsize=1
 
@@ -47,7 +47,7 @@
 #np.random.seed(1)
 
 #%%===========define fitness and functions=================%%#
-uni=np.random.uniform(0,60,500)
+#uni=np.random.uniform(0,60,500)
 
 def evalFit(individual): 
     """ A weighted total of fitness scores to be maximized

From b271417dd75a7eede7a7b7ceb7e129e1dd8d509c Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Mon, 30 Nov 2015 20:38:24 -0800
Subject: [PATCH 06/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index 61421fc..1c7501c 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -16,11 +16,12 @@
 import matplotlib.pyplot as plt
 import seaborn as sns
 
-os.chdir('/Users/Calvin/Desktop/')
+#os.chdir('/Users/Calvin/Desktop/')
 
 # Define the location of the csv file with modeled preferences, should make relative
 # Three col CSV (Item-Code, Option-Type, Value)
-csv_filepath=r'csvs\rank3301.csv'
+inputSID = input('Please enter an SID')
+csv_filepath=r'rank' + inputSID+ '.csv'
 
 
 #%% Magic Numbers
@@ -283,6 +284,7 @@ def main_program(pop):
     extended= [(x,0) for x in extended]
     outputDataFull  = extended+homoTransed+heteroTransed #median is in bestIndividual, so is included
     outputDataFull=sorted(outputDataFull, key = getRank)
+    outputDataFull = [x[1] for x in outputDataFull if x[1]==0 else x]
     outputData = { 'options' : outputDataFull }
     outputData = json.dumps(outputData)
     with open('jsonOutExtended.txt', 'w') as outfile:

From a04763d45747612957a7c70253ec7024d8b4d617 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Mon, 30 Nov 2015 20:43:34 -0800
Subject: [PATCH 07/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index 1c7501c..369e766 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -284,7 +284,7 @@ def main_program(pop):
     extended= [(x,0) for x in extended]
     outputDataFull  = extended+homoTransed+heteroTransed #median is in bestIndividual, so is included
     outputDataFull=sorted(outputDataFull, key = getRank)
-    outputDataFull = [x[1] for x in outputDataFull if x[1]==0 else x]
+    outputDataFull = [item[0] if item[1]==0 else item for item in outputDataFull]
     outputData = { 'options' : outputDataFull }
     outputData = json.dumps(outputData)
     with open('jsonOutExtended.txt', 'w') as outfile:

From fed197043b48ed066febee3759150a2c7b7523ac Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Mon, 30 Nov 2015 21:20:45 -0800
Subject: [PATCH 08/16] Update ga_multi.py

---
 OptionSelect/ga_multi.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
index 369e766..253360a 100644
--- a/OptionSelect/ga_multi.py
+++ b/OptionSelect/ga_multi.py
@@ -20,8 +20,8 @@
 
 # Define the location of the csv file with modeled preferences, should make relative
 # Three col CSV (Item-Code, Option-Type, Value)
-inputSID = input('Please enter an SID')
-csv_filepath=r'rank' + inputSID+ '.csv'
+inputSID = 3301
+csv_filepath=r'rank' + str(inputSID)+ '.csv'
 
 
 #%% Magic Numbers
@@ -29,7 +29,7 @@
 #cxpb- probability of a cross over occuring in one chromosome of a mating pair
 #mutpb- probability of at each nucleotide of a mutation
 #number of individuals to put in HOF in each epoc
-nepochs, ngen, npop, cxpb, mutpb =2,100,2000, 0.1, 0.05
+nepochs, ngen, npop, cxpb, mutpb =2,125,2500, 0.1, 0.05
     
 HOFsize=1
 
@@ -65,7 +65,7 @@ def evalFit(individual):
     #uniformityCost = -np.power(np.diff(np.hstack((0,indiv[0],60))),2).sum()-np.power(np.diff(np.hstack((0,indiv[1],60))),2).sum()-np.power(np.diff(np.hstack((0,indiv[2],60))),2).sum()
     spacingCost = 5*(np.mean(np.diff(np.hstack((0,indiv[0],60))))+np.mean(np.diff(np.hstack((0,indiv[1],60))))+np.mean(np.diff(np.hstack((0,indiv[2],60)))))
     varCost = -3*(np.var(np.diff(np.hstack((0,indiv[0],60))))+np.var(np.diff(np.hstack((0,indiv[1],60))))+np.var(np.diff(np.hstack((0,indiv[2],60)))))
-    cost=rangeCost+spacingCost+varCost+rangeCost#+similarityCost+similarity2#80*uniformCost+10*distanceCost+similarityCost+similarity2   
+    cost=rangeCost+spacingCost+varCost+rangeCost
     return (cost,)
 
 def getSims(individual):
@@ -246,7 +246,7 @@ def main_program(pop):
 if __name__ == '__main__':  
     print 'GA algorithm starting with the following settings:'
     print 'nepochs = ' + str(nepochs) + ' ngen = ' + str(ngen) + ' npop = ' + str(npop)
-    print 'cxpb = ' + str(cxpb) + ' mutpb = ' + str(mutpb)
+    print 'cxpb = ' + str(cxpb) + ' mutpb = ' + str(mutpb) + ' SID = ' + str(inputSID)
     answer = input('Are the following settings okay? (0/1)  ')
     if answer == 0:
         raise ValueError('Custom Error: Please change settings in script file')    

From e53d29c963af834f84798e1e5d55c2a7637079f6 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Mon, 30 Nov 2015 21:21:28 -0800
Subject: [PATCH 09/16] Delete ga_multi copy.py

---
 OptionSelect/ga_multi copy.py | 293 ----------------------------------
 1 file changed, 293 deletions(-)
 delete mode 100644 OptionSelect/ga_multi copy.py

diff --git a/OptionSelect/ga_multi copy.py b/OptionSelect/ga_multi copy.py
deleted file mode 100644
index 7cbe86a..0000000
--- a/OptionSelect/ga_multi copy.py	
+++ /dev/null
@@ -1,293 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Aug 13 13:01:21 2015
-@author: Calvin
-"""
-
-#%%==========imports and constants=================%%#
-import numpy as np
-import pandas as pd
-from deap import base, creator, tools
-import matplotlib.pyplot as plt
-from scipy.stats import kstest, ks_2samp
-import random, operator, seaborn
-import multiprocessing as mp
-import json
-import os
-
-#os.chdir('C:\Users\Calvin\Documents\GitHub\Nypype_Workflows\MVPA')
-
-# Define the location of the csv file with modeled preferences, should make relative
-# Three col CSV (Item-Code, Option-Type, Value)
-csv_filepath='rank000.csv'
-
-
-#%% Magic Numbers
-#nepochs-number of epochs, ngen-number of generations in an epoch
-#cxpb- probability of a cross over occuring in one chromosome of a mating pair
-#mutpb- probability of at each nucleotide of a mutation
-#number of individuals to put in HOF in each epoc
-nepochs, ngen, npop, cxpb, mutpb =2,30,100, 0.1, 0.05
-    
-HOFsize=1
-
-HallOfFame=[]
-
-SID='a03'
-n_single=20 #1 number of possibilities for singleton
-n_hetero=15 #2 number of possibilities for the heterogenous bundle
-n_homo=22 #3 number of possibilities for the homogeneous scaling
-n_genome=n_single+n_hetero+n_homo #total number of possibilities for all cases
-n_target=10 #Desired number in each chromosome
-
-chromosomeDict={0:n_single, 1:n_hetero, 2:n_homo}
-
-#Define the seed for the random number generator for replication purposes
-random.seed(1)
-np.random.seed(1)
-
-#%%===========define fitness and functions=================%%#
-uni=np.random.uniform(0,60,500)
-
-def evalFit(individual): 
-    """ A weighted total of fitness scores to be maximized
-    RangeCost-maximum to minimum
-    SimilarityCost - number of items in both singleton and homogenous scaling
-    UniformCost- Uses KS divergence to indicate distance of distribution of values from uniform distribution
-    DistanceCost- Uses KS divergence to indicate differences between distributions
-    Cost currently is a simple weightable summation, might be changed to F score"""
-    indiv=genoToPheno(individual)
-    #####similarityCost=np.sum(np.in1d(individual[0][0],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    #similarityCost=   np.sum([np.sum(c)>1 for c in [np.in1d(k,x) for k in y]])
-    #x is singelton, y is array of tuples of constituent items
-    ######similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,individual[0][0]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    rangeCost=(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))/125
-    uniformCost=1/(kstest(indiv[0],'uniform')[0]+kstest(indiv[1],'uniform')[0]+kstest(indiv[2],'uniform')[0])
-    #uniformCost=(ks_2samp(indiv[0], uni)[1]+ks_2samp(indiv[1], uni)[1]+ks_2samp(indiv[2], uni)[1])    
-    distanceCost=(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1])
-    cost=20*rangeCost+30*uniformCost+10*distanceCost+similarityCost+similarity2   
-    return (cost,)
-
-def getSims(individual):
-    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    print similarityCost
-    print similarity2
-    
-
-# Creates the initial generation      
-def createIndividual():
-    """Creates a random individual with 11 singleton, 10 het. bundle, 10 hom. scale"""    
-    return [random.sample(valueDictionary[1].keys(),n_target+1),
-               random.sample(valueDictionary[2].keys(),n_target),
-                random.sample(valueDictionary[3].keys(),n_target)]
-
-
-# Crossover algorithm          
-def nonReplicatingCross(ind1, ind2):
-    """Performs a crossover in-place"""
-    """Highly in need of new documentation"""
-    chromosomeNumber = random.randint(0,2)
-    indLength = len(ind1[chromosomeNumber])
-    cxpoint = random.randint(1,indLength-1)
-    child1 = np.zeros(indLength) #create a child array to use
-    child2 = np.zeros(indLength)
-    child1[0:cxpoint]=ind1[chromosomeNumber][0:cxpoint] #do the first half of the crossover
-    child2[0:cxpoint]=ind2[chromosomeNumber][0:cxpoint]
-    try:
-        child1[child1==0]=[x for x in ind2[chromosomeNumber] if x not in child1][0:len(child1[child1==0])]
-    except ValueError:
-        pass
-    if (child1[child1==0]!=[]) or (child1[child1==0]==[0]):
-        child1[child1==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child1], np.sum(np.where(child1==0, 1, 0)))
-    try:
-        child2[child2==0]=[x for x in ind1[chromosomeNumber] if x not in child2][0:len(child2[child2==0])]
-    except ValueError:
-        pass
-    if (child2[child2==0]!=[]) or (child2[child2==0]==[0]):
-        child2[child2==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child2], np.sum(np.where(child2==0, 1, 0)))
-    ind1[chromosomeNumber]=child1  #copy the child array onto the parent array (in place modification)
-    ind2[chromosomeNumber]=child2
-    
-    return ind1, ind2
-  
-#Mutation algorithm      
-def nonReplicatingMutate(ind,indpb):
-    """Mutates an individual in place"""
-    ind=np.asarray(ind) #copy indiviudal into numpy array
-    for chro in range(0,3):
-        for i in range(1,len(ind[chro])):
-                if random.random() < indpb: #for each nucleotide, use roulette to see if there is a mutation
-                            ind[chro][i]=(random.sample([x for x in valueDictionary[chro+1].keys() if x not in ind[chro]],1))[0]                                
-    return ind
-    del ind
-    
-#Maps genotype onto phenotype (item number onto value)    
-def genoToPheno(individual):
-    #print individual
-    indiv=[np.zeros(n_target+1), np.zeros(n_target), np.zeros(n_target)]
-    for chro in range(0,3):
-        for i in range(len(individual[0][chro])):
-            indiv[chro][i]=valueDictionary[chro+1][int(individual[0][chro][i])]
-    return indiv
-
-#stores top n individuals of an epoch in a list    
-def custHallOfFame(population,maxaddsize):
-    for i in tools.selBest(population, k=maxaddsize): 
-        HallOfFame.append(i)
-
-#checks for human error in value entry
-def inputErrorCheck(raw_data):
-    if not raw_data[['item1', 'item2']].applymap(np.isreal).all().all():
-        raise ValueError('Custom error, ask CL : Some item value is not a number')
-    if [raw_data['index']>60].any:
-        raise ValueError("Custom error, ask CL : An item index is > 60")
-    for bundleType in range(1,4):
-        if raw_data[raw_data['type']==bundleType].duplicated(subset=['item1', 'item2']).any():
-            print raw_data[raw_data['type']==bundleType].duplicated(subset=['item1', 'item2'])
-            raise ValueError('Custom error, ask CL : Some item value is duplicated')
-    
-    
-
-#%%==============import data from csv======================%%#
-raw_choice_dataset = pd.read_csv(csv_filepath, sep=',', header=0)
-
-raw_choice_dataset=raw_choice_dataset[raw_choice_dataset['SID']==SID]
-
-valueDictionary={}
-for x in range(1,4):
-  #Create a dictionary/hashtable associating the unique ID assigned to each singleton or bundle to its modeled value
-    placeholderValueDictionary={}
-    for rows in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==x].iterrows():
-        #rows[1][6]=rows[1][2] # change this once modeling is done
-        placeholderValueDictionary[int(rows[1]['index'])] =float(rows[1]['index'])
-    valueDictionary[x]=placeholderValueDictionary
-    
-singletonLookup={}
-for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==1].iterrows():
-    singletonLookup[int(x[1]['index'])]=int(x[1]['item1'])
-
-bundleLookup={}
-for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==2].iterrows():
- #create a dictionary/hastable that gives constituent item in homogeneous bundles
-    bundleLookup[int(x[1]['index'])]=int(x[1]['item1'])
-    
-bundleLookup2={}
-for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==3].iterrows():
-    bundleLookup2[int(x[1]['index'])]=(int(x[1]['item1']),int(x[1]['item2']))
-#%%===============initialize toolbox=======================%%#
-creator.create("FitnessMax", base.Fitness, weights=(1.0,))
-creator.create("Individual", list, typecode="d", fitness=creator.FitnessMax)
-
-stats = tools.Statistics(key=operator.attrgetter("fitness.values"))
-stats.register("max", np.max)
-stats.register("mean", np.mean)
-stats.register("min", np.min)
-
-toolbox = base.Toolbox()
-
-toolbox.register("HOF", custHallOfFame, maxaddsize=HOFsize)
-toolbox.register("create_individual", createIndividual)
-toolbox.register("individuals", tools.initRepeat, creator.Individual,
-                 toolbox.create_individual, n=1) 
-toolbox.register("population", tools.initRepeat, list, toolbox.individuals)
-
-toolbox.register("evaluate", evalFit)
-
-toolbox.register("mate", nonReplicatingCross)
-toolbox.register("mutate", nonReplicatingMutate, indpb=.1)
-toolbox.register("select", tools.selTournament, tournsize=2)
-
-
-#toolbox.register('map', futures.map)
-
-s= tools.Statistics()
-s.register("max", np.max)
-s.register("mean", np.mean)
-
-log=tools.Logbook()
-
-def main_program(pop):    
-    fitnesses = toolbox.map(toolbox.evaluate, pop) # eval. fitness of pop
-    for ind, fit in zip(pop, fitnesses):
-        ind.fitness.values = fit
-    
-    for g in range(ngen):  
-        if g%5==0:
-            print str(g) + ' of ' + str(ngen)       
-        offspring = toolbox.select(pop, len(pop)) #select which individuals to mate
-        offspring = map(toolbox.clone, offspring)
-        
-        for child1, child2 in zip(offspring[::2], offspring[1::2]): #determine whether to have a cross over
-            if random.random() < cxpb:
-                child1[0], child2[0] = toolbox.mate(child1[0], child2[0])
-                del child1.fitness.values, child2.fitness.values
-    
-        for mutant in offspring: #determine whether to mutate
-            if random.random() < mutpb:
-                mutant[0]=toolbox.mutate(mutant[0])
-                del mutant.fitness.values      
-        
-        invalids = [ind for ind in offspring if not ind.fitness.valid] #assign fitness scores to new offspring
-        fitnesses = toolbox.map(toolbox.evaluate, invalids)
-        for ind, fit in zip(invalids, fitnesses):
-            ind.fitness.values = fit  
-        
-        log.record(gen=g,**stats.compile(pop))
-        pop[:] = offspring #update population with offspring    
-    return tools.selBest(pop,k=1)[0][0]
-
-#%%======================main==============================%%#
-if __name__ == '__main__':  
-    print 'GA algorithm starting with the following settings:'
-    print 'nepochs = ' + str(nepochs) + ' ngen = ' + str(ngen) + ' npop = ' + str(npop)
-    print 'cxpb = ' + str(cxpb) + ' mutpb = ' + str(mutpb)
-    answer = input('Are the following settings okay? (0/1)  ')
-    if answer == 0:
-        raise ValueError('Custom Error: Please change settings in script file')    
-    
-    print 'initializing processing pool'
-    return_var= []
-    processes = []
-    pool = mp.Pool(processes = 8)
-    pop_pool = [toolbox.population(n=npop) for x in range(8)]
-    results = pool.map(main_program,pop_pool)
-    pool.close()
-    print 'pool finished, outputing to JSON'    
-    
-    results = [[np.sort(x[0]),np.sort(x[1]),np.sort(x[2])] for x in results]
-    
-    resultsFit = [evalFit([x]) for x in results]
-    maxIndex = np.argmax(resultsFit)
-    
-    bestIndividual = results[maxIndex]
-    
-    singletonTransed = [singletonLookup[item] for item in bestIndividual[0]]
-    median = singletonTransed[5]
-    singletonTransed = np.delete(singletonTransed, 5).tolist()
-    homoTransed = [bundleLookup[item] for item in bestIndividual[1]]
-    heteroTransed = [bundleLookup2[item] for item in bestIndividual[2]]
-    
-    outputData = { 'singleton' : singletonTransed, 'homo' : homoTransed, 'hetero' : heteroTransed, 'median' : median }
-    outputData = json.dumps(outputData)
-    with open('jsonOut.txt', 'w') as outfile:
-        outfile.write(str(outputData))
-        
-    outputDataFull = np.hstack((bestIndividual[0], bestIndividual[1],bestIndividual[2]))
-    outputDataFull = np.sort(outputDataFull)
-    transedFullData = []
-    for x in outputDataFull:
-        if x in singletonLookup.keys():
-            transedFullData.append(singletonLookup[x])
-        if x in bundleLookup.keys():
-            transedFullData.append(bundleLookup[x])
-        if x in bundleLookup2.keys():
-            transedFullData.append(bundleLookup2[x])
-        else:
-            raise ValueError('Custom error: item in outputData JSON was not in any value dictionary')
-    outputData = { 'options' : transedFullData}
-    outputData = json.dumps(outputData)
-    with open('jsonOutExtended.txt', 'w') as outfile:
-        outfile.write(str(outputData))
\ No newline at end of file

From f07d972903a10adeabe615d2428ea42b6f4d5f63 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Mon, 30 Nov 2015 21:21:36 -0800
Subject: [PATCH 10/16] Delete stimulusSelection.py

---
 OptionSelect/stimulusSelection.py | 279 ------------------------------
 1 file changed, 279 deletions(-)
 delete mode 100644 OptionSelect/stimulusSelection.py

diff --git a/OptionSelect/stimulusSelection.py b/OptionSelect/stimulusSelection.py
deleted file mode 100644
index 88c7445..0000000
--- a/OptionSelect/stimulusSelection.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-GA based stimulus selector
- 
-Created on Sat Feb 21 13:10:50 2015
-@author: Calvin Leather
-To do-
-Look into uniformity metric in evalFit()
-Explore different types of means
-"""
-#%%==========imports and constants=================%%#
-import numpy as np
-import pandas as pd
-from deap import base, creator, tools
-import matplotlib.pyplot as plt
-from scipy.stats import kstest, ks_2samp
-import random, operator, seaborn
-import multiprocessing as mp
-import json
-import os
-
-#os.chdir('C:\Users\Calvin\Documents\GitHub\Nypype_Workflows\MVPA')
-
-#%% Magic Numbers
-#nepochs-number of epochs, ngen-number of generations in an epoch
-#cxpb- probability of a cross over occuring in one chromosome of a mating pair
-#mutpb- probability of at each nucleotide of a mutation
-#number of individuals to put in HOF in each epoc
-nepochs, ngen, npop, cxpb, mutpb =2,50,1000, 0.1, 0.05
-    
-HOFsize=1
-
-HallOfFame=[]
-
-SID='a03'
-n_single=20 #1 number of possibilities for singleton
-n_hetero=15 #2 number of possibilities for the heterogenous bundle
-n_homo=22 #3 number of possibilities for the homogeneous scaling
-n_genome=n_single+n_hetero+n_homo #total number of possibilities for all cases
-n_target=10 #Desired number in each chromosome
-
-chromosomeDict={0:n_single, 1:n_hetero, 2:n_homo}
-
-#Define the seed for the random number generator for replication purposes
-random.seed(1)
-np.random.seed(1)
-
-#%%===========define fitness and functions=================%%#
-uni=np.random.uniform(0,60,500)
-
-def evalFit(individual): 
-    """ A weighted total of fitness scores to be maximized
-    RangeCost-maximum to minimum
-    SimilarityCost - number of items in both singleton and homogenous scaling
-    UniformCost- Uses KS divergence to indicate distance of distribution of values from uniform distribution
-    DistanceCost- Uses KS divergence to indicate differences between distributions
-    Cost currently is a simple weightable summation, might be changed to F score"""
-    indiv=genoToPheno(individual)
-    #####similarityCost=np.sum(np.in1d(individual[0][0],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    #similarityCost=   np.sum([np.sum(c)>1 for c in [np.in1d(k,x) for k in y]])
-    #x is singelton, y is array of tuples of constituent items
-    ######similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,individual[0][0]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    rangeCost=(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))/125
-    uniformCost=1/(kstest(indiv[0],'uniform')[0]+kstest(indiv[1],'uniform')[0]+kstest(indiv[2],'uniform')[0])
-    #uniformCost=(ks_2samp(indiv[0], uni)[1]+ks_2samp(indiv[1], uni)[1]+ks_2samp(indiv[2], uni)[1])    
-    distanceCost=(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1])
-    cost=20*rangeCost+30*uniformCost+10*distanceCost+similarityCost+similarity2   
-    return (cost,)
-
-def getSims(individual):
-    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    print similarityCost
-    print similarity2
-    
-
-# Creates the initial generation      
-def createIndividual():
-    """Creates a random individual with 11 singleton, 10 het. bundle, 10 hom. scale"""    
-    return [random.sample(valueDictionary[1].keys(),n_target+1),
-               random.sample(valueDictionary[2].keys(),n_target),
-                random.sample(valueDictionary[3].keys(),n_target)]
-
-
-# Crossover algorithm          
-def nonReplicatingCross(ind1, ind2):
-    """Performs a crossover in-place"""
-    """Highly in need of new documentation"""
-    chromosomeNumber = random.randint(0,2)
-    indLength = len(ind1[chromosomeNumber])
-    cxpoint = random.randint(1,indLength-1)
-    child1 = np.zeros(indLength) #create a child array to use
-    child2 = np.zeros(indLength)
-    child1[0:cxpoint]=ind1[chromosomeNumber][0:cxpoint] #do the first half of the crossover
-    child2[0:cxpoint]=ind2[chromosomeNumber][0:cxpoint]
-    try:
-        child1[child1==0]=[x for x in ind2[chromosomeNumber] if x not in child1][0:len(child1[child1==0])]
-    except ValueError:
-        pass
-    if (child1[child1==0]!=[]) or (child1[child1==0]==[0]):
-        child1[child1==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child1], np.sum(np.where(child1==0, 1, 0)))
-    try:
-        child2[child2==0]=[x for x in ind1[chromosomeNumber] if x not in child2][0:len(child2[child2==0])]
-    except ValueError:
-        pass
-    if (child2[child2==0]!=[]) or (child2[child2==0]==[0]):
-        child2[child2==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child2], np.sum(np.where(child2==0, 1, 0)))
-    ind1[chromosomeNumber]=child1  #copy the child array onto the parent array (in place modification)
-    ind2[chromosomeNumber]=child2
-    
-    return ind1, ind2
-  
-#Mutation algorithm      
-def nonReplicatingMutate(ind,indpb):
-    """Mutates an individual in place"""
-    ind=np.asarray(ind) #copy indiviudal into numpy array
-    for chro in range(0,3):
-        for i in range(1,len(ind[chro])):
-                if random.random() < indpb: #for each nucleotide, use roulette to see if there is a mutation
-                            ind[chro][i]=(random.sample([x for x in valueDictionary[chro+1].keys() if x not in ind[chro]],1))[0]                                
-    return ind
-    del ind
-    
-#Maps genotype onto phenotype (item number onto value)    
-def genoToPheno(individual):
-    #print individual
-    indiv=[np.zeros(n_target+1), np.zeros(n_target), np.zeros(n_target)]
-    for chro in range(0,3):
-        for i in range(len(individual[0][chro])):
-            indiv[chro][i]=valueDictionary[chro+1][int(individual[0][chro][i])]
-    return indiv
-
-#stores top n individuals of an epoch in a list    
-def custHallOfFame(population,maxaddsize):
-    for i in tools.selBest(population, k=maxaddsize): 
-        HallOfFame.append(i)
-
-#checks for human error in value entry
-def inputErrorCheck(raw_data):
-    if not raw_data[['item1', 'item2']].applymap(np.isreal).all().all():
-        raise ValueError('Custom error, ask CL : Some item value is not a number')
-    for bundleType in range(1,4):
-        if raw_data[raw_data['type']==bundleType].duplicated(subset=['item1', 'item2']).any():
-            raise ValueError('Custom error, ask CL : Some item value is duplicated')
-    
-
-
-#%%===============initialize toolbox=======================%%#
-creator.create("FitnessMax", base.Fitness, weights=(1.0,))
-creator.create("Individual", list, typecode="d", fitness=creator.FitnessMax)
-
-stats = tools.Statistics(key=operator.attrgetter("fitness.values"))
-stats.register("max", np.max)
-stats.register("mean", np.mean)
-stats.register("min", np.min)
-
-toolbox = base.Toolbox()
-
-toolbox.register("HOF", custHallOfFame, maxaddsize=HOFsize)
-toolbox.register("create_individual", createIndividual)
-toolbox.register("individuals", tools.initRepeat, creator.Individual,
-                 toolbox.create_individual, n=1) 
-toolbox.register("population", tools.initRepeat, list, toolbox.individuals)
-
-toolbox.register("evaluate", evalFit)
-
-toolbox.register("mate", nonReplicatingCross)
-toolbox.register("mutate", nonReplicatingMutate, indpb=.1)
-toolbox.register("select", tools.selTournament, tournsize=2)
-
-
-#toolbox.register('map', futures.map)
-
-s= tools.Statistics()
-s.register("max", np.max)
-s.register("mean", np.mean)
-
-log=tools.Logbook()
-
-def main_program(pop):
-
-    fitnesses = toolbox.map(toolbox.evaluate, pop) # eval. fitness of pop
-    for ind, fit in zip(pop, fitnesses):
-        ind.fitness.values = fit
-    
-    for g in range(ngen):  
-        if g%5==0:
-            print str(g) + ' of ' + str(ngen)       
-        offspring = toolbox.select(pop, len(pop)) #select which individuals to mate
-        offspring = map(toolbox.clone, offspring)
-        
-        for child1, child2 in zip(offspring[::2], offspring[1::2]): #determine whether to have a cross over
-            if random.random() < cxpb:
-                child1[0], child2[0] = toolbox.mate(child1[0], child2[0])
-                del child1.fitness.values, child2.fitness.values
-    
-        for mutant in offspring: #determine whether to mutate
-            if random.random() < mutpb:
-                mutant[0]=toolbox.mutate(mutant[0])
-                del mutant.fitness.values      
-        
-        invalids = [ind for ind in offspring if not ind.fitness.valid] #assign fitness scores to new offspring
-        fitnesses = toolbox.map(toolbox.evaluate, invalids)
-        for ind, fit in zip(invalids, fitnesses):
-            ind.fitness.values = fit  
-        
-        log.record(gen=g,**stats.compile(pop))
-        pop[:] = offspring #update population with offspring    
-    return tools.selBest(pop,k=1)[0][0]
-
-#%%======================main==============================%%#
-if __name__ == '__main__':  
-    #%%==============import data from csv======================%%#
-    # Define the location of the csv file with modeled preferences, should make relative
-    # Three col CSV (Item-Code, Option-Type, Value)
-    SID = input('enter subject ID')    
-    csv_filepath='rank'+str(SID)+'.csv'
-
-    raw_choice_dataset = pd.read_csv(csv_filepath, sep=',', header=0)
-
-    valueDictionary={}
-    for x in range(1,4):
-      #Create a dictionary/hashtable associating the unique ID assigned to each singleton or bundle to its modeled value
-        placeholderValueDictionary={}
-        for rows in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==x].iterrows():
-            #rows[1][6]=rows[1][2] # change this once modeling is done
-            placeholderValueDictionary[int(rows[1]['rank'])] =float(rows[1]['rank'])
-        valueDictionary[x]=placeholderValueDictionary
-        
-    singletonLookup={}
-    for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==1].iterrows():
-        singletonLookup[int(x[1]['rank'])]=int(x[1]['item1'])
-
-    bundleLookup={}
-    for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==2].iterrows():
-     #create a dictionary/hastable that gives constituent item in homogeneous bundles
-        bundleLookup[int(x[1]['rank'])]=int(x[1]['item1'])
-        
-    bundleLookup2={}
-    for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==3].iterrows():
-        bundleLookup2[int(x[1]['rank'])]=(int(x[1]['item1']),int(x[1]['item2']))
-
-
-    print 'GA algorithm starting with the following settings:'
-    print 'nepochs = ' + str(nepochs) + ' ngen = ' + str(ngen) + ' npop = ' + str(npop)
-    print 'cxpb = ' + str(cxpb) + ' mutpb = ' + str(mutpb)
-    answer = input('Are the following settings okay? (0/1)  ')
-    if answer == 0:
-        raise ValueError('Custom Error: Please change settings in script file')
-    
-    
-    print 'initializing processing pool'
-    return_var= []
-    processes = []
-    pool = mp.Pool(processes = 8)
-    pop_pool = [toolbox.population(n=npop) for x in range(8)]
-    results = pool.map(main_program,pop_pool)
-    pool.close()
-    print 'pool finished, outputing to JSON'    
-    
-    results = [[np.sort(x[0]),np.sort(x[1]),np.sort(x[2])] for x in results]
-    
-    resultsFit = [evalFit([x]) for x in results]
-    maxIndex = np.argmax(resultsFit)
-    
-    bestIndividual = results[maxIndex]
-    
-    singletonTransed = [singletonLookup[item] for item in bestIndividual[0]]
-    median = singletonTransed[5]
-    singletonTransed = np.delete(singletonTransed, 5).tolist()
-    homoTransed = [bundleLookup[item] for item in bestIndividual[1]]
-    heteroTransed = [bundleLookup2[item] for item in bestIndividual[2]]
-    
-    outputData = { 'singleton' : singletonTransed, 'homo' : homoTransed, 'hetero' : heteroTransed, 'median' : median }
-    outputData = json.dumps(outputData)
-    with open('jsonOut'+str(SID)+'.txt', 'w') as outfile:
-        outfile.write(str(outputData))

From 33ab5b5401b4111b9c70ced8e8b94a999a019c04 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Mon, 30 Nov 2015 21:22:04 -0800
Subject: [PATCH 11/16] Create graph.py

---
 OptionSelect/graph.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 OptionSelect/graph.py

diff --git a/OptionSelect/graph.py b/OptionSelect/graph.py
new file mode 100644
index 0000000..4a257be
--- /dev/null
+++ b/OptionSelect/graph.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Nov 05 18:42:47 2015
+
+@author: Calvin
+"""
+import numpy as np
+import pandas as pd
+from deap import base, creator, tools
+import matplotlib.pyplot as plt
+from scipy.stats import kstest, ks_2samp
+import random, operator, seaborn
+import multiprocessing as mp
+import json
+import os
+import seaborn as sns
+
+plt.hold(True)
+sns.set_context(rc={"figure.figsize": (8, 4)})
+plt.bar(np.asarray(bestIndividual[0]),np.ones((1,len(bestIndividual[0])))[0], color = 'blue')
+plt.bar(np.asarray(bestIndividual[1]),np.ones((1,len(bestIndividual[1])))[0], color = 'red')
+plt.bar(np.asarray(bestIndividual[2]),np.ones((1,len(bestIndividual[2])))[0], color = 'green')

From df31a5b9132e28ed7b7d15da18dfd76b2901f4e9 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Thu, 10 Mar 2016 19:32:44 -0800
Subject: [PATCH 12/16] Delete ga_multi.py

---
 OptionSelect/ga_multi.py | 302 ---------------------------------------
 1 file changed, 302 deletions(-)
 delete mode 100644 OptionSelect/ga_multi.py

diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
deleted file mode 100644
index 253360a..0000000
--- a/OptionSelect/ga_multi.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Aug 13 13:01:21 2015
-@author: Calvin
-"""
-
-#%%==========imports and constants=================%%#
-import numpy as np
-import pandas as pd  
-from deap import base, creator, tools
-from scipy.stats import kstest, ks_2samp
-import random, operator
-import multiprocessing as mp
-import json
-import os
-import matplotlib.pyplot as plt
-import seaborn as sns
-
-#os.chdir('/Users/Calvin/Desktop/')
-
-# Define the location of the csv file with modeled preferences, should make relative
-# Three col CSV (Item-Code, Option-Type, Value)
-inputSID = 3301
-csv_filepath=r'rank' + str(inputSID)+ '.csv'
-
-
-#%% Magic Numbers
-#nepochs-number of epochs, ngen-number of generations in an epoch
-#cxpb- probability of a cross over occuring in one chromosome of a mating pair
-#mutpb- probability of at each nucleotide of a mutation
-#number of individuals to put in HOF in each epoc
-nepochs, ngen, npop, cxpb, mutpb =2,125,2500, 0.1, 0.05
-    
-HOFsize=1
-
-HallOfFame=[]
-
-n_single=20 #1 number of possibilities for singleton
-n_hetero=15 #2 number of possibilities for the heterogenous bundle
-n_homo=22 #3 number of possibilities for the homogeneous scaling
-n_genome=n_single+n_hetero+n_homo #total number of possibilities for all cases
-n_target=10 #Desired number in each chromosome
-
-chromosomeDict={0:n_single, 1:n_hetero, 2:n_homo}
-
-#Define the seed for the random number generator for replication purposes
-#random.seed(1)
-#np.random.seed(1)
-
-#%%===========define fitness and functions=================%%#
-#uni=np.random.uniform(0,60,500)
-
-def evalFit(individual): 
-    """ A weighted total of fitness scores to be maximized
-    RangeCost-maximum to minimum
-    SimilarityCost - number of items in both singleton and homogenous scaling
-    UniformCost- Uses KS divergence to indicate distance of distribution of values from uniform distribution
-    DistanceCost- Uses KS divergence to indicate differences between distributions
-    Cost currently is a simple weightable summation, might be changed to F score"""
-    indiv=genoToPheno(individual)
-    #similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    #similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    rangeCost=20*(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))/50   
-    #distanceCost=10*(ks_2samp(indiv[0], indiv[1])[1]+ks_2samp(indiv[1], indiv[2])[1]+ks_2samp(indiv[2], indiv[0])[1])
-    #uniformityCost = -np.power(np.diff(np.hstack((0,indiv[0],60))),2).sum()-np.power(np.diff(np.hstack((0,indiv[1],60))),2).sum()-np.power(np.diff(np.hstack((0,indiv[2],60))),2).sum()
-    spacingCost = 5*(np.mean(np.diff(np.hstack((0,indiv[0],60))))+np.mean(np.diff(np.hstack((0,indiv[1],60))))+np.mean(np.diff(np.hstack((0,indiv[2],60)))))
-    varCost = -3*(np.var(np.diff(np.hstack((0,indiv[0],60))))+np.var(np.diff(np.hstack((0,indiv[1],60))))+np.var(np.diff(np.hstack((0,indiv[2],60)))))
-    cost=rangeCost+spacingCost+varCost+rangeCost
-    return (cost,)
-
-def getSims(individual):
-    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
-    print similarityCost
-    print similarity2
-    
-
-# Creates the initial generation      
-def createIndividual():
-    """Creates a random individual with 11 singleton, 10 het. bundle, 10 hom. scale"""    
-    return [random.sample(valueDictionary[1].keys(),n_target+1),
-               random.sample(valueDictionary[2].keys(),n_target),
-                random.sample(valueDictionary[3].keys(),n_target)]
-
-
-# Crossover algorithm          
-def nonReplicatingCross(ind1, ind2):
-    """Performs a crossover in-place"""
-    """Highly in need of new documentation"""
-    chromosomeNumber = random.randint(0,2)
-    indLength = len(ind1[chromosomeNumber])
-    cxpoint = random.randint(1,indLength-1)
-    child1 = np.zeros(indLength) #create a child array to use
-    child2 = np.zeros(indLength)
-    child1[0:cxpoint]=ind1[chromosomeNumber][0:cxpoint] #do the first half of the crossover
-    child2[0:cxpoint]=ind2[chromosomeNumber][0:cxpoint]
-    try:
-        child1[child1==0]=[x for x in ind2[chromosomeNumber] if x not in child1][0:len(child1[child1==0])]
-    except ValueError:
-        pass
-    if (child1[child1==0]!=[]) or (child1[child1==0]==[0]):
-        child1[child1==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child1], np.sum(np.where(child1==0, 1, 0)))
-    try:
-        child2[child2==0]=[x for x in ind1[chromosomeNumber] if x not in child2][0:len(child2[child2==0])]
-    except ValueError:
-        pass
-    if (child2[child2==0]!=[]) or (child2[child2==0]==[0]):
-        child2[child2==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child2], np.sum(np.where(child2==0, 1, 0)))
-    ind1[chromosomeNumber]=child1  #copy the child array onto the parent array (in place modification)
-    ind2[chromosomeNumber]=child2
-    
-    return ind1, ind2
-  
-#Mutation algorithm      
-def nonReplicatingMutate(ind,indpb):
-    """Mutates an individual in place"""
-    ind=np.asarray(ind) #copy indiviudal into numpy array
-    for chro in range(0,3):
-        for i in range(1,len(ind[chro])):
-                if random.random() < indpb: #for each nucleotide, use roulette to see if there is a mutation
-                            ind[chro][i]=(random.sample([x for x in valueDictionary[chro+1].keys() if x not in ind[chro]],1))[0]                                
-    return ind
-    del ind
-    
-#Maps genotype onto phenotype (item number onto value)    
-def genoToPheno(individual):
-    #print individual
-    indiv=[np.zeros(n_target+1), np.zeros(n_target), np.zeros(n_target)]
-    for chro in range(0,3):
-        for i in range(len(individual[0][chro])):
-            indiv[chro][i]=valueDictionary[chro+1][int(individual[0][chro][i])]
-    return indiv
-
-#stores top n individuals of an epoch in a list    
-def custHallOfFame(population,maxaddsize):
-    for i in tools.selBest(population, k=maxaddsize): 
-        HallOfFame.append(i)
-
-#checks for human error in value entry
-def inputErrorCheck(raw_data):
-    if not raw_data[['item1', 'item2']].applymap(np.isreal).all().all():
-        raise ValueError('Custom error, ask CL : Some item value is not a number')
-    if (raw_data.index>=60).any():
-        raise ValueError("Custom error, ask CL : An item index is > 60")
-    if raw_data.duplicated(subset=['item1', 'item2']).any():
-        print raw_data[raw_data.duplicated(subset=['item1', 'item2'])]
-        raise ValueError('Custom error, ask CL : Some item value is duplicated')
-    if raw_data[['item1', 'item2']].applymap(lambda x: x>30).any().any():
-        raise ValueError('Item number is greater than 30')
-        
-def getRank(item):
-    if type(item)==tuple:
-        return raw_choice_dataset.loc[(raw_choice_dataset['item1']==item[0]) & (raw_choice_dataset['item2']==item[1]),'rank'].values[0] 
-    else:
-        raise ValueError('Custom error: Some item is not a tuple in rank ordering')
-
-#%%==============import data from csv======================%%#
-raw_choice_dataset = pd.read_csv(csv_filepath, sep=',', header=0)
-
-inputErrorCheck(raw_choice_dataset)
-
-valueDictionary={}
-for x in range(1,4):
-  #Create a dictionary/hashtable associating the unique ID assigned to each singleton or bundle to its modeled value
-    placeholderValueDictionary={}
-    for rows in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==x].iterrows():
-        #rows[1][6]=rows[1][2] # change this once modeling is done
-        placeholderValueDictionary[int(rows[1]['rank'])] =float(rows[1]['rank'])
-    valueDictionary[x]=placeholderValueDictionary
-    
-singletonLookup={}
-for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==1].iterrows():
-    singletonLookup[int(x[1]['rank'])]=int(x[1]['item1'])
-
-bundleLookup={}
-for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==2].iterrows():
- #create a dictionary/hastable that gives constituent item in homogeneous bundles
-    bundleLookup[int(x[1]['rank'])]=int(x[1]['item1'])
-    
-bundleLookup2={}
-for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==3].iterrows():
-    bundleLookup2[int(x[1]['rank'])]=(int(x[1]['item1']),int(x[1]['item2']))
-#%%===============initialize toolbox=======================%%#
-creator.create("FitnessMax", base.Fitness, weights=(1.0,))
-creator.create("Individual", list, typecode="d", fitness=creator.FitnessMax)
-
-stats = tools.Statistics(key=operator.attrgetter("fitness.values"))
-stats.register("max", np.max)
-stats.register("mean", np.mean)
-stats.register("min", np.min)
-
-toolbox = base.Toolbox()
-
-toolbox.register("HOF", custHallOfFame, maxaddsize=HOFsize)
-toolbox.register("create_individual", createIndividual)
-toolbox.register("individuals", tools.initRepeat, creator.Individual,
-                 toolbox.create_individual, n=1) 
-toolbox.register("population", tools.initRepeat, list, toolbox.individuals)
-
-toolbox.register("evaluate", evalFit)
-
-toolbox.register("mate", nonReplicatingCross)
-toolbox.register("mutate", nonReplicatingMutate, indpb=.1)
-toolbox.register("select", tools.selTournament, tournsize=2)
-
-
-#toolbox.register('map', futures.map)
-
-s= tools.Statistics()
-s.register("max", np.max)
-s.register("mean", np.mean)
-
-log=tools.Logbook()
-
-def main_program(pop):    
-    fitnesses = toolbox.map(toolbox.evaluate, pop) # eval. fitness of pop
-    for ind, fit in zip(pop, fitnesses):
-        ind.fitness.values = fit
-    
-    for g in range(ngen):  
-        if g%5==0:
-            print str(g) + ' of ' + str(ngen)       
-        offspring = toolbox.select(pop, len(pop)) #select which individuals to mate
-        offspring = map(toolbox.clone, offspring)
-        
-        for child1, child2 in zip(offspring[::2], offspring[1::2]): #determine whether to have a cross over
-            if random.random() < cxpb:
-                child1[0], child2[0] = toolbox.mate(child1[0], child2[0])
-                del child1.fitness.values, child2.fitness.values
-    
-        for mutant in offspring: #determine whether to mutate
-            if random.random() < mutpb:
-                mutant[0]=toolbox.mutate(mutant[0])
-                del mutant.fitness.values      
-        
-        invalids = [ind for ind in offspring if not ind.fitness.valid] #assign fitness scores to new offspring
-        fitnesses = toolbox.map(toolbox.evaluate, invalids)
-        for ind, fit in zip(invalids, fitnesses):
-            ind.fitness.values = fit  
-        
-        log.record(gen=g,**stats.compile(pop))
-        pop[:] = offspring #update population with offspring    
-    return tools.selBest(pop,k=1)[0][0]
-
-#%%======================main==============================%%#
-if __name__ == '__main__':  
-    print 'GA algorithm starting with the following settings:'
-    print 'nepochs = ' + str(nepochs) + ' ngen = ' + str(ngen) + ' npop = ' + str(npop)
-    print 'cxpb = ' + str(cxpb) + ' mutpb = ' + str(mutpb) + ' SID = ' + str(inputSID)
-    answer = input('Are the following settings okay? (0/1)  ')
-    if answer == 0:
-        raise ValueError('Custom Error: Please change settings in script file')    
-    
-    print 'initializing processing pool'
-    return_var= []
-    processes = []
-    pool = mp.Pool(processes = 8)
-    pop_pool = [toolbox.population(n=npop) for x in range(8)]
-    results = pool.map(main_program,pop_pool)
-    pool.close()
-    print 'pool finished, outputing to JSON'    
-    
-    results = [[np.sort(x[0]),np.sort(x[1]),np.sort(x[2])] for x in results]
-    
-    resultsFit = [evalFit([x]) for x in results]
-    maxIndex = np.argmax(resultsFit)
-    
-    bestIndividual = results[maxIndex]
-    
-    singletonTransed = [singletonLookup[item] for item in bestIndividual[0]]
-    median = singletonTransed[5]
-    medianUntransed = bestIndividual[0][5]
-    singletonTransed = np.delete(singletonTransed, 5).tolist()
-    homoTransed = [bundleLookup[item] for item in bestIndividual[1]]
-    heteroTransed = [bundleLookup2[item] for item in bestIndividual[2]]
-    
-    outputData = { 'singleton' : singletonTransed, 'homo' : homoTransed, 'hetero' : heteroTransed, 'median' : median }
-    outputData = json.dumps(outputData)
-    with open('jsonOut.txt', 'w') as outfile:
-        outfile.write(str(outputData))
-
-    extended = np.unique(np.hstack((np.ravel([bundleLookup[x] for x in bestIndividual[1]]), np.ravel([bundleLookup2[x] for x in bestIndividual[2]]), [singletonLookup[x] for x in bestIndividual[0]]))).tolist()
-    homoTransed = [(x,x) for x in homoTransed]
-    extended= [(x,0) for x in extended]
-    outputDataFull  = extended+homoTransed+heteroTransed #median is in bestIndividual, so is included
-    outputDataFull=sorted(outputDataFull, key = getRank)
-    outputDataFull = [item[0] if item[1]==0 else item for item in outputDataFull]
-    outputData = { 'options' : outputDataFull }
-    outputData = json.dumps(outputData)
-    with open('jsonOutExtended.txt', 'w') as outfile:
-        outfile.write(str(outputData))
-    #outputDataFull=sorted(outputDataFull, key = getRank)
-    #outputDataFull=sorted(outputDataFull) 
-    plt.hold(True)
-    plt.title(csv_filepath)
-    sns.set_context(rc={"figure.figsize": (8, 4)})
-    plt.bar(np.asarray(bestIndividual[0]),np.ones((1,len(bestIndividual[0])))[0], color = 'blue')
-    plt.bar(np.asarray(bestIndividual[1]),np.ones((1,len(bestIndividual[1])))[0], color = 'red')
-    plt.bar(np.asarray(bestIndividual[2]),np.ones((1,len(bestIndividual[2])))[0], color = 'green')
-    individual=[bestIndividual]
-    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
-    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])

From 307aa9f3c41d4ec143333aa9596f1b6602ad5d9d Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Thu, 10 Mar 2016 19:33:27 -0800
Subject: [PATCH 13/16] Added new user friendly format

---
 OptionSelect/create_excel.py     |  35 ++++
 OptionSelect/ga_multi.py         | 310 +++++++++++++++++++++++++++++++
 OptionSelect/options_to_edit.txt |   1 +
 OptionSelect/rank9999.xls        | Bin 0 -> 25600 bytes
 4 files changed, 346 insertions(+)
 create mode 100644 OptionSelect/create_excel.py
 create mode 100644 OptionSelect/ga_multi.py
 create mode 100644 OptionSelect/options_to_edit.txt
 create mode 100644 OptionSelect/rank9999.xls

diff --git a/OptionSelect/create_excel.py b/OptionSelect/create_excel.py
new file mode 100644
index 0000000..4c98bf8
--- /dev/null
+++ b/OptionSelect/create_excel.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Feb 12 15:14:24 2016
+
+@author: Calvin
+
+MRI prep script. This script should be run before anything else is done, preferably before the subject arrives
+It will create the excel file into which the elicited preference ranks are enter.
+"""
+
+import pandas as pd
+import numpy as np
+import os
+import xlwt
+
+check1 = 'no' #initialize while loop variables
+check2 = 'no'
+
+print('Current directory is: ' + os.getcwd())
+while check1!='yes': #check if path is okay
+    check1 = input("Is this path correct? Enter yes if correct: \n")
+
+while check2 != 'yes': #request SID and prompt for okay
+    SID = input('Enter subject ID: ')
+    check2 = input('Is '+ SID+  ' correct?\nEnter yes if correct:\n')
+
+num_range = np.arange(2,62).astype('str') #this creates the excel formula for bundle type
+logic_vec = [xlwt.Formula('IF(B'+x+'=C'+x+',2,IF(C'+x+'=0,1,3))') for x in num_range]
+
+empty_frame = pd.DataFrame(columns = ['rank', 'item1', 'item2', 'type']) #creates data frame
+empty_frame['rank'] = range(60) #populates dataframe
+empty_frame['type']=logic_vec
+empty_frame.to_excel('rank'+SID+'.xls', index = False) #saves dataframe
+SID = [int(SID)]
+np.savetxt('options_to_edit.txt', SID, fmt='%1.0f')
\ No newline at end of file
diff --git a/OptionSelect/ga_multi.py b/OptionSelect/ga_multi.py
new file mode 100644
index 0000000..254ddb3
--- /dev/null
+++ b/OptionSelect/ga_multi.py
@@ -0,0 +1,310 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Aug 13 13:01:21 2015
+@author: Calvin Leather
+
+This script uses the item rankings given by participants to determine a well balanaced set of decision options.
+It takes as input the excel file used to 
+"""
+
+#%%==========imports and constants=================%%#
+import numpy as np
+import pandas as pd
+from deap import base, creator, tools
+from scipy.stats import kstest, ks_2samp
+import random, operator
+import multiprocessing as mp
+import json
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+# Define the location of the csv file with modeled preferences, should make relative
+# Three col CSV (Item-Code, Option-Type, Value)
+try:
+    option_text = np.loadtxt('options_to_edit.txt')
+    inputSID = int(option_text)
+except:
+    raise ValueError('Something is wrong w options_to_edit.txt. Please edit this file with the correct SID and rerun')
+csv_filepath=r'rank' + str(inputSID)+ '.xls'
+
+
+#%% Magic Numbers
+#nepochs-number of epochs, ngen-number of generations in an epoch
+#cxpb- probability of a cross over occuring in one chromosome of a mating pair
+#mutpb- probability of at each nucleotide of a mutation
+#number of individuals to put in HOF in each epoc
+nepochs, ngen, npop, cxpb, mutpb =2,50,250, 0.1, 0.05
+n_proc = 2
+    
+HOFsize=1
+
+HallOfFame=[]
+
+n_single=20 #1 number of possibilities for singleton
+n_hetero=15 #2 number of possibilities for the heterogenous bundle
+n_homo=22 #3 number of possibilities for the homogeneous scaling
+n_genome=n_single+n_hetero+n_homo #total number of possibilities for all cases
+n_target=10 #Desired number in each chromosome
+
+
+chromosomeDict={0:n_single, 1:n_hetero, 2:n_homo}
+
+#Define the seed for the random number generator for replication purposes
+#random.seed(1)
+#np.random.seed(1)
+
+#%%===========define fitness and functions=================%%#
+#uni=np.random.uniform(0,60,500)
+
+def evalFit(individual): 
+    """ A weighted total of fitness scores to be maximized
+    RangeCost-maximum to minimum
+    SimilarityCost - number of items in both singleton and homogenous scaling
+    UniformCost- Uses KS divergence to indicate distance of distribution of values from uniform distribution
+    DistanceCost- Uses KS divergence to indicate differences between distributions
+    Cost currently is a simple weightable summation, might be changed to F score"""
+    #indiv=genoToPheno(individual)
+    indiv = individual[0]
+    indiv = [np.sort(indiv[0]), np.sort(indiv[1]), np.sort(indiv[2])] 
+    rangeCost=3*(np.ptp(indiv[0])+np.ptp(indiv[1])+np.ptp(indiv[2]))
+    diffCost = -2*np.var((np.mean(indiv[0]),np.mean(indiv[1]), np.mean(indiv[2])))
+    spacingCost = 5*(np.mean(np.diff(np.hstack((0,indiv[0],60))))+np.mean(np.diff(np.hstack((0,indiv[1],60))))+np.mean(np.diff(np.hstack((0,indiv[2],60)))))
+    varCost = -10*(np.power(np.var(np.diff(np.hstack((0,indiv[0],60)))),3)+np.power(np.var(np.diff(np.hstack((0,indiv[1],60)))),3)+np.power(np.var(np.diff(np.hstack((0,indiv[2],60)))),3))
+    adjCost = -15*(np.sum(np.diff(indiv[0])==1)+ np.sum(np.diff(indiv[1])==1)+ np.sum(np.diff(indiv[2])==1))
+    cost=rangeCost+spacingCost+varCost+rangeCost+diffCost+adjCost
+    return (cost,)
+
+def getSims(individual):
+    similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
+    similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
+    print(similarityCost)
+    print(similarity2)
+    
+
+# Creates the initial generation      
+def createIndividual():
+    """Creates a random individual with 11 singleton, 10 het. bundle, 10 hom. scale"""    
+    return [random.sample(valueDictionary[1].keys(),n_target+1),
+               random.sample(valueDictionary[2].keys(),n_target),
+                random.sample(valueDictionary[3].keys(),n_target)]
+
+
+# Crossover algorithm          
+def nonReplicatingCross(ind1, ind2):
+    """Performs a crossover in-place"""
+    """Highly in need of new documentation"""
+    chromosomeNumber = random.randint(0,2)
+    indLength = len(ind1[chromosomeNumber])
+    cxpoint = random.randint(1,indLength-1)
+    child1 = np.zeros(indLength) #create a child array to use
+    child2 = np.zeros(indLength)
+    child1[0:cxpoint]=ind1[chromosomeNumber][0:cxpoint] #do the first half of the crossover
+    child2[0:cxpoint]=ind2[chromosomeNumber][0:cxpoint]
+    try:
+        child1[child1==0]=[x for x in ind2[chromosomeNumber] if x not in child1][0:len(child1[child1==0])]
+    except ValueError:
+        pass
+    if (child1[child1==0]!=[]) or (child1[child1==0]==[0]):
+        child1[child1==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child1], np.sum(np.where(child1==0, 1, 0)))
+    try:
+        child2[child2==0]=[x for x in ind1[chromosomeNumber] if x not in child2][0:len(child2[child2==0])]
+    except ValueError:
+        pass
+    if (child2[child2==0]!=[]) or (child2[child2==0]==[0]):
+        child2[child2==0]=random.sample([x for x in valueDictionary[chromosomeNumber+1].keys() if x not in child2], np.sum(np.where(child2==0, 1, 0)))
+    ind1[chromosomeNumber]=child1  #copy the child array onto the parent array (in place modification)
+    ind2[chromosomeNumber]=child2
+    
+    return ind1, ind2
+  
+#Mutation algorithm      
+def nonReplicatingMutate(ind,indpb):
+    """Mutates an individual in place"""
+    ind=np.asarray(ind) #copy indiviudal into numpy array
+    for chro in range(0,3):
+        for i in range(1,len(ind[chro])):
+                if random.random() < indpb: #for each nucleotide, use roulette to see if there is a mutation
+                            ind[chro][i]=(random.sample([x for x in valueDictionary[chro+1].keys() if x not in ind[chro]],1))[0]                                
+    return ind
+    del ind
+    
+#Maps genotype onto phenotype (item number onto value)    
+def genoToPheno(individual):
+    #print individual
+    indiv=[np.zeros(n_target+1), np.zeros(n_target), np.zeros(n_target)]
+    for chro in range(0,3):
+        for i in range(len(individual[0][chro])):
+            indiv[chro][i]=valueDictionary[chro+1][int(individual[0][chro][i])]
+    return indiv
+
+#checks for human error in value entry
+def inputErrorCheck(raw_data):
+    if not raw_data[['item1', 'item2']].applymap(np.isreal).all().all():
+        raise ValueError('Custom error, ask CL : Some item value is not a number')
+    if (raw_data.index>=60).any():
+        raise ValueError("Custom error, ask CL : An item index is > 60")
+    if raw_data.duplicated(subset=['item1', 'item2']).any():
+        print(raw_data[raw_data.duplicated(subset=['item1', 'item2'])])
+        raise ValueError('Custom error, ask CL : Some item value is duplicated')
+    if raw_data[['item1', 'item2']].applymap(lambda x: x>30).any().any():
+        raise ValueError('Item number is greater than 30')
+        
+def getRank(item):
+    if type(item)==tuple:
+        return raw_choice_dataset.loc[(raw_choice_dataset['item1']==item[0]) & (raw_choice_dataset['item2']==item[1]),'rank'].values[0] 
+    else:
+        raise ValueError('Custom error: Some item is not a tuple in rank ordering')
+
+#%%==============import data from csv======================%%#
+raw_choice_dataset = pd.read_excel(csv_filepath, sep=',', header=0)
+
+inputErrorCheck(raw_choice_dataset)
+
+valueDictionary={}
+for x in range(1,4):
+  #Create a dictionary/hashtable associating the unique ID assigned to each singleton or bundle to its modeled value
+    placeholderValueDictionary={}
+    for rows in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==x].iterrows():
+        #rows[1][6]=rows[1][2] # change this once modeling is done
+        placeholderValueDictionary[int(rows[1]['rank'])] =float(rows[1]['rank'])
+    valueDictionary[x]=placeholderValueDictionary
+    
+singletonLookup={}
+for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==1].iterrows():
+    singletonLookup[int(x[1]['rank'])]=int(x[1]['item1'])
+
+bundleLookup={}
+for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==2].iterrows():
+ #create a dictionary/hastable that gives constituent item in homogeneous bundles
+    bundleLookup[int(x[1]['rank'])]=int(x[1]['item1'])
+    
+bundleLookup2={}
+for x in raw_choice_dataset[raw_choice_dataset['type'].astype(int)==3].iterrows():
+    bundleLookup2[int(x[1]['rank'])]=(int(x[1]['item1']),int(x[1]['item2']))
+#%%===============initialize toolbox=======================%%#
+creator.create("FitnessMax", base.Fitness, weights=(1.0,))
+creator.create("Individual", list, typecode="d", fitness=creator.FitnessMax)
+
+stats = tools.Statistics(key=operator.attrgetter("fitness.values"))
+stats.register("max", np.max)
+stats.register("mean", np.mean)
+stats.register("min", np.min)
+
+toolbox = base.Toolbox()
+
+toolbox.register("HOF", tools.HallOfFame, maxsize = 5)
+toolbox.register("create_individual", createIndividual)
+toolbox.register("individuals", tools.initRepeat, creator.Individual,
+                 toolbox.create_individual, n=1) 
+toolbox.register("population", tools.initRepeat, list, toolbox.individuals)
+
+toolbox.register("evaluate", evalFit)
+
+toolbox.register("mate", nonReplicatingCross)
+toolbox.register("mutate", nonReplicatingMutate, indpb=.1)
+toolbox.register("select", tools.selTournament, tournsize=3)
+
+
+#toolbox.register('map', futures.map)
+
+s= tools.Statistics()
+s.register("max", np.max)
+s.register("mean", np.mean)
+
+log=tools.Logbook()
+
+def main_program(pop):    
+    HOF = []
+    fitnesses = toolbox.map(toolbox.evaluate, pop) # eval. fitness of pop
+    for ind, fit in zip(pop, fitnesses):
+        ind.fitness.values = fit
+    
+    for g in range(ngen):  
+        if g%5==0:
+            print(str(g) + ' of ' + str(ngen))       
+        offspring = toolbox.select(pop, len(pop)) #select which individuals to mate
+        offspring = list(map(toolbox.clone, offspring))
+        
+        for child1, child2 in zip(offspring[::2], offspring[1::2]): #determine whether to have a cross over
+            if random.random() < cxpb:
+                toolbox.mate(child1[0], child2[0])
+                del child1.fitness.values, child2.fitness.values
+    
+        for mutant in offspring: #determine whether to mutate
+            if random.random() < mutpb:
+                toolbox.mutate(mutant[0])
+                del mutant.fitness.values      
+        
+        invalids = [ind for ind in offspring if not ind.fitness.valid] #assign fitness scores to new offspring
+        fitnesses = toolbox.map(toolbox.evaluate, invalids)
+        for ind, fit in zip(invalids, fitnesses):
+            ind.fitness.values = fit  
+        
+        pop[:] = offspring #update population with offspring
+        log.record(gen=g,**stats.compile(pop))
+    return tools.selBest(pop,k=1)[0][0], log, HOF
+
+#%%======================main==============================%%#
+if __name__ == '__main__':  
+    print('GA algorithm starting with the following settings:')
+    print('nepochs = ' + str(nepochs) + ' ngen = ' + str(ngen) + ' npop = ' + str(npop))
+    print('cxpb = ' + str(cxpb) + ' mutpb = ' + str(mutpb) + ' SID = ' + str(inputSID))
+    answer = input('Are the following settings okay? (0/1)  ')
+    if answer == 0:
+        raise ValueError('Custom Error: Please change settings in script file')    
+    
+    print('initializing processing pool')
+    return_var= []
+    processes = []
+    pool = mp.Pool(processes = n_proc)
+    pop_pool = [toolbox.population(n=npop) for x in range(n_proc)]
+    results = pool.map(main_program,pop_pool)
+    pool.close()
+    print('pool finished, outputing to JSON')    
+    
+    best_inds = [x[0] for x in results]
+    stats = [x[1] for x in results]
+    HOF_best = [x[2] for x in results]
+    #plt.plot([x['min'] for x in stats[0]])
+    results = best_inds
+    results = [[np.sort(x[0]),np.sort(x[1]),np.sort(x[2])] for x in results]
+    
+    resultsFit = [evalFit([x]) for x in results]
+    maxIndex = np.argmax(resultsFit)
+    
+    bestIndividual = results[maxIndex]
+    
+    singletonTransed = [singletonLookup[item] for item in bestIndividual[0]]
+    median = singletonTransed[5]
+    medianUntransed = bestIndividual[0][5]
+    singletonTransed = np.delete(singletonTransed, 5).tolist()
+    homoTransed = [bundleLookup[item] for item in bestIndividual[1]]
+    heteroTransed = [bundleLookup2[item] for item in bestIndividual[2]]
+    
+    outputData = { 'singleton' : singletonTransed, 'homo' : homoTransed, 'hetero' : heteroTransed, 'median' : median }
+    outputData = json.dumps(outputData)
+    with open('jsonOut.txt', 'w') as outfile:
+        outfile.write(str(outputData))
+
+    extended = np.unique(np.hstack((np.ravel([bundleLookup[x] for x in bestIndividual[1]]), np.ravel([bundleLookup2[x] for x in bestIndividual[2]]), [singletonLookup[x] for x in bestIndividual[0]]))).tolist()
+    homoTransed = [(x,x) for x in homoTransed]
+    extended= [(x,0) for x in extended]
+    outputDataFull  = extended+homoTransed+heteroTransed #median is in bestIndividual, so is included
+    outputDataFull=sorted(outputDataFull, key = getRank)
+    outputDataFull = [item[0] if item[1]==0 else item for item in outputDataFull]
+    outputData = { 'options' : outputDataFull }
+    outputData = json.dumps(outputData)
+    with open('..\\BehavioralValueMeasurements\\jsonOutExtended.txt', 'w') as outfile:
+        outfile.write(str(outputData))
+    #outputDataFull=sorted(outputDataFull, key = getRank)
+    #outputDataFull=sorted(outputDataFull) 
+    #plt.hold(True)
+    #plt.title(csv_filepath)
+    #sns.set_context(rc={"figure.figsize": (8, 4)})
+    #plt.bar(np.asarray(bestIndividual[0]),np.ones((1,len(bestIndividual[0])))[0], color = 'blue')
+    #plt.bar(np.asarray(bestIndividual[1]),np.ones((1,len(bestIndividual[1])))[0], color = 'red')
+    #plt.bar(np.asarray(bestIndividual[2]),np.ones((1,len(bestIndividual[2])))[0], color = 'green')
+    #individual=[bestIndividual]
+    #similarityCost=np.sum(np.in1d([singletonLookup[k] for k in individual[0][0]],[ bundleLookup[k] for k in individual[0][1] ]))
+    #similarity2=np.sum([np.sum(c)>1 for c in [np.in1d(p,[singletonLookup[k] for k in individual[0][0]]) for p in [ bundleLookup2[w] for w in individual[0][2] ]]])
\ No newline at end of file
diff --git a/OptionSelect/options_to_edit.txt b/OptionSelect/options_to_edit.txt
new file mode 100644
index 0000000..e83e892
--- /dev/null
+++ b/OptionSelect/options_to_edit.txt
@@ -0,0 +1 @@
+9999
\ No newline at end of file
diff --git a/OptionSelect/rank9999.xls b/OptionSelect/rank9999.xls
new file mode 100644
index 0000000000000000000000000000000000000000..44343313ac80aec0de56e3d27163ef9f85015b1e
GIT binary patch
literal 25600
zcmeHQeQ;FQbw9hSj}_uW5<+}hEJA<~0t*Rb@&K;@$JmW*Q=8PTON#}pKm|)w5iWK^
z!AYGzV#iH{i^o8dCaIfgk`jM4c3Q_KosQGV$Vr>prKU`3w{E5#cUqgaQzv74`}^Jd
z?%Q|ozI`mNGt<uO)4aRyojt#E&OP_sbMJlcF8t*W8eje5<MUn<=eSx@@?O49YC`k`
zuDi{BgW&sop10=nN?e0fIsPAcpe~Jutf`j0JulY2ax5V=Xz<r0DUVfs2c^hA;M^ex
zWxB|g!2@><k7NdNgX8;hW0{}rv02$Ln4k@35qKMMm5|e*Hp*OOS(7Sfs`49(?^#uD
zlqPvKal8B=dt>K1par<wlKqPBGpcM;<#dz@d0Ju4E6Qq_Cw~F{{L#-c)FrXGI<ZN{
zWLO5}fc#(nHUXQTrK>K}=2p1?Oj=ZZlk&aV#1>W8VcDhXy;?L!-6c7iFTs(JHKHsm
zsU;zGm*7as>`Q17L`Pg*x)KRl7ptY}k{lNhHSsd&)2n0cAjg_Za%{K+M?$i(b=enD
zmplEI9Yj2{Z4+qy_;wMQQqqprss&PwR*|;$w#8kG7kBSmzJ5oCx!JM2wQ3PYkOkKF
zzjSkM&oA3dZBi>&6qr_Z+j4MEQOjUzo6F@$^I5NwPU#WZNmDvA#xnnQM~5QpSiasT
zw5@D|-8(z_cS6f5S&lzI!??~&7}S}u5LxLG&sL^uVCz$+g;Zv<1*Ltm0_{;yYVd(x
z16HaF$XBHqktFh8`HaYas!J4SGo*9muFyX?KhV1g$?(HJw4dv6r~LesPp<lc@_#Y?
zk4n(LT7rJ21pQPA`Zr3@zg~j=yCvxNm!Oxjr<D9Bm7FO>{`q_fy0)j7U)xhm|NRnj
z9xp+Ess#PH67)}(pnt0b{c`R3V<o4)$UizxrW*P~jg1f2KU^<WClvp*fIc+z{KDrK
zO4ZYfju~2Hf1hFKL44|X!@N)Pza-mmxzOZ|Mw<@vbeuO8=owr>tV4J5&rHJ~*stTW
z+0b8-J8(JA$eCs6ft_Dd^{O`X(~XTS^)2wjNkwllbja6!(Eh<_6zPXKhW-+!IC6DR
z-?@hF*k22Jg7d<+8yovG{TX9tTG4U)HZIhm{lH6=A42qxr<>*D1O&$hx##rTUpbQA
zBu6YdTO%dYkhfE~;`Eb)9)^p~=@?;?RqBh}P!+z=BUL6-EQC#0h7fwRv=Fv)8A3R`
zlorCqFGC1@Tv`Ytr3@kTV`(7_t1^TzTbCBXSi7W<$$|LRVPKXaB))YR#bpSIZyknw
z8A9S)2OYN@A@Qv<zdRxFt%Lbx$P(N{^_`yH_}0Pvvm7Dut<zebkoeX?W-7;$_}0mk
zCnUag7L_L?zI8BVzjSp(;#+5Nc|zh_r@cHO@vXC@JR$L|v$Q-R@vXD0JR$L|(-8>a
z<Ac*ulP?-~_(lG^-_IA)<L8{gYdJV-)!;?mpB|BDAr~)R420BTKqK?fqet@@|42vB
z>OwTIt7A$8?MT`+suR50D2}K+B5U^OqmLFDmCa_0j0)3?QKV_nTqNrXg=t69D;-a*
z0DCg<6f18TRXn730iaVgy@i>J4dA?hU%c?|;r4MS7b^oHbOldKEB#Jld>33*j*xsd
zUI-r%7e<~Embj_fbz(P!XeUNir#KX_v<}5*HhWjJ6Z4ag6otYE7oTpU;Q2(TWs5qE
z=?wW}iNL7h@aVOKoX(X9kNg{_LqZNaLAeG(s?qv7l=At>z&)myKJmm8dGUo9!xYVT
zkLeI$n&}V$4O)tuW-X|N)WObUg{CpePaR6PG0N|eO1Ck}rzzdWsBn)oMulm`y|fQT
zl@74Pk@QC$4_*a8Rq5F~Ee>@sWh@IrL;h377F)6|u!OxF$;Nii@nn-UK(F+4FUhRT
zY}&m$H!{9@B7K8QaP07pL{)2k8lH!B%M!`RN}QWyH~w-mBIA7S1Va)sRcfnpLl=Jh
zR=%3oT!KK|60Sa|rqNZ?Q>>;fMvV@4rKUPJbn(L9MXPCY)$|stSsJ58cL=4XCO7oP
zTR)0cGt*VGrdUm9j2d<s-)XhEq1XTZU!&DDyK2@Jt63GJhP}gAlg<sD|F{2$Rx`_0
z(^ssfCq@n9-B(kW8+!L&UWr!ILN#mRBW)qntSKYXCI_CGd_7vtY*$T0q%Dq76OOdW
zfgeBfbhMf|u9}EQTNa}x9BGpSfBX8g(Q4+pY9b<SMU0woq)iS?o;@F}W}d4iBGRsm
zQ4@}|$$_VT^v}_1=DTVlBCR(@O*qmf2i|%4sc1C|sHQJI(lSufS4O1u^#0D1=d>Co
zG#o)ryTVly5ozr)YQmA$(|i8JL(ytlT{RJr))Av79BDnhe}3*%w3>yknutj2icu4e
zw4UC_fA71|YBH{xh)C;>Q4@}|p5BQUFGQ<Z<f@5?v^6nm!jaa~`_t!tJ4($|yimeQ
zEf_a9IpgLP`P!0WrN~Y9emh#|;(|~_lq+<RBQzXEc*wZzhG?Pf1)+!^SLhN)XgFp>
zPW|aGMhjh15Q=DVg)Vo5h9gAe*q$#%3td_eir8?4u5^Tk<3Z#jUw<K5=(2)PM1m`H
zwIej#`Xc{*;@T*oY3abQCz}WNOr$?#w|ruLJ|&y6+}aZijn$leVHwQX5wE#c_A^NX
z-WcLJIW|b_K6GGke0X$ZB7LtdFxir?7yWX5w;aNDf<a8ZhGi5}FiwCuVWCjYbBzKn
z36z2w?7rZaoNOn+9KK6icXig?@#GA=2gQ4nE&Fo2_h+``4jh<BA9d`Sou2}`_F*!(
zTlQmWxCK`S@E5j<Yh*Pk3@1=y6{pfQfmQm>Sf%fbRr>CDau(ip;(7On4<9@*I5Ids
zI(Bzvd+zY~M0%@Z+uD4Cd{_=+`w9IuqI@=rjR|+d&h4=CFnq|Fy73qt<TptjUdm!A
zx^;9Ejl15Ko0ylclCALlsMEY$MH^XAW@JsroEcA1uFDPX86Mf2!Tj1WG}VBeQrE#C
z#>+4a-3tTtyjr;pLALPvBU&7ruNIc+<Z(PXT~>vKBVV|}8zY44oKZq}KKqsDIq#+L
zE5pK(H(cRO5yExuC?Q-Ar%5HeJ1iXe!|6n+nGwR7Lv)MPhTU>JIRzfW^SkRu4jw`%
z9I~4%*^K&J56>P%=V7B4x(rwCrqQ^x|6g{Q1}v1&mj{MN_UHEaedZ&MF>}zD2UJ7v
zN2BktdQB>b#-RV29?PYC4IDRBdSSoZm^(B+Hi$0iB(uZ<B(obKm^B?kOtCZS>AA{5
zSj(IGp2mXT4|s&sG09J)+iY90GpGi_*lX_eMF~u(!z3Mu8%M`;6X{(xBsm4$gOG99
zz$XJHIq}JW>GfRI7jlulGreBlnO?8&jwkD7t>Dq>O^3!+1OK|CbQVP4q^exp#Nn*S
zBw{r&2?a~^ov}pU8B6pXBdw2-wtaZ~0BXP5HY7Cz-rbJI9f#jp|KeaxP{dVA#OOF1
z&{=M$<|+|%pl-Y>6%*Eyntn8CKwEF_7$J?=VTAX;w+@btFyrX#n6(W}*5{|nt<W%r
zZefy6R957;r9Adv`Zjqm^jzSxs9huewnNA+Roh!&^4;p{1{K)DXplS6A+|xmT`D39
z9P9vlF>vq+LZ95B_R-ykTJ`a1Fk1SSeYxED>XYd?ax$bd!7p(j_~gsjd-&zhccedn
zZxZ<XP9lv{<mGiqc}JEieqwNr4UX(b^9_&Z?p*DE_JBNo_rY8qqg`4a>kxWRcBfl#
zQmQ^${LiGEfaVjSZ#Mr^LQa8pDnz5D4}kVSh(-b2hoE;&HdwnR>yvypr;m;JxE&9m
zPa@mhf}3fGBKD#rlEn;l-H1{!3Z~_qKYZ(lH|*M$-Ki*@s$8MUPoPZ5G4PmU30?%F
zHRWD@Q`L;Hf|RA(X89M_eo`TRi&Xe5Exqda>nyiUnd=T*-}lDPKht{OKW3NX`pnkX
z?rA=AF8f8yh3jA+y`PXzN>`#Qaq<x!-^!X#?7&Pf#cAv*{(R^WL;EPK03*gA9`uNA
z^>mW&x_OPj<dK*mq|x6ljrDM8`k|SPrB_@&ghmmKuXbEMzNv9(te{KddlZ+(Sa4|=
zJc?+HMwbsO+#(u%<??a=hD)OdT^e8hyR@bV+RO-Aa|CTx1dTn&)xp==E{#3GrSYY+
zOXE9am&RAZE{$(^T^e85x-`Cjb!iJ7+PkU;i+l)l5q+1y9pg{mC2_~d(|0M{G3LH4
za}<xKzQcw;$@H`22vH;R@m-4B0M+qkHVta(J&c_sKU=jGUuPthlFUZT9OS*TDDr3P
z=O$~VH7Rq^sT2YBYgz)dDbkw4M@rgKyw#tyg0^`aOO?L$%8K><+6rh5tdRaR?ot?x
zlof=sBA#*pP1Vq}4+U!gO#@nGL)phX<afYuzL{m&=|jP4L91;jyP}6}*&5o}PpIcZ
z!D>NkY^bg!<BkTW`&oATP_SCiS{ura?QwsRGFWRh27@(#(oJww*iG#J9`?s-=tgDm
zMqev^N-NX>Xq^pZn(&ZHwZYoMCPcs*Kp7%aY$&sdhrF$|HY=?L3RVkRZ$p`MJS_IY
zb)DlI4Aub3cAaWNnVUQ;LbSmf`&qX)vpSl<YC)&jP^K^s(}S>^jKN?Hpsee38_GEH
zFmKS%4OU&jYC&h%P$ooAG0@b`e%%<_V6a-y1{=z}>0xiS*4nMKnm`0=0HvE6Z79>L
zherY$>M7__-@{<FpiMTEf$zz+s>Hb5PlMO_P_SCinKqQk+LNC{$v)Gev>GT_Ehye6
zg=3gG-NQUyLswY_^VO4f8~apRX4z1teot;zCA%A0O{W8E0A<f<v7x%B{(`TyPiZx|
z39JE>T4&o(4kVu3?nBpEbp>kxrB-CJa9ugxc=9W%#Dn908obK)Fq2-mu5)cDhb0fw
zU+pGsaGwtaYXGIzc{X&Of(}_w6aHWgpbY={Hgvv%{;DcP=JeCR4qq!+11R0Jz=m=x
z^f2WQ2cn5#um(^Xe1#3=km+H)5RN9J6|5Gt)rK+`dRPOn#B8pgWxH=MSOX{xUT8x(
z0DE$e57h&QNibjypfng`q-oc4a*cE#B_2qwlTXR|_x;DYWd_RKdBLIx8q>DRw>W~<
z9zk0YL1Qj=<t>Y#bwtp(jB>F|@Gfmd1g$HAwladotnbR>77dry9YN#sEtii&flK4l
zB$tL|Q4x*Lfn7e14=zo=Yq#hBd`<51aen907_}~qiPfcTaA=%5WkB=fH&w~(LY&Dl
z2wN+R7Zt*oP==Z(pFl~F*H1G{q5x|EWuj=)P?9-qTqGMfZCvmHOdA>89=ejYLR1Be
z70_x~vAAFbE5JVB$){Cmh9Lt5s|9Vhq3kN2Jm_oH6F4*D1Zx0g?_Xj=*`qv}@S(l^
zben;KHGnebEVZHRc%Gb4B|-GNF&L}?lpbGZL)kw)If;@WJ(l$PnGUQ0lv?rrfa!{j
zMK`e<d-8c7x<F|)nGUQLbh!;>Z};R&sua;sGth(8f_B<aCIwIaz}KqB5>r>O22j>@
zg$-ry@Z_sLRL_SD6s#7s%Z4(=cydOSoYaIp3|0%e(uOh<dGds>wOv6?PXcQIrH5D9
zP$n!-zUf1CNSI*=tQPc28_K-q$)Bjw^j}j~um(`NsoRD!{dw}7uT{@X3>2&ubhQm-
zmh|M$e5f|qL=#v8D8s+UhBBFY@`5T&NG$LJ5v&%p*M>6Zdh$JAtBxjPFjxa9>$=8<
zGBtbheIMH1&rWAN4AuZjgV)+nW^hk_s7ivuApzC^O09i1l!@Mxzd=b*uLT9G1-;6K
zay;<lybpadOM^{a!D>NkGV-q9U!TK@gu@0`66<lM(7w~L)F*|L4MU+g&0#3C&_;2%
zD4@<^f$?itL3StCU*^Q632S8jU5))#->hNh?8nK2R@{$=k@s_D8XO)bcv$i1rxkpL
zfve=+^laH1V#AX{ZhA;vqaR%cV14l(2GW{j(xK{KpuBqD7cTzfrhN@x`y3L-vL|0*
zH`sxbPXZD+neM2FbS?$2!^s89cAS*>Iv6=`Y`|&%cwATPSo<|cuKkU_O7NjX<*0a|
z;(>|>DjukKpyGjw2Pz(@c%b5eiU%qlsCb~_fuEfR;@1DkmtTDO^s4y{#~;S}zw0Mw
zxXynJC)fO3|8u?1mv-B5@&)v5IJwrp11FyY?8DiD^B_(>A2^JYZ=3JOX`T=0wf}U5
z2f5^DD+E&~U+U|fWik~cis2&$s=sw~Z2!l&>Fi^8n4`JzQ8_9esCb~_fr<wz9;kSr
z;(>|>DjukKpyGjw2Pz(@ctEYGxrpY9neX+vRlf!&*V$a>b36MKoLpdY?ap=lbe!CT
z&9y$aMK|H(dY?OvxpkOpem)nNgOl59=i!`>lRH<r1(geS{+|IEd~$1C8_vZz+i`Nw
z7dLP5`wo0^Ulcbut-#4GN8CHaO+ftqU7Y-^9Pi%)H{(}*{Gnn7KeXn*2E-p%U(V4Y
z)oObQEnpmF(kw;Js#0xw*H@vEZDww7h4wN0@O+oDW4|Gn{fYktAka@6$_2iqe~^ZF
zzeWAVkw1=Q-T52iA!QROg`en`lG_3Od@jZQQ?CAn6vobjqZ+%MNAc5p{?1bWdrX&S
O&${<ff9`NJ`u_(@n8FGG

literal 0
HcmV?d00001


From 4ecb31d3c18030ec6b1b53279e5fae73037ad288 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Thu, 10 Mar 2016 19:34:16 -0800
Subject: [PATCH 14/16] Delete jsonOut.txt

---
 OptionSelect/jsonOut.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 OptionSelect/jsonOut.txt

diff --git a/OptionSelect/jsonOut.txt b/OptionSelect/jsonOut.txt
deleted file mode 100644
index 92b8e79..0000000
--- a/OptionSelect/jsonOut.txt
+++ /dev/null
@@ -1 +0,0 @@
-{"hetero": [[13, 24], [18, 26], [18, 30], [9, 16], [16, 30], [23, 21], [21, 26], [4, 8], [30, 21], [30, 12]], "singleton": [11, 18, 17, 16, 24, 13, 9, 26, 30, 21], "homo": [18, 24, 16, 23, 9, 26, 6, 4, 21, 5], "median": 23}
\ No newline at end of file

From e986d9cc4af1c71304d6d79106105c8bc33ddee5 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Thu, 10 Mar 2016 19:34:23 -0800
Subject: [PATCH 15/16] Delete jsonOutExtended.txt

---
 OptionSelect/jsonOutExtended.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 OptionSelect/jsonOutExtended.txt

diff --git a/OptionSelect/jsonOutExtended.txt b/OptionSelect/jsonOutExtended.txt
deleted file mode 100644
index a3fbdab..0000000
--- a/OptionSelect/jsonOutExtended.txt
+++ /dev/null
@@ -1 +0,0 @@
-{"options": [[13, 24], [18, 18], [18, 26], [18, 30], 11, 18, [24, 24], 17, [9, 16], [13, 13], [16, 16], [23, 23], [8, 9], [16, 30], [10, 16], [9, 9], 16, 24, 23, 13, [23, 21], [21, 26], [26, 26], [8, 8], [4, 8], [6, 6], [4, 4], [30, 21], [30, 12], [21, 21], 9, 26, [5, 5], 30, 21]}
\ No newline at end of file

From 33710ac5429bb5d78836fd63fcae5e0b2029fa58 Mon Sep 17 00:00:00 2001
From: Calvin Leather <caleather@gmail.com>
Date: Thu, 10 Mar 2016 19:35:02 -0800
Subject: [PATCH 16/16] Added files via upload

---
 Procedure.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Procedure.txt

diff --git a/Procedure.txt b/Procedure.txt
new file mode 100644
index 0000000..a59173b
--- /dev/null
+++ b/Procedure.txt
@@ -0,0 +1 @@
+change options_to_edit.txt to correct SID
\ No newline at end of file