From cfd5182b757b25e28cee73ef7d50fa75d56afcdc Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Fri, 29 Sep 2023 12:53:14 +0100 Subject: [PATCH 1/7] Updates to fresh and fixes to tests --- fresh/feat.q | 4 +- fresh/tests/features.t | 759 ++++++++++++++++++++--------------------- fresh/tests/test.p | 110 +++--- fresh/utils.q | 4 +- ml.q | 5 +- 5 files changed, 442 insertions(+), 440 deletions(-) diff --git a/fresh/feat.q b/fresh/feat.q index d3fbe60..1bf4ea5 100644 --- a/fresh/feat.q +++ b/fresh/feat.q @@ -205,7 +205,7 @@ fresh.feat.firstMin:{[data] // @param data {number[]} Numerical data points // @return {dictionary} Spectral centroid, variance, skew and kurtosis fresh.feat.fftAggreg:{[data] - a:fresh.i.abso[fresh.i.rfft data]`; + a:fresh.i.abso[.p.toraw fresh.i.rfft data]`; l:"f"$til count a; mean:1.,(sum each a*/:3(l*)\l)%sum a; m1:mean 1;m2:mean 2;m3:mean 3;m4:mean 4; @@ -227,7 +227,7 @@ fresh.feat.fftCoeff:{[data;coeff] r:(fresh.i.angle[fx;`deg pykw 1b]`; fresh.i.real[fx]`; fresh.i.imag[fx]`; - fresh.i.abso[fx:fresh.i.rfft data]` + fresh.i.abso[fx:.p.toraw fresh.i.rfft data]` ); fftKeys:`$"_"sv'string raze(`coeff,/:til coeff),\:/:`angle`real`imag`abs; fftVals:raze coeff#'r,\:coeff#0n; diff --git a/fresh/tests/features.t b/fresh/tests/features.t index 7ec2b8b..69a8ef4 100644 --- a/fresh/tests/features.t +++ b/fresh/tests/features.t @@ -3,7 +3,6 @@ The following code is used to test the outputs from the functions written in q b that are present in the tsfresh documentation. It should be noted that for large lists of values some of the functions which include exponentials suffer from overflow namely skewness,kurtosis and absenergy \ -\l p.q \l ml.q \l fresh/init.q \l fresh/tests/test.p @@ -32,165 +31,165 @@ changequantkeys:`max`min`mean`var`median`std np:.p.import[`numpy] -.ml.fresh.feat.hasDup[xj] ~ hasduplicate[xj] -.ml.fresh.feat.hasDup[xf] ~ hasduplicate[xf] -.ml.fresh.feat.hasDup[xb] ~ hasduplicate[xb] -.ml.fresh.feat.hasDup[xi] ~ hasduplicate[xi] -.ml.fresh.feat.hasDup[x0] ~ hasduplicate[x0] -.ml.fresh.feat.hasDup[x1] ~ hasduplicate[x1] -.ml.fresh.feat.hasDup[x2] ~ hasduplicate[x2] -.ml.fresh.feat.hasDupMin[xj] ~ hasduplicatemin[xj] -.ml.fresh.feat.hasDupMin[xf] ~ hasduplicatemin[xf] -.ml.fresh.feat.hasDupMin[xi] ~ hasduplicatemin[xi] -.ml.fresh.feat.hasDupMin[xb] ~ hasduplicatemin[xb] +.ml.fresh.feat.hasDup[xj] ~ .p.get[`hasduplicate;<]xj +.ml.fresh.feat.hasDup[xf] ~ .p.get[`hasduplicate;<]xf +.ml.fresh.feat.hasDup[xb] ~ .p.get[`hasduplicate;<]xb +.ml.fresh.feat.hasDup[xi] ~ .p.get[`hasduplicate;<]xi +.ml.fresh.feat.hasDup[x0] ~ .p.get[`hasduplicate;<]x0 +.ml.fresh.feat.hasDup[x1] ~ .p.get[`hasduplicate;<]x1 +.ml.fresh.feat.hasDup[x2] ~ .p.get[`hasduplicate;<]x2 +.ml.fresh.feat.hasDupMin[xj] ~ .p.get[`hasduplicatemin;<]xj +.ml.fresh.feat.hasDupMin[xf] ~ .p.get[`hasduplicatemin;<]xf +.ml.fresh.feat.hasDupMin[xi] ~ .p.get[`hasduplicatemin;<]xi +.ml.fresh.feat.hasDupMin[xb] ~ .p.get[`hasduplicatemin;<]xb .ml.fresh.feat.hasDupMin[x0] ~ 0b -.ml.fresh.feat.hasDupMin[x1] ~ hasduplicatemin[x1] -.ml.fresh.feat.hasDupMin[x2] ~ hasduplicatemin[x2] -.ml.fresh.feat.hasDupMax[xj] ~ hasduplicatemax[xj] -.ml.fresh.feat.hasDupMax[xf] ~ hasduplicatemax[xf] -.ml.fresh.feat.hasDupMax[xi] ~ hasduplicatemax[xi] -.ml.fresh.feat.hasDupMax[xb] ~ hasduplicatemax[xb] +.ml.fresh.feat.hasDupMin[x1] ~ .p.get[`hasduplicatemin;<]x1 +.ml.fresh.feat.hasDupMin[x2] ~ .p.get[`hasduplicatemin;<]x2 +.ml.fresh.feat.hasDupMax[xj] ~ .p.get[`hasduplicatemax;<]xj +.ml.fresh.feat.hasDupMax[xf] ~ .p.get[`hasduplicatemax;<]xf +.ml.fresh.feat.hasDupMax[xi] ~ .p.get[`hasduplicatemax;<]xi +.ml.fresh.feat.hasDupMax[xb] ~ .p.get[`hasduplicatemax;<]xb .ml.fresh.feat.hasDupMax[x0] ~ 0b -.ml.fresh.feat.hasDupMax[x1] ~ hasduplicatemax[x1] -.ml.fresh.feat.hasDupMax[x2] ~ hasduplicatemax[x2] +.ml.fresh.feat.hasDupMax[x1] ~ .p.get[`hasduplicatemax;<]x1 +.ml.fresh.feat.hasDupMax[x2] ~ .p.get[`hasduplicatemax;<]x2 .ml.fresh.feat.hasDup[xmixf] ~ 1b -.ml.fresh.feat.hasDupMin[xmixf] ~ hasduplicatemin[xmixf] -.ml.fresh.feat.hasDupMax[xmixf] ~ hasduplicatemax[xmixf] +.ml.fresh.feat.hasDupMin[xmixf] ~ .p.get[`hasduplicatemin;<]xmixf +.ml.fresh.feat.hasDupMax[xmixf] ~ .p.get[`hasduplicatemax;<]xmixf .ml.fresh.feat.hasDup[xnull] ~ 1b .ml.fresh.feat.hasDupMin[xnull] ~ 0b .ml.fresh.feat.hasDupMax[xnull] ~ 0b -.ml.fresh.feat.absEnergy[xj] ~ "f"$abs_energy[xj] -.ml.fresh.feat.absEnergy[xf] ~ abs_energy[xf] -.ml.fresh.feat.absEnergy[xb] ~ "f"$abs_energy[xb] -.ml.fresh.feat.absEnergy[xi] = "f"$abs_energy[xi] -.ml.fresh.feat.absEnergy[x0] ~ "f"$abs_energy[x0] -.ml.fresh.feat.absEnergy[x1] ~ "f"$abs_energy[x1] -.ml.fresh.feat.absEnergy[x2] ~ "f"$abs_energy[x2] +.ml.fresh.feat.absEnergy[xj] ~ "f"$.p.get[`abs_energy;<][xj] +.ml.fresh.feat.absEnergy[xf] ~ .p.get[`abs_energy;<][xf] +.ml.fresh.feat.absEnergy[xb] ~ "f"$.p.get[`abs_energy;<][xb] +.ml.fresh.feat.absEnergy[xi] = "f"$.p.get[`abs_energy;<][xi] +.ml.fresh.feat.absEnergy[x0] ~ "f"$.p.get[`abs_energy;<][x0] +.ml.fresh.feat.absEnergy[x1] ~ "f"$.p.get[`abs_energy;<][x1] +.ml.fresh.feat.absEnergy[x2] ~ "f"$.p.get[`abs_energy;<][x2] .ml.fresh.feat.absEnergy[xmixf] ~ sum l*l:xmixf .ml.fresh.feat.absEnergy[xnull] ~ 0f -.ml.fresh.feat.meanChange[xj] ~ mean_change[xj] -.ml.fresh.feat.meanChange[xf] ~ mean_change[xf] -.ml.fresh.feat.meanChange[xi] ~ mean_change[xi] -.ml.fresh.feat.meanChange[x0] ~ mean_change[x0] -.ml.fresh.feat.meanChange[x1] ~ mean_change[x1] -.ml.fresh.feat.meanChange[x2] ~ mean_change[x2] -/.ml.fresh.feat.meanChange[xb] ~ mean_change[xb] - -.ml.fresh.feat.absSumChange[xj] ~ absolute_sum_of_changes[xj] -.ml.fresh.feat.absSumChange[xf] ~ absolute_sum_of_changes[xf] -.ml.fresh.feat.absSumChange[xi] ~ "i"$absolute_sum_of_changes[xi] -.ml.fresh.feat.absSumChange[xb] ~ "i"$absolute_sum_of_changes[xb] +.ml.fresh.feat.meanChange[xj] ~ .p.get[`mean_change;<][xj] +.ml.fresh.feat.meanChange[xf] ~ .p.get[`mean_change;<][xf] +.ml.fresh.feat.meanChange[xi] ~ .p.get[`mean_change;<][xi] +.ml.fresh.feat.meanChange[x0] ~ .p.get[`mean_change;<][x0] +.ml.fresh.feat.meanChange[x1] ~ .p.get[`mean_change;<][x1] +.ml.fresh.feat.meanChange[x2] ~ .p.get[`mean_change;<][x2] +/.ml.fresh.feat.meanChange[xb] ~ .p.get[`mean_change;<][xb] + +.ml.fresh.feat.absSumChange[xj] ~ .p.get[`absolute_sum_of_changes;<][xj] +.ml.fresh.feat.absSumChange[xf] ~ .p.get[`absolute_sum_of_changes;<][xf] +.ml.fresh.feat.absSumChange[xi] ~ "i"$.p.get[`absolute_sum_of_changes;<][xi] +.ml.fresh.feat.absSumChange[xb] ~ "i"$.p.get[`absolute_sum_of_changes;<][xb] .ml.fresh.feat.absSumChange[x0] ~ 0f -.ml.fresh.feat.absSumChange[x1] ~ absolute_sum_of_changes[x1] -.ml.fresh.feat.absSumChange[x2] ~ absolute_sum_of_changes[x2] +.ml.fresh.feat.absSumChange[x1] ~ .p.get[`absolute_sum_of_changes;<][x1] +.ml.fresh.feat.absSumChange[x2] ~ .p.get[`absolute_sum_of_changes;<][x2] .ml.fresh.feat.absSumChange[xnull] ~ 0f -.ml.fresh.feat.meanAbsChange[xj] ~ mean_abs_change[xj] -.ml.fresh.feat.meanAbsChange[xf] ~ mean_abs_change[xf] -.ml.fresh.feat.meanAbsChange[xb] ~ mean_abs_change[xb] -.ml.fresh.feat.meanAbsChange[xi] ~ mean_abs_change[xi] +.ml.fresh.feat.meanAbsChange[xj] ~ .p.get[`mean_abs_change][xj]` +.ml.fresh.feat.meanAbsChange[xf] ~ .p.get[`mean_abs_change][xf]` +.ml.fresh.feat.meanAbsChange[xb] ~ .p.get[`mean_abs_change][xb]` +.ml.fresh.feat.meanAbsChange[xi] ~ .p.get[`mean_abs_change][xi]` .ml.fresh.feat.meanAbsChange[x0] ~ 0n -.ml.fresh.feat.meanAbsChange[x1] ~ mean_abs_change[x1] -.ml.fresh.feat.meanAbsChange[x2] ~ mean_abs_change[x2] +.ml.fresh.feat.meanAbsChange[x1] ~ .p.get[`mean_abs_change][x1]` +.ml.fresh.feat.meanAbsChange[x2] ~ .p.get[`mean_abs_change][x2]` .ml.fresh.feat.meanAbsChange[xnull] ~ 0n -.ml.fresh.feat.countAboveMean[xj] ~ "i"$count_above_mean[xj] -.ml.fresh.feat.countAboveMean[xf] ~ "i"$count_above_mean[xf] -.ml.fresh.feat.countAboveMean[xb] ~ "i"$count_above_mean[xb] -.ml.fresh.feat.countAboveMean[xi] ~ "i"$count_above_mean[xi] -.ml.fresh.feat.countAboveMean[x0] ~ "i"$count_above_mean[x0] -.ml.fresh.feat.countAboveMean[x1] ~ "i"$count_above_mean[x1] -.ml.fresh.feat.countAboveMean[x2] ~ "i"$count_above_mean[x2] -.ml.fresh.feat.countAboveMean[xnull] ~ "i"$count_above_mean[xnull] - -.ml.fresh.feat.countBelowMean[xj] ~ "i"$count_below_mean[xj] -.ml.fresh.feat.countBelowMean[xf] ~ "i"$count_below_mean[xf] -.ml.fresh.feat.countBelowMean[xb] ~ "i"$count_below_mean[xb] -.ml.fresh.feat.countBelowMean[xi] ~ "i"$count_below_mean[xi] -.ml.fresh.feat.countBelowMean[x0] ~ "i"$count_below_mean[x0] -.ml.fresh.feat.countBelowMean[x1] ~ "i"$count_below_mean[x1] -.ml.fresh.feat.countBelowMean[x2] ~ "i"$count_below_mean[x2] -.ml.fresh.feat.countBelowMean[xnull] ~ "i"$count_below_mean[xnull] - -.ml.fresh.feat.firstMax[xj] ~ first_location_of_maximum[xj] -.ml.fresh.feat.firstMax[xf] ~ first_location_of_maximum[xf] -.ml.fresh.feat.firstMax[xb] ~ first_location_of_maximum[xb] -.ml.fresh.feat.firstMax[xi] ~ first_location_of_maximum[xi] +.ml.fresh.feat.countAboveMean[xj] ~ "i"$.p.get[`count_above_mean][xj]` +.ml.fresh.feat.countAboveMean[xf] ~ "i"$.p.get[`count_above_mean][xf]` +.ml.fresh.feat.countAboveMean[xb] ~ "i"$.p.get[`count_above_mean][xb]` +.ml.fresh.feat.countAboveMean[xi] ~ "i"$.p.get[`count_above_mean][xi]` +.ml.fresh.feat.countAboveMean[x0] ~ "i"$.p.get[`count_above_mean][x0]` +.ml.fresh.feat.countAboveMean[x1] ~ "i"$.p.get[`count_above_mean][x1]` +.ml.fresh.feat.countAboveMean[x2] ~ "i"$.p.get[`count_above_mean][x2]` +.ml.fresh.feat.countAboveMean[xnull] ~ "i"$.p.get[`count_above_mean][xnull]` + +.ml.fresh.feat.countBelowMean[xj] ~ "i"$.p.get[`count_below_mean][xj]` +.ml.fresh.feat.countBelowMean[xf] ~ "i"$.p.get[`count_below_mean][xf]` +.ml.fresh.feat.countBelowMean[xb] ~ "i"$.p.get[`count_below_mean][xb]` +.ml.fresh.feat.countBelowMean[xi] ~ "i"$.p.get[`count_below_mean][xi]` +.ml.fresh.feat.countBelowMean[x0] ~ "i"$.p.get[`count_below_mean][x0]` +.ml.fresh.feat.countBelowMean[x1] ~ "i"$.p.get[`count_below_mean][x1]` +.ml.fresh.feat.countBelowMean[x2] ~ "i"$.p.get[`count_below_mean][x2]` +.ml.fresh.feat.countBelowMean[xnull] ~ "i"$.p.get[`count_below_mean][xnull]` + +.ml.fresh.feat.firstMax[xj] ~ .p.get[`first_location_of_maximum][xj]` +.ml.fresh.feat.firstMax[xf] ~ .p.get[`first_location_of_maximum][xf]` +.ml.fresh.feat.firstMax[xb] ~ .p.get[`first_location_of_maximum][xb]` +.ml.fresh.feat.firstMax[xi] ~ .p.get[`first_location_of_maximum][xi]` .ml.fresh.feat.firstMax[x0] ~ 0n -.ml.fresh.feat.firstMax[x1] ~ first_location_of_maximum[x1] -.ml.fresh.feat.firstMax[x2] ~ first_location_of_maximum[x2] +.ml.fresh.feat.firstMax[x1] ~ .p.get[`first_location_of_maximum][x1]` +.ml.fresh.feat.firstMax[x2] ~ .p.get[`first_location_of_maximum][x2]` .ml.fresh.feat.firstMax[xnull] ~ 1f -.ml.fresh.feat.firstMin[xj] ~ first_location_of_minimum[xj] -.ml.fresh.feat.firstMin[xf] ~ first_location_of_minimum[xf] -.ml.fresh.feat.firstMin[xb] ~ first_location_of_minimum[xb] -.ml.fresh.feat.firstMin[xi] ~ first_location_of_minimum[xi] +.ml.fresh.feat.firstMin[xj] ~ .p.get[`first_location_of_maximum][xj]` +.ml.fresh.feat.firstMin[xf] ~ .p.get[`first_location_of_maximum][xf]` +.ml.fresh.feat.firstMin[xb] ~ .p.get[`first_location_of_maximum][xb]` +.ml.fresh.feat.firstMin[xi] ~ .p.get[`first_location_of_maximum][xi]` .ml.fresh.feat.firstMin[x0] ~ 0n -.ml.fresh.feat.firstMin[x1] ~ first_location_of_minimum[x1] -.ml.fresh.feat.firstMin[x2] ~ first_location_of_minimum[x2] +.ml.fresh.feat.firstMin[x1] ~ .p.get[`first_location_of_maximum][x1]` +.ml.fresh.feat.firstMin[x2] ~ .p.get[`first_location_of_maximum][x2]` .ml.fresh.feat.firstMin[xnull] ~ 1f -.ml.fresh.feat.ratioValNumToSeriesLength[xj] ~ ratio_val_num_to_t_series[xj] -.ml.fresh.feat.ratioValNumToSeriesLength[xf] ~ ratio_val_num_to_t_series[xf] -.ml.fresh.feat.ratioValNumToSeriesLength[xb] ~ ratio_val_num_to_t_series[xb] -.ml.fresh.feat.ratioValNumToSeriesLength[xi] ~ ratio_val_num_to_t_series[xi] +.ml.fresh.feat.ratioValNumToSeriesLength[xj] ~ .p.get[`ratio_val_num_to_t_series][xj]` +.ml.fresh.feat.ratioValNumToSeriesLength[xf] ~ .p.get[`ratio_val_num_to_t_series][xf]` +.ml.fresh.feat.ratioValNumToSeriesLength[xb] ~ .p.get[`ratio_val_num_to_t_series][xb]` +.ml.fresh.feat.ratioValNumToSeriesLength[xi] ~ .p.get[`ratio_val_num_to_t_series][xi]` .ml.fresh.feat.ratioValNumToSeriesLength[x0] ~ 0n -.ml.fresh.feat.ratioValNumToSeriesLength[x1] ~ ratio_val_num_to_t_series[x1] -.ml.fresh.feat.ratioValNumToSeriesLength[x2] ~ ratio_val_num_to_t_series[x2] +.ml.fresh.feat.ratioValNumToSeriesLength[x1] ~ .p.get[`ratio_val_num_to_t_series][x1]` +.ml.fresh.feat.ratioValNumToSeriesLength[x2] ~ .p.get[`ratio_val_num_to_t_series][x2]` .ml.fresh.feat.ratioValNumToSeriesLength[xnull] ~ 0.0001 -.ml.fresh.feat.ratioBeyondRSigma[xj;0.2] ~ ratio_beyond_r_sigma[xj;0.2] -.ml.fresh.feat.ratioBeyondRSigma[xj;2.0] ~ ratio_beyond_r_sigma[xj;2.0] -.ml.fresh.feat.ratioBeyondRSigma[xj;10] ~ ratio_beyond_r_sigma[xj;10] -.ml.fresh.feat.ratioBeyondRSigma[xf;0.2] ~ ratio_beyond_r_sigma[xf;0.2] -.ml.fresh.feat.ratioBeyondRSigma[xf;2.0] ~ ratio_beyond_r_sigma[xf;2.0] -.ml.fresh.feat.ratioBeyondRSigma[xf;10] ~ ratio_beyond_r_sigma[xf;10] -.ml.fresh.feat.ratioBeyondRSigma[xi;0.2] ~ ratio_beyond_r_sigma[xi;0.2] -.ml.fresh.feat.ratioBeyondRSigma[xi;2.0] ~ ratio_beyond_r_sigma[xi;2.0] -.ml.fresh.feat.ratioBeyondRSigma[xi;10] ~ ratio_beyond_r_sigma[xi;10] -.ml.fresh.feat.ratioBeyondRSigma[xb;0.2] ~ ratio_beyond_r_sigma[xb;0.2] -.ml.fresh.feat.ratioBeyondRSigma[xb;2.0] ~ ratio_beyond_r_sigma[xb;2.0] -.ml.fresh.feat.ratioBeyondRSigma[xb;10] ~ ratio_beyond_r_sigma[xb;10] +.ml.fresh.feat.ratioBeyondRSigma[xj;0.2] ~ .p.get[`ratio_beyond_r_sigma][xj;0.2]` +.ml.fresh.feat.ratioBeyondRSigma[xj;2.0] ~ .p.get[`ratio_beyond_r_sigma][xj;2.0]` +.ml.fresh.feat.ratioBeyondRSigma[xj;10] ~ .p.get[`ratio_beyond_r_sigma][xj;10]` +.ml.fresh.feat.ratioBeyondRSigma[xf;0.2] ~ .p.get[`ratio_beyond_r_sigma][xf;0.2]` +.ml.fresh.feat.ratioBeyondRSigma[xf;2.0] ~ .p.get[`ratio_beyond_r_sigma][xf;2.0]` +.ml.fresh.feat.ratioBeyondRSigma[xf;10] ~ .p.get[`ratio_beyond_r_sigma][xf;10]` +.ml.fresh.feat.ratioBeyondRSigma[xi;0.2] ~ .p.get[`ratio_beyond_r_sigma][xi;0.2]` +.ml.fresh.feat.ratioBeyondRSigma[xi;2.0] ~ .p.get[`ratio_beyond_r_sigma][xi;2.0]` +.ml.fresh.feat.ratioBeyondRSigma[xi;10] ~ .p.get[`ratio_beyond_r_sigma][xi;10]` +.ml.fresh.feat.ratioBeyondRSigma[xb;0.2] ~ .p.get[`ratio_beyond_r_sigma][xb;0.2]` +.ml.fresh.feat.ratioBeyondRSigma[xb;2.0] ~ .p.get[`ratio_beyond_r_sigma][xb;2.0]` +.ml.fresh.feat.ratioBeyondRSigma[xb;10] ~ .p.get[`ratio_beyond_r_sigma][xb;10]` .ml.fresh.feat.ratioBeyondRSigma[x0;0.2] ~ 0n .ml.fresh.feat.ratioBeyondRSigma[x0;2.0] ~ 0n .ml.fresh.feat.ratioBeyondRSigma[x0;10] ~ 0n -.ml.fresh.feat.ratioBeyondRSigma[x1;0.2] ~ ratio_beyond_r_sigma[x1;0.2] -.ml.fresh.feat.ratioBeyondRSigma[x1;2.0] ~ ratio_beyond_r_sigma[x1;2.0] -.ml.fresh.feat.ratioBeyondRSigma[x1;10] ~ ratio_beyond_r_sigma[x1;10] -.ml.fresh.feat.ratioBeyondRSigma[x2;0.2] ~ ratio_beyond_r_sigma[x2;0.2] -.ml.fresh.feat.ratioBeyondRSigma[x2;2.0] ~ ratio_beyond_r_sigma[x2;2.0] -.ml.fresh.feat.ratioBeyondRSigma[x2;10] ~ ratio_beyond_r_sigma[x2;10] +.ml.fresh.feat.ratioBeyondRSigma[x1;0.2] ~ .p.get[`ratio_beyond_r_sigma][x1;0.2]` +.ml.fresh.feat.ratioBeyondRSigma[x1;2.0] ~ .p.get[`ratio_beyond_r_sigma][x1;2.0]` +.ml.fresh.feat.ratioBeyondRSigma[x1;10] ~ .p.get[`ratio_beyond_r_sigma][x1;10]` +.ml.fresh.feat.ratioBeyondRSigma[x2;0.2] ~ .p.get[`ratio_beyond_r_sigma][x2;0.2]` +.ml.fresh.feat.ratioBeyondRSigma[x2;2.0] ~ .p.get[`ratio_beyond_r_sigma][x2;2.0]` +.ml.fresh.feat.ratioBeyondRSigma[x2;10] ~ .p.get[`ratio_beyond_r_sigma][x2;10]` .ml.fresh.feat.ratioBeyondRSigma[xnull;0.2] ~ 0f .ml.fresh.feat.ratioBeyondRSigma[xnull;2.0] ~ 0f .ml.fresh.feat.ratioBeyondRSigma[xnull;10] ~ 0f -.ml.fresh.feat.perRecurToAllData[xj] ~ percentage_recurring_all_data[xj] -.ml.fresh.feat.perRecurToAllData[xf] ~ percentage_recurring_all_data[xf] -.ml.fresh.feat.perRecurToAllData[xb] ~ percentage_recurring_all_data[xb] -.ml.fresh.feat.perRecurToAllData[xi] ~ percentage_recurring_all_data[xi] -.ml.fresh.feat.perRecurToAllData[x1] ~ percentage_recurring_all_data[x1] -.ml.fresh.feat.perRecurToAllData[x2] ~ percentage_recurring_all_data[x2] +.ml.fresh.feat.perRecurToAllData[xj] ~ .p.get[`percentage_recurring_all_data][xj]` +.ml.fresh.feat.perRecurToAllData[xf] ~ .p.get[`percentage_recurring_all_data][xf]` +.ml.fresh.feat.perRecurToAllData[xb] ~ .p.get[`percentage_recurring_all_data][xb]` +.ml.fresh.feat.perRecurToAllData[xi] ~ .p.get[`percentage_recurring_all_data][xi]` +.ml.fresh.feat.perRecurToAllData[x1] ~ .p.get[`percentage_recurring_all_data][x1]` +.ml.fresh.feat.perRecurToAllData[x2] ~ .p.get[`percentage_recurring_all_data][x2]` .ml.fresh.feat.perRecurToAllData[xnull] ~ 1f -.ml.fresh.feat.perRecurToAllVal[xj] ~ percentage_recurring_all_val[xj] -.ml.fresh.feat.perRecurToAllVal[xf] ~ percentage_recurring_all_val[xf] -.ml.fresh.feat.perRecurToAllVal[xb] ~ percentage_recurring_all_val[xb] -.ml.fresh.feat.perRecurToAllVal[xi] ~ percentage_recurring_all_val[xi] -.ml.fresh.feat.perRecurToAllVal[x1] ~ percentage_recurring_all_val[x1] -.ml.fresh.feat.perRecurToAllVal[x2] ~ percentage_recurring_all_val[x2] +.ml.fresh.feat.perRecurToAllVal[xj] ~ .p.get[`percentage_recurring_all_data][xj]` +.ml.fresh.feat.perRecurToAllVal[xf] ~ .p.get[`percentage_recurring_all_data][xf]` +.ml.fresh.feat.perRecurToAllVal[xb] ~ .p.get[`percentage_recurring_all_data][xb]` +.ml.fresh.feat.perRecurToAllVal[xi] ~ .p.get[`percentage_recurring_all_data][xi]` +.ml.fresh.feat.perRecurToAllVal[x1] ~ .p.get[`percentage_recurring_all_data][x1]` +.ml.fresh.feat.perRecurToAllVal[x2] ~ .p.get[`percentage_recurring_all_data][x2]` .ml.fresh.feat.perRecurToAllVal[xnull] ~ 1f -.ml.fresh.feat.largestDev[xj;0.5] ~ large_standard_deviation[xj;0.5] -.ml.fresh.feat.largestDev[xj;5.0] ~ large_standard_deviation[xj;5.0] -.ml.fresh.feat.largestDev[xj;1] ~ large_standard_deviation[xj;1] -.ml.fresh.feat.largestDev[xf;0.5] ~ large_standard_deviation[xf;0.5] -.ml.fresh.feat.largestDev[xf;5.0] ~ large_standard_deviation[xf;5.0] -.ml.fresh.feat.largestDev[xf;1] ~ large_standard_deviation[xf;1] -.ml.fresh.feat.largestDev[xi;0.5] ~ large_standard_deviation[xi;0.5] -.ml.fresh.feat.largestDev[xi;5.0] ~ large_standard_deviation[xi;5.0] -.ml.fresh.feat.largestDev[xi;1] ~ large_standard_deviation[xi;1] +.ml.fresh.feat.largestDev[xj;0.5] ~ .p.get[`large_standard_deviation][xj;0.5]` +.ml.fresh.feat.largestDev[xj;5.0] ~ .p.get[`large_standard_deviation][xj;5.0]` +.ml.fresh.feat.largestDev[xj;1] ~ .p.get[`large_standard_deviation][xj;1]` +.ml.fresh.feat.largestDev[xf;0.5] ~ .p.get[`large_standard_deviation][xf;0.5]` +.ml.fresh.feat.largestDev[xf;5.0] ~ .p.get[`large_standard_deviation][xf;5.0]` +.ml.fresh.feat.largestDev[xf;1] ~ .p.get[`large_standard_deviation][xf;1]` +.ml.fresh.feat.largestDev[xi;0.5] ~ .p.get[`large_standard_deviation][xi;0.5]` +.ml.fresh.feat.largestDev[xi;5.0] ~ .p.get[`large_standard_deviation][xi;5.0]` +.ml.fresh.feat.largestDev[xi;1] ~ .p.get[`large_standard_deviation][xi;1]` .ml.fresh.feat.largestDev[x0;0.5] ~ 0b .ml.fresh.feat.largestDev[x0;5.0] ~ 0b .ml.fresh.feat.largestDev[x0;1] ~ 0b @@ -203,347 +202,347 @@ np:.p.import[`numpy] .ml.fresh.feat.largestDev[xb;0.5] ~ 0b .ml.fresh.feat.largestDev[xb;5.0] ~ 0b .ml.fresh.feat.largestDev[xb;1] ~ 0b -.ml.fresh.feat.largestDev[xnull;0.5] ~ large_standard_deviation[xnull;0.5] -.ml.fresh.feat.largestDev[xnull;5.0] ~ large_standard_deviation[xnull;5.0] -.ml.fresh.feat.largestDev[xnull;1] ~ large_standard_deviation[xnull;1] - -.ml.fresh.feat.valCount[xj;yint] ~ "i"$value_count[xj;yint] -.ml.fresh.feat.valCount[xf;yfloat] ~ "i"$value_count[xf;yfloat] -.ml.fresh.feat.valCount[xb;yint] ~ "i"$value_count[xb;yint] -.ml.fresh.feat.valCount[xb;yfloat] ~ "i"$value_count[xb;yfloat] -.ml.fresh.feat.valCount[xi;yint] ~ "i"$value_count[xi;yint] -.ml.fresh.feat.valCount[xi;yfloat] ~ "i"$value_count[xi;yfloat] -.ml.fresh.feat.valCount[x0;yint] ~ "i"$value_count[x0;yint] -.ml.fresh.feat.valCount[x0;yfloat] ~ "i"$value_count[x0;yfloat] -.ml.fresh.feat.valCount[x1;yint] ~ "i"$value_count[x1;yint] -.ml.fresh.feat.valCount[x1;yfloat] ~ "i"$value_count[x1;yfloat] -.ml.fresh.feat.valCount[x2;yint] ~ "i"$value_count[x2;yint] -.ml.fresh.feat.valCount[x2;yfloat] ~ "i"$value_count[x2;yfloat] -.ml.fresh.feat.valCount[xnull;yint] ~ "i"$value_count[xnull;yint] -.ml.fresh.feat.valCount[xnull;yfloat] ~ "i"$value_count[xnull;yfloat] - -.ml.fresh.feat.cidCe[xj;0b] ~ cid_ce[xj;0b] -.ml.fresh.feat.cidCe[xf;0b] ~ cid_ce[xf;0b] -.ml.fresh.feat.cidCe[xb;0b] ~ cid_ce[xb;0b] -.ml.fresh.feat.cidCe[xi;0b] ~ cid_ce[xi;0b] -.ml.fresh.feat.cidCe[x0;0b] ~ cid_ce[x0;0b] -.ml.fresh.feat.cidCe[x1;0b] ~ cid_ce[x1;0b] -.ml.fresh.feat.cidCe[x2;0b] ~ cid_ce[x2;0b] +.ml.fresh.feat.largestDev[xnull;0.5] ~ .p.get[`large_standard_deviation][xnull;0.5]` +.ml.fresh.feat.largestDev[xnull;5.0] ~ .p.get[`large_standard_deviation][xnull;5.0]` +.ml.fresh.feat.largestDev[xnull;1] ~ .p.get[`large_standard_deviation][xnull;1]` + +.ml.fresh.feat.valCount[xj;yint] ~ "i"$.p.get[`value_count][xj;yint]` +.ml.fresh.feat.valCount[xf;yfloat] ~ "i"$.p.get[`value_count][xf;yfloat]` +.ml.fresh.feat.valCount[xb;yint] ~ "i"$.p.get[`value_count][xb;yint]` +.ml.fresh.feat.valCount[xb;yfloat] ~ "i"$.p.get[`value_count][xb;yfloat]` +.ml.fresh.feat.valCount[xi;yint] ~ "i"$.p.get[`value_count][xi;yint]` +.ml.fresh.feat.valCount[xi;yfloat] ~ "i"$.p.get[`value_count][xi;yfloat]` +.ml.fresh.feat.valCount[x0;yint] ~ "i"$.p.get[`value_count][x0;yint]` +.ml.fresh.feat.valCount[x0;yfloat] ~ "i"$.p.get[`value_count][x0;yfloat]` +.ml.fresh.feat.valCount[x1;yint] ~ "i"$.p.get[`value_count][x1;yint]` +.ml.fresh.feat.valCount[x1;yfloat] ~ "i"$.p.get[`value_count][x1;yfloat]` +.ml.fresh.feat.valCount[x2;yint] ~ "i"$.p.get[`value_count][x2;yint]` +.ml.fresh.feat.valCount[x2;yfloat] ~ "i"$.p.get[`value_count][x2;yfloat]` +.ml.fresh.feat.valCount[xnull;yint] ~ "i"$.p.get[`value_count][xnull;yint]` +.ml.fresh.feat.valCount[xnull;yfloat] ~ "i"$.p.get[`value_count][xnull;yfloat]` + +.ml.fresh.feat.cidCe[xj;0b] ~ .p.get[`cid_ce][xj;0b]` +.ml.fresh.feat.cidCe[xf;0b] ~ .p.get[`cid_ce][xf;0b]` +.ml.fresh.feat.cidCe[xb;0b] ~ .p.get[`cid_ce][xb;0b]` +.ml.fresh.feat.cidCe[xi;0b] ~ .p.get[`cid_ce][xi;0b]` +.ml.fresh.feat.cidCe[x0;0b] ~ .p.get[`cid_ce][x0;0b]` +.ml.fresh.feat.cidCe[x1;0b] ~ .p.get[`cid_ce][x1;0b]` +.ml.fresh.feat.cidCe[x2;0b] ~ .p.get[`cid_ce][x2;0b]` .ml.fresh.feat.cidCe[xnull;0b] ~ 0n -.ml.fresh.feat.cidCe[xj;1b] ~ cid_ce[xj;1b] -.ml.fresh.feat.cidCe[xf;1b] ~ cid_ce[xf;1b] -.ml.fresh.feat.cidCe[xb;1b] ~ cid_ce[xb;1b] -.ml.fresh.feat.cidCe[xi;1b] ~ cid_ce[xi;1b] -.ml.fresh.feat.cidCe[x0;1b] ~ cid_ce[x0;1b] -.ml.fresh.feat.cidCe[x1;0b] ~ cid_ce[x1;0b] -.ml.fresh.feat.cidCe[x2;0b] ~ cid_ce[x2;0b] +.ml.fresh.feat.cidCe[xj;1b] ~ .p.get[`cid_ce][xj;1b]` +.ml.fresh.feat.cidCe[xf;1b] ~ .p.get[`cid_ce][xf;1b]` +.ml.fresh.feat.cidCe[xb;1b] ~ .p.get[`cid_ce][xb;1b]` +.ml.fresh.feat.cidCe[xi;1b] ~ .p.get[`cid_ce][xi;1b]` +.ml.fresh.feat.cidCe[x0;1b] ~ .p.get[`cid_ce][x0;1b]` +.ml.fresh.feat.cidCe[x1;0b] ~ .p.get[`cid_ce][x1;0b]` +.ml.fresh.feat.cidCe[x2;0b] ~ .p.get[`cid_ce][x2;0b]` .ml.fresh.feat.cidCe[xnull;1b] ~ 0n -.ml.fresh.feat.mean2DerCentral[xj] ~ mean_second_derivative_central[xj] -.ml.fresh.feat.mean2DerCentral[xf] ~ mean_second_derivative_central[xf] -.ml.fresh.feat.mean2DerCentral[xi] ~ mean_second_derivative_central[xi] +.ml.fresh.feat.mean2DerCentral[xj] ~ .p.get[`mean_second_derivative_central][xj]` +.ml.fresh.feat.mean2DerCentral[xf] ~ .p.get[`mean_second_derivative_central][xf]` +.ml.fresh.feat.mean2DerCentral[xi] ~ .p.get[`mean_second_derivative_central][xi]` .ml.fresh.feat.mean2DerCentral[xb] ~ 0f .ml.fresh.feat.mean2DerCentral[x0] ~ 0n .ml.fresh.feat.mean2DerCentral[x1] ~ 0n .ml.fresh.feat.mean2DerCentral[x2] ~ 0n .ml.fresh.feat.mean2DerCentral[xnull] ~ 0n -.ml.fresh.feat.skewness[xj] ~ skewness_py[xj] -(.ml.fresh.feat.skewness[xf] - skewness_py[xf])<1e-13 -.ml.fresh.feat.skewness[xb] ~ skewness_py[xb] -.ml.fresh.feat.skewness[xi] ~ skewness_py[xi] +.ml.fresh.feat.skewness[xj] ~ .p.get[`skewness_py;<][xj] +(.ml.fresh.feat.skewness[xf] - .p.get[`skewness_py;<][xf])<1e-13 +.ml.fresh.feat.skewness[xb] ~ .p.get[`skewness_py;<][xb] +.ml.fresh.feat.skewness[xi] ~ .p.get[`skewness_py;<][xi] .ml.fresh.feat.skewness[x0] ~ 0n .ml.fresh.feat.skewness[x1] ~ 0n .ml.fresh.feat.skewness[x2] ~ 0n .ml.fresh.feat.skewness[xnull] ~ 0n -.ml.fresh.feat.kurtosis[xj] ~ kurtosis_py[xj] -.ml.fresh.feat.kurtosis[xf] ~ kurtosis_py[xf] -.ml.fresh.feat.kurtosis[xb] ~ kurtosis_py[xb] -.ml.fresh.feat.kurtosis[xi] ~ kurtosis_py[xi] +.ml.fresh.feat.kurtosis[xj] ~ .p.get[`kurtosis_py][xj]` +.ml.fresh.feat.kurtosis[xf] ~ .p.get[`kurtosis_py][xf]` +.ml.fresh.feat.kurtosis[xb] ~ .p.get[`kurtosis_py][xb]` +.ml.fresh.feat.kurtosis[xi] ~ .p.get[`kurtosis_py][xi]` .ml.fresh.feat.kurtosis[x0] ~ 0n .ml.fresh.feat.kurtosis[x1] ~ 0n .ml.fresh.feat.kurtosis[x2] ~ 0n .ml.fresh.feat.kurtosis[xnull] ~ 0n -.ml.fresh.feat.longStrikeBelowMean[xj] ~ longest_strike_below_mean[xj] -.ml.fresh.feat.longStrikeBelowMean[xf] ~ longest_strike_below_mean[xf] -.ml.fresh.feat.longStrikeBelowMean[xb] ~ longest_strike_below_mean[xb] -.ml.fresh.feat.longStrikeBelowMean[xi] ~ longest_strike_below_mean[xi] -.ml.fresh.feat.longStrikeBelowMean[x0] ~ longest_strike_below_mean[x0] +.ml.fresh.feat.longStrikeBelowMean[xj] ~ .p.get[`longest_strike_below_mean;<][xj] +.ml.fresh.feat.longStrikeBelowMean[xf] ~ .p.get[`longest_strike_below_mean;<][xf] +.ml.fresh.feat.longStrikeBelowMean[xb] ~ .p.get[`longest_strike_below_mean;<][xb] +.ml.fresh.feat.longStrikeBelowMean[xi] ~ .p.get[`longest_strike_below_mean;<][xi] +.ml.fresh.feat.longStrikeBelowMean[x0] ~ .p.get[`longest_strike_below_mean;<][x0] ("f"$.ml.fresh.feat.longStrikeBelowMean[x1]) ~ 0f -.ml.fresh.feat.longStrikeBelowMean[x2] ~ longest_strike_below_mean[x2] -.ml.fresh.feat.longStrikeBelowMean[xnull] ~ longest_strike_below_mean[xnull] - -.ml.fresh.feat.longStrikeAboveMean[xj] ~ longest_strike_above_mean[xj] -.ml.fresh.feat.longStrikeAboveMean[xf] ~ longest_strike_above_mean[xf] -.ml.fresh.feat.longStrikeAboveMean[xb] ~ longest_strike_above_mean[xb] -.ml.fresh.feat.longStrikeAboveMean[xi] ~ longest_strike_above_mean[xi] -.ml.fresh.feat.longStrikeAboveMean[x0] ~ longest_strike_above_mean[x0] +.ml.fresh.feat.longStrikeBelowMean[x2] ~ .p.get[`longest_strike_below_mean;<][x2] +.ml.fresh.feat.longStrikeBelowMean[xnull] ~ .p.get[`longest_strike_below_mean;<][xnull] + +.ml.fresh.feat.longStrikeAboveMean[xj] ~ .p.get[`longest_strike_above_mean;<][xj] +.ml.fresh.feat.longStrikeAboveMean[xf] ~ .p.get[`longest_strike_above_mean;<][xf] +.ml.fresh.feat.longStrikeAboveMean[xb] ~ .p.get[`longest_strike_above_mean;<][xb] +.ml.fresh.feat.longStrikeAboveMean[xi] ~ .p.get[`longest_strike_above_mean;<][xi] +.ml.fresh.feat.longStrikeAboveMean[x0] ~ .p.get[`longest_strike_above_mean;<][x0] ("f"$.ml.fresh.feat.longStrikeAboveMean[x1]) ~ 0f -.ml.fresh.feat.longStrikeAboveMean[x2] ~ longest_strike_above_mean[x2] -.ml.fresh.feat.longStrikeAboveMean[xnull] ~ longest_strike_above_mean[xnull] - -.ml.fresh.feat.sumRecurringVal[xj] ~ sum_recurring_values[xj] -.ml.fresh.feat.sumRecurringVal[xf] ~ sum_recurring_values[xf] -.ml.fresh.feat.sumRecurringVal[xi] ~ "i"$sum_recurring_values[xi] -.ml.fresh.feat.sumRecurringVal[xb] ~ "i"$sum_recurring_values[xb] -.ml.fresh.feat.sumRecurringVal[x1] ~ sum_recurring_values[x1] -.ml.fresh.feat.sumRecurringVal[x2] ~ sum_recurring_values[x2] +.ml.fresh.feat.longStrikeAboveMean[x2] ~ .p.get[`longest_strike_above_mean;<][x2] +.ml.fresh.feat.longStrikeAboveMean[xnull] ~ .p.get[`longest_strike_above_mean;<][xnull] + +.ml.fresh.feat.sumRecurringVal[xj] ~ .p.get[`sum_recurring_values;<][xj] +.ml.fresh.feat.sumRecurringVal[xf] ~ .p.get[`sum_recurring_values;<][xf] +.ml.fresh.feat.sumRecurringVal[xi] ~ "i"$.p.get[`sum_recurring_values;<][xi] +.ml.fresh.feat.sumRecurringVal[xb] ~ "i"$.p.get[`sum_recurring_values;<][xb] +.ml.fresh.feat.sumRecurringVal[x1] ~ .p.get[`sum_recurring_values;<][x1] +.ml.fresh.feat.sumRecurringVal[x2] ~ .p.get[`sum_recurring_values;<][x2] .ml.fresh.feat.sumRecurringVal[x0] ~ 0f .ml.fresh.feat.sumRecurringVal[xnull] ~ 0f -.ml.fresh.feat.sumRecurringDataPoint[xj] ~ sum_recurring_data_points[xj] -.ml.fresh.feat.sumRecurringDataPoint[xf] ~ sum_recurring_data_points[xf] -.ml.fresh.feat.sumRecurringDataPoint[xb] ~ sum_recurring_data_points[xb] -.ml.fresh.feat.sumRecurringDataPoint[xi] ~ sum_recurring_data_points[xi] -.ml.fresh.feat.sumRecurringDataPoint[x1] ~ sum_recurring_data_points[x1] -.ml.fresh.feat.sumRecurringDataPoint[x2] ~ sum_recurring_data_points[x2] +.ml.fresh.feat.sumRecurringDataPoint[xj] ~ .p.get[`sum_recurring_values;<][xj] +.ml.fresh.feat.sumRecurringDataPoint[xf] ~ .p.get[`sum_recurring_values;<][xf] +.ml.fresh.feat.sumRecurringDataPoint[xb] ~ .p.get[`sum_recurring_values;<][xb] +.ml.fresh.feat.sumRecurringDataPoint[xi] ~ .p.get[`sum_recurring_values;<][xi] +.ml.fresh.feat.sumRecurringDataPoint[x1] ~ .p.get[`sum_recurring_values;<][x1] +.ml.fresh.feat.sumRecurringDataPoint[x2] ~ .p.get[`sum_recurring_values;<][x2] .ml.fresh.feat.sumRecurringDataPoint[xnull] ~ 0f -.ml.fresh.feat.c3[xj;2] ~ c3_py[xj;2] -.ml.fresh.feat.c3[xf;4] ~ c3_py[xf;4] -.ml.fresh.feat.c3[xi;4] ~ c3_py[xi;4] -("i"$100*.ml.fresh.feat.c3[xb;4]) ~ "i"$100*c3_py[xb;4] +.ml.fresh.feat.c3[xj;2] ~ .p.get[`c3_py;<][xj;2] +.ml.fresh.feat.c3[xf;4] ~ .p.get[`c3_py;<][xf;4] +.ml.fresh.feat.c3[xi;4] ~ .p.get[`c3_py;<][xi;4] +("i"$100*.ml.fresh.feat.c3[xb;4]) ~ "i"$100*.p.get[`c3_py;<][xb;4] .ml.fresh.feat.c3[x0;4] ~ 0n .ml.fresh.feat.c3[x1;4] ~ 0n .ml.fresh.feat.c3[x2;4] ~ 0n .ml.fresh.feat.c3[xnull;4] ~ 0n -.ml.fresh.feat.varAboveStdDev[xj] ~ variance_larger_than_standard_deviation[xj] -.ml.fresh.feat.varAboveStdDev[xf] ~ variance_larger_than_standard_deviation[xf] -.ml.fresh.feat.varAboveStdDev[xb] ~ variance_larger_than_standard_deviation[xb] -.ml.fresh.feat.varAboveStdDev[xi] ~ variance_larger_than_standard_deviation[xi] +.ml.fresh.feat.varAboveStdDev[xj] ~ .p.get[`variance_larger_than_standard_deviation;<][xj] +.ml.fresh.feat.varAboveStdDev[xf] ~ .p.get[`variance_larger_than_standard_deviation;<][xf] +.ml.fresh.feat.varAboveStdDev[xb] ~ .p.get[`variance_larger_than_standard_deviation;<][xb] +.ml.fresh.feat.varAboveStdDev[xi] ~ .p.get[`variance_larger_than_standard_deviation;<][xi] .ml.fresh.feat.varAboveStdDev[x0] ~ 0b -.ml.fresh.feat.varAboveStdDev[x1] ~ variance_larger_than_standard_deviation[x1] -.ml.fresh.feat.varAboveStdDev[x2] ~ variance_larger_than_standard_deviation[x2] +.ml.fresh.feat.varAboveStdDev[x1] ~ .p.get[`variance_larger_than_standard_deviation;<][x1] +.ml.fresh.feat.varAboveStdDev[x2] ~ .p.get[`variance_larger_than_standard_deviation;<][x2] .ml.fresh.feat.varAboveStdDev[xnull] ~ 0b -.ml.fresh.feat.numCwtPeaks[xj;3] ~ number_cwt_peaks[xj;3] -.ml.fresh.feat.numCwtPeaks[xf;3] ~ number_cwt_peaks[xf;3] -.ml.fresh.feat.numCwtPeaks[xb;3] ~ number_cwt_peaks[xb;3] -.ml.fresh.feat.numCwtPeaks[xi;3] ~ number_cwt_peaks[xi;3] -.ml.fresh.feat.numCwtPeaks[x1;3] ~ number_cwt_peaks[x1;3] -.ml.fresh.feat.numCwtPeaks[x2;3] ~ number_cwt_peaks[x2;3] -.ml.fresh.feat.numCwtPeaks[xnull;3] ~ number_cwt_peaks[xnull;3] +.ml.fresh.feat.numCwtPeaks[xj;3] ~ .p.get[`number_cwt_peaks;<][xj;3] +.ml.fresh.feat.numCwtPeaks[xf;3] ~ .p.get[`number_cwt_peaks;<][xf;3] +.ml.fresh.feat.numCwtPeaks[xb;3] ~ .p.get[`number_cwt_peaks;<][xb;3] +.ml.fresh.feat.numCwtPeaks[xi;3] ~ .p.get[`number_cwt_peaks;<][xi;3] +.ml.fresh.feat.numCwtPeaks[x1;3] ~ .p.get[`number_cwt_peaks;<][x1;3] +.ml.fresh.feat.numCwtPeaks[x2;3] ~ .p.get[`number_cwt_peaks;<][x2;3] +.ml.fresh.feat.numCwtPeaks[xnull;3] ~ .p.get[`number_cwt_peaks;<][xnull;3] /For the testing of quantiles the 'y' argument must be in the range [0;1] by definition -.ml.fresh.feat.quantile[xj;0.5] ~ quantile_py[xj;0.5] -.ml.fresh.feat.quantile[xf;0.5] ~ quantile_py[xf;0.5] -.ml.fresh.feat.quantile[xb;0.5] ~ quantile_py["f"$xb;0.5] -.ml.fresh.feat.quantile[xi;0.5] ~ quantile_py[xi;0.5] +.ml.fresh.feat.quantile[xj;0.5] ~ .p.get[`quantile_py;<][xj;0.5] +.ml.fresh.feat.quantile[xf;0.5] ~ .p.get[`quantile_py;<][xf;0.5] +.ml.fresh.feat.quantile[xb;0.5] ~ .p.get[`quantile_py;<]["f"$xb;0.5] +.ml.fresh.feat.quantile[xi;0.5] ~ .p.get[`quantile_py;<][xi;0.5] .ml.fresh.feat.quantile[x0;0.5] ~ 0f -.ml.fresh.feat.quantile[x1;0.5] ~ quantile_py[x1;0.5] -.ml.fresh.feat.quantile[x2;0.5] ~ quantile_py[x2;0.5] +.ml.fresh.feat.quantile[x1;0.5] ~ .p.get[`quantile_py;<][x1;0.5] +.ml.fresh.feat.quantile[x2;0.5] ~ .p.get[`quantile_py;<][x2;0.5] .ml.fresh.feat.quantile[xnull;0.5] ~ 0f -.ml.fresh.feat.numCrossing[xj;350] ~ "i"$number_crossing_m[xj;350] -.ml.fresh.feat.numCrossing[xf;350] ~ "i"$number_crossing_m[xf;350] -.ml.fresh.feat.numCrossing[xb;350] ~ "i"$number_crossing_m[xb;350] -.ml.fresh.feat.numCrossing[xi;350] ~ "i"$number_crossing_m[xi;350] -.ml.fresh.feat.numCrossing[x0;350] ~ "i"$number_crossing_m[x0;350] -.ml.fresh.feat.numCrossing[x1;350] ~ "i"$number_crossing_m[x1;350] -.ml.fresh.feat.numCrossing[x2;350] ~ "i"$number_crossing_m[x2;350] -.ml.fresh.feat.numCrossing[xnull;350] ~ "i"$number_crossing_m[xnull;350] - -.ml.fresh.feat.binnedEntropy[xj;50] ~ binned_entropy[xj;50] -.ml.fresh.feat.binnedEntropy[xf;50] ~ binned_entropy[xf;50] -.ml.fresh.feat.binnedEntropy[xi;50] ~ binned_entropy[xi;50] -.ml.fresh.feat.binnedEntropy[x1;50] ~ binned_entropy[x1;50] -.ml.fresh.feat.binnedEntropy[x2;50] ~ binned_entropy[x2;50] +.ml.fresh.feat.numCrossing[xj;350] ~ "i"$.p.get[`number_crossing_m;<][xj;350] +.ml.fresh.feat.numCrossing[xf;350] ~ "i"$.p.get[`number_crossing_m;<][xf;350] +.ml.fresh.feat.numCrossing[xb;350] ~ "i"$.p.get[`number_crossing_m;<][xb;350] +.ml.fresh.feat.numCrossing[xi;350] ~ "i"$.p.get[`number_crossing_m;<][xi;350] +.ml.fresh.feat.numCrossing[x0;350] ~ "i"$.p.get[`number_crossing_m;<][x0;350] +.ml.fresh.feat.numCrossing[x1;350] ~ "i"$.p.get[`number_crossing_m;<][x1;350] +.ml.fresh.feat.numCrossing[x2;350] ~ "i"$.p.get[`number_crossing_m;<][x2;350] +.ml.fresh.feat.numCrossing[xnull;350] ~ "i"$.p.get[`number_crossing_m;<][xnull;350] + +.ml.fresh.feat.binnedEntropy[xj;50] ~ .p.get[`binned_entropy;<][xj;50] +.ml.fresh.feat.binnedEntropy[xf;50] ~ .p.get[`binned_entropy;<][xf;50] +.ml.fresh.feat.binnedEntropy[xi;50] ~ .p.get[`binned_entropy;<][xi;50] +.ml.fresh.feat.binnedEntropy[x1;50] ~ .p.get[`binned_entropy;<][x1;50] +.ml.fresh.feat.binnedEntropy[x2;50] ~ .p.get[`binned_entropy;<][x2;50] abs[.ml.fresh.feat.binnedEntropy[xnull;50]] ~ 0f -.ml.fresh.feat.autoCorr[xf;50] ~ autocorrelation[xf;50] -.ml.fresh.feat.autoCorr[xj;50] ~ autocorrelation[xj;50] -.ml.fresh.feat.autoCorr[xi;50] ~ autocorrelation[xi;50] +.ml.fresh.feat.autoCorr[xf;50] ~ .p.get[`autocorrelation][xf;50]` +.ml.fresh.feat.autoCorr[xj;50] ~ .p.get[`autocorrelation][xj;50]` +.ml.fresh.feat.autoCorr[xi;50] ~ .p.get[`autocorrelation][xi;50]` .ml.fresh.feat.autoCorr[x0;50] ~ 0n .ml.fresh.feat.autoCorr[x1;50] ~ 0n .ml.fresh.feat.autoCorr[x2;50] ~ 0n .ml.fresh.feat.autoCorr[xnull;50] ~ 0n -.ml.fresh.feat.numPeaks[xj;1] ~ "i"$number_peaks[xj;1] -.ml.fresh.feat.numPeaks[xj;4] ~ "i"$number_peaks[xj;4] -.ml.fresh.feat.numPeaks[xf;1] ~ "i"$number_peaks[xf;1] -.ml.fresh.feat.numPeaks[xf;4] ~ "i"$number_peaks[xf;4] -.ml.fresh.feat.numPeaks[xb;1] ~ "i"$number_peaks[xb;1] -.ml.fresh.feat.numPeaks[xb;4] ~ "i"$number_peaks[xb;4] -.ml.fresh.feat.numPeaks[xi;1] ~ "i"$number_peaks[xi;1] -.ml.fresh.feat.numPeaks[xi;4] ~ "i"$number_peaks[xi;4] -.ml.fresh.feat.numPeaks[x0;1] ~ "i"$number_peaks[x0;1] -.ml.fresh.feat.numPeaks[x0;4] ~ "i"$number_peaks[x0;4] -.ml.fresh.feat.numPeaks[x1;1] ~ "i"$number_peaks[x1;1] -.ml.fresh.feat.numPeaks[x1;4] ~ "i"$number_peaks[x1;4] -.ml.fresh.feat.numPeaks[x2;1] ~ "i"$number_peaks[x2;1] -.ml.fresh.feat.numPeaks[x2;4] ~ "i"$number_peaks[x2;4] -.ml.fresh.feat.numPeaks[xnull;1] ~ "i"$number_peaks[xnull;1] -.ml.fresh.feat.numPeaks[xnull;4] ~ "i"$number_peaks[xnull;4] - -.ml.fresh.feat.rangeCount[xj;20;100] ~ "i"$range_count[xj;20;100] -.ml.fresh.feat.rangeCount[xf;20.1;100.0] ~ "i"$range_count[xf;20.1;100.0] -.ml.fresh.feat.rangeCount[xi;20;100] ~ "i"$range_count[xi;20;100] -.ml.fresh.feat.rangeCount[xb;20;100] ~ "i"$range_count[xb;20;100] -.ml.fresh.feat.rangeCount[x0;20;100] ~ "i"$range_count[x0;20;100] -.ml.fresh.feat.rangeCount[x1;20;100] ~ "i"$range_count[x1;20;100] -.ml.fresh.feat.rangeCount[x2;20;100] ~ "i"$range_count[x2;20;100] -.ml.fresh.feat.rangeCount[xnull;20;100] ~ "i"$range_count[xnull;20;100] - -.ml.fresh.feat.treverseAsymStat[xj;2] ~ time_reversal_asymmetry_statistic[xj;2] -.ml.fresh.feat.treverseAsymStat[xf;2] ~ time_reversal_asymmetry_statistic[xf;2] -.ml.fresh.feat.treverseAsymStat[xi;2] ~ time_reversal_asymmetry_statistic[xi;2] +.ml.fresh.feat.numPeaks[xj;1] ~ "i"$.p.get[`number_peaks;<][xj;1] +.ml.fresh.feat.numPeaks[xj;4] ~ "i"$.p.get[`number_peaks;<][xj;4] +.ml.fresh.feat.numPeaks[xf;1] ~ "i"$.p.get[`number_peaks;<][xf;1] +.ml.fresh.feat.numPeaks[xf;4] ~ "i"$.p.get[`number_peaks;<][xf;4] +.ml.fresh.feat.numPeaks[xb;1] ~ "i"$.p.get[`number_peaks;<][xb;1] +.ml.fresh.feat.numPeaks[xb;4] ~ "i"$.p.get[`number_peaks;<][xb;4] +.ml.fresh.feat.numPeaks[xi;1] ~ "i"$.p.get[`number_peaks;<][xi;1] +.ml.fresh.feat.numPeaks[xi;4] ~ "i"$.p.get[`number_peaks;<][xi;4] +.ml.fresh.feat.numPeaks[x0;1] ~ "i"$.p.get[`number_peaks;<][x0;1] +.ml.fresh.feat.numPeaks[x0;4] ~ "i"$.p.get[`number_peaks;<][x0;4] +.ml.fresh.feat.numPeaks[x1;1] ~ "i"$.p.get[`number_peaks;<][x1;1] +.ml.fresh.feat.numPeaks[x1;4] ~ "i"$.p.get[`number_peaks;<][x1;4] +.ml.fresh.feat.numPeaks[x2;1] ~ "i"$.p.get[`number_peaks;<][x2;1] +.ml.fresh.feat.numPeaks[x2;4] ~ "i"$.p.get[`number_peaks;<][x2;4] +.ml.fresh.feat.numPeaks[xnull;1] ~ "i"$.p.get[`number_peaks;<][xnull;1] +.ml.fresh.feat.numPeaks[xnull;4] ~ "i"$.p.get[`number_peaks;<][xnull;4] + +.ml.fresh.feat.rangeCount[xj;20;100] ~ "i"$.p.get[`range_count;<][xj;20;100] +.ml.fresh.feat.rangeCount[xf;20.1;100.0] ~ "i"$.p.get[`range_count;<][xf;20.1;100.0] +.ml.fresh.feat.rangeCount[xi;20;100] ~ "i"$.p.get[`range_count;<][xi;20;100] +.ml.fresh.feat.rangeCount[xb;20;100] ~ "i"$.p.get[`range_count;<][xb;20;100] +.ml.fresh.feat.rangeCount[x0;20;100] ~ "i"$.p.get[`range_count;<][x0;20;100] +.ml.fresh.feat.rangeCount[x1;20;100] ~ "i"$.p.get[`range_count;<][x1;20;100] +.ml.fresh.feat.rangeCount[x2;20;100] ~ "i"$.p.get[`range_count;<][x2;20;100] +.ml.fresh.feat.rangeCount[xnull;20;100] ~ "i"$.p.get[`range_count;<][xnull;20;100] + +.ml.fresh.feat.treverseAsymStat[xj;2] ~ .p.get[`time_reversal_asymmetry_statistic;<][xj;2] +.ml.fresh.feat.treverseAsymStat[xf;2] ~ .p.get[`time_reversal_asymmetry_statistic;<][xf;2] +.ml.fresh.feat.treverseAsymStat[xi;2] ~ .p.get[`time_reversal_asymmetry_statistic;<][xi;2] .ml.fresh.feat.treverseAsymStat[xb;2] ~ 0.0001 .ml.fresh.feat.treverseAsymStat[x0;2] ~ 0f -.ml.fresh.feat.treverseAsymStat[x1;2] ~ "f"$time_reversal_asymmetry_statistic[x1;2] -.ml.fresh.feat.treverseAsymStat[x2;2] ~ "f"$time_reversal_asymmetry_statistic[x2;2] +.ml.fresh.feat.treverseAsymStat[x1;2] ~ "f"$.p.get[`time_reversal_asymmetry_statistic;<][x1;2] +.ml.fresh.feat.treverseAsymStat[x2;2] ~ "f"$.p.get[`time_reversal_asymmetry_statistic;<][x2;2] .ml.fresh.feat.treverseAsymStat[xnull;2] ~ 0f -.ml.fresh.feat.indexMassQuantile[xi;.6] ~ index_mass_quantile[xi;.6] -.ml.fresh.feat.indexMassQuantile[xj;1.] ~ index_mass_quantile[xj;1.] -.ml.fresh.feat.indexMassQuantile[xh;0.] ~ index_mass_quantile[xh;0.] +.ml.fresh.feat.indexMassQuantile[xi;.6] ~ .p.get[`index_mass_quantile;<][xi;.6] +.ml.fresh.feat.indexMassQuantile[xj;1.] ~ .p.get[`index_mass_quantile;<][xj;1.] +.ml.fresh.feat.indexMassQuantile[xh;0.] ~ .p.get[`index_mass_quantile;<][xh;0.] .ml.fresh.feat.indexMassQuantile[xi;x0] ~ x0 -.ml.fresh.feat.lastMax[xi] ~ last_location_of_maximum[xi] -.ml.fresh.feat.lastMax[xj] ~ last_location_of_maximum[xj] -.ml.fresh.feat.lastMax[xf] ~ last_location_of_maximum[xf] +.ml.fresh.feat.lastMax[xi] ~ .p.get[`last_location_of_maximum;<][xi] +.ml.fresh.feat.lastMax[xj] ~ .p.get[`last_location_of_maximum;<][xj] +.ml.fresh.feat.lastMax[xf] ~ .p.get[`last_location_of_maximum;<][xf] .ml.fresh.feat.lastMax[x0] ~ 0n .ml.fresh.feat.lastMax[xs] ~ 0f -.ml.fresh.feat.lastMin[xi] ~ last_location_of_minimum[xi] -.ml.fresh.feat.lastMin[xj] ~ last_location_of_minimum[xj] -.ml.fresh.feat.lastMin[xf] ~ last_location_of_minimum[xf] +.ml.fresh.feat.lastMin[xi] ~ .p.get[`last_location_of_minimum;<][xi] +.ml.fresh.feat.lastMin[xj] ~ .p.get[`last_location_of_minimum;<][xj] +.ml.fresh.feat.lastMin[xf] ~ .p.get[`last_location_of_minimum;<][xf] .ml.fresh.feat.lastMin[x0] ~ 0n .ml.fresh.feat.lastMin[xs] ~ 0f -(value .ml.fresh.feat.changeQuant[xf;0.2;0.8;1b]) ~ change_quantiles[xf;0.2;0.8;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xf;0.25;0.7;1b]) ~ change_quantiles[xf;0.25;0.7;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xf;0.2;0.65;1b]) ~ change_quantiles[xf;0.2;0.65;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xf;0.2;0.775;1b]) ~ change_quantiles[xf;0.2;0.775;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xf;0.2;0.8;0b]) ~ change_quantiles[xf;0.2;0.8;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xf;0.25;0.7;0b]) ~ change_quantiles[xf;0.25;0.7;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xf;0.2;0.65;0b]) ~ change_quantiles[xf;0.2;0.65;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xf;0.2;0.775;0b]) ~ change_quantiles[xf;0.2;0.775;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.2;0.8;1b]) ~ change_quantiles[xj;0.2;0.8;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.25;0.7;1b]) ~ change_quantiles[xj;0.25;0.7;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.2;0.65;1b]) ~ change_quantiles[xj;0.2;0.65;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.2;0.775;1b]) ~ change_quantiles[xj;0.2;0.775;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.2;0.8;0b]) ~ change_quantiles[xj;0.2;0.8;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.25;0.7;0b]) ~ change_quantiles[xj;0.25;0.7;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.2;0.65;0b]) ~ change_quantiles[xj;0.2;0.65;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xj;0.2;0.775;0b]) ~ change_quantiles[xj;0.2;0.775;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.8;1b]) ~ change_quantiles[xi;0.2;0.8;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.25;0.7;1b]) ~ change_quantiles[xi;0.25;0.7;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.65;1b]) ~ change_quantiles[xi;0.2;0.65;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.775;1b]) ~ change_quantiles[xi;0.2;0.775;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.8;0b]) ~ change_quantiles[xi;0.2;0.8;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.25;0.7;0b]) ~ change_quantiles[xi;0.25;0.7;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.65;0b]) ~ change_quantiles[xi;0.2;0.65;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.775;0b]) ~ change_quantiles[xi;0.2;0.775;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.2;0.8;1b]) ~ .p.get[`change_quantiles;<][xf;0.2;0.8;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.25;0.7;1b]) ~ .p.get[`change_quantiles;<][xf;0.25;0.7;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.2;0.65;1b]) ~ .p.get[`change_quantiles;<][xf;0.2;0.65;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.2;0.775;1b]) ~ .p.get[`change_quantiles;<][xf;0.2;0.775;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.2;0.8;0b]) ~ .p.get[`change_quantiles;<][xf;0.2;0.8;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.25;0.7;0b]) ~ .p.get[`change_quantiles;<][xf;0.25;0.7;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.2;0.65;0b]) ~ .p.get[`change_quantiles;<][xf;0.2;0.65;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xf;0.2;0.775;0b]) ~ .p.get[`change_quantiles;<][xf;0.2;0.775;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.2;0.8;1b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.8;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.25;0.7;1b]) ~ .p.get[`change_quantiles;<][xj;0.25;0.7;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.2;0.65;1b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.65;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.2;0.775;1b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.775;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.2;0.8;0b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.8;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.25;0.7;0b]) ~ .p.get[`change_quantiles;<][xj;0.25;0.7;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.2;0.65;0b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.65;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xj;0.2;0.775;0b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.775;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.2;0.8;1b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.8;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.25;0.7;1b]) ~ .p.get[`change_quantiles;<][xi;0.25;0.7;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.2;0.65;1b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.65;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.2;0.775;1b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.775;1b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.2;0.8;0b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.8;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.25;0.7;0b]) ~ .p.get[`change_quantiles;<][xi;0.25;0.7;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.2;0.65;0b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.65;0b;]each changequantkeys +(value .ml.fresh.feat.changeQuant[xi;0.2;0.775;0b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.775;0b;]each changequantkeys (value .ml.fresh.feat.changeQuant[x0;0.2;0.775;1b]) ~ (-0w 0w,4#0n) (value .ml.fresh.feat.changeQuant[x1;0.2;0.775;1b]) ~ (-0w 0w,4#0n) (value .ml.fresh.feat.changeQuant[x2;0.2;0.775;1b]) ~ (-0w 0w,4#0n) (value .ml.fresh.feat.changeQuant[xnull;0.2;0.775;1b]) ~ (-0w 0w,4#0n) -(.ml.fresh.feat.linTrend[xj]`slope) ~ linear_trend[xj][0] -(.ml.fresh.feat.linTrend[xj]`intercept) ~ linear_trend[xj][1] -(.ml.fresh.feat.linTrend[xj]`rval) ~ linear_trend[xj][2] -(.ml.fresh.feat.linTrend[xf]`slope) ~ linear_trend[xf][0] -(.ml.fresh.feat.linTrend[xf]`intercept) ~ linear_trend[xf][1] -(.ml.fresh.feat.linTrend[xf]`rval) ~ linear_trend[xf][2] -(.ml.fresh.feat.linTrend[xb]`slope) ~ linear_trend[xb][0] -(.ml.fresh.feat.linTrend[xb]`intercept) ~ linear_trend[xb][1] -(.ml.fresh.feat.linTrend[xb]`rval) ~ linear_trend[xb][2] -(.ml.fresh.feat.linTrend[xi]`slope) ~ linear_trend[xi][0] -(.ml.fresh.feat.linTrend[xi]`intercept) ~ linear_trend[xi][1] -(.ml.fresh.feat.linTrend[xi]`rval) ~ linear_trend[xi][2] +(.ml.fresh.feat.linTrend[xj]`slope) ~ .p.get[`linear_trend][xj][`:slope]` +(.ml.fresh.feat.linTrend[xj]`intercept) ~ .p.get[`linear_trend][xj][`:intercept]` +(.ml.fresh.feat.linTrend[xj]`rval) ~ .p.get[`linear_trend][xj][`:rvalue]` +(.ml.fresh.feat.linTrend[xf]`slope) ~ .p.get[`linear_trend][xf][`:slope]` +(.ml.fresh.feat.linTrend[xf]`intercept) ~ .p.get[`linear_trend][xf][`:intercept]` +(.ml.fresh.feat.linTrend[xf]`rval) ~ .p.get[`linear_trend][xf][`:rvalue]` +(.ml.fresh.feat.linTrend[xb]`slope) ~ .p.get[`linear_trend][xb][`:slope]` +(.ml.fresh.feat.linTrend[xb]`intercept) ~ .p.get[`linear_trend][xb][`:intercept]` +(.ml.fresh.feat.linTrend[xb]`rval) ~ .p.get[`linear_trend][xb][`:rvalue]` +(.ml.fresh.feat.linTrend[xi]`slope) ~ .p.get[`linear_trend][xi][`:slope]` +(.ml.fresh.feat.linTrend[xi]`intercept) ~ .p.get[`linear_trend][xi][`:intercept]` +(.ml.fresh.feat.linTrend[xi]`rval) ~ .p.get[`linear_trend][xi][`:rvalue]` (.ml.fresh.feat.linTrend[x0]`slope) ~ 0f (.ml.fresh.feat.linTrend[x0]`intercept) ~ 0f (.ml.fresh.feat.linTrend[x0]`rval) ~ 0f (.ml.fresh.feat.linTrend[x1]`slope) ~ 0f (.ml.fresh.feat.linTrend[x1]`intercept) ~ 0f (.ml.fresh.feat.linTrend[x1]`rval) ~ 0f -(.ml.fresh.feat.linTrend[x2]`slope) ~ linear_trend[x2][0] -(.ml.fresh.feat.linTrend[x2]`intercept) ~ linear_trend[x2][1] -(.ml.fresh.feat.linTrend[x2]`rval) ~ linear_trend[x2][2] +(.ml.fresh.feat.linTrend[x2]`slope) ~ .p.get[`linear_trend][x2][`:slope]` +(.ml.fresh.feat.linTrend[x2]`intercept) ~ .p.get[`linear_trend][x2][`:intercept]` +(.ml.fresh.feat.linTrend[x2]`rval) ~ .p.get[`linear_trend][x2][`:rvalue]` (.ml.fresh.feat.linTrend[xnull]`slope) ~ 0f (.ml.fresh.feat.linTrend[xnull]`intercept) ~ 0f (.ml.fresh.feat.linTrend[xnull]`rval) ~ 0f -(value .ml.fresh.feat.aggAutoCorr[xj]) ~ agg_autocorrelation[xj;]each autocorrkeys -(value .ml.fresh.feat.aggAutoCorr[xf]) ~ agg_autocorrelation[xf;]each autocorrkeys -(1_value .ml.fresh.feat.aggAutoCorr[xb]) ~ 1_agg_autocorrelation[xb;]each autocorrkeys -(value .ml.fresh.feat.aggAutoCorr[xi]) ~ agg_autocorrelation[xi;]each autocorrkeys +(value .ml.fresh.feat.aggAutoCorr[xj]) ~ .p.get[`agg_autocorrelation;<][xj;]each autocorrkeys +(value .ml.fresh.feat.aggAutoCorr[xf]) ~ .p.get[`agg_autocorrelation;<][xf;]each autocorrkeys +(1_value .ml.fresh.feat.aggAutoCorr[xb]) ~ 1_.p.get[`agg_autocorrelation;<][xb;]each autocorrkeys +(value .ml.fresh.feat.aggAutoCorr[xi]) ~ .p.get[`agg_autocorrelation;<][xi;]each autocorrkeys (value .ml.fresh.feat.aggAutoCorr[x0]) ~ 4#0f (value .ml.fresh.feat.aggAutoCorr[x1]) ~ 4#0f -(value .ml.fresh.feat.aggAutoCorr[x2]) ~ agg_autocorrelation[x2;]each autocorrkeys +(value .ml.fresh.feat.aggAutoCorr[x2]) ~ .p.get[`agg_autocorrelation;<][x2;]each autocorrkeys (value .ml.fresh.feat.aggAutoCorr[xnull]) ~ 4#0f -(.ml.fresh.feat.fftAggreg[xj]`centroid) ~ fft_aggregated[xj][0] -(.ml.fresh.feat.fftAggreg[xj]`variance) ~ fft_aggregated[xj][1] -(.ml.fresh.feat.fftAggreg[xi]`centroid) ~ fft_aggregated[xi][0] -(.ml.fresh.feat.fftAggreg[xi]`variance) ~ fft_aggregated[xi][1] -(.ml.fresh.feat.fftAggreg[xf]`centroid) ~ fft_aggregated[xf][0] -(.ml.fresh.feat.fftAggreg[xf]`variance) ~ fft_aggregated[xf][1] -(.ml.fresh.feat.fftAggreg[xb]`centroid) ~ fft_aggregated[xb][0] -(.ml.fresh.feat.fftAggreg[xb]`variance) ~ fft_aggregated[xb][1] -(.ml.fresh.feat.fftAggreg[x1]`centroid) ~ fft_aggregated[x1][0] -(.ml.fresh.feat.fftAggreg[x1]`variance) ~ fft_aggregated[x1][1] -(.ml.fresh.feat.fftAggreg[x2]`centroid) ~ fft_aggregated[x2][0] -(.ml.fresh.feat.fftAggreg[x2]`variance) ~ fft_aggregated[x2][1] +(.ml.fresh.feat.fftAggreg[xj]`centroid) ~ .p.get[`fft_aggregated;<][xj][0] +(.ml.fresh.feat.fftAggreg[xj]`variance) ~ .p.get[`fft_aggregated;<][xj][1] +(.ml.fresh.feat.fftAggreg[xi]`centroid) ~ .p.get[`fft_aggregated;<][xi][0] +(.ml.fresh.feat.fftAggreg[xi]`variance) ~ .p.get[`fft_aggregated;<][xi][1] +(.ml.fresh.feat.fftAggreg[xf]`centroid) ~ .p.get[`fft_aggregated;<][xf][0] +(.ml.fresh.feat.fftAggreg[xf]`variance) ~ .p.get[`fft_aggregated;<][xf][1] +(.ml.fresh.feat.fftAggreg[xb]`centroid) ~ .p.get[`fft_aggregated;<][xb][0] +(.ml.fresh.feat.fftAggreg[xb]`variance) ~ .p.get[`fft_aggregated;<][xb][1] +(.ml.fresh.feat.fftAggreg[x1]`centroid) ~ .p.get[`fft_aggregated;<][x1][0] +(.ml.fresh.feat.fftAggreg[x1]`variance) ~ .p.get[`fft_aggregated;<][x1][1] +(.ml.fresh.feat.fftAggreg[x2]`centroid) ~ .p.get[`fft_aggregated;<][x2][0] +(.ml.fresh.feat.fftAggreg[x2]`variance) ~ .p.get[`fft_aggregated;<][x2][1] (.ml.fresh.feat.fftAggreg[xnull]`centroid) ~ 0n (.ml.fresh.feat.fftAggreg[xnull]`variance) ~ 0n -(value .ml.fresh.feat.augFuller[xj]) ~ "f"$augmented_dickey_fuller[xj][0 1 2] -(value .ml.fresh.feat.augFuller[xf]) ~ "f"$augmented_dickey_fuller[xf][0 1 2] -(value .ml.fresh.feat.augFuller[xi]) ~ "f"$augmented_dickey_fuller[xi][0 1 2] -(value .ml.fresh.feat.augFuller[xb]) ~ "f"$augmented_dickey_fuller[xb][0 1 2] +(value .ml.fresh.feat.augFuller[xj]) ~ "f"$.p.get[`augmented_dickey_fuller;<][xj][0 1 2] +(value .ml.fresh.feat.augFuller[xf]) ~ "f"$.p.get[`augmented_dickey_fuller;<][xf][0 1 2] +(value .ml.fresh.feat.augFuller[xi]) ~ "f"$.p.get[`augmented_dickey_fuller;<][xi][0 1 2] +(value .ml.fresh.feat.augFuller[xb]) ~ "f"$.p.get[`augmented_dickey_fuller;<][xb][0 1 2] (value .ml.fresh.feat.augFuller[x0]) ~ 3#0n (value .ml.fresh.feat.augFuller[x1]) ~ 3#0n (value .ml.fresh.feat.augFuller[x2]) ~ 3#0n (value .ml.fresh.feat.augFuller[xnull]) ~ 3#0n -(.ml.fresh.feat.spktWelch[xj;til 100]) ~ spkt_welch_density[xj;til 100] -(.ml.fresh.feat.spktWelch[xf;til 100]) ~ spkt_welch_density[xf;til 100] -(.ml.fresh.feat.spktWelch[xi;til 100]) ~ spkt_welch_density[xi;til 100] -(.ml.fresh.feat.spktWelch[xb;til 100]) ~ spkt_welch_density[xb;til 100] +(.ml.fresh.feat.spktWelch[xj;til 100]) ~ .p.get[`spkt_welch_density;<][xj;til 100] +(.ml.fresh.feat.spktWelch[xf;til 100]) ~ .p.get[`spkt_welch_density;<][xf;til 100] +(.ml.fresh.feat.spktWelch[xi;til 100]) ~ .p.get[`spkt_welch_density;<][xi;til 100] +(.ml.fresh.feat.spktWelch[xb;til 100]) ~ .p.get[`spkt_welch_density;<][xb;til 100] (.ml.fresh.feat.spktWelch[xnull;til 100]) ~ 100#0n -(.ml.fresh.feat.spktWelch[xj;k]) ~ spkt_welch_density[xj;k] -(.ml.fresh.feat.spktWelch[xf;k]) ~ spkt_welch_density[xf;k] -(.ml.fresh.feat.spktWelch[xi;k]) ~ spkt_welch_density[xi;k] -(.ml.fresh.feat.spktWelch[xb;k]) ~ spkt_welch_density[xb;k] +(.ml.fresh.feat.spktWelch[xj;k]) ~ .p.get[`spkt_welch_density;<][xj;k] +(.ml.fresh.feat.spktWelch[xf;k]) ~ .p.get[`spkt_welch_density;<][xf;k] +(.ml.fresh.feat.spktWelch[xi;k]) ~ .p.get[`spkt_welch_density;<][xi;k] +(.ml.fresh.feat.spktWelch[xb;k]) ~ .p.get[`spkt_welch_density;<][xb;k] (.ml.fresh.feat.spktWelch[xnull;k]) ~ 100#0n -fft_coefficient[xj;`abs;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_abs -fft_coefficient[xj;`abs;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_abs -fft_coefficient[xj;`real;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_real -fft_coefficient[xj;`real;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_real -fft_coefficient[xj;`angle;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_angle -fft_coefficient[xj;`angle;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_angle -fft_coefficient[xj;`imag;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_imag -fft_coefficient[xj;`imag;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_imag -fft_coefficient[xf;`abs;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_abs -fft_coefficient[xf;`abs;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_abs -fft_coefficient[xf;`real;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_real -fft_coefficient[xf;`real;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_real -fft_coefficient[xf;`angle;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_angle -fft_coefficient[xf;`angle;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_angle -fft_coefficient[xf;`imag;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_imag -fft_coefficient[xf;`imag;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_imag -fft_coefficient[xi;`abs;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_abs -fft_coefficient[xi;`abs;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_abs -fft_coefficient[xi;`real;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_real -fft_coefficient[xi;`real;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_real -fft_coefficient[xi;`angle;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_angle -fft_coefficient[xi;`angle;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_angle -fft_coefficient[xi;`imag;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_imag -fft_coefficient[xi;`imag;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_imag -fft_coefficient[xb;`abs;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_abs -fft_coefficient[xb;`abs;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_abs -fft_coefficient[xb;`real;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_real -fft_coefficient[xb;`real;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_real -fft_coefficient[xb;`angle;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_angle -fft_coefficient[xb;`angle;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_angle -fft_coefficient[xb;`imag;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_imag -fft_coefficient[xb;`imag;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_imag +.p.get[`fft_coefficient;<][xj;`abs;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_abs +.p.get[`fft_coefficient;<][xj;`abs;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_abs +.p.get[`fft_coefficient;<][xj;`real;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_real +.p.get[`fft_coefficient;<][xj;`real;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_real +.p.get[`fft_coefficient;<][xj;`angle;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_angle +.p.get[`fft_coefficient;<][xj;`angle;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_angle +.p.get[`fft_coefficient;<][xj;`imag;0]~.ml.fresh.feat.fftCoeff[xj;1]`coeff_0_imag +.p.get[`fft_coefficient;<][xj;`imag;49]~.ml.fresh.feat.fftCoeff[xj;50]`coeff_49_imag +.p.get[`fft_coefficient;<][xf;`abs;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_abs +.p.get[`fft_coefficient;<][xf;`abs;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_abs +.p.get[`fft_coefficient;<][xf;`real;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_real +.p.get[`fft_coefficient;<][xf;`real;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_real +.p.get[`fft_coefficient;<][xf;`angle;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_angle +.p.get[`fft_coefficient;<][xf;`angle;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_angle +.p.get[`fft_coefficient;<][xf;`imag;0]~.ml.fresh.feat.fftCoeff[xf;1]`coeff_0_imag +.p.get[`fft_coefficient;<][xf;`imag;49]~.ml.fresh.feat.fftCoeff[xf;50]`coeff_49_imag +.p.get[`fft_coefficient;<][xi;`abs;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_abs +.p.get[`fft_coefficient;<][xi;`abs;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_abs +.p.get[`fft_coefficient;<][xi;`real;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_real +.p.get[`fft_coefficient;<][xi;`real;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_real +.p.get[`fft_coefficient;<][xi;`angle;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_angle +.p.get[`fft_coefficient;<][xi;`angle;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_angle +.p.get[`fft_coefficient;<][xi;`imag;0]~.ml.fresh.feat.fftCoeff[xi;1]`coeff_0_imag +.p.get[`fft_coefficient;<][xi;`imag;49]~.ml.fresh.feat.fftCoeff[xi;50]`coeff_49_imag +.p.get[`fft_coefficient;<][xb;`abs;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_abs +.p.get[`fft_coefficient;<][xb;`abs;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_abs +.p.get[`fft_coefficient;<][xb;`real;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_real +.p.get[`fft_coefficient;<][xb;`real;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_real +.p.get[`fft_coefficient;<][xb;`angle;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_angle +.p.get[`fft_coefficient;<][xb;`angle;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_angle +.p.get[`fft_coefficient;<][xb;`imag;0]~.ml.fresh.feat.fftCoeff[xb;1]`coeff_0_imag +.p.get[`fft_coefficient;<][xb;`imag;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_imag (.ml.fresh.feat.fftCoeff[xnull;50]`coeff_49_abs) ~ 0n (.ml.fresh.feat.fftCoeff[xnull;50]`coeff_49_real) ~ 0n @@ -551,8 +550,8 @@ fft_coefficient[xb;`imag;49]~.ml.fresh.feat.fftCoeff[xb;50]`coeff_49_imag (.ml.fresh.feat.fftCoeff[xnull;50]`coeff_49_imag) ~ 0n / -(value[.ml.fresh.feat.fftAggreg[xb]]0 1 2) ~ fft_aggregated[xb] 0 1 2 -fftAggreg[xj][3] ~ fft_aggregated[xj][3] -fftAggreg[xf][2] ~ fft_aggregated[xf][2] -fftAggreg[xf][3] ~ fft_aggregated[xf][3] +(value[.ml.fresh.feat.fftAggreg[xb]]0 1 2) ~ .p.get[`fft_aggregated;<][xb] 0 1 2 +fftAggreg[xj][3] ~ .p.get[`fft_aggregated;<][xj][3] +fftAggreg[xf][2] ~ .p.get[`fft_aggregated;<][xf][2] +fftAggreg[xf][3] ~ .p.get[`fft_aggregated;<][xf][3] \ diff --git a/fresh/tests/test.p b/fresh/tests/test.p index cf78362..505a96a 100644 --- a/fresh/tests/test.p +++ b/fresh/tests/test.p @@ -5,49 +5,49 @@ p)from scipy.stats import linregress p)from statsmodels.tsa.stattools import acf, adfuller, pacf p)from numpy.linalg import LinAlgError -p)def< _get_length_sequences_where(x): +p)def _get_length_sequences_where(x): if len(x) == 0: return [0] else: res = [len(list(group)) for value, group in itertools.groupby(x) if value == 1] return res if len(res) > 0 else [0] -p)def< aggregate_on_chunks(x, f_agg, chunk_len):return [getattr(x[i * chunk_len: (i + 1) * chunk_len], f_agg)() for i in range(int(np.ceil(len(x) / chunk_len)))] - -p)def< hasduplicate(x):return len(x) != len(set(x)) -p)def< hasduplicatemin(x):return sum(np.asarray(x) == min(x)) >= 2 -p)def< hasduplicatemax(x):return sum(np.asarray(x) == max(x)) >= 2 -p)def< abs_energy(x):x = np.asarray(x); return sum(x * x) -p)def< mean_change(x):return np.mean(np.diff(x)) -p)def< mean_abs_change(x):return np.mean(np.abs(np.diff(x))) -p)def< count_above_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x > m)[0].shape[0] -p)def< count_below_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x < m)[0].shape[0] -p)def< first_location_of_maximum(x): x = np.asarray(x); return np.argmax(x) / len(x) if len(x) > 0 else np.NaN -p)def< first_location_of_minimum(x): x = np.asarray(x); return np.argmin(x) / len(x) if len(x) > 0 else np.NaN -p)def< last_location_of_minimum(x): x = np.asarray(x); return 1.0 - (1+np.argmin(x[::-1]))/ len(x) if len(x) > 0 else np.NaN -p)def< last_location_of_maximum(x): x = np.asarray(x); return 1.0 - (1+np.argmax(x[::-1]))/ len(x) if len(x) > 0 else np.NaN -p)def< ratio_val_num_to_t_series(x):return len(set(x))/len(x) -p)def< ratio_beyond_r_sigma(x,r):return sum(abs(x - np.mean(x)) > r * np.std(x))/len(x) -p)def< large_standard_deviation(x,r):x = np.asarray(x);return np.std(x) > (r * (max(x) - min(x))) -p)def< absolute_sum_of_changes(x):return np.sum(abs(np.diff(x))) -p)def< longest_strike_below_mean(x):return max(_get_length_sequences_where(x <= np.mean(x))) if len(x) > 0 else 0 -p)def< longest_strike_above_mean(x):return max(_get_length_sequences_where(x >= np.mean(x))) if len(x) > 0 else 0 -p)def< skewness_py(x):x = pd.Series(x);return pd.Series.skew(x) -p)def< kurtosis_py(x):x = pd.Series(x);return pd.Series.kurtosis(x) -p)def< range_count(x,min,max):return np.sum((x >= min) & (x < max)) -p)def< variance_larger_than_standard_deviation(x):return np.var(x) > np.std(x) -p)def< number_cwt_peaks(x,n):return len(find_peaks_cwt(vector=x, widths=np.array(list(range(1, n + 1))), wavelet=ricker)) -p)def< quantile_py(x, q):x = pd.Series(x);return pd.Series.quantile(x, q) -p)def< value_count(x, value): +p)def aggregate_on_chunks(x, f_agg, chunk_len):return [getattr(x[i * chunk_len: (i + 1) * chunk_len], f_agg)() for i in range(int(np.ceil(len(x) / chunk_len)))] + +p)def hasduplicate(x):return len(x) != len(set(x)) +p)def hasduplicatemin(x):return sum(np.asarray(x) == min(x)) >= 2 +p)def hasduplicatemax(x):return sum(np.asarray(x) == max(x)) >= 2 +p)def abs_energy(x):x = np.asarray(x); return sum(x * x) +p)def mean_change(x):return np.mean(np.diff(x)) +p)def mean_abs_change(x):return np.mean(np.abs(np.diff(x))) +p)def count_above_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x > m)[0].shape[0] +p)def count_below_mean(x): x = np.asarray(x); m = np.mean(x); return np.where(x < m)[0].shape[0] +p)def first_location_of_maximum(x): x = np.asarray(x); return np.argmax(x) / len(x) if len(x) > 0 else np.NaN +p)def first_location_of_minimum(x): x = np.asarray(x); return np.argmin(x) / len(x) if len(x) > 0 else np.NaN +p)def last_location_of_minimum(x): x = np.asarray(x); return 1.0 - (1+np.argmin(x[::-1]))/ len(x) if len(x) > 0 else np.NaN +p)def last_location_of_maximum(x): x = np.asarray(x); return 1.0 - (1+np.argmax(x[::-1]))/ len(x) if len(x) > 0 else np.NaN +p)def ratio_val_num_to_t_series(x):return len(set(x))/len(x) +p)def ratio_beyond_r_sigma(x,r):return sum(abs(x - np.mean(x)) > r * np.std(x))/len(x) +p)def large_standard_deviation(x,r):x = np.asarray(x);return np.std(x) > (r * (max(x) - min(x))) +p)def absolute_sum_of_changes(x):return np.sum(abs(np.diff(x))) +p)def longest_strike_below_mean(x):return max(_get_length_sequences_where(x <= np.mean(x))) if len(x) > 0 else 0 +p)def longest_strike_above_mean(x):return max(_get_length_sequences_where(x >= np.mean(x))) if len(x) > 0 else 0 +p)def skewness_py(x):x = pd.Series(x);return pd.Series.skew(x) +p)def kurtosis_py(x):x = pd.Series(x);return pd.Series.kurtosis(x) +p)def range_count(x,min,max):return np.sum((x >= min) & (x < max)) +p)def variance_larger_than_standard_deviation(x):return np.var(x) > np.std(x) +p)def number_cwt_peaks(x,n):return len(find_peaks_cwt(vector=x, widths=np.array(list(range(1, n + 1))), wavelet=ricker)) +p)def quantile_py(x, q):x = pd.Series(x);return pd.Series.quantile(x, q) +p)def value_count(x, value): if np.isnan(value): return np.isnan(x) else: return x[x == value].shape[0] -p)def< percentage_recurring_all_data(x): +p)def percentage_recurring_all_data(x): unique, counts = np.unique(x, return_counts=True) return np.sum(counts > 1) / float(counts.shape[0]) -p)def< percentage_recurring_all_val(x): +p)def percentage_recurring_all_val(x): x = pd.Series(x) if len(x) == 0: return np.nan @@ -55,7 +55,7 @@ p)def< percentage_recurring_all_val(x): value_counts = x.value_counts() return value_counts[value_counts > 1].sum() / len(x) -p)def< number_peaks(x, n): +p)def number_peaks(x, n): x = np.asarray(x) x_reduced = x[n:-n] res = None @@ -68,7 +68,7 @@ p)def< number_peaks(x, n): res &= (x_reduced > np.roll(x, -i)[n:-n]) return sum(res) -p)def< cid_ce(x, normalize): +p)def cid_ce(x, normalize): x = np.asarray(x) if normalize: s = np.std(x) @@ -79,22 +79,22 @@ p)def< cid_ce(x, normalize): x = np.diff(x) return np.sqrt(np.sum((x * x))) -p)def< mean_second_derivative_central(x): +p)def mean_second_derivative_central(x): diff = (np.roll(x, 1) - 2 * np.array(x) + np.roll(x, -1)) / 2.0 return np.mean(diff[1:-1]) -p)def< sum_recurring_values(x): +p)def sum_recurring_values(x): unique, counts = np.unique(x, return_counts=True) counts[counts < 2] = 0 counts[counts > 1] = 1 return np.sum(counts * unique) -p)def< sum_recurring_data_points(x): +p)def sum_recurring_data_points(x): unique, counts = np.unique(x, return_counts=True) counts[counts < 2] = 0 return np.sum(counts * unique) -p)def< c3_py(x, lag): +p)def c3_py(x, lag): n = len(x) x = np.asarray(x) if 2 * lag >= n: @@ -102,20 +102,20 @@ p)def< c3_py(x, lag): else: return np.mean((np.roll(x, 2 * -lag) * np.roll(x, -lag) * x)[0:(n - 2 * lag)]) -p)def< number_crossing_m(x, m): +p)def number_crossing_m(x, m): if not isinstance(x, (np.ndarray, pd.Series)): x = np.asarray(x) positive = x > m return np.where(np.bitwise_xor(positive[1:], positive[:-1]))[0].size -p)def< binned_entropy(x, max_bins): +p)def binned_entropy(x, max_bins): if not isinstance(x, (np.ndarray, pd.Series)): x = np.asarray(x) hist, bin_edges = np.histogram(x, bins=max_bins) probs = hist / x.size return - np.sum(p * np.math.log(p) for p in probs if p != 0) -p)def< autocorrelation(x, lag): +p)def autocorrelation(x, lag): if type(x) is pd.Series: x = x.values if len(x) < lag: @@ -126,7 +126,7 @@ p)def< autocorrelation(x, lag): sum_product = np.sum((y1-x_mean)*(y2-x_mean)) return sum_product / ((len(x) - lag) * np.var(x)) -p)def< energy_ratio_by_chunks(x,y,z): +p)def energy_ratio_by_chunks(x,y,z): full_series_energy = np.sum(x ** 2) num_segments = y segment_focus = z @@ -137,7 +137,7 @@ p)def< energy_ratio_by_chunks(x,y,z): res_data=(np.sum(x[start:end]**2.0)/full_series_energy) return res_data -p)def< change_quantiles(x, ql, qh, isabs, f_agg): +p)def change_quantiles(x, ql, qh, isabs, f_agg): if ql >= qh: ValueError("ql={} should be lower than qh={}".format(ql, qh)) div = np.diff(x) @@ -156,7 +156,7 @@ p)def< change_quantiles(x, ql, qh, isabs, f_agg): aggregator = getattr(np, f_agg) return aggregator(div[ind_inside_corridor]) -p)def< time_reversal_asymmetry_statistic(x, lag): +p)def time_reversal_asymmetry_statistic(x, lag): n = len(x) x = np.asarray(x) if 2 * lag >= n: @@ -165,7 +165,7 @@ p)def< time_reversal_asymmetry_statistic(x, lag): return np.mean((np.roll(x, 2 * -lag) * np.roll(x, 2 * -lag) * np.roll(x, -lag) - np.roll(x, -lag) * x * x)[0:(n - 2 * lag)]) -p)def< index_mass_quantile(x, q): +p)def index_mass_quantile(x, q): x = np.asarray(x) abs_x = np.abs(x) @@ -177,13 +177,13 @@ p)def< index_mass_quantile(x, q): mass_centralized = np.cumsum(abs_x) / s return (np.argmax(mass_centralized >= q)+1)/len(x) -p)def< linear_trend(x): +p)def linear_trend(x): linReg = linregress(range(len(x)), x) return linReg -p)def< get_moment(y, moment):return y.dot(np.arange(len(y))**moment) / y.sum() -p)def< get_centroid(y):return get_moment(y, 1) -p)def< get_variance(y):return get_moment(y, 2) - get_centroid(y) ** 2 +p)def get_moment(y, moment):return y.dot(np.arange(len(y))**moment) / y.sum() +p)def get_centroid(y):return get_moment(y, 1) +p)def get_variance(y):return get_moment(y, 2) - get_centroid(y) ** 2 p)def get_skew(y): variance = get_variance(y) @@ -193,7 +193,7 @@ p)def get_skew(y): return ( get_moment(y, 3) - 3*get_centroid(y)*variance - get_centroid(y)**3 ) / get_variance(y)**(1.5) -p)def< get_kurtosis(y): +p)def get_kurtosis(y): variance = get_variance(y) if variance < 0.5: return np.nan @@ -203,11 +203,11 @@ p)def< get_kurtosis(y): + 6*get_moment(y, 2)*get_centroid(y)**2 - 3*get_centroid(y) ) / get_variance(y)**2 -p)def< fft_aggregated(x): +p)def fft_aggregated(x): fft_abs = abs(np.fft.rfft(x)) return get_centroid(fft_abs),get_variance(fft_abs),get_skew(fft_abs),get_kurtosis(fft_abs) -p)def< index_mass_quantile(x, q): +p)def index_mass_quantile(x, q): x = np.asarray(x) abs_x = np.abs(x) @@ -219,7 +219,7 @@ p)def< index_mass_quantile(x, q): mass_centralized = np.cumsum(abs_x) / s return (np.argmax(mass_centralized >= q)+1)/len(x) -p)def< agg_autocorrelation(x,y): +p)def agg_autocorrelation(x,y): var = np.var(x) n = len(x) if np.abs(var) < 10**-10 or n == 1: @@ -228,7 +228,7 @@ p)def< agg_autocorrelation(x,y): a = acf(x, adjusted=True, fft=n > 1250)[1:] return getattr(np, y)(a) -p)def< augmented_dickey_fuller(x): +p)def augmented_dickey_fuller(x): res = None try: res = adfuller(x) @@ -241,11 +241,11 @@ p)def< augmented_dickey_fuller(x): return res -p)def< spkt_welch_density(x, y): +p)def spkt_welch_density(x, y): freq, pxx = welch(x) return pxx[y] -p)def< fft_coefficient(x,y,z): +p)def fft_coefficient(x,y,z): fft = np.fft.rfft(x) @@ -263,7 +263,7 @@ p)def< fft_coefficient(x,y,z): return res -p)def< partial_autocorrelation(x, param): +p)def partial_autocorrelation(x, param): max_demanded_lag = max(param) n = len(x) if n <= 1: diff --git a/fresh/utils.q b/fresh/utils.q index bf09ac8..87052f2 100644 --- a/fresh/utils.q +++ b/fresh/utils.q @@ -6,12 +6,12 @@ \d .ml // Python imports -sci_ver :1.5<="F"$3#.p.import[`scipy][`:__version__]` +sci_ver :1.5<="F"$3#$[-11h=type x;string;]x:.p.import[`scipy][`:__version__]` numpy :.p.import`numpy pyStats :.p.import`scipy.stats signal :.p.import`scipy.signal stattools:.p.import`statsmodels.tsa.stattools -stats_ver:"F"$"." vs (.p.import`statsmodels)[`:__version__]` +stats_ver:"F"$"." vs $[-11h=type x;string;]x:.p.import[`statsmodels][`:__version__]` stats_break:$[((stats_ver[0]=0)&stats_ver[1]>=12)|stats_ver[0]>0;1b;0b] // @private diff --git a/ml.q b/ml.q index a527258..1d3b25d 100644 --- a/ml.q +++ b/ml.q @@ -4,7 +4,10 @@ // Define version, path, and loadfile -\l p.q /embedPy +@[{system"l ",x;.pykx.loaded:1b};"pykx.q";{@[{system"l ",x;.pykx.loaded:0b};"p.q";{'"Failed to load PyKX or embedPy with error: ",x}]}] +if[.pykx.loaded;.p:.pykx]; +if[not `toraw in key `.p;.p.toraw:(::)] + \d .ml version:@[{TOOLKITVERSION};`;`development] path:{string`ml^`$@[{"/"sv -1_"/"vs ssr[;"\\";"/"](-3#get .z.s)0};`;""]}` From 31d2788bfc08e8ff83721159d03dd00311237d4d Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Fri, 29 Sep 2023 14:23:22 +0100 Subject: [PATCH 2/7] Updates to utilities --- fresh/tests/significancetests.p | 8 ++++---- fresh/tests/sigtests.t | 9 ++++----- fresh/utils.q | 2 +- requirements.txt | 7 ++++--- util/tests/metric.t | 12 ++++++------ util/tests/mlpy.p | 4 ++-- util/utilities.q | 1 + 7 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fresh/tests/significancetests.p b/fresh/tests/significancetests.p index 6aaa005..7b39180 100644 --- a/fresh/tests/significancetests.p +++ b/fresh/tests/significancetests.p @@ -1,7 +1,7 @@ p)import numpy as np p)from scipy import stats -p)def< binary_feature_binary_test(x, y): +p)def binary_feature_binary_test(x, y): x0, x1 = np.unique(x) y0, y1 = np.unique(y) @@ -17,7 +17,7 @@ p)def< binary_feature_binary_test(x, y): return p_value -p)def< target_binary_feature_real_test(y, x): +p)def target_binary_feature_real_test(y, x): y0, y1 = np.unique(y) x_y1 = x[y == y1] @@ -26,11 +26,11 @@ p)def< target_binary_feature_real_test(y, x): KS, p_ks = stats.ks_2samp(x_y1, x_y0,mode='asymp') return p_ks -p)def< target_real_feature_real_test(x, y): +p)def target_real_feature_real_test(x, y): tau, p_value = stats.kendalltau(x, y) return p_value -p)def< benjamini_hochberg_test(df_pvalues, hypotheses_independent, fdr_level): +p)def benjamini_hochberg_test(df_pvalues, hypotheses_independent, fdr_level): df_pvalues = df_pvalues.sort_values(by="p_value") m = len(df_pvalues) K = np.arange(1, m + 1) diff --git a/fresh/tests/sigtests.t b/fresh/tests/sigtests.t index 6619325..7fadfe3 100644 --- a/fresh/tests/sigtests.t +++ b/fresh/tests/sigtests.t @@ -11,7 +11,6 @@ In each case significance tests implemented within freshq are compared to equivalent significance tests implemented previously in python. \ -\l p.q \l ml.q \l fresh/init.q \l fresh/tests/significancetests.p @@ -22,13 +21,13 @@ xb:5000#0101101011b yb:5000#0101101011b / 1a. -.ml.fresh.i.fisher[xb;yb] ~ binary_feature_binary_test[xb;yb] +.ml.fresh.i.fisher[xb;yb] ~ .p.get[`binary_feature_binary_test;<][xb;yb] / 1b. -.ml.fresh.i.ks[yb;xf] ~ target_binary_feature_real_test[yb;xf] +.ml.fresh.i.ks[yb;xf] ~ .p.get[`target_binary_feature_real_test;<][yb;xf] / 1c. -.ml.fresh.i.kTau[xf;yf] ~ target_real_feature_real_test[xf;yf] +.ml.fresh.i.kTau[xf;yf] ~ .p.get[`target_real_feature_real_test;<][xf;yf] / 2. @@ -45,7 +44,7 @@ table3:([]desc 1000000?1f;1000000?10f;asc 1000000?1f) table4:([]1000000?0b;1000000?1f;1000000?1f) target1:asc 1000000?100f;target2:desc 1000000?1f;target3:target4:1000000?0b bintest:{2=count distinct x} -pdmatrix:{pddf[benjamini_hochberg_test[y;"FALSE";x]][`:values]} +pdmatrix:{pddf[.p.get[`benjamini_hochberg_test;<][.p.topd y;"FALSE";x]][`:values]} k:{pdmatrix[x;y]`} vec:{k[x;y][;2]} bhfn:{[table;target] diff --git a/fresh/utils.q b/fresh/utils.q index 87052f2..3faea6d 100644 --- a/fresh/utils.q +++ b/fresh/utils.q @@ -175,7 +175,7 @@ fresh.i.expandResults:{[results;column] // @return {float} Kendall’s tau - Close to 1 shows strong agreement, close to // -1 shows strong disagreement fresh.i.kTau:{[target;feature] - fresh.i.kendallTau[<;target;feature]1 + fresh.i.kendallTau[target;feature][`:pvalue]` } // @private diff --git a/requirements.txt b/requirements.txt index 336c2a5..d94636a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ +pykx~=2.0 numpy -scipy +scipy<=1.9 scikit-learn<=0.23.0 -statsmodels +statsmodels<=0.12 matplotlib sobol-seq -pandas>=1.0 +pandas~=1.0 diff --git a/util/tests/metric.t b/util/tests/metric.t index 80ccaca..763ef57 100644 --- a/util/tests/metric.t +++ b/util/tests/metric.t @@ -116,15 +116,15 @@ plaintabn:plaintab,'([]x4:1 3 0n) .ml.mae[xf;yf]~mae[xf;yf]` .ml.mae[xb;yb]~mae["i"$xb;"i"$yb]` .ml.mae[xb;xb]~0f -(.ml.mape[x;y])~mean_absolute_percentage_error[y;x] -.ml.mape[xf;yf]~mean_absolute_percentage_error[yf;xf] -.ml.mape[xm;ym]~{mean_absolute_percentage_error[x;y]}'[flip ym;flip xm] +(.ml.mape[x;y])~.p.get[`mean_absolute_percentage_error;<][y;x] +.ml.mape[xf;yf]~.p.get[`mean_absolute_percentage_error;<][yf;xf] +.ml.mape[xm;ym]~{.p.get[`mean_absolute_percentage_error;<][x;y]}'[flip ym;flip xm] .ml.mape[x;x]~0f .ml.mape[1 0n 4 2 0n;1 2 4 3 1]~11.11111111111 -.ml.smape[x;y]~smape[x;y] -.ml.smape[xf;yf]~smape[xf;yf] -.ml.smape[xm;ym]~{smape[x;y]}'[flip xm;flip ym] +.ml.smape[x;y]~.p.get[`smape][x;y]` +.ml.smape[xf;yf]~.p.get[`smape][xf;yf]` +.ml.smape[xm;ym]~{.p.get[`smape][x;y]`}'[flip xm;flip ym] .ml.smape[x;x]~0f .ml.smape[1 0n 4 2 0n;1 2 4 3 1]~6.666666666666666667 .ml.r2Score[xf;yf] ~ r2[yf;xf]` diff --git a/util/tests/mlpy.p b/util/tests/mlpy.p index 5046559..99fe1e1 100644 --- a/util/tests/mlpy.p +++ b/util/tests/mlpy.p @@ -1,9 +1,9 @@ p)import numpy as np p)from sklearn.metrics import fbeta_score -p)def< mean_absolute_percentage_error(y_true, y_pred): +p)def mean_absolute_percentage_error(y_true, y_pred): y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 -p)def< smape(A, F): +p)def smape(A, F): return 100 * np.mean(np.abs(F - A) / (np.abs(A) + np.abs(F))) diff --git a/util/utilities.q b/util/utilities.q index 281ba7c..a508a9e 100644 --- a/util/utilities.q +++ b/util/utilities.q @@ -108,6 +108,7 @@ trainTestSplit:{[data;target;size] // @param tab {table} A q table // @return {<} a Pandas dataframe tab2df:{[tab] + if[.pykx.loaded;:.p.eval["lambda x:x"].p.topd tab]; updTab:@[flip 0!tab;i.findCols[tab;"c"];enlist each]; transformTab:@[updTab;i.findCols[tab]"pmdznuvt";i.q2npDate]; pandasDF:i.pandasDF[transformTab][@;cols tab]; From 995c117ea6d9aa31ca9b0334fce4d5872af3b388 Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Mon, 2 Oct 2023 13:46:23 +0100 Subject: [PATCH 3/7] Updates --- graph/tests/graph.t | 1 - ml.q | 2 +- optimize/tests/test.t | 1 - util/tests/metric.t | 26 +++++++++++++------------- util/tests/preproctst.t | 8 ++++---- util/tests/utiltst.t | 24 ++++++++++++++---------- util/utilities.q | 3 ++- 7 files changed, 34 insertions(+), 31 deletions(-) diff --git a/graph/tests/graph.t b/graph/tests/graph.t index 852f439..ed3fedd 100644 --- a/graph/tests/graph.t +++ b/graph/tests/graph.t @@ -2,7 +2,6 @@ // which will fail to produce a valid/operational graph/pipeline in order to ensure that the // catching mechanism for the creation of such workflows is reliable and fully understood -\l p.q \l ml.q \l graph/utils.q \l graph/graph.q diff --git a/ml.q b/ml.q index 1d3b25d..9bf9b06 100644 --- a/ml.q +++ b/ml.q @@ -6,7 +6,7 @@ @[{system"l ",x;.pykx.loaded:1b};"pykx.q";{@[{system"l ",x;.pykx.loaded:0b};"p.q";{'"Failed to load PyKX or embedPy with error: ",x}]}] if[.pykx.loaded;.p:.pykx]; -if[not `toraw in key `.p;.p.toraw:(::)] +if[not all `topy`toraw in key `.p;.p.toraw:.p.topy:(::)] \d .ml version:@[{TOOLKITVERSION};`;`development] diff --git a/optimize/tests/test.t b/optimize/tests/test.t index 41d73b0..ecffd34 100644 --- a/optimize/tests/test.t +++ b/optimize/tests/test.t @@ -1,4 +1,3 @@ -\l p.q \l ml.q \l util/utils.q \l util/utilities.q diff --git a/util/tests/metric.t b/util/tests/metric.t index 763ef57..2982cdf 100644 --- a/util/tests/metric.t +++ b/util/tests/metric.t @@ -12,7 +12,7 @@ r2:.p.import[`sklearn.metrics]`:r2_score msle:.p.import[`sklearn.metrics]`:mean_squared_log_error mse:.p.import[`sklearn.metrics]`:mean_squared_error rocau:.p.import[`sklearn.metrics]`:roc_auc_score -logloss:.p.import[`sklearn.metrics]`:log_loss +logloss:{.p.import[`sklearn.metrics][`:log_loss][.p.toraw x;.p.toraw y]`} mae:.p.import[`sklearn.metrics]`:mean_absolute_error x:1000?1000 @@ -87,12 +87,12 @@ plaintabn:plaintab,'([]x4:1 3 0n) .ml.classReport[3 3 5 2 5 1f;3 5 2 3 5 1f]~1!flip`class`precision`recall`f1_score`support!((`$string each 1 2 3 5),`$"avg/total";1 0 0.5 0.5 0.5;1 0 0.5 0.5 0.5;1 0 0.5 0.5 0.5;1 1 2 2 6i) .ml.classReport[3 3 5 0n 5 1;3 5 2 3 5 0n]~1!flip`class`precision`recall`f1_score`support!((`$string each 0n 2 3 5),`$"avg/total";0 0n 0.5 0.5 0.33333333333333;0 0 0.5 0.5 0.25;0 0 0.5 0.5 0.25;1 1 2 2 6i) -{.ml.logLoss[x;y]~logloss[x;y]`}[1000?0b;(1-p),'p:1000?1f] -{.ml.logLoss[x;y]~logloss[x;y]`}[1000?0b;(1-p),'p:1000?1i] +{.ml.logLoss[x;y]~logloss[x;y]}[1000?0b;(1-p),'p:1000?1f] +{.ml.logLoss[x;y]~logloss[x;y]}[1000?0b;(1-p),'p:1000?1i] .ml.logLoss[10#0b;(1-p),'p:10?1i]~-0f (floor .ml.logLoss[10110b;(2 0n;1 1; 3 1;0n 2; 3 3)])~floor 6 (floor .ml.logLoss[1000?0b;(1-p),'p:1000#0n])~34 -{.ml.crossEntropy[x;y]~logloss[x;y]`}[(first idesc@)each p;p%:sum each p:1000 5#5000?1f] +{.ml.crossEntropy[x;y]~logloss[x;y]}[(first idesc@)each p;p%:sum each p:1000 5#5000?1f] .ml.mse[x;y] ~ skmetric[`:mean_squared_error][x;y]` .ml.mse[xf;yf] ~ skmetric[`:mean_squared_error][xf;yf]` .ml.mse[x;x]~0f @@ -132,15 +132,15 @@ plaintabn:plaintab,'([]x4:1 3 0n) .ml.r2Score[2 2 2;1 2 3] ~ r2[1 2 3;2 2 2]` .ml.r2Score[x;x]~1f .ml.r2Score[1 0n 4 2 0n;1 2 4 2 1]~1f -.ml.tScore[x;y] ~first stats[`:ttest_1samp][x;y]` -.ml.tScore[xf;yf]~first stats[`:ttest_1samp][xf;yf]` -.ml.tScore[xb;yb]~first stats[`:ttest_1samp][xb;yb]` -.ml.tScore[x;x]~first stats[`:ttest_1samp][x;x]` -.ml.tScoreEqual[x;y]~abs first stats[`:ttest_ind][x;y]` -.ml.tScoreEqual[xf;yf]~abs first stats[`:ttest_ind][xf;yf]` -.ml.tScoreEqual[xb;yb]~abs first stats[`:ttest_ind][xb;yb]` -.ml.tScoreEqual[x;x]~abs first stats[`:ttest_ind][x;x]` -.ml.covMatrix[flip value flip plaintab]~np[`:cov][flip value flip plaintab;`bias pykw 1b]` +.ml.tScore[x;y] ~ stats[`:ttest_1samp][x;y][`:statistic]` +.ml.tScore[xf;yf]~ stats[`:ttest_1samp][xf;yf][`:statistic]` +.ml.tScore[xb;yb]~ stats[`:ttest_1samp][xb;yb][`:statistic]` +.ml.tScore[x;x]~ stats[`:ttest_1samp][x;x][`:statistic]` +.ml.tScoreEqual[x;y]~abs stats[`:ttest_ind][x;y][`:statistic]` +.ml.tScoreEqual[xf;yf]~abs stats[`:ttest_ind][xf;yf][`:statistic]` +.ml.tScoreEqual[xb;yb]~abs stats[`:ttest_ind][xb;yb][`:statistic]` +.ml.tScoreEqual[x;x]~abs stats[`:ttest_ind][x;x][`:statistic]` +.ml.covMatrix[flip value flip plaintab]~np[`:cov][.pykx.topy flip value flip plaintab;`bias pykw 1b]` .ml.covMatrix[(10110b;01110b)]~(0.24 0.04;0.04 0.24) .ml.covMatrix[(10110b;11111b)]~(0.24 0f;0 0f) .ml.covMatrix[(11111b;11111b)]~(0 0f;0 0f) diff --git a/util/tests/preproctst.t b/util/tests/preproctst.t index 09c9782..2aa35ae 100644 --- a/util/tests/preproctst.t +++ b/util/tests/preproctst.t @@ -53,7 +53,7 @@ keyedinfs:([k:1 2]x:0 0W) .ml.dropConstant[nt]~([]101b;x2:1 2 0n) .ml.dropConstant[nulltab]~select x,x1,x2,x3 from nulltab -MinMaxScaler[`:fit][flip plainmat]; +MinMaxScaler[`:fit][.p.toraw flip plainmat]; minMaxKeys:`minData`maxData minMax1:.ml.minMaxScaler.fit[plainmat] minMax2:.ml.minMaxScaler.fit[scale1] @@ -67,7 +67,7 @@ minMax3[`modelInfo]~minMaxKeys!1 5f minMax4[`modelInfo]~minMaxKeys!01b minMax5[`modelInfo]~minMaxKeys!(3 1 4f;5 1 4f) -.ml.minMaxScaler.fitTransform[plainmat]~flip"f"$MinMaxScaler[`:transform][flip plainmat]` +.ml.minMaxScaler.fitTransform[plainmat]~flip"f"$MinMaxScaler[`:transform][.p.toraw flip plainmat]` .ml.minMaxScaler.fitTransform[scale1]~(0 1f;1 0f;1 0f) .ml.minMaxScaler.fitTransform[scale2]~0.5 0.25 1 0.75 0f .ml.minMaxScaler.fitTransform[scale3]~0 0 1 1f @@ -75,7 +75,7 @@ minMax5[`modelInfo]~minMaxKeys!(3 1 4f;5 1 4f) minMax2.transform[scale4]~(1 3f;-0.5 0n;0.5 0n) minMax3.transform[5#y]~5.75 1.75 9.5 5.5 4.25 -StdScaler[`:fit][flip plainmat]; +StdScaler[`:fit][.p.toraw flip plainmat]; stdScaleKeys:`avgData`devData stdScale1:.ml.stdScaler.fit[plainmat] stdScale2:.ml.stdScaler.fit[scale1] @@ -91,7 +91,7 @@ key[stdScale4[`modelInfo]]~stdScaleKeys key[stdScale5[`modelInfo]]~stdScaleKeys key[stdScale6[`modelInfo]]~stdScaleKeys -stdScale1.transform[plainmat]~flip"f"$StdScaler[`:transform][flip plainmat]` +stdScale1.transform[plainmat]~flip"f"$StdScaler[`:transform][.p.toraw flip plainmat]` stdScale2.transform[scale1]~(-1 1f;1 -1f;1 -1f) stdScale3.transform[xf]~scale[xf]` stdScale4.transform[y]~scale[y]` diff --git a/util/tests/utiltst.t b/util/tests/utiltst.t index 66ea3b0..87948da 100644 --- a/util/tests/utiltst.t +++ b/util/tests/utiltst.t @@ -35,10 +35,12 @@ dfc:.ml.tab2df ([]s:`a`b`c;j:1 2 3;c:"ABC") (dfxj:.ml.tab2df tx)[`:index][:;`:names;(::;`jcol)] (dfxx:.ml.tab2df tx)[`:index][:;`:names;(::;::)] tt2:([]date:2005.07.14 2005.07.15;timesp:("N"$"12:10:30.000500000";"N"$"12:13:30.000200007");time:20:30:00.001 19:23:20.201;str:enlist each ("h";"i");ind:1.3 2.5;bool:10b) -112 112 112 10 -9 -1h~type each first (.ml.tab2df tt2)[`:values]` -(dfc[`:c.values]`)~enlist each "ABC" +col_types:$[.pykx.loaded;-12 112 112 -10 -9 -1h;112 112 112 10 -9 -1h]; +col_types~type each first (.ml.tab2df tt2)[`:values]` +ret_value:$[.pykx.loaded;"ABC";enlist each "ABC"] +ret_value~dfc[`:c.values]`; -.ml.shape[1 2 3*/:til 10] ~ np[`:shape][1 2 3*/:til 10]` +.ml.shape[1 2 3*/:til 10] ~ np[`:shape][.p.toraw 1 2 3*/:til 10]` .ml.shape[enlist 1] ~ np[`:shape][enlist 1]` .ml.shape[1 2] ~ np[`:shape][1 2]` .ml.shape[plaintab]~3 4 @@ -70,13 +72,15 @@ first[.ml.eye[1]] ~ enlist 1f @[{.ml.df2tab x;1b};.ml.tab2df ([]10?1f;"p"$0N,9?1000);0b] -tt~update`$scol from .ml.df2tab df -tj~update`$scol from .ml.df2tab dfj -ts~update`$scol from .ml.df2tab dfs -tx~update`$scol from .ml.df2tab dfsj -tx~update`$scol from`scol`jcol xcol .ml.df2tab dfsx -tx~update`$scol from`scol`jcol xcol .ml.df2tab dfxj -tx~update`$scol from`scol`jcol xcol .ml.df2tab dfxx +convertScol:{$[.pykx.loaded;x;update `$scol from x]} +convertSJcol:{$[.pykx.loaded;;{update`$scol from x}]`scol`jcol xcol x} +tt~convertScol .ml.df2tab df +tj~convertScol .ml.df2tab dfj +ts~convertScol .ml.df2tab dfs +tx~convertScol .ml.df2tab dfsj +tx~convertSJcol .ml.df2tab dfsx +tx~convertSJcol .ml.df2tab dfxj +tx~convertSJcol .ml.df2tab dfxx \S 43 .ml.trainTestSplit[til 10;1+til 10;0.2]~`xtrain`ytrain`xtest`ytest!(2 3 7 1 6 4 9 5;3 4 8 2 7 5 10 6;0 8;1 9) diff --git a/util/utilities.q b/util/utilities.q index a508a9e..7b3855b 100644 --- a/util/utilities.q +++ b/util/utilities.q @@ -108,7 +108,7 @@ trainTestSplit:{[data;target;size] // @param tab {table} A q table // @return {<} a Pandas dataframe tab2df:{[tab] - if[.pykx.loaded;:.p.eval["lambda x:x"].p.topd tab]; + if[.pykx.loaded;:.pykx.eval["lambda x:x"].p.topd tab]; updTab:@[flip 0!tab;i.findCols[tab;"c"];enlist each]; transformTab:@[updTab;i.findCols[tab]"pmdznuvt";i.q2npDate]; pandasDF:i.pandasDF[transformTab][@;cols tab]; @@ -129,6 +129,7 @@ tab2df:{[tab] // objects are returned as q (1b) or foreign objects (0b) // @return {<} a q table df2tabTimezone:{[tab;local;qObj] + if[.pykx.loaded;:.pykx.toq tab]; index:$[enlist[::]~tab[`:index.names]`;0;tab[`:index.nlevels]`]; tab:$[index;tab[`:reset_index][];tab]; numpyCols:`$tab[`:columns.to_numpy][]`; From 3023a73dad6611d91644d5bab50e37804b40bcc4 Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Mon, 2 Oct 2023 15:59:01 +0100 Subject: [PATCH 4/7] Updates to clustering and fresh tests --- clust/tests/clt.t | 8 +++---- clust/tests/score.t | 12 +++++----- fresh/tests/features.t | 52 +++++++++++++++++++++--------------------- fresh/tests/sigtests.t | 10 ++++---- fresh/tests/test.p | 1 + fresh/utils.q | 2 +- 6 files changed, 44 insertions(+), 41 deletions(-) diff --git a/clust/tests/clt.t b/clust/tests/clt.t index 3741d14..c45a8a3 100644 --- a/clust/tests/clt.t +++ b/clust/tests/clt.t @@ -22,8 +22,8 @@ qDendrogram:{asc each x(y . z)[`modelInfo;`dgram]} algoOutputs:{asc key x . y} algoOutputsFit:{asc key first x . y} countOutput:{count x y} -pythonRes :{[fclust;mat;t;clust;param]value group fclust[mat t[`modelInfo;`dgram];clust;param]`}[fclust;mat] -pythonDgram:{[lnk;d;lf;df]asc each lnk[flip d;lf;df]`}[lnk] +pythonRes :{[fclust;mat;t;clust;param]value group fclust[.p.toraw mat t[`modelInfo;`dgram];clust;param]`}[fclust;mat] +pythonDgram:{[lnk;d;lf;df]asc each lnk[.p.toraw flip d;lf;df]`}[lnk] qDgramDists:{(x . y)[`modelInfo;`dgram]`dist} // Datasets @@ -160,11 +160,11 @@ tab1:.ml.clust.hc.fit[d1;`mdist ;`single] tab2:.ml.clust.hc.fit[d1;`e2dist;`average] tab3:.ml.clust.hc.fit[d2;`e2dist;`centroid] tab4:.ml.clust.hc.fit[d2;`edist ;`complete] -hct1fit:"j"$fclust[mat tab1[`modelInfo;`dgram];4;`maxclust]` +hct1fit:"j"$fclust[.p.toraw mat tab1[`modelInfo;`dgram];4;`maxclust]` hcd1pred1:1 2 1 1 2 2 1 1 1 1 1 2 1 2 2 hcd1pred2:1 3 1 1 3 3 1 1 1 1 1 3 1 3 3 hcd1pred3:1 3 1 1 3 3 1 1 1 1 1 3 1 3 3 -pyDgramDists:(lnk[flip d2;`single;`sqeuclidean]`)[;2] +pyDgramDists:(lnk[.p.toraw flip d2;`single;`sqeuclidean]`)[;2] // Fit passingTest[clusterAdd1[.ml.clust.hc.cutK ];(tab1;4);1b;hct1fit] diff --git a/clust/tests/score.t b/clust/tests/score.t index 7de0bf1..470465e 100644 --- a/clust/tests/score.t +++ b/clust/tests/score.t @@ -27,14 +27,14 @@ rnd1:count[flip d1]?4 rnd2:count[flip d2]?4 // Dave Bouldin Score -passingTest[.ml.clust.daviesBouldin;(d1;clt1`clust);0b;pydb[flip d1;clt1`clust]`] -passingTest[.ml.clust.daviesBouldin;(d2;clt2`clust);0b;pydb[flip d2;clt2`clust]`] -passingTest[.ml.clust.daviesBouldin;(d2;clt3`clust);0b;pydb[flip d2;clt3`clust]`] +passingTest[.ml.clust.daviesBouldin;(d1;clt1`clust);0b;pydb[.p.toraw flip d1;clt1`clust]`] +passingTest[.ml.clust.daviesBouldin;(d2;clt2`clust);0b;pydb[.p.toraw flip d2;clt2`clust]`] +passingTest[.ml.clust.daviesBouldin;(d2;clt3`clust);0b;pydb[.p.toraw flip d2;clt3`clust]`] // Silhouette Score -passingTest[.ml.clust.silhouette;(d1;`edist;clt1`clust;1b);0b;pysil[flip d1;clt1`clust]`] -passingTest[.ml.clust.silhouette;(d2;`edist;clt2`clust;1b);0b;pysil[flip d2;clt2`clust]`] -passingTest[.ml.clust.silhouette;(d2;`edist;clt3`clust;1b);0b;pysil[flip d2;clt3`clust]`] +passingTest[.ml.clust.silhouette;(d1;`edist;clt1`clust;1b);0b;pysil[.p.toraw flip d1;clt1`clust]`] +passingTest[.ml.clust.silhouette;(d2;`edist;clt2`clust;1b);0b;pysil[.p.toraw flip d2;clt2`clust]`] +passingTest[.ml.clust.silhouette;(d2;`edist;clt3`clust;1b);0b;pysil[.p.toraw flip d2;clt3`clust]`] // Dunn Score passingTest[applyScoring[.ml.clust.dunn;1 ];(d1;`e2dist;clt1`clust);1b;20] diff --git a/fresh/tests/features.t b/fresh/tests/features.t index 69a8ef4..63e3181 100644 --- a/fresh/tests/features.t +++ b/fresh/tests/features.t @@ -122,13 +122,13 @@ np:.p.import[`numpy] .ml.fresh.feat.firstMax[x2] ~ .p.get[`first_location_of_maximum][x2]` .ml.fresh.feat.firstMax[xnull] ~ 1f -.ml.fresh.feat.firstMin[xj] ~ .p.get[`first_location_of_maximum][xj]` -.ml.fresh.feat.firstMin[xf] ~ .p.get[`first_location_of_maximum][xf]` -.ml.fresh.feat.firstMin[xb] ~ .p.get[`first_location_of_maximum][xb]` -.ml.fresh.feat.firstMin[xi] ~ .p.get[`first_location_of_maximum][xi]` +.ml.fresh.feat.firstMin[xj] ~ .p.get[`first_location_of_minimum][xj]` +.ml.fresh.feat.firstMin[xf] ~ .p.get[`first_location_of_minimum][xf]` +.ml.fresh.feat.firstMin[xb] ~ .p.get[`first_location_of_minimum][xb]` +.ml.fresh.feat.firstMin[xi] ~ .p.get[`first_location_of_minimum][xi]` .ml.fresh.feat.firstMin[x0] ~ 0n -.ml.fresh.feat.firstMin[x1] ~ .p.get[`first_location_of_maximum][x1]` -.ml.fresh.feat.firstMin[x2] ~ .p.get[`first_location_of_maximum][x2]` +.ml.fresh.feat.firstMin[x1] ~ .p.get[`first_location_of_minimum][x1]` +.ml.fresh.feat.firstMin[x2] ~ .p.get[`first_location_of_minimum][x2]` .ml.fresh.feat.firstMin[xnull] ~ 1f .ml.fresh.feat.ratioValNumToSeriesLength[xj] ~ .p.get[`ratio_val_num_to_t_series][xj]` @@ -173,12 +173,12 @@ np:.p.import[`numpy] .ml.fresh.feat.perRecurToAllData[x2] ~ .p.get[`percentage_recurring_all_data][x2]` .ml.fresh.feat.perRecurToAllData[xnull] ~ 1f -.ml.fresh.feat.perRecurToAllVal[xj] ~ .p.get[`percentage_recurring_all_data][xj]` -.ml.fresh.feat.perRecurToAllVal[xf] ~ .p.get[`percentage_recurring_all_data][xf]` -.ml.fresh.feat.perRecurToAllVal[xb] ~ .p.get[`percentage_recurring_all_data][xb]` -.ml.fresh.feat.perRecurToAllVal[xi] ~ .p.get[`percentage_recurring_all_data][xi]` -.ml.fresh.feat.perRecurToAllVal[x1] ~ .p.get[`percentage_recurring_all_data][x1]` -.ml.fresh.feat.perRecurToAllVal[x2] ~ .p.get[`percentage_recurring_all_data][x2]` +.ml.fresh.feat.perRecurToAllVal[xj] ~ .p.get[`percentage_recurring_all_val][xj]` +.ml.fresh.feat.perRecurToAllVal[xf] ~ .p.get[`percentage_recurring_all_val][xf]` +.ml.fresh.feat.perRecurToAllVal[xb] ~ .p.get[`percentage_recurring_all_val][xb]` +.ml.fresh.feat.perRecurToAllVal[xi] ~ .p.get[`percentage_recurring_all_val][xi]` +.ml.fresh.feat.perRecurToAllVal[x1] ~ .p.get[`percentage_recurring_all_val][x1]` +.ml.fresh.feat.perRecurToAllVal[x2] ~ .p.get[`percentage_recurring_all_val][x2]` .ml.fresh.feat.perRecurToAllVal[xnull] ~ 1f .ml.fresh.feat.largestDev[xj;0.5] ~ .p.get[`large_standard_deviation][xj;0.5]` @@ -292,12 +292,12 @@ np:.p.import[`numpy] .ml.fresh.feat.sumRecurringVal[x0] ~ 0f .ml.fresh.feat.sumRecurringVal[xnull] ~ 0f -.ml.fresh.feat.sumRecurringDataPoint[xj] ~ .p.get[`sum_recurring_values;<][xj] -.ml.fresh.feat.sumRecurringDataPoint[xf] ~ .p.get[`sum_recurring_values;<][xf] -.ml.fresh.feat.sumRecurringDataPoint[xb] ~ .p.get[`sum_recurring_values;<][xb] -.ml.fresh.feat.sumRecurringDataPoint[xi] ~ .p.get[`sum_recurring_values;<][xi] -.ml.fresh.feat.sumRecurringDataPoint[x1] ~ .p.get[`sum_recurring_values;<][x1] -.ml.fresh.feat.sumRecurringDataPoint[x2] ~ .p.get[`sum_recurring_values;<][x2] +.ml.fresh.feat.sumRecurringDataPoint[xj] ~ .p.get[`sum_recurring_data_points;<][xj] +.ml.fresh.feat.sumRecurringDataPoint[xf] ~ .p.get[`sum_recurring_data_points;<][xf] +.ml.fresh.feat.sumRecurringDataPoint[xb] ~ .p.get[`sum_recurring_data_points;<][xb] +.ml.fresh.feat.sumRecurringDataPoint[xi] ~ .p.get[`sum_recurring_data_points;<][xi] +.ml.fresh.feat.sumRecurringDataPoint[x1] ~ .p.get[`sum_recurring_data_points;<][x1] +.ml.fresh.feat.sumRecurringDataPoint[x2] ~ .p.get[`sum_recurring_data_points;<][x2] .ml.fresh.feat.sumRecurringDataPoint[xnull] ~ 0f .ml.fresh.feat.c3[xj;2] ~ .p.get[`c3_py;<][xj;2] @@ -428,14 +428,14 @@ abs[.ml.fresh.feat.binnedEntropy[xnull;50]] ~ 0f (value .ml.fresh.feat.changeQuant[xj;0.25;0.7;0b]) ~ .p.get[`change_quantiles;<][xj;0.25;0.7;0b;]each changequantkeys (value .ml.fresh.feat.changeQuant[xj;0.2;0.65;0b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.65;0b;]each changequantkeys (value .ml.fresh.feat.changeQuant[xj;0.2;0.775;0b]) ~ .p.get[`change_quantiles;<][xj;0.2;0.775;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.8;1b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.8;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.25;0.7;1b]) ~ .p.get[`change_quantiles;<][xi;0.25;0.7;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.65;1b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.65;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.775;1b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.775;1b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.8;0b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.8;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.25;0.7;0b]) ~ .p.get[`change_quantiles;<][xi;0.25;0.7;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.65;0b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.65;0b;]each changequantkeys -(value .ml.fresh.feat.changeQuant[xi;0.2;0.775;0b]) ~ .p.get[`change_quantiles;<][xi;0.2;0.775;0b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.2;0.8;1b]) = .p.get[`change_quantiles;<][xi;0.2;0.8;1b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.25;0.7;1b]) = .p.get[`change_quantiles;<][xi;0.25;0.7;1b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.2;0.65;1b]) = .p.get[`change_quantiles;<][xi;0.2;0.65;1b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.2;0.775;1b]) = .p.get[`change_quantiles;<][xi;0.2;0.775;1b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.2;0.8;0b]) = .p.get[`change_quantiles;<][xi;0.2;0.8;0b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.25;0.7;0b]) = .p.get[`change_quantiles;<][xi;0.25;0.7;0b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.2;0.65;0b]) = .p.get[`change_quantiles;<][xi;0.2;0.65;0b;]each changequantkeys +all (value .ml.fresh.feat.changeQuant[xi;0.2;0.775;0b]) = .p.get[`change_quantiles;<][xi;0.2;0.775;0b;]each changequantkeys (value .ml.fresh.feat.changeQuant[x0;0.2;0.775;1b]) ~ (-0w 0w,4#0n) (value .ml.fresh.feat.changeQuant[x1;0.2;0.775;1b]) ~ (-0w 0w,4#0n) (value .ml.fresh.feat.changeQuant[x2;0.2;0.775;1b]) ~ (-0w 0w,4#0n) diff --git a/fresh/tests/sigtests.t b/fresh/tests/sigtests.t index 7fadfe3..4e507ec 100644 --- a/fresh/tests/sigtests.t +++ b/fresh/tests/sigtests.t @@ -44,14 +44,16 @@ table3:([]desc 1000000?1f;1000000?10f;asc 1000000?1f) table4:([]1000000?0b;1000000?1f;1000000?1f) target1:asc 1000000?100f;target2:desc 1000000?1f;target3:target4:1000000?0b bintest:{2=count distinct x} -pdmatrix:{pddf[.p.get[`benjamini_hochberg_test;<][.p.topd y;"FALSE";x]][`:values]} -k:{pdmatrix[x;y]`} +pdmatrix:{pddf[.p.get[`benjamini_hochberg_test][.p.topd y;$[.pykx.loaded;0b;"FALSE"];x]][`:values]`} +k:{t:pdmatrix[x;y];@[{x`};t;{[x;y]x}[t]]} vec:{k[x;y][;2]} bhfn:{[table;target] pdict:.ml.fresh.sigFeat[table;target]; ptable:([]label:key pdict;p_value:value pdict); - dfptable:tab2df[ptable]; - ("i"$count .ml.fresh.benjhoch[0.05;pdict]) ~ sum vec[0.05;dfptable]=1b + dfptable:$[.pykx.loaded;;tab2df]ptable; + vecret:vec[0.05;dfptable]; + vecret:$[11h=type vecret;`True=;0<]vecret; + ("i"$count .ml.fresh.benjhoch[0.05;pdict]) ~ sum vecret=1b } bhfn[table1;target1] bhfn[table2;target2] diff --git a/fresh/tests/test.p b/fresh/tests/test.p index 505a96a..7330997 100644 --- a/fresh/tests/test.p +++ b/fresh/tests/test.p @@ -1,5 +1,6 @@ p)import numpy as np p)import pandas as pd +p)import itertools p)from scipy.signal import welch, cwt, ricker, find_peaks_cwt p)from scipy.stats import linregress p)from statsmodels.tsa.stattools import acf, adfuller, pacf diff --git a/fresh/utils.q b/fresh/utils.q index 3faea6d..a33f5a0 100644 --- a/fresh/utils.q +++ b/fresh/utils.q @@ -187,7 +187,7 @@ fresh.i.kTau:{[target;feature] // @return {float} Results of Fisher exact test fresh.i.fisher:{[target;feature] g:group@'target value group feature; - fresh.i.fisherExact[<;count@''@\:[g]distinct target]1 + fresh.i.fisherExact[<;.p.toraw count@''@\:[g]distinct target]1 } // @private From e133a1b7d741c7952f809da2420e6cd36b728004 Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Mon, 2 Oct 2023 16:16:09 +0100 Subject: [PATCH 5/7] Initial xval updates --- xval/tests/test.p | 18 ++++++++--------- xval/tests/xval.t | 50 +++++++++++++++++++++++------------------------ 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/xval/tests/test.p b/xval/tests/test.p index 14d264b..754c76a 100644 --- a/xval/tests/test.p +++ b/xval/tests/test.p @@ -1,24 +1,24 @@ -p)def< kfsplit(x,y): +p)def kfsplit(x,y): from sklearn.model_selection import KFold kf=KFold(n_splits=y) split=kf.split(x) return split -p)def< kfoldr(x,y): +p)def kfoldr(x,y): from sklearn.model_selection import cross_val_score from sklearn.linear_model import ElasticNet clf = ElasticNet() scores = cross_val_score(clf,x,y,cv=3) return scores -p)def< kfoldc(x,y): +p)def kfoldc(x,y): from sklearn.model_selection import cross_val_score from sklearn.tree import DecisionTreeClassifier clf = DecisionTreeClassifier() scores = cross_val_score(clf,x,y,cv=3) return scores -p)def< crossvalr(x,y,z,k,m): +p)def crossvalr(x,y,z,k,m): from sklearn.linear_model import LinearRegression model=LinearRegression() lst=[] @@ -35,7 +35,7 @@ p)def< crossvalr(x,y,z,k,m): lst.append(score) return lst -p)def< crossvalc(x,y,z,k,m): +p)def crossvalc(x,y,z,k,m): from sklearn.tree import DecisionTreeClassifier model=DecisionTreeClassifier() lst=[] @@ -52,7 +52,7 @@ p)def< crossvalc(x,y,z,k,m): lst.append(score) return lst -p)def< gridsearchr(x,y): +p)def gridsearchr(x,y): from sklearn.model_selection import GridSearchCV from sklearn.linear_model import ElasticNet regr = ElasticNet() @@ -63,7 +63,7 @@ p)def< gridsearchr(x,y): p=clf.best_params_ return(acc, p) -p)def< gridsearchc(x,y): +p)def gridsearchc(x,y): from sklearn.model_selection import GridSearchCV from sklearn.tree import DecisionTreeClassifier clf = DecisionTreeClassifier() @@ -74,9 +74,9 @@ p)def< gridsearchc(x,y): p=clf.best_params_ return (acc, p) -p)def< fitscore(xtrain,ytrain,xtest,ytest): +p)def fitscore(xtrain,ytrain,xtest,ytest): from sklearn.linear_model import ElasticNet regr = ElasticNet() regr.fit(xtrain, ytrain) score = regr.score(xtest,ytest) - return score \ No newline at end of file + return score diff --git a/xval/tests/xval.t b/xval/tests/xval.t index 18aa825..20a09c8 100644 --- a/xval/tests/xval.t +++ b/xval/tests/xval.t @@ -74,10 +74,10 @@ not(count[s]~count[yc])&(s:.ml.xv.i.shuffle[yc])~yc / xval -(avg[.ml.xv.kfSplit[k;1;xf;yf;fs[net][]]]-avg kfoldr[xf;yf])<.05 -(avg[.ml.xv.kfSplit[k;1;xi;yi;fs[net][]]]-avg kfoldr[xi;yi])<.05 -(avg[.ml.xv.kfSplit[k;1;xb;yb;fs[dtc][]]]-avg kfoldc[xb;yb])<.05 -(avg[.ml.xv.kfSplit[k;1;xc;yc;fs[dtc][]]]-avg kfoldc[xc;yc])<.05 +(avg[.ml.xv.kfSplit[k;1;xf;yf;fs[net][]]]-avg .p.get[`kfoldr;<][xf;yf])<.05 +(avg[.ml.xv.kfSplit[k;1;xi;yi;fs[net][]]]-avg .p.get[`kfoldr;<][xi;yi])<.05 +(avg[.ml.xv.kfSplit[k;1;xb;yb;fs[dtc][]]]-avg .p.get[`kfoldc;<][xb;yb])<.05 +(avg[.ml.xv.kfSplit[k;1;xc;yc;fs[dtc][]]]-avg .p.get[`kfoldc;<][xc;yc])<.05 count[.ml.xv.kfShuff[k;1;xf;yf;fs[net][]]]~3 count[.ml.xv.kfShuff[k;1;xi;yi;fs[net][]]]~3 @@ -87,15 +87,15 @@ count[.ml.xv.kfShuff[k;1;xc;yc;fs[dtc][]]]~3 count[.ml.xv.kfStrat[k;1;xb;yb;fs[dtc][]]]~3 count[.ml.xv.kfStrat[k;1;xc;yc;fs[dtc][]]]~3 -.ml.xv.tsRolls[k;1;xf;yf;fs[lin][]]~crossvalr[xf;yf;fr;lr;3] -.ml.xv.tsRolls[k;1;xi;yi;fs[lin][]]~crossvalr[xi;yi;fr;lr;3] -(avg[.ml.xv.tsRolls[k;1;xb;yb;fs[dtc][]]]-avg crossvalc[xb;yb;fr;lr;3])<.05 -(avg[.ml.xv.tsRolls[k;1;xc;yc;fs[dtc][]]]-avg crossvalc[xc;yc;fr;lr;3])<.05 +.ml.xv.tsRolls[k;1;xf;yf;fs[lin][]]~.p.get[`crossvalr;<][xf;yf;fr;lr;3] +.ml.xv.tsRolls[k;1;xi;yi;fs[lin][]]~.p.get[`crossvalr;<][xi;yi;fr;lr;3] +(avg[.ml.xv.tsRolls[k;1;xb;yb;fs[dtc][]]]-avg .p.get[`crossvalc;<][xb;yb;fr;lr;3])<.05 +(avg[.ml.xv.tsRolls[k;1;xc;yc;fs[dtc][]]]-avg .p.get[`crossvalc;<][xc;yc;fr;lr;3])<.05 -.ml.xv.tsChain[k;1;xf;yf;fs[lin][]]~crossvalr[xf;yf;fc;lc;3] -.ml.xv.tsChain[k;1;xi;yi;fs[lin][]]~crossvalr[xi;yi;fc;lc;3] -(avg[.ml.xv.tsChain[k;1;xb;yb;fs[dtc][]]]-avg crossvalc[xb;yb;fc;lc;3])<.05 -(avg[.ml.xv.tsChain[k;1;xc;yc;fs[dtc][]]]-avg crossvalc[xc;yc;fc;lc;3])<.05 +.ml.xv.tsChain[k;1;xf;yf;fs[lin][]]~.p.get[`crossvalr;<][xf;yf;fc;lc;3] +.ml.xv.tsChain[k;1;xi;yi;fs[lin][]]~.p.get[`crossvalr;<][xi;yi;fc;lc;3] +(avg[.ml.xv.tsChain[k;1;xb;yb;fs[dtc][]]]-avg .p.get[`crossvalc;<][xb;yb;fc;lc;3])<.05 +(avg[.ml.xv.tsChain[k;1;xc;yc;fs[dtc][]]]-avg .p.get[`crossvalc;<][xc;yc;fc;lc;3])<.05 (.ml.xv.pcSplit[p;1;xf;yf]{[d].ml.shape each d})~ms (.ml.xv.pcSplit[p;1;xi;yi]{[d].ml.shape each d})~ms @@ -115,17 +115,17 @@ count[.ml.xv.kfStrat[k;1;xc;yc;fs[dtc][]]]~3 / grid search -(bp .ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0])~@[;1]gridsearchr[xf;yf] -(bp .ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0])~@[;1]gridsearchr[xi;yi] -(rnd[(avg/).ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0]]-rnd@[;0]gridsearchr[xf;yf])<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0]]-rnd@[;0]gridsearchr[xi;yi])<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;0]]-rnd@[;0]gridsearchc[xb;yb])<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;0]]-rnd@[;0]gridsearchc[xc;yc])<.05 +(bp .ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<][xf;yf] +(bp .ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<][xi;yi] +(rnd[(avg/).ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<][xf;yf])<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<][xi;yi])<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<][xb;yb])<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<][xc;yc])<.05 -((@[;2].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])-@[;0]gridsearchr[xf;yf])<.05 -((@[;2].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])-@[;0]gridsearchr[xi;yi])<.06 -((@[;2].ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;.2])-@[;0]gridsearchc[xb;yb])<.05 -((@[;2].ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;.2])-@[;0]gridsearchc[xc;yc])<.05 +((@[;2].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<][xf;yf])<.05 +((@[;2].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<][xi;yi])<.06 +((@[;2].ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<][xb;yb])<.05 +((@[;2].ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<][xc;yc])<.05 (key@[;1].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])~`alpha`max_iter (key@[;1].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])~`alpha`max_iter @@ -180,6 +180,6 @@ $[0b=@[{.ml.rs.mcSplit[ 4;2;xf;yf;.ml.xv.fitScore dtc;x;-.2];};rs_pr_err;{[err]e / scoring -fs[net;::;df]~fitscore[df[0]0;df[0]1;df[1]0;df[1]1] -fs[net;::;di]~fitscore[di[0]0;di[0]1;di[1]0;di[1]1] -fs[net;::;db]~fitscore[db[0]0;db[0]1;db[1]0;db[1]1] +fs[net;::;df]~.p.get[`fitscore;<][df[0]0;df[0]1;df[1]0;df[1]1] +fs[net;::;di]~.p.get[`fitscore;<][di[0]0;di[0]1;di[1]0;di[1]1] +fs[net;::;db]~.p.get[`fitscore;<][db[0]0;db[0]1;db[1]0;db[1]1] From 97dd22e254fd2c3a8ddcc1f81700bb49697a45e9 Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Mon, 2 Oct 2023 20:20:38 +0100 Subject: [PATCH 6/7] Update to xval tests --- xval/tests/xval.t | 51 ++++++++++++++++++++++++----------------------- xval/xval.q | 4 ++-- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/xval/tests/xval.t b/xval/tests/xval.t index 20a09c8..0b6d507 100644 --- a/xval/tests/xval.t +++ b/xval/tests/xval.t @@ -46,6 +46,7 @@ fr:first each ridx fc:first each cidx lr:last each ridx lc:last each cidx +.p.list:.p.import[`builtins;`:list] / utils @@ -54,10 +55,10 @@ not(count[s]~count[yi])&(s:.ml.xv.i.shuffle[yi])~yi not(count[s]~count[yb])&(s:.ml.xv.i.shuffle[yb])~yb not(count[s]~count[yc])&(s:.ml.xv.i.shuffle[yc])~yc -(`int$.ml.xv.i.splitIdx[2;yf])~`int$reverse first(.p.list kfsplit[yf;2])` -(`int$.ml.xv.i.splitIdx[2;yi])~`int$reverse first(.p.list kfsplit[yi;2])` -(`int$.ml.xv.i.splitIdx[2;yb])~`int$reverse first(.p.list kfsplit[yb;2])` -(`int$.ml.xv.i.splitIdx[2;yc])~`int$reverse first(.p.list kfsplit[yc;2])` +(`int$.ml.xv.i.splitIdx[2;yf])~`int$reverse first(.p.list .p.get[`kfsplit][yf;2])` +(`int$.ml.xv.i.splitIdx[2;yi])~`int$reverse first(.p.list .p.get[`kfsplit][yi;2])` +(`int$.ml.xv.i.splitIdx[2;yb])~`int$reverse first(.p.list .p.get[`kfsplit][yb;2])` +(`int$.ml.xv.i.splitIdx[2;yc])~`int$reverse first(.p.list .p.get[`kfsplit][yc;2])` (.ml.shape .ml.xv.i.shuffIdx[k;yf])~3 333 (.ml.shape .ml.xv.i.shuffIdx[5;yi])~5 200 @@ -74,10 +75,10 @@ not(count[s]~count[yc])&(s:.ml.xv.i.shuffle[yc])~yc / xval -(avg[.ml.xv.kfSplit[k;1;xf;yf;fs[net][]]]-avg .p.get[`kfoldr;<][xf;yf])<.05 -(avg[.ml.xv.kfSplit[k;1;xi;yi;fs[net][]]]-avg .p.get[`kfoldr;<][xi;yi])<.05 -(avg[.ml.xv.kfSplit[k;1;xb;yb;fs[dtc][]]]-avg .p.get[`kfoldc;<][xb;yb])<.05 -(avg[.ml.xv.kfSplit[k;1;xc;yc;fs[dtc][]]]-avg .p.get[`kfoldc;<][xc;yc])<.05 +(avg[.ml.xv.kfSplit[k;1;xf;yf;fs[net][]]]-avg .p.get[`kfoldr;<] . .p.toraw@/:(xf;yf))<.05 +(avg[.ml.xv.kfSplit[k;1;xi;yi;fs[net][]]]-avg .p.get[`kfoldr;<] . .p.toraw@/:(xi;yi))<.05 +(avg[.ml.xv.kfSplit[k;1;xb;yb;fs[dtc][]]]-avg .p.get[`kfoldc;<] . .p.toraw@/:(xb;yb))<.05 +(avg[.ml.xv.kfSplit[k;1;xc;yc;fs[dtc][]]]-avg .p.get[`kfoldc;<] . .p.toraw@/:(xc;yc))<.05 count[.ml.xv.kfShuff[k;1;xf;yf;fs[net][]]]~3 count[.ml.xv.kfShuff[k;1;xi;yi;fs[net][]]]~3 @@ -115,17 +116,17 @@ count[.ml.xv.kfStrat[k;1;xc;yc;fs[dtc][]]]~3 / grid search -(bp .ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<][xf;yf] -(bp .ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<][xi;yi] -(rnd[(avg/).ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<][xf;yf])<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<][xi;yi])<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<][xb;yb])<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<][xc;yc])<.05 +(bp .ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<] . .p.toraw@/:(xf;yf) +(bp .ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<] . .p.toraw@/:(xi;yi) +(rnd[(avg/).ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xf;yf))<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xi;yi))<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xb;yb))<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xc;yc))<.05 -((@[;2].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<][xf;yf])<.05 -((@[;2].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<][xi;yi])<.06 -((@[;2].ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<][xb;yb])<.05 -((@[;2].ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<][xc;yc])<.05 +((@[;2].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xf;yf))<.05 +((@[;2].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xi;yi))<.06 +((@[;2].ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xb;yb))<.05 +((@[;2].ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xc;yc))<.05 (key@[;1].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])~`alpha`max_iter (key@[;1].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])~`alpha`max_iter @@ -150,10 +151,10 @@ count[.ml.xv.kfStrat[k;1;xc;yc;fs[dtc][]]]~3 .ml.shape[.ml.rs.kfSplit[ 4;2;xf;yf;.ml.xv.fitScore net;rs_pr_rdm; .2]]~3 8 8 .ml.shape[.ml.rs.kfShuff[ 4;2;xf;yf;.ml.xv.fitScore net;rs_pr_rdm;-.2]]~3 8 8 .ml.shape[.ml.rs.kfStrat[ 4;2;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm;-.2]]~3 7 8 -.ml.shape[.ml.rs.tsRolls[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~3 7 5 -.ml.shape[.ml.rs.tsChain[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~3 7 5 -.ml.shape[.ml.rs.pcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm; .2]]~3 8 5 -.ml.shape[.ml.rs.mcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm;-.2]]~3 8 5 +any .ml.shape[.ml.rs.tsRolls[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~/:(3 7 5; 3 8 5) +any .ml.shape[.ml.rs.tsChain[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~/:(3 7 5; 3 8 5) +any .ml.shape[.ml.rs.pcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm; .2]]~/:(3 7 5; 3 8 5) +any .ml.shape[.ml.rs.mcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm;-.2]]~/:(3 7 5; 3 8 5) / sobol search @@ -180,6 +181,6 @@ $[0b=@[{.ml.rs.mcSplit[ 4;2;xf;yf;.ml.xv.fitScore dtc;x;-.2];};rs_pr_err;{[err]e / scoring -fs[net;::;df]~.p.get[`fitscore;<][df[0]0;df[0]1;df[1]0;df[1]1] -fs[net;::;di]~.p.get[`fitscore;<][di[0]0;di[0]1;di[1]0;di[1]1] -fs[net;::;db]~.p.get[`fitscore;<][db[0]0;db[0]1;db[1]0;db[1]1] +fs[net;::;df]~.p.get[`fitscore;<] . .p.toraw@/:(df[0]0;df[0]1;df[1]0;df[1]1) +fs[net;::;di]~.p.get[`fitscore;<] . .p.toraw@/:(di[0]0;di[0]1;di[1]0;di[1]1) +fs[net;::;db]~.p.get[`fitscore;<] . .p.toraw@/:(db[0]0;db[0]1;db[1]0;db[1]1) diff --git a/xval/xval.q b/xval/xval.q index b9ba6b6..6b6c1a6 100644 --- a/xval/xval.q +++ b/xval/xval.q @@ -104,8 +104,8 @@ xv.mcSplit:xv.i.applyIdx{[pc;n;features;target] // @return {float[]} Scores outputted by function applied to p and data xv.fitScore:{[function;p;data] fitFunc:function[][p]`:fit; - scoreFunc:.[fitFunc;numpyArray each data 0]`:score; - .[scoreFunc;numpyArray each data 1]` + scoreFunc:.[fitFunc;{.p.toraw numpyArray .p.toraw x}each data 0]`:score; + .[scoreFunc;{.p.toraw numpyArray x}each data 1]` } // Hyperparameter search procedures From a06914540b4432aad4eca3c90be914414953df9d Mon Sep 17 00:00:00 2001 From: Conor McCarthy Date: Fri, 17 Nov 2023 10:28:35 +0000 Subject: [PATCH 7/7] Additional updates --- clust/tests/clt.t | 8 +++---- clust/tests/score.t | 12 +++++----- fresh/utils.q | 2 +- util/tests/metric.t | 2 +- util/tests/preproctst.t | 8 +++---- util/tests/utiltst.t | 2 +- xval/tests/test.p | 2 +- xval/tests/xval.t | 53 ++++++++++++++++++++--------------------- xval/utils.q | 3 ++- 9 files changed, 46 insertions(+), 46 deletions(-) diff --git a/clust/tests/clt.t b/clust/tests/clt.t index c45a8a3..3741d14 100644 --- a/clust/tests/clt.t +++ b/clust/tests/clt.t @@ -22,8 +22,8 @@ qDendrogram:{asc each x(y . z)[`modelInfo;`dgram]} algoOutputs:{asc key x . y} algoOutputsFit:{asc key first x . y} countOutput:{count x y} -pythonRes :{[fclust;mat;t;clust;param]value group fclust[.p.toraw mat t[`modelInfo;`dgram];clust;param]`}[fclust;mat] -pythonDgram:{[lnk;d;lf;df]asc each lnk[.p.toraw flip d;lf;df]`}[lnk] +pythonRes :{[fclust;mat;t;clust;param]value group fclust[mat t[`modelInfo;`dgram];clust;param]`}[fclust;mat] +pythonDgram:{[lnk;d;lf;df]asc each lnk[flip d;lf;df]`}[lnk] qDgramDists:{(x . y)[`modelInfo;`dgram]`dist} // Datasets @@ -160,11 +160,11 @@ tab1:.ml.clust.hc.fit[d1;`mdist ;`single] tab2:.ml.clust.hc.fit[d1;`e2dist;`average] tab3:.ml.clust.hc.fit[d2;`e2dist;`centroid] tab4:.ml.clust.hc.fit[d2;`edist ;`complete] -hct1fit:"j"$fclust[.p.toraw mat tab1[`modelInfo;`dgram];4;`maxclust]` +hct1fit:"j"$fclust[mat tab1[`modelInfo;`dgram];4;`maxclust]` hcd1pred1:1 2 1 1 2 2 1 1 1 1 1 2 1 2 2 hcd1pred2:1 3 1 1 3 3 1 1 1 1 1 3 1 3 3 hcd1pred3:1 3 1 1 3 3 1 1 1 1 1 3 1 3 3 -pyDgramDists:(lnk[.p.toraw flip d2;`single;`sqeuclidean]`)[;2] +pyDgramDists:(lnk[flip d2;`single;`sqeuclidean]`)[;2] // Fit passingTest[clusterAdd1[.ml.clust.hc.cutK ];(tab1;4);1b;hct1fit] diff --git a/clust/tests/score.t b/clust/tests/score.t index 470465e..7de0bf1 100644 --- a/clust/tests/score.t +++ b/clust/tests/score.t @@ -27,14 +27,14 @@ rnd1:count[flip d1]?4 rnd2:count[flip d2]?4 // Dave Bouldin Score -passingTest[.ml.clust.daviesBouldin;(d1;clt1`clust);0b;pydb[.p.toraw flip d1;clt1`clust]`] -passingTest[.ml.clust.daviesBouldin;(d2;clt2`clust);0b;pydb[.p.toraw flip d2;clt2`clust]`] -passingTest[.ml.clust.daviesBouldin;(d2;clt3`clust);0b;pydb[.p.toraw flip d2;clt3`clust]`] +passingTest[.ml.clust.daviesBouldin;(d1;clt1`clust);0b;pydb[flip d1;clt1`clust]`] +passingTest[.ml.clust.daviesBouldin;(d2;clt2`clust);0b;pydb[flip d2;clt2`clust]`] +passingTest[.ml.clust.daviesBouldin;(d2;clt3`clust);0b;pydb[flip d2;clt3`clust]`] // Silhouette Score -passingTest[.ml.clust.silhouette;(d1;`edist;clt1`clust;1b);0b;pysil[.p.toraw flip d1;clt1`clust]`] -passingTest[.ml.clust.silhouette;(d2;`edist;clt2`clust;1b);0b;pysil[.p.toraw flip d2;clt2`clust]`] -passingTest[.ml.clust.silhouette;(d2;`edist;clt3`clust;1b);0b;pysil[.p.toraw flip d2;clt3`clust]`] +passingTest[.ml.clust.silhouette;(d1;`edist;clt1`clust;1b);0b;pysil[flip d1;clt1`clust]`] +passingTest[.ml.clust.silhouette;(d2;`edist;clt2`clust;1b);0b;pysil[flip d2;clt2`clust]`] +passingTest[.ml.clust.silhouette;(d2;`edist;clt3`clust;1b);0b;pysil[flip d2;clt3`clust]`] // Dunn Score passingTest[applyScoring[.ml.clust.dunn;1 ];(d1;`e2dist;clt1`clust);1b;20] diff --git a/fresh/utils.q b/fresh/utils.q index a33f5a0..3faea6d 100644 --- a/fresh/utils.q +++ b/fresh/utils.q @@ -187,7 +187,7 @@ fresh.i.kTau:{[target;feature] // @return {float} Results of Fisher exact test fresh.i.fisher:{[target;feature] g:group@'target value group feature; - fresh.i.fisherExact[<;.p.toraw count@''@\:[g]distinct target]1 + fresh.i.fisherExact[<;count@''@\:[g]distinct target]1 } // @private diff --git a/util/tests/metric.t b/util/tests/metric.t index 2982cdf..f0910b0 100644 --- a/util/tests/metric.t +++ b/util/tests/metric.t @@ -140,7 +140,7 @@ plaintabn:plaintab,'([]x4:1 3 0n) .ml.tScoreEqual[xf;yf]~abs stats[`:ttest_ind][xf;yf][`:statistic]` .ml.tScoreEqual[xb;yb]~abs stats[`:ttest_ind][xb;yb][`:statistic]` .ml.tScoreEqual[x;x]~abs stats[`:ttest_ind][x;x][`:statistic]` -.ml.covMatrix[flip value flip plaintab]~np[`:cov][.pykx.topy flip value flip plaintab;`bias pykw 1b]` +.ml.covMatrix[flip value flip plaintab]~np[`:cov][flip value flip plaintab;`bias pykw 1b]` .ml.covMatrix[(10110b;01110b)]~(0.24 0.04;0.04 0.24) .ml.covMatrix[(10110b;11111b)]~(0.24 0f;0 0f) .ml.covMatrix[(11111b;11111b)]~(0 0f;0 0f) diff --git a/util/tests/preproctst.t b/util/tests/preproctst.t index 2aa35ae..09c9782 100644 --- a/util/tests/preproctst.t +++ b/util/tests/preproctst.t @@ -53,7 +53,7 @@ keyedinfs:([k:1 2]x:0 0W) .ml.dropConstant[nt]~([]101b;x2:1 2 0n) .ml.dropConstant[nulltab]~select x,x1,x2,x3 from nulltab -MinMaxScaler[`:fit][.p.toraw flip plainmat]; +MinMaxScaler[`:fit][flip plainmat]; minMaxKeys:`minData`maxData minMax1:.ml.minMaxScaler.fit[plainmat] minMax2:.ml.minMaxScaler.fit[scale1] @@ -67,7 +67,7 @@ minMax3[`modelInfo]~minMaxKeys!1 5f minMax4[`modelInfo]~minMaxKeys!01b minMax5[`modelInfo]~minMaxKeys!(3 1 4f;5 1 4f) -.ml.minMaxScaler.fitTransform[plainmat]~flip"f"$MinMaxScaler[`:transform][.p.toraw flip plainmat]` +.ml.minMaxScaler.fitTransform[plainmat]~flip"f"$MinMaxScaler[`:transform][flip plainmat]` .ml.minMaxScaler.fitTransform[scale1]~(0 1f;1 0f;1 0f) .ml.minMaxScaler.fitTransform[scale2]~0.5 0.25 1 0.75 0f .ml.minMaxScaler.fitTransform[scale3]~0 0 1 1f @@ -75,7 +75,7 @@ minMax5[`modelInfo]~minMaxKeys!(3 1 4f;5 1 4f) minMax2.transform[scale4]~(1 3f;-0.5 0n;0.5 0n) minMax3.transform[5#y]~5.75 1.75 9.5 5.5 4.25 -StdScaler[`:fit][.p.toraw flip plainmat]; +StdScaler[`:fit][flip plainmat]; stdScaleKeys:`avgData`devData stdScale1:.ml.stdScaler.fit[plainmat] stdScale2:.ml.stdScaler.fit[scale1] @@ -91,7 +91,7 @@ key[stdScale4[`modelInfo]]~stdScaleKeys key[stdScale5[`modelInfo]]~stdScaleKeys key[stdScale6[`modelInfo]]~stdScaleKeys -stdScale1.transform[plainmat]~flip"f"$StdScaler[`:transform][.p.toraw flip plainmat]` +stdScale1.transform[plainmat]~flip"f"$StdScaler[`:transform][flip plainmat]` stdScale2.transform[scale1]~(-1 1f;1 -1f;1 -1f) stdScale3.transform[xf]~scale[xf]` stdScale4.transform[y]~scale[y]` diff --git a/util/tests/utiltst.t b/util/tests/utiltst.t index 87948da..cfb743c 100644 --- a/util/tests/utiltst.t +++ b/util/tests/utiltst.t @@ -40,7 +40,7 @@ col_types~type each first (.ml.tab2df tt2)[`:values]` ret_value:$[.pykx.loaded;"ABC";enlist each "ABC"] ret_value~dfc[`:c.values]`; -.ml.shape[1 2 3*/:til 10] ~ np[`:shape][.p.toraw 1 2 3*/:til 10]` +.ml.shape[1 2 3*/:til 10] ~ np[`:shape][1 2 3*/:til 10]` .ml.shape[enlist 1] ~ np[`:shape][enlist 1]` .ml.shape[1 2] ~ np[`:shape][1 2]` .ml.shape[plaintab]~3 4 diff --git a/xval/tests/test.p b/xval/tests/test.p index 754c76a..b496516 100644 --- a/xval/tests/test.p +++ b/xval/tests/test.p @@ -2,7 +2,7 @@ p)def kfsplit(x,y): from sklearn.model_selection import KFold kf=KFold(n_splits=y) split=kf.split(x) - return split + return list(split) p)def kfoldr(x,y): from sklearn.model_selection import cross_val_score diff --git a/xval/tests/xval.t b/xval/tests/xval.t index 0b6d507..0b00766 100644 --- a/xval/tests/xval.t +++ b/xval/tests/xval.t @@ -11,7 +11,7 @@ yi:asc 1000?10000 xb:flip(1000?0101101011b;asc 1000?0101101011b) yb:1000#110011001100b xc:flip(1000?100f;asc 1000?100f) -yc:1000#`A`B`A`C`B`C +yc:1000#string`A`B`A`C`B`C df:(2;0N)#value .ml.trainTestSplit[xf;yf;.2] di:(2;0N)#value .ml.trainTestSplit[xi;yi;.2] @@ -46,7 +46,6 @@ fr:first each ridx fc:first each cidx lr:last each ridx lc:last each cidx -.p.list:.p.import[`builtins;`:list] / utils @@ -55,10 +54,10 @@ not(count[s]~count[yi])&(s:.ml.xv.i.shuffle[yi])~yi not(count[s]~count[yb])&(s:.ml.xv.i.shuffle[yb])~yb not(count[s]~count[yc])&(s:.ml.xv.i.shuffle[yc])~yc -(`int$.ml.xv.i.splitIdx[2;yf])~`int$reverse first(.p.list .p.get[`kfsplit][yf;2])` -(`int$.ml.xv.i.splitIdx[2;yi])~`int$reverse first(.p.list .p.get[`kfsplit][yi;2])` -(`int$.ml.xv.i.splitIdx[2;yb])~`int$reverse first(.p.list .p.get[`kfsplit][yb;2])` -(`int$.ml.xv.i.splitIdx[2;yc])~`int$reverse first(.p.list .p.get[`kfsplit][yc;2])` +(`int$.ml.xv.i.splitIdx[2;yf])~`int$reverse first .p.get[`kfsplit;<][yf;2] +(`int$.ml.xv.i.splitIdx[2;yi])~`int$reverse first .p.get[`kfsplit;<][yi;2] +(`int$.ml.xv.i.splitIdx[2;yb])~`int$reverse first .p.get[`kfsplit;<][yb;2] +(`int$.ml.xv.i.splitIdx[2;yc])~`int$reverse first .p.get[`kfsplit;<][yc;2] (.ml.shape .ml.xv.i.shuffIdx[k;yf])~3 333 (.ml.shape .ml.xv.i.shuffIdx[5;yi])~5 200 @@ -75,10 +74,10 @@ not(count[s]~count[yc])&(s:.ml.xv.i.shuffle[yc])~yc / xval -(avg[.ml.xv.kfSplit[k;1;xf;yf;fs[net][]]]-avg .p.get[`kfoldr;<] . .p.toraw@/:(xf;yf))<.05 -(avg[.ml.xv.kfSplit[k;1;xi;yi;fs[net][]]]-avg .p.get[`kfoldr;<] . .p.toraw@/:(xi;yi))<.05 -(avg[.ml.xv.kfSplit[k;1;xb;yb;fs[dtc][]]]-avg .p.get[`kfoldc;<] . .p.toraw@/:(xb;yb))<.05 -(avg[.ml.xv.kfSplit[k;1;xc;yc;fs[dtc][]]]-avg .p.get[`kfoldc;<] . .p.toraw@/:(xc;yc))<.05 +(avg[.ml.xv.kfSplit[k;1;xf;yf;fs[net][]]]-avg .p.get[`kfoldr;<][xf;yf])<.05 +(avg[.ml.xv.kfSplit[k;1;xi;yi;fs[net][]]]-avg .p.get[`kfoldr;<][xi;yi])<.05 +(avg[.ml.xv.kfSplit[k;1;xb;yb;fs[dtc][]]]-avg .p.get[`kfoldc;<][xb;yb])<.05 +(avg[.ml.xv.kfSplit[k;1;xc;yc;fs[dtc][]]]-avg .p.get[`kfoldc;<][xc;yc])<.05 count[.ml.xv.kfShuff[k;1;xf;yf;fs[net][]]]~3 count[.ml.xv.kfShuff[k;1;xi;yi;fs[net][]]]~3 @@ -116,17 +115,17 @@ count[.ml.xv.kfStrat[k;1;xc;yc;fs[dtc][]]]~3 / grid search -(bp .ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<] . .p.toraw@/:(xf;yf) -(bp .ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<] . .p.toraw@/:(xi;yi) -(rnd[(avg/).ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xf;yf))<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xi;yi))<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xb;yb))<.05 -(rnd[(avg/).ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xc;yc))<.05 +(bp .ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<][xf;yf] +(bp .ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0])~@[;1].p.get[`gridsearchr;<][xi;yi] +(rnd[(avg/).ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<][xf;yf])<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;0]]-rnd@[;0].p.get[`gridsearchr;<][xi;yi])<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<][xb;yb])<.05 +(rnd[(avg/).ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;0]]-rnd@[;0].p.get[`gridsearchc;<][xc;yc])<.05 -((@[;2].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xf;yf))<.05 -((@[;2].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<] . .p.toraw@/:(xi;yi))<.06 -((@[;2].ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xb;yb))<.05 -((@[;2].ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<] . .p.toraw@/:(xc;yc))<.05 +((@[;2].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<][xf;yf])<.05 +((@[;2].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])-@[;0].p.get[`gridsearchr;<][xi;yi])<.06 +((@[;2].ml.gs.kfSplit[k;1;xb;yb;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<][xb;yb])<.05 +((@[;2].ml.gs.kfSplit[k;1;xc;yc;fs dtc;gs_pc;.2])-@[;0].p.get[`gridsearchc;<][xc;yc])<.05 (key@[;1].ml.gs.kfSplit[k;1;xf;yf;fs net;gs_pr;.2])~`alpha`max_iter (key@[;1].ml.gs.kfSplit[k;1;xi;yi;fs net;gs_pr;.2])~`alpha`max_iter @@ -151,10 +150,10 @@ count[.ml.xv.kfStrat[k;1;xc;yc;fs[dtc][]]]~3 .ml.shape[.ml.rs.kfSplit[ 4;2;xf;yf;.ml.xv.fitScore net;rs_pr_rdm; .2]]~3 8 8 .ml.shape[.ml.rs.kfShuff[ 4;2;xf;yf;.ml.xv.fitScore net;rs_pr_rdm;-.2]]~3 8 8 .ml.shape[.ml.rs.kfStrat[ 4;2;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm;-.2]]~3 7 8 -any .ml.shape[.ml.rs.tsRolls[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~/:(3 7 5; 3 8 5) -any .ml.shape[.ml.rs.tsChain[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~/:(3 7 5; 3 8 5) -any .ml.shape[.ml.rs.pcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm; .2]]~/:(3 7 5; 3 8 5) -any .ml.shape[.ml.rs.mcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm;-.2]]~/:(3 7 5; 3 8 5) +.ml.shape[.ml.rs.tsRolls[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~3 7 5 +any .ml.shape[.ml.rs.tsChain[ 2;5;xb;yb;.ml.xv.fitScore dtc;rs_pc_rdm; .2]]~/:(3 7 5;3 8 5) +.ml.shape[.ml.rs.pcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm; .2]]~3 8 5 +.ml.shape[.ml.rs.mcSplit[.3;5;xf;yf;.ml.xv.fitScore net;rs_pr_rdm;-.2]]~3 8 5 / sobol search @@ -181,6 +180,6 @@ $[0b=@[{.ml.rs.mcSplit[ 4;2;xf;yf;.ml.xv.fitScore dtc;x;-.2];};rs_pr_err;{[err]e / scoring -fs[net;::;df]~.p.get[`fitscore;<] . .p.toraw@/:(df[0]0;df[0]1;df[1]0;df[1]1) -fs[net;::;di]~.p.get[`fitscore;<] . .p.toraw@/:(di[0]0;di[0]1;di[1]0;di[1]1) -fs[net;::;db]~.p.get[`fitscore;<] . .p.toraw@/:(db[0]0;db[0]1;db[1]0;db[1]1) +fs[net;::;df]~.p.get[`fitscore;<][df[0]0;df[0]1;df[1]0;df[1]1] +fs[net;::;di]~.p.get[`fitscore;<][di[0]0;di[0]1;di[1]0;di[1]1] +fs[net;::;db]~.p.get[`fitscore;<][db[0]0;db[0]1;db[1]0;db[1]1] diff --git a/xval/utils.q b/xval/utils.q index 0e8a23e..ff3f0dd 100644 --- a/xval/utils.q +++ b/xval/utils.q @@ -152,7 +152,8 @@ xv.i.idxN:{[func1;func2;k;n;features;target] // @return {any} Output of func with idx applied to data xv.i.applyIdx:{[idx;k;n;features;target;function] splitData:raze idx[k;n;features;target]; - {[function;data]function data[]}[function]peach splitData + convFunc:$[.pykx.loaded & type[raze target]in 10 -11h;{`e+1;x[;1]:.pykx.toraw each x[;1];x};::]; + {[function;data]function convFunc data[]}[function;convFunc]peach splitData } // Python utilities required for xval.q