diff --git a/scripts/modeldata/nuc/analysis.txt b/scripts/modeldata/nuc/analysis.txt new file mode 100644 index 00000000..e4e94e96 --- /dev/null +++ b/scripts/modeldata/nuc/analysis.txt @@ -0,0 +1,24 @@ +loading modeltable.txt +chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count'] + +target = stage1_cycle_count + 0.55 cycles per byte_count +R2 = 0.9952005292028262 + +target = stage2_cycle_count + 2 cycles per structural_indexes_count + 0.11 cycles per byte_count +R2 = 0.9941606366930587 + +target = stage3_cycle_count + 14 cycles per float_count + 11 cycles per structural_indexes_count + 0.31 cycles per byte_count +R2 = 0.9824350906350493 + +target = total_cycles + 17 cycles per float_count + 13 cycles per structural_indexes_count + 0.96 cycles per byte_count +R2 = 0.991605569037089 + diff --git a/scripts/modeldata/nuc/learn.py b/scripts/modeldata/nuc/learn.py new file mode 100644 index 00000000..48cf9266 --- /dev/null +++ b/scripts/modeldata/nuc/learn.py @@ -0,0 +1,62 @@ +import os +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.linear_model import Ridge +from sklearn.linear_model import Lasso +from sklearn.preprocessing import normalize +from sklearn import metrics + +def displaycoefs(coef_name): + coef_name.sort() + coef_name.reverse() + for c,n in coef_name: + print("\t%0.2g cycles per %s "%(c,n)) + +datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh + +predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"] +targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"] + +print("loading", datafile) +dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets) + + +dataset.columns = predictors + targets + +dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count'] +dataset['ratio']=dataset['total_cycles']/dataset['byte_count'] +#print(dataset[['ratio']]) + +chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"] +print("chosenpredictors=",chosenpredictors) +print() +chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"] +for t in chosentargets: + print("target = ", t) + howmany = 1 # we want at most one predictors + if(t.startswith("stage2")): + howmany = 2 # we allow for less + if(t.startswith("stage3")): + howmany = 3 # we allow for more + if(t.startswith("total")): + howmany = 3 # we allow for more + A=10000000.0 + while(True): + regressor = Lasso(max_iter=100000, alpha=A, positive = True, normalize=False, fit_intercept=False) #LinearRegression(normalize=False, fit_intercept=False) + x = dataset[chosenpredictors] + y = dataset[[t]] + regressor.fit(x, y) + rest = list(filter(lambda z: z[0] != 0, zip(regressor.coef_,chosenpredictors) )) + nonzero = len(rest) + if(nonzero > howmany): + A *= 1.2 + else: + #print(rest) + displaycoefs(rest) + print("R2 = ", regressor.score(x,y)) + Y_pred = regressor.predict(x) + break + print() diff --git a/scripts/modeldata/nuc/runanalysis.sh b/scripts/modeldata/nuc/runanalysis.sh new file mode 100755 index 00000000..53b48605 --- /dev/null +++ b/scripts/modeldata/nuc/runanalysis.sh @@ -0,0 +1 @@ +python learn.py > analysis.txt diff --git a/scripts/plots/nuc/stackedperf.pdf b/scripts/plots/nuc/stackedperf.pdf index a2a82c1a..6a2b6553 100644 Binary files a/scripts/plots/nuc/stackedperf.pdf and b/scripts/plots/nuc/stackedperf.pdf differ diff --git a/scripts/plots/skylake/stackedperf.pdf b/scripts/plots/skylake/stackedperf.pdf index 1235a67b..f464f434 100644 Binary files a/scripts/plots/skylake/stackedperf.pdf and b/scripts/plots/skylake/stackedperf.pdf differ diff --git a/scripts/refreshplot.sh b/scripts/refreshplot.sh index 4c7b1c44..d4108c60 100755 --- a/scripts/refreshplot.sh +++ b/scripts/refreshplot.sh @@ -4,5 +4,7 @@ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" gnuplot -e "filename='plots/skylake/parselinuxtable.txt';name='plots/skylake/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot +gnuplot -e "filename='plots/nuc/parselinuxtable.txt';name='plots/nuc/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot -echo "plots/skylake/stackedperf.pdf" \ No newline at end of file +echo "plots/skylake/stackedperf.pdf" +echo "plots/nuc/stackedperf.pdf" \ No newline at end of file