Updating.

2018-12-24 16:02:53 -05:00 · 2018-12-24 16:02:53 -05:00 · 2afff77567
parent d975fc7543
commit 2afff77567
6 changed files with 90 additions and 1 deletions
--- a/scripts/modeldata/nuc/analysis.txt
+++ b/scripts/modeldata/nuc/analysis.txt
@ -0,0 +1,24 @@
+loading modeltable.txt
+chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']
+
+target =  stage1_cycle_count
+	0.55 cycles per byte_count 
+R2 =  0.9952005292028262
+
+target =  stage2_cycle_count
+	2 cycles per structural_indexes_count 
+	0.11 cycles per byte_count 
+R2 =  0.9941606366930587
+
+target =  stage3_cycle_count
+	14 cycles per float_count 
+	11 cycles per structural_indexes_count 
+	0.31 cycles per byte_count 
+R2 =  0.9824350906350493
+
+target =  total_cycles
+	17 cycles per float_count 
+	13 cycles per structural_indexes_count 
+	0.96 cycles per byte_count 
+R2 =  0.991605569037089
+
--- a/scripts/modeldata/nuc/learn.py
+++ b/scripts/modeldata/nuc/learn.py
@ -0,0 +1,62 @@
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import Ridge
+from sklearn.linear_model import Lasso
+from sklearn.preprocessing import normalize
+from sklearn import metrics
+
+def displaycoefs(coef_name):
+    coef_name.sort()
+    coef_name.reverse()
+    for c,n in coef_name:
+        print("\t%0.2g cycles per %s "%(c,n))
+
+datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh
+
+predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
+targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"]
+
+print("loading", datafile)
+dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)
+
+
+dataset.columns = predictors + targets
+
+dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count']
+dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
+#print(dataset[['ratio']])
+
+chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"]
+print("chosenpredictors=",chosenpredictors)
+print()
+chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"]
+for t in chosentargets:
+    print("target = ", t)
+    howmany = 1 # we want at most one predictors
+    if(t.startswith("stage2")):
+        howmany = 2 # we allow for less
+    if(t.startswith("stage3")):
+        howmany = 3 # we allow for more
+    if(t.startswith("total")):
+        howmany = 3 # we allow for more
+    A=10000000.0
+    while(True):
+      regressor = Lasso(max_iter=100000, alpha=A, positive = True, normalize=False,  fit_intercept=False) #LinearRegression(normalize=False,  fit_intercept=False)
+      x = dataset[chosenpredictors]
+      y = dataset[[t]]
+      regressor.fit(x, y)
+      rest = list(filter(lambda z:  z[0] != 0, zip(regressor.coef_,chosenpredictors) ))
+      nonzero = len(rest)
+      if(nonzero > howmany):
+        A *= 1.2
+      else:
+       #print(rest)
+       displaycoefs(rest)
+       print("R2 = ", regressor.score(x,y))
+       Y_pred = regressor.predict(x)
+       break
+    print()
--- a/scripts/modeldata/nuc/runanalysis.sh
+++ b/scripts/modeldata/nuc/runanalysis.sh
@ -0,0 +1 @@
+python learn.py > analysis.txt
--- a/scripts/plots/nuc/stackedperf.pdf
+++ b/scripts/plots/nuc/stackedperf.pdf
--- a/scripts/plots/skylake/stackedperf.pdf
+++ b/scripts/plots/skylake/stackedperf.pdf
--- a/scripts/refreshplot.sh
+++ b/scripts/refreshplot.sh
@ -4,5 +4,7 @@
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"

 gnuplot -e "filename='plots/skylake/parselinuxtable.txt';name='plots/skylake/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot
+gnuplot -e "filename='plots/nuc/parselinuxtable.txt';name='plots/nuc/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot

-echo "plots/skylake/stackedperf.pdf"
+echo "plots/skylake/stackedperf.pdf"
+echo "plots/nuc/stackedperf.pdf"