Done analysis.

2018-12-24 15:10:55 -05:00 · 2018-12-24 15:10:55 -05:00 · 579bc0d848
parent 95d1dc24f6
commit 579bc0d848
4 changed files with 93 additions and 2 deletions
--- a/benchmark/statisticalmodel.cpp
+++ b/benchmark/statisticalmodel.cpp
@ -149,8 +149,8 @@ int main(int argc, char *argv[]) {
         "false_count byte_count structural_indexes_count ");
 #ifdef __linux__
  printf(
-      " stage1_instruction_count stage1_cycle_count stage2_instruction_count "
-      "stage2_cycle_count  stage3_instruction_count stage3_cycle_count ");
+      "  stage1_cycle_count stage1_instruction_count  stage2_cycle_count "
+      " stage2_instruction_count  stage3_cycle_count stage3_instruction_count  ");
 #else
  printf("(you are not under linux, so perf counters are disaabled)");
 #endif
--- a/scripts/modeldata/skylake/analysis.txt
+++ b/scripts/modeldata/skylake/analysis.txt
@ -0,0 +1,28 @@
+loading modeltable.txt
+chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']
+
+target =  stage1_cycle_count
+	0.98 cycles per nonasciibyte_count 
+	0.46 cycles per byte_count 
+R2 =  0.9987789976254756
+
+target =  stage2_cycle_count
+	2 cycles per structural_indexes_count 
+	0.11 cycles per byte_count 
+R2 =  0.9944144115615688
+
+target =  stage3_cycle_count
+	2.5 cycles per float_count 
+	2.2 cycles per string_count 
+	1.4 cycles per structural_indexes_count 
+	0.095 cycles per byte_count 
+R2 =  0.9983269621964512
+
+target =  total_cycles
+	6.6 cycles per string_count 
+	6 cycles per float_count 
+	2.6 cycles per structural_indexes_count 
+	0.82 cycles per nonasciibyte_count 
+	0.64 cycles per byte_count 
+R2 =  0.9994265124423222
+
--- a/scripts/modeldata/skylake/learn.py
+++ b/scripts/modeldata/skylake/learn.py
@ -0,0 +1,62 @@
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import Ridge
+from sklearn.linear_model import Lasso
+from sklearn.preprocessing import normalize
+from sklearn import metrics
+
+def displaycoefs(coef_name):
+    coef_name.sort()
+    coef_name.reverse()
+    for c,n in coef_name:
+        print("\t%0.2g cycles per %s "%(c,n))
+
+datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh
+
+predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
+targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"]
+
+print("loading", datafile)
+dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)
+
+
+dataset.columns = predictors + targets
+
+dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count']
+dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
+#print(dataset[['ratio']])
+
+chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"]
+print("chosenpredictors=",chosenpredictors)
+print()
+chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"]
+for t in chosentargets:
+    print("target = ", t)
+    howmany = 2 # we want at most two predictors
+    if(t.startswith("stage2")):
+        howmany = 2 # we allow for less
+    if(t.startswith("stage3")):
+        howmany = 4 # we allow for more
+    if(t.startswith("total")):
+        howmany = 5 # we allow for more
+    A=10000000.0
+    while(True):
+      regressor = Lasso(max_iter=100000, alpha=A, positive = True, normalize=False,  fit_intercept=False) #LinearRegression(normalize=False,  fit_intercept=False)
+      x = dataset[chosenpredictors]
+      y = dataset[[t]]
+      regressor.fit(x, y)
+      rest = list(filter(lambda z:  z[0] != 0, zip(regressor.coef_,chosenpredictors) ))
+      nonzero = len(rest)
+      if(nonzero > howmany):
+        A *= 1.2
+      else:
+       #print(rest)
+       displaycoefs(rest)
+       print("R2 = ", regressor.score(x,y))
+       Y_pred = regressor.predict(x)
+       break
+    print()
--- a/scripts/modeldata/skylake/runanalysis.sh
+++ b/scripts/modeldata/skylake/runanalysis.sh
@ -0,0 +1 @@
+python learn.py > analysis.txt