Updating.

2018-12-24 16:02:53 -05:00 · 2018-12-24 16:02:53 -05:00 · 2afff77567
parent d975fc7543
commit 2afff77567
6 changed files with 90 additions and 1 deletions
--- a/scripts/modeldata/nuc/analysis.txt
+++ b/scripts/modeldata/nuc/analysis.txt
@ -0,0 +1,24 @@
 loading modeltable.txt
 chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']
 target =  stage1_cycle_count
 	0.55 cycles per byte_count 
 R2 =  0.9952005292028262
 target =  stage2_cycle_count
 	2 cycles per structural_indexes_count 
 	0.11 cycles per byte_count 
 R2 =  0.9941606366930587
 target =  stage3_cycle_count
 	14 cycles per float_count 
 	11 cycles per structural_indexes_count 
 	0.31 cycles per byte_count 
 R2 =  0.9824350906350493
 target =  total_cycles
 	17 cycles per float_count 
 	13 cycles per structural_indexes_count 
 	0.96 cycles per byte_count 
 R2 =  0.991605569037089
--- a/scripts/modeldata/nuc/learn.py
+++ b/scripts/modeldata/nuc/learn.py
@ -0,0 +1,62 @@
 import os
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import Ridge
 from sklearn.linear_model import Lasso
 from sklearn.preprocessing import normalize
 from sklearn import metrics
 def displaycoefs(coef_name):
    coef_name.sort()
    coef_name.reverse()
    for c,n in coef_name:
        print("\t%0.2g cycles per %s "%(c,n))
 datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh
 predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
 targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"]
 print("loading", datafile)
 dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)
 dataset.columns = predictors + targets
 dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count']
 dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
 #print(dataset[['ratio']])
 chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"]
 print("chosenpredictors=",chosenpredictors)
 print()
 chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"]
 for t in chosentargets:
    print("target = ", t)
    howmany = 1 # we want at most one predictors
    if(t.startswith("stage2")):
        howmany = 2 # we allow for less
    if(t.startswith("stage3")):
        howmany = 3 # we allow for more
    if(t.startswith("total")):
        howmany = 3 # we allow for more
    A=10000000.0
    while(True):
      regressor = Lasso(max_iter=100000, alpha=A, positive = True, normalize=False,  fit_intercept=False) #LinearRegression(normalize=False,  fit_intercept=False)
      x = dataset[chosenpredictors]
      y = dataset[[t]]
      regressor.fit(x, y)
      rest = list(filter(lambda z:  z[0] != 0, zip(regressor.coef_,chosenpredictors) ))
      nonzero = len(rest)
      if(nonzero > howmany):
        A *= 1.2
      else:
       #print(rest)
       displaycoefs(rest)
       print("R2 = ", regressor.score(x,y))
       Y_pred = regressor.predict(x)
       break
    print()
--- a/scripts/modeldata/nuc/runanalysis.sh
+++ b/scripts/modeldata/nuc/runanalysis.sh
@ -0,0 +1 @@
 python learn.py > analysis.txt
--- a/scripts/plots/nuc/stackedperf.pdf
+++ b/scripts/plots/nuc/stackedperf.pdf
--- a/scripts/plots/skylake/stackedperf.pdf
+++ b/scripts/plots/skylake/stackedperf.pdf
--- a/scripts/refreshplot.sh
+++ b/scripts/refreshplot.sh
@ -4,5 +4,7 @@
 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
 gnuplot -e "filename='plots/skylake/parselinuxtable.txt';name='plots/skylake/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot
 gnuplot -e "filename='plots/nuc/parselinuxtable.txt';name='plots/nuc/stackedperf.pdf'" $SCRIPTPATH/stackbar.gnuplot
 echo "plots/skylake/stackedperf.pdf"
 echo "plots/nuc/stackedperf.pdf"