Updating.

2019-01-03 15:57:34 -05:00 · 2019-01-03 15:57:34 -05:00 · 58f7b55a09
parent 2e9a92588b
commit 58f7b55a09
6 changed files with 71 additions and 29 deletions
--- a/scripts/data/nuc/gbps.pdf
+++ b/scripts/data/nuc/gbps.pdf
--- a/scripts/data/skylake/gbps.pdf
+++ b/scripts/data/skylake/gbps.pdf
--- a/scripts/modeldata/nuc/analysis.txt
+++ b/scripts/modeldata/nuc/analysis.txt
@ -1,24 +1,31 @@
 loading modeltable.txt
+       integer_count    float_count    ...      total_cycles      ratio
+count      14.000000      14.000000    ...      1.400000e+01  14.000000
+mean    17156.642857   21488.071429    ...      2.910166e+06   2.517336
+std     35520.191644   40466.023650    ...      3.465531e+06   1.043643
+min         0.000000       0.000000    ...      9.816417e+04   1.209798
+25%        13.000000       0.000000    ...      6.484984e+05   1.643587
+50%      2108.000000       0.500000    ...      1.607978e+06   2.305758
+75%     12044.500000   26800.250000    ...      3.766565e+06   3.436660
+max    130225.000000  114950.000000    ...      1.227446e+07   4.129712
+
+[8 rows x 18 columns]
 chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']

 target =  stage1_cycle_count
-	0.55 cycles per byte_count 
-R2 =  0.9952005292028262
+	1.8 cycles per structural_indexes_count 
+	0.62 cycles per byte_count 
+R2 =  0.9966890133532899

 target =  stage2_cycle_count
-	2 cycles per structural_indexes_count 
-	0.11 cycles per byte_count 
-R2 =  0.9941606366930587
-
-target =  stage3_cycle_count
-	14 cycles per float_count 
-	11 cycles per structural_indexes_count 
-	0.31 cycles per byte_count 
-R2 =  0.9824350906350493
+	19 cycles per float_count 
+	9.5 cycles per structural_indexes_count 
+	0.33 cycles per byte_count 
+R2 =  0.9868882924152415

 target =  total_cycles
-	17 cycles per float_count 
-	13 cycles per structural_indexes_count 
-	0.96 cycles per byte_count 
-R2 =  0.991605569037089
+	19 cycles per float_count 
+	11 cycles per structural_indexes_count 
+	0.95 cycles per byte_count 
+R2 =  0.9923672903089373

--- a/scripts/modeldata/nuc/learn.py
+++ b/scripts/modeldata/nuc/learn.py
@ -1,7 +1,7 @@
 import os
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import Ridge
@ -18,28 +18,30 @@ def displaycoefs(coef_name):
 datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh

 predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
-targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"]
+targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count"]

 print("loading", datafile)
 dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)


+
 dataset.columns = predictors + targets

-dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count']
+dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']
 dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
 #print(dataset[['ratio']])
+print(dataset.describe())

-chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"]
+chosenpredictors = predictors
 print("chosenpredictors=",chosenpredictors)
 print()
-chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"]
+chosentargets=["stage1_cycle_count", "stage2_cycle_count","total_cycles"]
 for t in chosentargets:
    print("target = ", t)
    howmany = 1 # we want at most one predictors
-    if(t.startswith("stage2")):
+    if(t.startswith("stage1")):
        howmany = 2 # we allow for less
-    if(t.startswith("stage3")):
+    if(t.startswith("stage2")):
        howmany = 3 # we allow for more
    if(t.startswith("total")):
        howmany = 3 # we allow for more
--- a/scripts/modeldata/skylake/analysis.txt
+++ b/scripts/modeldata/skylake/analysis.txt
@ -0,0 +1,31 @@
+loading modeltable.txt
+       integer_count    float_count    ...      total_cycles      ratio
+count      14.000000      14.000000    ...      1.400000e+01  14.000000
+mean    17156.642857   21488.071429    ...      2.898374e+06   2.523610
+std     35520.191644   40466.023650    ...      3.408262e+06   1.021949
+min         0.000000       0.000000    ...      9.934419e+04   1.225950
+25%        13.000000       0.000000    ...      6.545444e+05   1.650991
+50%      2108.000000       0.500000    ...      1.611746e+06   2.369115
+75%     12044.500000   26800.250000    ...      3.803468e+06   3.441740
+max    130225.000000  114950.000000    ...      1.205456e+07   4.110868
+
+[8 rows x 18 columns]
+chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']
+
+target =  stage1_cycle_count
+	1.9 cycles per structural_indexes_count 
+	0.63 cycles per byte_count 
+R2 =  0.9965695015271681
+
+target =  stage2_cycle_count
+	19 cycles per float_count 
+	9 cycles per structural_indexes_count 
+	0.36 cycles per byte_count 
+R2 =  0.9858116267470738
+
+target =  total_cycles
+	19 cycles per float_count 
+	11 cycles per structural_indexes_count 
+	0.98 cycles per byte_count 
+R2 =  0.9919590553913162
+
--- a/scripts/modeldata/skylake/learn.py
+++ b/scripts/modeldata/skylake/learn.py
@ -1,7 +1,7 @@
 import os
 import pandas as pd
 import numpy as np
-import matplotlib.pyplot as plt
+#import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import Ridge
@ -18,28 +18,30 @@ def displaycoefs(coef_name):
 datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh

 predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
-targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"]
+targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count"]

 print("loading", datafile)
 dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)


+
 dataset.columns = predictors + targets

-dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count']
+dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']
 dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
 #print(dataset[['ratio']])
+print(dataset.describe())

-chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"]
+chosenpredictors = predictors
 print("chosenpredictors=",chosenpredictors)
 print()
-chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"]
+chosentargets=["stage1_cycle_count", "stage2_cycle_count","total_cycles"]
 for t in chosentargets:
    print("target = ", t)
    howmany = 1 # we want at most one predictors
-    if(t.startswith("stage2")):
+    if(t.startswith("stage1")):
        howmany = 2 # we allow for less
-    if(t.startswith("stage3")):
+    if(t.startswith("stage2")):
        howmany = 3 # we allow for more
    if(t.startswith("total")):
        howmany = 3 # we allow for more