diff --git a/scripts/data/nuc/gbps.pdf b/scripts/data/nuc/gbps.pdf index e69de29b..efaef573 100644 Binary files a/scripts/data/nuc/gbps.pdf and b/scripts/data/nuc/gbps.pdf differ diff --git a/scripts/data/skylake/gbps.pdf b/scripts/data/skylake/gbps.pdf index e69de29b..ceff7574 100644 Binary files a/scripts/data/skylake/gbps.pdf and b/scripts/data/skylake/gbps.pdf differ diff --git a/scripts/modeldata/nuc/analysis.txt b/scripts/modeldata/nuc/analysis.txt index e4e94e96..f0322efb 100644 --- a/scripts/modeldata/nuc/analysis.txt +++ b/scripts/modeldata/nuc/analysis.txt @@ -1,24 +1,31 @@ loading modeltable.txt + integer_count float_count ... total_cycles ratio +count 14.000000 14.000000 ... 1.400000e+01 14.000000 +mean 17156.642857 21488.071429 ... 2.910166e+06 2.517336 +std 35520.191644 40466.023650 ... 3.465531e+06 1.043643 +min 0.000000 0.000000 ... 9.816417e+04 1.209798 +25% 13.000000 0.000000 ... 6.484984e+05 1.643587 +50% 2108.000000 0.500000 ... 1.607978e+06 2.305758 +75% 12044.500000 26800.250000 ... 3.766565e+06 3.436660 +max 130225.000000 114950.000000 ... 1.227446e+07 4.129712 + +[8 rows x 18 columns] chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count'] target = stage1_cycle_count - 0.55 cycles per byte_count -R2 = 0.9952005292028262 + 1.8 cycles per structural_indexes_count + 0.62 cycles per byte_count +R2 = 0.9966890133532899 target = stage2_cycle_count - 2 cycles per structural_indexes_count - 0.11 cycles per byte_count -R2 = 0.9941606366930587 - -target = stage3_cycle_count - 14 cycles per float_count - 11 cycles per structural_indexes_count - 0.31 cycles per byte_count -R2 = 0.9824350906350493 + 19 cycles per float_count + 9.5 cycles per structural_indexes_count + 0.33 cycles per byte_count +R2 = 0.9868882924152415 target = total_cycles - 17 cycles per float_count - 13 cycles per structural_indexes_count - 0.96 cycles per byte_count -R2 = 0.991605569037089 + 19 cycles per float_count + 11 cycles per structural_indexes_count + 0.95 cycles per byte_count +R2 = 0.9923672903089373 diff --git a/scripts/modeldata/nuc/learn.py b/scripts/modeldata/nuc/learn.py index 48cf9266..76cfc329 100644 --- a/scripts/modeldata/nuc/learn.py +++ b/scripts/modeldata/nuc/learn.py @@ -1,7 +1,7 @@ import os import pandas as pd import numpy as np -import matplotlib.pyplot as plt +#import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.linear_model import Ridge @@ -18,28 +18,30 @@ def displaycoefs(coef_name): datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"] -targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"] +targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count"] print("loading", datafile) dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets) + dataset.columns = predictors + targets -dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count'] +dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count'] dataset['ratio']=dataset['total_cycles']/dataset['byte_count'] #print(dataset[['ratio']]) +print(dataset.describe()) -chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"] +chosenpredictors = predictors print("chosenpredictors=",chosenpredictors) print() -chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"] +chosentargets=["stage1_cycle_count", "stage2_cycle_count","total_cycles"] for t in chosentargets: print("target = ", t) howmany = 1 # we want at most one predictors - if(t.startswith("stage2")): + if(t.startswith("stage1")): howmany = 2 # we allow for less - if(t.startswith("stage3")): + if(t.startswith("stage2")): howmany = 3 # we allow for more if(t.startswith("total")): howmany = 3 # we allow for more diff --git a/scripts/modeldata/skylake/analysis.txt b/scripts/modeldata/skylake/analysis.txt index e69de29b..9ce146ad 100644 --- a/scripts/modeldata/skylake/analysis.txt +++ b/scripts/modeldata/skylake/analysis.txt @@ -0,0 +1,31 @@ +loading modeltable.txt + integer_count float_count ... total_cycles ratio +count 14.000000 14.000000 ... 1.400000e+01 14.000000 +mean 17156.642857 21488.071429 ... 2.898374e+06 2.523610 +std 35520.191644 40466.023650 ... 3.408262e+06 1.021949 +min 0.000000 0.000000 ... 9.934419e+04 1.225950 +25% 13.000000 0.000000 ... 6.545444e+05 1.650991 +50% 2108.000000 0.500000 ... 1.611746e+06 2.369115 +75% 12044.500000 26800.250000 ... 3.803468e+06 3.441740 +max 130225.000000 114950.000000 ... 1.205456e+07 4.110868 + +[8 rows x 18 columns] +chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count'] + +target = stage1_cycle_count + 1.9 cycles per structural_indexes_count + 0.63 cycles per byte_count +R2 = 0.9965695015271681 + +target = stage2_cycle_count + 19 cycles per float_count + 9 cycles per structural_indexes_count + 0.36 cycles per byte_count +R2 = 0.9858116267470738 + +target = total_cycles + 19 cycles per float_count + 11 cycles per structural_indexes_count + 0.98 cycles per byte_count +R2 = 0.9919590553913162 + diff --git a/scripts/modeldata/skylake/learn.py b/scripts/modeldata/skylake/learn.py index 48cf9266..76cfc329 100644 --- a/scripts/modeldata/skylake/learn.py +++ b/scripts/modeldata/skylake/learn.py @@ -1,7 +1,7 @@ import os import pandas as pd import numpy as np -import matplotlib.pyplot as plt +#import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.linear_model import Ridge @@ -18,28 +18,30 @@ def displaycoefs(coef_name): datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"] -targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"] +targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count"] print("loading", datafile) dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets) + dataset.columns = predictors + targets -dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count'] +dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count'] dataset['ratio']=dataset['total_cycles']/dataset['byte_count'] #print(dataset[['ratio']]) +print(dataset.describe()) -chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"] +chosenpredictors = predictors print("chosenpredictors=",chosenpredictors) print() -chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"] +chosentargets=["stage1_cycle_count", "stage2_cycle_count","total_cycles"] for t in chosentargets: print("target = ", t) howmany = 1 # we want at most one predictors - if(t.startswith("stage2")): + if(t.startswith("stage1")): howmany = 2 # we allow for less - if(t.startswith("stage3")): + if(t.startswith("stage2")): howmany = 3 # we allow for more if(t.startswith("total")): howmany = 3 # we allow for more