Updating.
This commit is contained in:
parent
2e9a92588b
commit
58f7b55a09
Binary file not shown.
Binary file not shown.
|
@ -1,24 +1,31 @@
|
||||||
loading modeltable.txt
|
loading modeltable.txt
|
||||||
|
integer_count float_count ... total_cycles ratio
|
||||||
|
count 14.000000 14.000000 ... 1.400000e+01 14.000000
|
||||||
|
mean 17156.642857 21488.071429 ... 2.910166e+06 2.517336
|
||||||
|
std 35520.191644 40466.023650 ... 3.465531e+06 1.043643
|
||||||
|
min 0.000000 0.000000 ... 9.816417e+04 1.209798
|
||||||
|
25% 13.000000 0.000000 ... 6.484984e+05 1.643587
|
||||||
|
50% 2108.000000 0.500000 ... 1.607978e+06 2.305758
|
||||||
|
75% 12044.500000 26800.250000 ... 3.766565e+06 3.436660
|
||||||
|
max 130225.000000 114950.000000 ... 1.227446e+07 4.129712
|
||||||
|
|
||||||
|
[8 rows x 18 columns]
|
||||||
chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']
|
chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']
|
||||||
|
|
||||||
target = stage1_cycle_count
|
target = stage1_cycle_count
|
||||||
0.55 cycles per byte_count
|
1.8 cycles per structural_indexes_count
|
||||||
R2 = 0.9952005292028262
|
0.62 cycles per byte_count
|
||||||
|
R2 = 0.9966890133532899
|
||||||
|
|
||||||
target = stage2_cycle_count
|
target = stage2_cycle_count
|
||||||
2 cycles per structural_indexes_count
|
19 cycles per float_count
|
||||||
0.11 cycles per byte_count
|
9.5 cycles per structural_indexes_count
|
||||||
R2 = 0.9941606366930587
|
0.33 cycles per byte_count
|
||||||
|
R2 = 0.9868882924152415
|
||||||
target = stage3_cycle_count
|
|
||||||
14 cycles per float_count
|
|
||||||
11 cycles per structural_indexes_count
|
|
||||||
0.31 cycles per byte_count
|
|
||||||
R2 = 0.9824350906350493
|
|
||||||
|
|
||||||
target = total_cycles
|
target = total_cycles
|
||||||
17 cycles per float_count
|
19 cycles per float_count
|
||||||
13 cycles per structural_indexes_count
|
11 cycles per structural_indexes_count
|
||||||
0.96 cycles per byte_count
|
0.95 cycles per byte_count
|
||||||
R2 = 0.991605569037089
|
R2 = 0.9923672903089373
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
#import matplotlib.pyplot as plt
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.linear_model import LinearRegression
|
from sklearn.linear_model import LinearRegression
|
||||||
from sklearn.linear_model import Ridge
|
from sklearn.linear_model import Ridge
|
||||||
|
@ -18,28 +18,30 @@ def displaycoefs(coef_name):
|
||||||
datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh
|
datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh
|
||||||
|
|
||||||
predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
|
predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
|
||||||
targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"]
|
targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count"]
|
||||||
|
|
||||||
print("loading", datafile)
|
print("loading", datafile)
|
||||||
dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)
|
dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
dataset.columns = predictors + targets
|
dataset.columns = predictors + targets
|
||||||
|
|
||||||
dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count']
|
dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']
|
||||||
dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
|
dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
|
||||||
#print(dataset[['ratio']])
|
#print(dataset[['ratio']])
|
||||||
|
print(dataset.describe())
|
||||||
|
|
||||||
chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"]
|
chosenpredictors = predictors
|
||||||
print("chosenpredictors=",chosenpredictors)
|
print("chosenpredictors=",chosenpredictors)
|
||||||
print()
|
print()
|
||||||
chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"]
|
chosentargets=["stage1_cycle_count", "stage2_cycle_count","total_cycles"]
|
||||||
for t in chosentargets:
|
for t in chosentargets:
|
||||||
print("target = ", t)
|
print("target = ", t)
|
||||||
howmany = 1 # we want at most one predictors
|
howmany = 1 # we want at most one predictors
|
||||||
if(t.startswith("stage2")):
|
if(t.startswith("stage1")):
|
||||||
howmany = 2 # we allow for less
|
howmany = 2 # we allow for less
|
||||||
if(t.startswith("stage3")):
|
if(t.startswith("stage2")):
|
||||||
howmany = 3 # we allow for more
|
howmany = 3 # we allow for more
|
||||||
if(t.startswith("total")):
|
if(t.startswith("total")):
|
||||||
howmany = 3 # we allow for more
|
howmany = 3 # we allow for more
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
loading modeltable.txt
|
||||||
|
integer_count float_count ... total_cycles ratio
|
||||||
|
count 14.000000 14.000000 ... 1.400000e+01 14.000000
|
||||||
|
mean 17156.642857 21488.071429 ... 2.898374e+06 2.523610
|
||||||
|
std 35520.191644 40466.023650 ... 3.408262e+06 1.021949
|
||||||
|
min 0.000000 0.000000 ... 9.934419e+04 1.225950
|
||||||
|
25% 13.000000 0.000000 ... 6.545444e+05 1.650991
|
||||||
|
50% 2108.000000 0.500000 ... 1.611746e+06 2.369115
|
||||||
|
75% 12044.500000 26800.250000 ... 3.803468e+06 3.441740
|
||||||
|
max 130225.000000 114950.000000 ... 1.205456e+07 4.110868
|
||||||
|
|
||||||
|
[8 rows x 18 columns]
|
||||||
|
chosenpredictors= ['integer_count', 'float_count', 'string_count', 'backslash_count', 'nonasciibyte_count', 'object_count', 'array_count', 'null_count', 'true_count', 'false_count', 'byte_count', 'structural_indexes_count']
|
||||||
|
|
||||||
|
target = stage1_cycle_count
|
||||||
|
1.9 cycles per structural_indexes_count
|
||||||
|
0.63 cycles per byte_count
|
||||||
|
R2 = 0.9965695015271681
|
||||||
|
|
||||||
|
target = stage2_cycle_count
|
||||||
|
19 cycles per float_count
|
||||||
|
9 cycles per structural_indexes_count
|
||||||
|
0.36 cycles per byte_count
|
||||||
|
R2 = 0.9858116267470738
|
||||||
|
|
||||||
|
target = total_cycles
|
||||||
|
19 cycles per float_count
|
||||||
|
11 cycles per structural_indexes_count
|
||||||
|
0.98 cycles per byte_count
|
||||||
|
R2 = 0.9919590553913162
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib.pyplot as plt
|
#import matplotlib.pyplot as plt
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.linear_model import LinearRegression
|
from sklearn.linear_model import LinearRegression
|
||||||
from sklearn.linear_model import Ridge
|
from sklearn.linear_model import Ridge
|
||||||
|
@ -18,28 +18,30 @@ def displaycoefs(coef_name):
|
||||||
datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh
|
datafile = "modeltable.txt" ## from ./scripts/statisticalmodel.sh
|
||||||
|
|
||||||
predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
|
predictors = ["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "object_count", "array_count", "null_count", "true_count", "false_count", "byte_count", "structural_indexes_count"]
|
||||||
targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count", "stage3_cycle_count", "stage3_instruction_count"]
|
targets = ["stage1_cycle_count", "stage1_instruction_count", "stage2_cycle_count", "stage2_instruction_count"]
|
||||||
|
|
||||||
print("loading", datafile)
|
print("loading", datafile)
|
||||||
dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)
|
dataset = pd.read_csv(datafile, delim_whitespace=True, skip_blank_lines=True, comment="#", header=None, names = predictors + targets)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
dataset.columns = predictors + targets
|
dataset.columns = predictors + targets
|
||||||
|
|
||||||
dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']+dataset['stage3_cycle_count']
|
dataset['total_cycles']=dataset['stage1_cycle_count']+dataset['stage2_cycle_count']
|
||||||
dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
|
dataset['ratio']=dataset['total_cycles']/dataset['byte_count']
|
||||||
#print(dataset[['ratio']])
|
#print(dataset[['ratio']])
|
||||||
|
print(dataset.describe())
|
||||||
|
|
||||||
chosenpredictors = predictors #["integer_count", "float_count", "string_count", "backslash_count", "nonasciibyte_count", "byte_count", "structural_indexes_count"]
|
chosenpredictors = predictors
|
||||||
print("chosenpredictors=",chosenpredictors)
|
print("chosenpredictors=",chosenpredictors)
|
||||||
print()
|
print()
|
||||||
chosentargets=["stage1_cycle_count", "stage2_cycle_count", "stage3_cycle_count","total_cycles"]
|
chosentargets=["stage1_cycle_count", "stage2_cycle_count","total_cycles"]
|
||||||
for t in chosentargets:
|
for t in chosentargets:
|
||||||
print("target = ", t)
|
print("target = ", t)
|
||||||
howmany = 1 # we want at most one predictors
|
howmany = 1 # we want at most one predictors
|
||||||
if(t.startswith("stage2")):
|
if(t.startswith("stage1")):
|
||||||
howmany = 2 # we allow for less
|
howmany = 2 # we allow for less
|
||||||
if(t.startswith("stage3")):
|
if(t.startswith("stage2")):
|
||||||
howmany = 3 # we allow for more
|
howmany = 3 # we allow for more
|
||||||
if(t.startswith("total")):
|
if(t.startswith("total")):
|
||||||
howmany = 3 # we allow for more
|
howmany = 3 # we allow for more
|
||||||
|
|
Loading…
Reference in New Issue