Commit 6e022f6d authored by robertfrankzhang's avatar robertfrankzhang
Browse files

notation changes

parent d1a1dcf0
......@@ -29,7 +29,7 @@ In addition, powerful data tracking collection methods are built into the scikit
<li>Times for matching, deletion, subsumption, selection, evaluation</li>
</ul>
These values can then be exported as a csv after training is complete for analysis using the built in "exportIterationTrackingData" method.
These values can then be exported as a csv after training is complete for analysis using the built in "export_iteration_tracking_data" method.
In addition, the package includes functionality that allows the final rule population to be exported as a csv after training.
......@@ -56,7 +56,7 @@ dataFeatures = np.delete(formatted,-1,axis=1)
dataActions = formatted[:,-1]
#Initialize XCS Model
model = XCS(learningIterations = 5000)
model = XCS(learning_iterations = 5000)
#3-fold CV
print(np.mean(cross_val_score(model,dataFeatures,dataActions,cv=3)))
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -89,9 +89,9 @@ class ClassifierSet:
classifier.updateActionSetSize(actionSetNumerositySum,xcs)
self.updateFitnessSet(xcs)
if xcs.doActionSetSubsumption:
if xcs.do_action_set_subsumption:
xcs.timer.startTimeSubsumption()
self.doActionSetSubsumption(xcs)
self.do_action_set_subsumption(xcs)
xcs.timer.stopTimeSubsumption()
def updateFitnessSet(self,xcs):
......@@ -112,7 +112,7 @@ class ClassifierSet:
i+=1
####Action Set Subsumption####
def doActionSetSubsumption(self,xcs):
def do_action_set_subsumption(self,xcs):
subsumer = None
for clRef in self.actionSet:
classifier = self.popSet[clRef]
......@@ -177,14 +177,14 @@ class ClassifierSet:
if changedByCrossover:
childClassifier1.prediction = (childClassifier1.prediction + childClassifier2.prediction)/2
childClassifier2.predictionError = xcs.predictionErrorReduction*(childClassifier1.predictionError + childClassifier2.predictionError)/2
childClassifier1.fitness = xcs.fitnessReduction*(childClassifier1.fitness+childClassifier2.fitness)/2
childClassifier2.predictionError = xcs.prediction_error_reduction*(childClassifier1.predictionError + childClassifier2.predictionError)/2
childClassifier1.fitness = xcs.fitness_reduction*(childClassifier1.fitness+childClassifier2.fitness)/2
childClassifier2.prediction = childClassifier1.prediction
childClassifier2.predictionError = childClassifier1.predictionError
childClassifier2.fitness = childClassifier1.fitness
else:
childClassifier1.fitness = xcs.fitnessReduction * childClassifier1.fitness
childClassifier2.fitness = xcs.fitnessReduction * childClassifier2.fitness
childClassifier1.fitness = xcs.fitness_reduction * childClassifier1.fitness
childClassifier2.fitness = xcs.fitness_reduction * childClassifier2.fitness
changedByMutation1 = childClassifier1.mutation(state,xcs)
changedByMutation2 = childClassifier2.mutation(state,xcs)
......@@ -198,7 +198,7 @@ class ClassifierSet:
self.insertDiscoveredClassifiers(childClassifier1,childClassifier2,parentClassifier1,parentClassifier2,xcs)
def insertDiscoveredClassifiers(self,child1,child2,parent1,parent2,xcs):
if xcs.doGASubsumption:
if xcs.do_GA_subsumption:
xcs.timer.startTimeSubsumption()
self.subsumeClassifier(child1,parent1,parent2,xcs)
self.subsumeClassifier(child2,parent1,parent2,xcs)
......
......@@ -15,7 +15,7 @@ class DataManagement:
self.isDefault = True # Is discrete attribute limit an int or string
try:
int(xcs.discreteAttributeLimit)
int(xcs.discrete_attribute_limit)
except:
self.isDefault = False
......@@ -46,7 +46,7 @@ class DataManagement:
if self.isDefault:
currentInstanceIndex = 0
stateDict = {}
while attIsDiscrete and len(list(stateDict.keys())) <= xcs.discreteAttributeLimit and currentInstanceIndex < self.numTrainInstances:
while attIsDiscrete and len(list(stateDict.keys())) <= xcs.discrete_attribute_limit and currentInstanceIndex < self.numTrainInstances:
target = features[currentInstanceIndex,att]
if target in list(stateDict.keys()):
stateDict[target] += 1
......@@ -56,15 +56,15 @@ class DataManagement:
stateDict[target] = 1
currentInstanceIndex+=1
if len(list(stateDict.keys())) > xcs.discreteAttributeLimit:
if len(list(stateDict.keys())) > xcs.discrete_attribute_limit:
attIsDiscrete = False
elif xcs.discreteAttributeLimit == "c":
if att in xcs.specifiedAttributes:
elif xcs.discrete_attribute_limit == "c":
if att in xcs.specified_attributes:
attIsDiscrete = False
else:
attIsDiscrete = True
elif xcs.discreteAttributeLimit == "d":
if att in xcs.specifiedAttributes:
elif xcs.discrete_attribute_limit == "d":
if att in xcs.specified_attributes:
attIsDiscrete = True
else:
attIsDiscrete = False
......
......@@ -5,7 +5,7 @@ class Environment:
def __init__(self,X,y,xcs):
self.dataRef = 0
self.formatData = DataManagement(X,y,xcs)
self.maxPayoff = xcs.maxPayoff
self.max_payoff = xcs.max_payoff
self.currentTrainState = self.formatData.trainFormatted[0][self.dataRef]
self.currentTrainPhenotype = self.formatData.trainFormatted[1][self.dataRef]
......@@ -28,6 +28,6 @@ class Environment:
def executeAction(self,action):
if action == self.currentTrainPhenotype:
return self.maxPayoff
return self.max_payoff
return 0
......@@ -27,9 +27,9 @@ class StringEnumerator:
tempFeatureArray[instanceIndex][attrInst] = str(self.dataFeatures[instanceIndex][attrInst])
self.dataFeatures = tempFeatureArray
self.deleteAllInstancesWithoutPhenotype()
self.delete_all_instances_without_phenotype()
def printInvalidAttributes(self):
def print_invalid_attributes(self):
print("ALL INVALID ATTRIBUTES & THEIR DISTINCT VALUES")
for attr in range(len(self.dataHeaders)):
distinctValues = []
......@@ -66,14 +66,14 @@ class StringEnumerator:
print(str(i)+"\t",end="")
print()
def changeClassName(self,newName):
def change_class_name(self,newName):
if newName in self.dataHeaders:
raise Exception("New Class Name Cannot Be An Already Existing Data Header Name")
if self.classLabel in self.map.keys():
self.map[self.newName] = self.map.pop(self.classLabel)
self.classLabel = newName
def changeHeaderName(self,currentName,newName):
def change_header_name(self,currentName,newName):
if newName in self.dataHeaders or newName == self.classLabel:
raise Exception("New Class Name Cannot Be An Already Existing Data Header or Phenotype Name")
if currentName in self.dataHeaders:
......@@ -84,7 +84,7 @@ class StringEnumerator:
else:
raise Exception("Current Header Doesn't Exist")
def addAttributeConverter(self,headerName,array):#map is an array of strings, ordered by how it is to be enumerated enumeration
def add_attribute_converter(self,headerName,array):#map is an array of strings, ordered by how it is to be enumerated enumeration
if headerName in self.dataHeaders and not (headerName in self.map):
newAttributeConverter = {}
for index in range(len(array)):
......@@ -92,37 +92,37 @@ class StringEnumerator:
newAttributeConverter[str(array[index])] = str(index)
self.map[headerName] = newAttributeConverter
def addAttributeConverterMap(self,headerName,map):
def add_attribute_converter_map(self,headerName,map):
if headerName in self.dataHeaders and not (headerName in self.map) and not("" in map) and not("NA" in map) and not("NaN" in map):
self.map[headerName] = map
else:
raise Exception("Invalid Map")
def addAttributeConverterRandom(self,headerName):
def add_attribute_converter_random(self,headerName):
if headerName in self.dataHeaders and not (headerName in self.map):
headerIndex = np.where(self.dataHeaders == headerName)[0][0]
uniqueItems = np.array([])
for instance in self.dataFeatures:
if not(instance[headerIndex] in uniqueItems) and instance[headerIndex] != "NA":
uniqueItems = np.append(uniqueItems,instance[headerIndex])
self.addAttributeConverter(headerName,uniqueItems)
self.add_attribute_converter(headerName,uniqueItems)
def addClassConverter(self,array):
def add_class_converter(self,array):
if not (self.classLabel in self.map.keys()):
newAttributeConverter = {}
for index in range(len(array)):
newAttributeConverter[str(array[index])] = str(index)
self.map[self.classLabel] = newAttributeConverter
def addClassConverterRandom(self):
def add_class_converter_random(self):
if not (self.classLabel in self.map.keys()):
uniqueItems = np.array([])
for instance in self.dataPhenotypes:
if not (instance in uniqueItems) and instance != "NA":
uniqueItems = np.append(uniqueItems, instance)
self.addClassConverter(uniqueItems)
self.add_class_converter(uniqueItems)
def convertAllAttributes(self):
def convert_all_attributes(self):
for attribute in self.dataHeaders:
if attribute in self.map.keys():
i = np.where(self.dataHeaders == attribute)[0][0]
......@@ -136,7 +136,7 @@ class StringEnumerator:
i = np.where(self.dataPhenotypes == state)
self.dataPhenotypes[i] = self.map[self.classLabel][state]
def deleteAttribute(self,headerName):
def delete_attribute(self,headerName):
if headerName in self.dataHeaders:
i = np.where(headerName == self.dataHeaders)[0][0]
newFeatures = np.array([[2,3]])
......@@ -154,7 +154,7 @@ class StringEnumerator:
else:
raise Exception("Header Doesn't Exist")
def deleteAllInstancesWithoutHeaderData(self,headerName):
def delete_all_instances_without_header_data(self,headerName):
newFeatures = np.array([[2,3]])
newPhenotypes = np.array([])
attributeIndex = np.where(self.dataHeaders == headerName)[0][0]
......@@ -173,7 +173,7 @@ class StringEnumerator:
self.dataFeatures = newFeatures
self.dataPhenotypes = newPhenotypes
def deleteAllInstancesWithoutPhenotype(self):
def delete_all_instances_without_phenotype(self):
newFeatures = np.array([[2,3]])
newPhenotypes = np.array([])
firstTime = True
......@@ -191,7 +191,7 @@ class StringEnumerator:
self.dataPhenotypes = newPhenotypes
def print(self):
isFullNumber = self.checkIsFullNumeric()
isFullNumber = self.check_is_full_numeric()
print("Converted Data Features and Phenotypes")
for header in self.dataHeaders:
print(header,end="\t")
......@@ -214,7 +214,7 @@ class StringEnumerator:
print("NA")
print()
def printAttributeConversions(self):
def print_attribute_conversions(self):
print("Changed Attribute Conversions")
for headerName,conversions in self.map:
print(headerName + " conversions:")
......@@ -223,7 +223,7 @@ class StringEnumerator:
print()
print()
def checkIsFullNumeric(self):
def check_is_full_numeric(self):
try:
for instance in self.dataFeatures:
for value in instance:
......@@ -238,8 +238,8 @@ class StringEnumerator:
return True
def getParams(self):
if not(self.checkIsFullNumeric()):
def get_params(self):
if not(self.check_is_full_numeric()):
raise Exception("Features and Phenotypes must be fully numeric")
newFeatures = np.array([[2,3]],dtype=float)
......
This diff is collapsed.
......@@ -46,12 +46,12 @@ class test_StringEnumerator(unittest.TestCase):
# Changes header and class names. Checks map, and classLabel/dataHeaders correctness
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
se.changeClassName("country")
se.changeHeaderName("N1","gender")
se.changeHeaderName("N2","N1")
se.changeHeaderName("N1","floats")
se.changeHeaderName("N3","phenotype")
se.changeHeaderName("phenotype","age")
se.change_class_name("country")
se.change_header_name("N1","gender")
se.change_header_name("N2","N1")
se.change_header_name("N1","floats")
se.change_header_name("N3","phenotype")
se.change_header_name("phenotype","age")
cHeaders = np.array(["gender","floats","age"])
self.assertTrue(np.array_equal(cHeaders,se.dataHeaders))
self.assertTrue(np.array_equal("country", se.classLabel))
......@@ -60,15 +60,15 @@ class test_StringEnumerator(unittest.TestCase):
# Changes header and class names. Checks map, and classLabel/dataHeaders correctness
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
se.addClassConverterRandom()
se.changeHeaderName("N1","gender")
se.addAttributeConverterRandom("gender")
se.changeHeaderName("gender","Gender")
se.addAttributeConverterRandom("Gender")
se.addAttributeConverterRandom("Gender")
se.addAttributeConverterRandom("gender")
se.addAttributeConverterRandom("N3")
se.changeHeaderName("N3","Age")
se.add_class_converter_random()
se.change_header_name("N1","gender")
se.add_attribute_converter_random("gender")
se.change_header_name("gender","Gender")
se.add_attribute_converter_random("Gender")
se.add_attribute_converter_random("Gender")
se.add_attribute_converter_random("gender")
se.add_attribute_converter_random("N3")
se.change_header_name("N3","Age")
cHeaders = np.array(["Gender","N2","Age"])
cMap = {"phenotype":{"china":"0","japan":"1","russia":"2"},"Gender":{"male":"0","female":"1"},"Age":{"young":"0","old":"1"}}
......@@ -76,41 +76,41 @@ class test_StringEnumerator(unittest.TestCase):
self.assertTrue(np.array_equal("phenotype", se.classLabel))
self.assertTrue(se.map == cMap)
def testChangeClassNameInvalid(self):
def testchange_class_nameInvalid(self):
# Changes class name to an existing header name should raise exception
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
with self.assertRaises(Exception) as context:
se.changeClassName("N1")
se.change_class_name("N1")
self.assertTrue("New Class Name Cannot Be An Already Existing Data Header Name" in str(context.exception))
def testChangeHeaderNameInvalid(self):
def testchange_header_nameInvalid(self):
# Changes header name to an existing header or class name should raise exception
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
with self.assertRaises(Exception) as context:
se.changeHeaderName("N1","N2")
se.change_header_name("N1","N2")
self.assertTrue("New Class Name Cannot Be An Already Existing Data Header or Phenotype Name" in str(context.exception))
def testChangeHeaderNameInvalid2(self):
def testchange_header_nameInvalid2(self):
# Changes non existing header name should raise exception
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
with self.assertRaises(Exception) as context:
se.changeHeaderName("N", "N5")
se.change_header_name("N", "N5")
self.assertTrue("Current Header Doesn't Exist" in str(context.exception))
def testDeleteAttribute(self):
def testdelete_attribute(self):
# Deletes attributes and checks map, headers, and arrays for correctness
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
se.changeHeaderName("N1","gender")
se.addAttributeConverterRandom("gender")
se.addAttributeConverterRandom("N3")
se.deleteAttribute("gender")
se.change_header_name("N1","gender")
se.add_attribute_converter_random("gender")
se.add_attribute_converter_random("N3")
se.delete_attribute("gender")
cHeaders = np.array(["N2","N3"])
cMap = {"N3": {"young": "0", "old": "1"}}
self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
......@@ -122,21 +122,21 @@ class test_StringEnumerator(unittest.TestCase):
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
with self.assertRaises(Exception) as context:
se.deleteAttribute("N")
se.delete_attribute("N")
self.assertTrue("Header Doesn't Exist" in str(context.exception))
def testDeleteInstancesWithMissing(self):
# Deletes instances and checks arrays for correctness
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
se.changeHeaderName("N1","gender")
se.addAttributeConverterRandom("gender")
se.addAttributeConverterRandom("N3")
se.addClassConverterRandom()
se.convertAllAttributes()
se.deleteAllInstancesWithoutHeaderData("gender")
se.deleteAllInstancesWithoutHeaderData("N2")
se.deleteAllInstancesWithoutHeaderData("N3")
se.change_header_name("N1","gender")
se.add_attribute_converter_random("gender")
se.add_attribute_converter_random("N3")
se.add_class_converter_random()
se.convert_all_attributes()
se.delete_all_instances_without_header_data("gender")
se.delete_all_instances_without_header_data("N2")
se.delete_all_instances_without_header_data("N3")
cHeaders = np.array(["gender","N2","N3"])
cMap = {"phenotype":{"china":"0","japan":"1","russia":"2"},"gender":{"male":"0","female":"1"},"N3":{"young":"0","old":"1"}}
cArray = np.array([["0","1.2","0"],["1","-0.4","1"]])
......@@ -151,15 +151,15 @@ class test_StringEnumerator(unittest.TestCase):
# Deletes instances and checks arrays for correctness
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
se.changeHeaderName("N1","gender")
se.deleteAllInstancesWithoutHeaderData("gender")
se.deleteAllInstancesWithoutHeaderData("N2")
se.deleteAllInstancesWithoutHeaderData("N3")
se.change_header_name("N1","gender")
se.delete_all_instances_without_header_data("gender")
se.delete_all_instances_without_header_data("N2")
se.delete_all_instances_without_header_data("N3")
se.addAttributeConverterRandom("gender")
se.addAttributeConverterRandom("N3")
se.addClassConverterRandom()
se.convertAllAttributes()
se.add_attribute_converter_random("gender")
se.add_attribute_converter_random("N3")
se.add_class_converter_random()
se.convert_all_attributes()
cHeaders = np.array(["gender","N2","N3"])
cMap = {"phenotype":{"china":"0"},"gender":{"male":"0","female":"1"},"N3":{"young":"0","old":"1"}}
......@@ -175,41 +175,41 @@ class test_StringEnumerator(unittest.TestCase):
# Checks non missing numeric
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
self.assertFalse(se.checkIsFullNumeric())
se.addAttributeConverterRandom("N1")
se.convertAllAttributes()
self.assertFalse(se.checkIsFullNumeric())
se.addAttributeConverterRandom("N3")
se.addClassConverterRandom()
se.convertAllAttributes()
self.assertTrue(se.checkIsFullNumeric())
self.assertFalse(se.check_is_full_numeric())
se.add_attribute_converter_random("N1")
se.convert_all_attributes()
self.assertFalse(se.check_is_full_numeric())
se.add_attribute_converter_random("N3")
se.add_class_converter_random()
se.convert_all_attributes()
self.assertTrue(se.check_is_full_numeric())
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/MissingFeatureData.csv")
se2 = StringEnumerator(dataPath, "phenotype")
self.assertTrue(se2.checkIsFullNumeric())
self.assertTrue(se2.check_is_full_numeric())
def testGetParamsFail(self):
def testget_paramsFail(self):
# Get params when not all features/class have been enumerated
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
with self.assertRaises(Exception) as context:
se.getParams()
se.get_params()
self.assertTrue("Features and Phenotypes must be fully numeric" in str(context.exception))
def testGetParams1(self):
def testget_params1(self):
# Get Params Test
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
se.changeHeaderName("N1","gender")
se.changeHeaderName("N2","floats")
se.changeHeaderName("N3","age")
se.changeClassName("country")
se.addAttributeConverterRandom("gender")
se.addAttributeConverterRandom("age")
#se.addAttributeConverterRandom("floats") #You can convert "floats" to discrete values as well
se.addClassConverterRandom()
se.convertAllAttributes()
dataHeaders,classLabel,dataFeatures,dataPhenotypes = se.getParams()
se.change_header_name("N1","gender")
se.change_header_name("N2","floats")
se.change_header_name("N3","age")
se.change_class_name("country")
se.add_attribute_converter_random("gender")
se.add_attribute_converter_random("age")
#se.add_attribute_converter_random("floats") #You can convert "floats" to discrete values as well
se.add_class_converter_random()
se.convert_all_attributes()
dataHeaders,classLabel,dataFeatures,dataPhenotypes = se.get_params()
cHeaders = np.array(["gender","floats","age"])
cFeatures = np.array([[0,1.2,0],[1,0.3,np.nan],[1,-0.4,1],[np.nan,0,0]])
cPhenotypes = np.array([0,1,0,2])
......@@ -218,19 +218,19 @@ class test_StringEnumerator(unittest.TestCase):
self.assertTrue(np.allclose(cFeatures,dataFeatures,equal_nan=True))
self.assertTrue(np.allclose(cPhenotypes, dataPhenotypes, equal_nan=True))
def testGetParams2(self):
def testget_params2(self):
# Get Params Test
dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
se = StringEnumerator(dataPath, "phenotype")
se.changeHeaderName("N1", "gender")
se.changeHeaderName("N2", "floats")
se.changeHeaderName("N3", "age")
se.changeClassName("country")
se.addAttributeConverter("gender",np.array(["female","male","NA","other"]))
se.addAttributeConverter("age",np.array(["old","young"]))
se.addClassConverterRandom()
se.convertAllAttributes()
dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.getParams()
se.change_header_name("N1", "gender")
se.change_header_name("N2", "floats")
se.change_header_name("N3", "age")
se.change_class_name("country")
se.add_attribute_converter("gender",np.array(["female","male","NA","other"]))
se.add_attribute_converter("age",np.array(["old","young"]))
se.add_class_converter_random()
se.convert_all_attributes()
dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.get_params()
cHeaders = np.array(["gender", "floats", "age"])
cFeatures = np.array([[1, 1.2, 1], [0, 0.3, np.nan], [0, -0.4, 0], [np.nan, 0, 1]])
cPhenotypes = np.array([0, 1, 0, 2])
......@@ -242,4 +242,4 @@ class test_StringEnumerator(unittest.TestCase):
# def testPrintInvalids(self):
# dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData2.csv")
# se = DataCleanup.StringEnumerator(dataPath, "phenotype")
# se.printInvalidAttributes()
\ No newline at end of file
# se.print_invalid_attributes()
\ No newline at end of file
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment