2016-06-27 9 views
1

Also ich bin sehr neu in R. Ich hatte Probleme beim Importieren von Daten mit Mathematica, also entschied ich mich für einen Wechsel, da R viel besser für die Analytik geeignet ist. Ich erstelle ein paar maschinelle Lerntechniken, um die Daten zu analysieren, die ich jetzt importieren kann. Dies ist eine genetische Programmierimplementierung, die, wenn sie fertiggestellt ist, eine symbolische Regression bei einigen Daten durchführen sollte. Abgesehen von den Fehlern sollte das Skript fast vollständig sein (ich muss den Kompositionsoperator programmieren, die Division schützen und die Liste der Basisfunktionen abschließen). Ich hatte ein vorheriges Problem beim Programmieren des Skripts, das aufgelöst wurde (R Error Genetic Programming Implementation). Ich habe das Skript für einen Tag debuggen lassen und ich habe keine Ideen mehr.R GP Implementation Error

Meine Fehlermeldung lautet:

Error in makeStrName(nextGen) : object 'nextGen' not found 
> 
> #Print the string versions of the five functions with the lowest RMSE evolved. 
> byRMSEList<-sortByRMSE(populationsBestTenStr) 
Error: object 'totalTwo' not found 
> for(i in 1:5) 
+ { 
+ byRMSEList[[i]] 
+ } 
Error: object 'byRMSEList' not found 

Hier mein Skript. Ich verwende derzeit RStudio. Vielen Dank für die Zeit nehmen, zu helfen:

library("datasets") 

operators<-list("+","*","-","/","o") 
funcs<-list("x","log(x)","sin(x)","cos(x)","tan(x)") 

#Allows me to map a name to each element in a numerical list. 
makeStrName<-function(listOfItems) 
{ 
    for(i in 1:length(listOfItems)) 
    { 
    names(listOfItems)[i]=paste("x",i,sep="") 
    } 
    return(listOfItems) 
} 

#Allows me to replace each random number in a vector with the corresponding 
#function in a list of functions. 

mapFuncList<-function(funcList,rndNumVector) 
{ 
    for(i in 1:length(funcList)) 
    { 
    rndNumVector[rndNumVector==i]<-funcList[i] 
    } 
    return(rndNumVector) 
} 

#Will generate a random function from the list of functions and a random sample. 
generateOrganism<-function(inputLen,inputSeed, funcList) 
{ 
    set.seed(inputSeed) 
    rnd<-sample(1:length(funcList),inputLen,replace=T) 
    Org<-mapFuncList(funcList,rnd) 
    return(Org) 
} 

#Will generate a series of "Organisms" 
genPopulation<-function(popSize,initialSeed,initialSize,functions) 
{ 
    population<-list() 
    for(i in 1:popSize) 
    { 
    population <- c(population,generateOrganism(initialSize,initialSeed+i,functions)) 
    } 
    populationWithNames<-makeStrName(population) 
    return(populationWithNames) 
} 

#Turns the population of functions (which are actually strings in "") into 
#actual functions. (i.e. changes the mode of the list from string to function). 

funCreator<-function(snippet) 
{ 
    txt=snippet 
    function(x) 
    { 
    exprs <- parse(text = txt) 
    eval(exprs) 
    } 
} 

#Applies a fitness function to the population. Puts the best organism in 
#the hallOfFame. 
evalPopulation<-function(populationFuncList, inputData, outputData, populationStringList) 
{ 
    #rmse <- sqrt(mean((sim - obs)^2)) 
    for(i in 1:length(populationStringList)) 
    { 
    stringFunc<-populationStringList[[i]] 
    total<-list(mode="numeric",length=length(inputData)) 
    topTenPercentFunctionList<-list() 
    topTenPercentRMSEList<-list() 
    topTenPercentStringFunctionList<-list() 
    tempFunc<-function(x){x} 
    for(z in 1:length(inputData)) 
    { 
     total<-c(total,(abs(populationFuncList[[i]](inputData[[z]])-outputData[[z]]))) 
     tempFunc<-populationFuncList[[i]] 
    } 
    rmse<-sqrt(mean(total*total)) 
    topTenPercentVal<-length(populationFuncList)*0.1 
    if(length(topTenPercentFunctionList)<topTenPercentVal||RMSE<min(topTenPercentRMSEList)) 
    { 
     topTenPercentStringFunctionList<-c(topTenPercentStringFunctionList,stringFunc) 
     topTenPercentRMSEList<-c(topTenPercentRMSEList, rmse) 
     topTenPercentFunctionList<-c(topTenPercentFunctionList, tempFunc) 
    } 
    } 
    return(topTenPercentStringFunctionList) 
} 
#Get random operator 
getRndOp<-function(seed) 
{ 
    set.seed(seed) 
    rndOpNum<-sample(1:length(operators),1,replace=T) 
    operation<-operators[[rndOpNum]] 
    return(operation) 
} 

#Mutation Operators 

#This attaches a new appendage to an organism 
endNodeMutation<-function(strFunc,seed) 
{ 
    op<-getRndOp(seed) 
    strFunc<-c(strFunc,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFunc<-c(strFunc,newAppendage) 
    return(strFunc) 
} 

#This is a mutation that occurs at a random locaiton in an organism 
rndNodeMutation<-function(strFunc,seed,secondSeed) 
{ 
    op<-getRndOp(seed) 
    halfStrFunc<-((length(strFunc))/2) 
    set.seed(seed) 
    randomStart<-sample(1:halfStrFunc,1,replace=T) 
    set.seed(secondSeed) 
    randomEnd<-2*(sample(1:length(halfStrFunc),1,replace=T)) 
    strFuncUpdate<-substr(strFunc,randomStart,randomEnd) 
    strFuncUpdate<-c(strFuncUpdate,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFuncUpdate<-c(strFuncUpdate,newAppendage) 
    return(strFuncUpdate) 
} 

#Crossover Operators 

#Crossover operator that attaches otherStrFunc to strFunc at the endpoint of strFunc 
crossoverConcatenationOperator<-function(strFunc,otherStrFunc) 
{ 
    newStrFunc<-c(strFunc,otherStrFunc) 
    return(newStrFunc) 
} 

#Crossover Operation that starts and ends at random points in the concatenation 
randomCrossoverOperator<-function(strFunc,otherStrFunc,seed,secondSeed) 
{ 
    set.seed(seed) 
    wholeLength<-(length(strFunc)+length(otherStrFunc)) 
    startRndNum<-sample(1:length(strFunc),1,replace=T) 
    set.seed(secondSeed) 
    endRndNum<-sample(length(strFunc):wholeLength,1,replace=T) 
    concatenatedFunc<-c(strFunc,otherStrFunc) 
    newFunc<-substr(concatenatedFunc,startRndNum,endRndNum) 
    return(newFunc) 
} 
evolve<-function(strFuncList,tenPercentStrFuncList) 
{ 
    #Detach the bottom ninety percent to the top ten percent 
    evolveList<-substr(strFuncList,length(tenPercentStrFuncList),length(strFuncList)) 
    #Get sizes. Will use a random mutation, then random crossover, then 
    #random mutation, then random crossover at percentages with 0.05,0.45,0.05,0.45 
    #respectively 
    size<-length(evolveList) 
    mutateNum<-0.1*size 
    crossoverNum<-0.9*size 
    halfMutateNum<-0.05*size 
    halfCrossoverNum<-0.45*size 
    roundedMutateNum<-floor(mutateNum) 
    roundedCrossoverNum<-floor(crossoverNum) 
    roundedHalfMutateNum<-floor(halfMutateNum) 
    roundedHalfCrossoverNum<-floor(halfCrossoverNum) 

    #Calls the functions for those percentage of organisms in that order 
    for(i in 1:roundedHalfMutateNum) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTWo) 
    evolveList[[i]]<-newFunc 
    } 
    for (i in roundedHalfMutateNum:(roundedHalfCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveLIst,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
    for(i in (roundedHalfCrossoverNum+roundedHalfMutateNum):(roundedHalfCrossoverNum+roundedMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTWo) 
    evolveList[[i]]<-newFunc 
    } 
    for(i in (roundedHalfCrossoverNum+roundedMutateNum):(roundedCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveLIst,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
} 

#Calculates the root mean squared of the functions in a string list. 
#Then sorts the list by RMSE. 
sortByRMSE<-function(strL) 
{ 
    for (z in 1:length(strL)) 
    { 
    for(i in 1:length(strL)) 
    { 
     nonStrFuncList<-lapply(strL,function(x){funCreator(x)}) 
     totalTwo<-c(totalTwo,(abs(nonStrFuncList[[z]](inputData[[i]])-outputData[[i]]))) 
    } 
    rmse<-sqrt(mean(totalTwo*totalTwo)) 
    strFuncsLists<-strL[order(sapply(strL, '[[', rmse))] 
    } 
    return(strFuncsLists) 
} 

#Data, Output Goal 
desiredFuncOutput<-list(1,4,9,16,25) 
dataForInput<-list(1,2,3,4,5) 

#Generate Initial Population 
POpulation<-genPopulation(4,1,1,funcs) 
POpulationFuncList <- lapply(setNames(POpulation,names(POpulation)),function(x){funCreator(x)}) 

#Get and save top ten percent in bestDudes 
bestDudes<-evalPopulation(POpulationFuncList,dataForInput,desiredFuncOutput,POpulation) 
#Evolve the rest 
NewBottomNinetyPercent<-evolve(POpulation,bestDudes) 
#Concatenate the two to make a new generation 
nextGen<-c(bestDudes,NewBottomNinetyPercent) 

#Declare lists, 
populationsBestTenStr<-list() 
populationsFuncList<-list() 

#Run ten generations. 
for(i in 1:10) 
{ 
    nextGen<-makeStrName(nextGen) 
    populationsFuncList<-lapply(setNames(nextGen,names(nextGen)),function(x){funCreator(x)}) 
    populationsBestTenStr<-evalPopulation(populationsFuncList,dataForInput,desiredFuncOutput,nextGen) 
    nextGen<-evolve(populations,populationsBestTenStr) 
} 

#Print the string versions of the five functions with the lowest RMSE evolved. 
byRMSEList<-sortByRMSE(populationsBestTenStr) 
for(i in 1:5) 
{ 
    byRMSEList[[i]] 
} 

Antwort

0
library("datasets") 

operators<-list("+","*","-","/","o") 
funcs<-list("x","log(x)","sin(x)","cos(x)","tan(x)") 

# Fixed: 
# evolveLIst inconsistently typed as evolveList 
# rndCrossoverOperation inconsistently typed as randomCrossoverOperator 
# rndTWo inconsistently typed as rndTwo 
# broken substr 
# broken condition leading to for(i in 1:0) 
# misc. others 

#Allows me to map a name to each element in a numerical list. 
makeStrName<-function(listOfItems) 
{ 
    for(i in 1:length(listOfItems)) 
    { 
    names(listOfItems)[i]=paste("x",i,sep="") 
    } 
    return(listOfItems) 
} 

#Allows me to replace each random number in a vector with the corresponding 
#function in a list of functions. 

mapFuncList<-function(funcList,rndNumVector) 
{ 
    for(i in 1:length(funcList)) 
    { 
    rndNumVector[rndNumVector==i]<-funcList[i] 
    } 
    return(rndNumVector) 
} 

#Will generate a random function from the list of functions and a random sample. 
generateOrganism<-function(inputLen,inputSeed, funcList) 
{ 
    set.seed(inputSeed) 
    rnd<-sample(1:length(funcList),inputLen,replace=T) 
    Org<-mapFuncList(funcList,rnd) 
    return(Org) 
} 

#Will generate a series of "Organisms" 
genPopulation<-function(popSize,initialSeed,initialSize,functions) 
{ 
    population<-list() 
    for(i in 1:popSize) 
    { 
    population <- c(population,generateOrganism(initialSize,initialSeed+i,functions)) 
    } 
    populationWithNames<-makeStrName(population) 
    return(populationWithNames) 
} 

#Turns the population of functions (which are actually strings in "") into 
#actual functions. (i.e. changes the mode of the list from string to function). 

funCreator<-function(snippet) 
{ 
    txt=snippet 
    function(x) 
    { 
    exprs <- parse(text = txt) 
    eval(exprs) 
    } 
} 

#Applies a fitness function to the population. Puts the best organism in 
#the hallOfFame. 
evalPopulation<-function(populationFuncList=POpulationFuncList, inputData=dataForInput, outputData=desiredFuncOutput, 
         populationStringList=POpulation) 
{ 
    #rmse <- sqrt(mean((sim - obs)^2)) 
    for(i in 1:length(populationStringList)) 
    { 
    stringFunc<-populationStringList[[i]] 
    total<-as.numeric(length(inputData)) 
    topTenPercentFunctionList<-list() 
    topTenPercentRMSEList<-list() 
    topTenPercentStringFunctionList<-list() 
    tempFunc<-function(x){x} 
    for(z in 1:length(inputData)) 
    { 
     total<-c(total,(abs(populationFuncList[[i]](inputData[[z]])-outputData[[z]]))) 
     tempFunc<-populationFuncList[[i]] 
    } 
    rmse<-sqrt(mean(total^2)) 
    topTenPercentVal<-length(populationFuncList)*0.1 
    if(length(topTenPercentFunctionList)<topTenPercentVal||RMSE<min(topTenPercentRMSEList)) 
    { 
     topTenPercentStringFunctionList<-c(topTenPercentStringFunctionList,stringFunc) 
     topTenPercentRMSEList<-c(topTenPercentRMSEList, rmse) 
     topTenPercentFunctionList<-c(topTenPercentFunctionList, tempFunc) 
    } 
    } 
    return(topTenPercentStringFunctionList) 
} 
#Get random operator 
getRndOp<-function(seed) 
{ 
    set.seed(seed) 
    rndOpNum<-sample(1:length(operators),1,replace=T) 
    operation<-operators[[rndOpNum]] 
    return(operation) 
} 

#Mutation Operators 

#This attaches a new appendage to an organism 
endNodeMutation<-function(strFunc,seed) 
{ 
    op<-getRndOp(seed) 
    strFunc<-c(strFunc,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFunc<-c(strFunc,newAppendage) 
    return(strFunc) 
} 

#This is a mutation that occurs at a random locaiton in an organism 
rndNodeMutation<-function(strFunc,seed,secondSeed) 
{ 
    op<-getRndOp(seed) 
    halfStrFunc<-((length(strFunc))/2) 
    set.seed(seed) 
    randomStart<-sample(1:halfStrFunc,1,replace=T) 
    set.seed(secondSeed) 
    randomEnd<-2*(sample(1:length(halfStrFunc),1,replace=T)) 
    strFuncUpdate<-substr(strFunc,randomStart,randomEnd) 
    strFuncUpdate<-c(strFuncUpdate,op) 
    newAppendage<-generateOrganism(1,seed+2,funcs) 
    strFuncUpdate<-c(strFuncUpdate,newAppendage) 
    return(strFuncUpdate) 
} 

#Crossover Operators 

#Crossover operator that attaches otherStrFunc to strFunc at the endpoint of strFunc 
crossoverConcatenationOperator<-function(strFunc,otherStrFunc) 
{ 
    newStrFunc<-c(strFunc,otherStrFunc) 
    return(newStrFunc) 
} 

#Crossover Operation that starts and ends at random points in the concatenation 
rndCrossoverOperation<-function(strFunc,otherStrFunc,seed,secondSeed) # fixed function name 
{ 
    set.seed(seed) 
    wholeLength<-(length(strFunc)+length(otherStrFunc)) 
    startRndNum<-sample(1:length(strFunc),1,replace=T) 
    set.seed(secondSeed) 
    endRndNum<-sample(length(strFunc):wholeLength,1,replace=T) 
    concatenatedFunc<-c(strFunc,otherStrFunc) 
    newFunc<-substr(concatenatedFunc,startRndNum,endRndNum) 
    return(newFunc) 
} 
evolve<-function(strFuncList=POpulation,tenPercentStrFuncList=bestDudes) 
{ 
    #Detach the bottom ninety percent to the top ten percent 
    evolveList<-strFuncList[!strFuncList %in% tenPercentStrFuncList] # fixed broken substring 
    #Get sizes. Will use a random mutation, then random crossover, then 
    #random mutation, then random crossover at percentages with 0.05,0.45,0.05,0.45 
    #respectively 
    size<-length(evolveList) 
    mutateNum<-0.1*size 
    crossoverNum<-0.9*size 
    halfMutateNum<-0.05*size 
    halfCrossoverNum<-0.45*size 
    roundedMutateNum<-floor(mutateNum) 
    roundedCrossoverNum<-floor(crossoverNum) 
    roundedHalfMutateNum<-floor(halfMutateNum) 
    roundedHalfCrossoverNum<-floor(halfCrossoverNum) 

    #Calls the functions for those percentage of organisms in that order 
    if(roundedHalfMutateNum < 1) roundedHalfMutateNum <- 1 
    for(i in 1:roundedHalfMutateNum) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTwo) # fixed case 
    evolveList[[i]]<-newFunc 
    } 
    for (i in roundedHalfMutateNum:(roundedHalfCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveList,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
    for(i in (roundedHalfCrossoverNum+roundedHalfMutateNum):(roundedHalfCrossoverNum+roundedMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndNodeMutation(evolveList[[i]],rndOne,rndTwo) 
    evolveList[[i]]<-newFunc 
    } 
    for(i in (roundedHalfCrossoverNum+roundedMutateNum):(roundedCrossoverNum+roundedHalfMutateNum)) 
    { 
    set.seed(i) 
    rndOne<-sample(0:1000,1,replace=T) 
    set.seed(i+10000) 
    rndTwo<-sample(0:10000,1,replace=T) 
    newFunc<-rndCrossoverOperation(evolveList[[i]],evolveList[[i+1]],rndOne,rndTwo) 
    firstSubstr<-substr(evolveList,1,i-1) 
    secondSubstr<-substr(evolveList,i+2,length(evolveList)) 
    halfSubstr<-c(firstSubstr,newFunc) 
    evolveList<-c(halfSubstr,secondSubstr) 
    } 
} 

#Calculates the root mean squared of the functions in a string list. 
#Then sorts the list by RMSE. 
sortByRMSE<-function(strL) 
{ 
    for (z in 1:length(strL)) 
    { 
    for(i in 1:length(strL)) 
    { 
     nonStrFuncList<-lapply(strL,function(x){funCreator(x)}) 
     totalTwo<-c(totalTwo,(abs(nonStrFuncList[[z]](inputData[[i]])-outputData[[i]]))) 
    } 
    rmse<-sqrt(mean(totalTwo*totalTwo)) 
    strFuncsLists<-strL[order(sapply(strL, '[[', rmse))] 
    } 
    return(strFuncsLists) 
} 

#Data, Output Goal 
desiredFuncOutput<-list(1,4,9,16,25) 
dataForInput<-list(1,2,3,4,5) 

#Generate Initial Population 
POpulation<-genPopulation(4,1,1,funcs) 
POpulationFuncList <- lapply(setNames(POpulation,names(POpulation)),function(x){funCreator(x)}) 

#Get and save top ten percent in bestDudes 
bestDudes<-evalPopulation(POpulationFuncList,dataForInput,desiredFuncOutput,POpulation) 
#Evolve the rest 
NewBottomNinetyPercent<-evolve(POpulation,bestDudes) 
#Concatenate the two to make a new generation 
nextGen<-c(bestDudes,NewBottomNinetyPercent) 

#Declare lists, 
populationsBestTenStr<-list() 
populationsFuncList<-list() 

#Run ten generations. 
for(i in 1:10) 
{ 
    nextGen<-makeStrName(nextGen) 
    populationsFuncList<-lapply(setNames(nextGen,names(nextGen)),function(x){funCreator(x)}) 
    populationsBestTenStr<-evalPopulation(populationsFuncList,dataForInput,desiredFuncOutput,nextGen) 
    nextGen<-evolve(populations,populationsBestTenStr) 
} 

#Print the string versions of the five functions with the lowest RMSE evolved. 
byRMSEList<-sortByRMSE(populationsBestTenStr) 
for(i in 1:5) 
{ 
    byRMSEList[[i]] 
} 
+0

ich, dass in meinem Code zu tun haben. Siehe "#Data, Output Goal sitedFuncOutput <-list (1,4,9,16,25) dataForInput <-list (1,2,3,4,5)" Aus irgendeinem Grund hat es nicht zu Ihrem kopiert . Fixed mapStrFunc, danke für den Tipp –

+0

Es liegt direkt über dem Aufruf von evalPopulation in den Code, den ich gepostet habe. –

+0

@ Novize-Polymath OK Ich habe einen anderen Fehler behoben und jetzt funktioniert es durch 'evolve', aber ich weiß nicht, was du mit dem 'substr' machen wolltest. Es scheint die "POpulation" -Werte willkürlich abzuschneiden. Ich sehe den Kommentar für diese Zeile, aber ich verstehe es nicht wirklich. Kannst du mir helfen? Ich muss zu einer Besprechung gehen, aber ich komme später darauf zurück. –