Ich bin eine ARFF Datei mit groovigen von einem XSLX zu erzeugen, aber wenn ich versuche, diese Datei in weka zu öffnen bekam ich diese Fehlermeldung:Nennwert nicht in Kopf erklärte
File "..." not recognised as an 'Arff data files' file. Reason: nominal value not declared in header, read Token[Ativo], line 16
ich kann nicht verstehen, Warum bekomme ich diesen Fehler? kann jemand helpme diesen Fehler beheben und erklären, warum es passiert?
generierte Datei
@relation kd-itempedido
@attribute tipopedido {Assistencia,Recompra,Venda,Troca}
@attribute aprovado {0.0,1.0}
@attribute fasepedido {Aprovado,Cancelado,EmAprovacao,Liberado,Novo}
@attribute statusinternopedido {NegociarPagamento,PedidosDeTeste,AguardandoOcorrencia,Nada,AguardandoBoletoDeposito,PedidoDuplicado,SuspeitaDeFraude}
@attribute canal {Marketplace,Desktop}
@attribute origem {LojasAmericanas,Optimise,MercadoLivre,Cityads,Zanox,Zoom,Rakuten,Lomadee,Facebook,Viptarget,Submarino,Criteo,Muccashop,Chaordic,Walmart,Googlead,Nada,Extra,Lojaskd,Shopback,Afilio,Shoptime,Nextperformance,CarrinhoAbandonado,Bing}
@attribute mercado {S,N}
@attribute cluster {EntregaImediata,Fiprec,Icconv,Esgotado}
@attribute statusitem {Ativo}
@attribute statusproduto {Inativo,Ativo,AtivoSemEstoque,ForaDeLinha}
@attribute polo {Polo1,Polo3,Polo2}
@data
Venda,0.0,Novo,Nada,Desktop,Googlead,S,Fiprec,Ativo,Ativo,Polo2
Venda,0.0,Novo,Nada,Desktop,Googlead,S,Fiprec,Ativo,Ativo,Polo2
Venda,0.0,Novo,Nada,Desktop,Googlead,S,Ativo,Inativo,Polo2
Venda,0.0,Novo,Nada,Desktop,Muccashop,N,Ativo,Ativo,Polo3
Groovy (VM -Dfile.encoding = ascii UTF8 UTF8)
@Grapes([
@Grab('org.apache.poi:poi:3.10.1'),
@Grab('org.apache.poi:poi-ooxml:3.10.1')])
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import java.text.Normalizer
import static org.apache.poi.ss.usermodel.Cell.*
import java.nio.file.Paths
def path = "/home/eric/Documents/development/ufpr/Solid Eric/ItemPedido1000.xlsx"
def relation = "kd-itempedido"
def columns = ["tipopedido", "aprovado", "fasepedido", "statusinternopedido", "canal", "origem", "mercado", "cluster", "statusitem","statusproduto", "polo"]
def arff = "ItemPedido.arff"
new XslxToArffParser(path, relation, columns, arff);
class Data{
def rows = new ArrayList<List>();
@Override
String toString() {
def s = ""
for (r in rows){
for(d in r){
s+=d
if(r.indexOf(d) < (r.size()-1))
s+=","
}
s+="\n"
}
return s
}
}
class Atributo {
def descricao;
def possibilidades = new HashSet<Object>();
def index;
@Override
String toString() {
def builder = new StringBuilder()
builder.append("@attribute ").append(descricao)
builder.append(" {")
for(def i = 0; i<possibilidades.size(); i++){
builder.append(possibilidades[i])
if((i+1) != possibilidades.size())
builder.append(",")
}
builder.append("}").append("\n")
return builder.toString();
}
}
class XslxToArffParser {
def attributes =[:];
def data = new Data();
def sheet = null;
XslxToArffParser(path, relation, columns, arffPath){
load(path)
getAttributes(columns)
collectData()
saveArff(relation, arffPath)
}
def String parse(String s){
s = Normalizer.normalize(s, Normalizer.Form.NFD)
s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "")
s = s.split(/[^\w]/).collect { it.toLowerCase().capitalize() }.join("")
s = s.replaceAll(" ", "")
s = s.replaceAll("[^A-Za-z0-9]", "")
s = s.isEmpty() ? "Nada" : s
return s
}
def load(path) {
Paths.get(path).withInputStream { input ->
def workbook = new XSSFWorkbook(input)
sheet = workbook.getSheetAt(0)
}
}
def getAttributes(columns){
for (cell in sheet.getRow(0).cellIterator()) {
def index = cell.columnIndex
def description = parse(cell.stringCellValue).toLowerCase()
if(columns.contains(description)){
attributes << [(index):new Atributo(descricao: description, index: index)]
}
}
}
def collectData(){
def headerFlag = true
for (row in sheet.rowIterator()) {
if (headerFlag) {
headerFlag = false
continue
}
def r = []
for (cell in row.cellIterator()) {
def index = cell.columnIndex;
def value = cell.cellType == CELL_TYPE_STRING ? parse(cell.stringCellValue) : cell.numericCellValue
def attr = attributes[index]
if(attr != null){
attr.possibilidades.add(value)
r << value
}
}
data.rows.add(r)
}
}
def saveArff(relation, path){
Paths.get(path).withWriter { writer ->
writer.write "@relation " + relation
writer.write "\n"
for(a in attributes.values())
writer.write a.toString()
writer.write "@data"
writer.write "\n"
writer.write data.toString()
}
}
}
gelöst. "row.cellIterator()" iteriert nicht über null/leere Zellen