2017-06-11 4 views
0

ich in der bestehenden Spalte eine neue Spalte b basierend auf vorhandene Spalte aeine neue Spalte auf der Grundlage bestehenden Spalt

Angenommen generieren möge a Ich habe folgende Werte

3,3,5,5,7,7,9,9 

In den neuen Spalten, wenn die Werte in [0,25],(25,50],(50,75],(75,100] Perzentil ist, würde ich Werte haben 1,2,3,4 so für die neue Spalte b, ich habe Werte 1,1,2,2,3,3,4,4

Wie könnte diese neue Säule in rapidminer erzeugt werden?

Antwort

0

Eine Kombination aus den Operatoren Normalize und Discretize by User Specification ist ein Ansatz.

Der Normalize Operator skaliert die Werte des Attributs zwischen 0 und 1 zu sein, und der Discretize Operator bestimmt, welche der Wert Perzentil ist in.

Hier ist ein Beispiel Prozess, den Sie eine Basis nutzen könnte.

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000"> 
    <context> 
    <input/> 
    <output/> 
    <macros/> 
    </context> 
    <operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process"> 
    <process expanded="true"> 
     <operator activated="true" class="subprocess" compatibility="7.5.000" expanded="true" height="82" name="Subprocess" width="90" x="179" y="85"> 
     <process expanded="true"> 
      <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34"> 
      <list key="attribute_values"> 
       <parameter key="a" value="3"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136"> 
      <list key="attribute_values"> 
       <parameter key="a" value="5"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="238"> 
      <list key="attribute_values"> 
       <parameter key="a" value="7"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="generate_data_user_specification" compatibility="7.5.000" expanded="true" height="68" name="Generate Data by User Specification (4)" width="90" x="45" y="340"> 
      <list key="attribute_values"> 
       <parameter key="a" value="9"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="append" compatibility="7.5.000" expanded="true" height="145" name="Append" width="90" x="313" y="34"/> 
      <operator activated="true" class="multiply" compatibility="7.5.000" expanded="true" height="103" name="Multiply" width="90" x="447" y="34"/> 
      <operator activated="true" class="append" compatibility="7.5.000" expanded="true" height="103" name="Append (2)" width="90" x="648" y="34"/> 
      <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/> 
      <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/> 
      <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/> 
      <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/> 
      <connect from_op="Append" from_port="merged set" to_op="Multiply" to_port="input"/> 
      <connect from_op="Multiply" from_port="output 1" to_op="Append (2)" to_port="example set 1"/> 
      <connect from_op="Multiply" from_port="output 2" to_op="Append (2)" to_port="example set 2"/> 
      <connect from_op="Append (2)" from_port="merged set" to_port="out 1"/> 
      <portSpacing port="source_in 1" spacing="0"/> 
      <portSpacing port="sink_out 1" spacing="0"/> 
      <portSpacing port="sink_out 2" spacing="0"/> 
     </process> 
     </operator> 
     <operator activated="true" class="normalize" compatibility="7.5.000" expanded="true" height="103" name="Normalize" width="90" x="380" y="85"> 
     <parameter key="attribute" value="a"/> 
     <parameter key="method" value="range transformation"/> 
     </operator> 
     <operator activated="true" class="discretize_by_user_specification" compatibility="7.5.000" expanded="true" height="103" name="Discretize" width="90" x="581" y="85"> 
     <parameter key="attribute" value="a"/> 
     <list key="classes"> 
      <parameter key="1" value="0.25"/> 
      <parameter key="2" value="0.5"/> 
      <parameter key="3" value="0.75"/> 
      <parameter key="4" value="1.0"/> 
     </list> 
     </operator> 
     <connect from_op="Subprocess" from_port="out 1" to_op="Normalize" to_port="example set input"/> 
     <connect from_op="Normalize" from_port="example set output" to_op="Discretize" to_port="example set input"/> 
     <connect from_op="Discretize" from_port="example set output" to_port="result 1"/> 
     <portSpacing port="source_input 1" spacing="0"/> 
     <portSpacing port="sink_result 1" spacing="0"/> 
     <portSpacing port="sink_result 2" spacing="0"/> 
    </process> 
    </operator> 
</process> 
Verwandte Themen