2016-06-11 7 views
0

Ich habe eine Liste, wo die Gesamtmenge der Attribute zuerst unbekannt ist.Wählen Sie alle möglichen Paare von Attributen in RapidMiner

Ich möchte alle Paare von Attributen berücksichtigen, ohne zu wissen, wie viele von ihnen dort sind.

Es gibt einen Loop Attribute Subsets Operator, aber leider gibt es keinen Ausgang.

Derzeit mein Prozess sieht wie folgt aus:

<?xml version="1.0" encoding="UTF-8" standalone="no"?> 
<process version="7.1.001"> 
    <context> 
    <input/> 
    <output/> 
    <macros/> 
    </context> 
    <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process"> 
    <process expanded="true"> 
     <operator activated="true" class="subprocess" compatibility="7.1.001" expanded="true" height="82" name="Generate Data" width="90" x="45" y="75"> 
     <process expanded="true"> 
      <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="45" y="30"> 
      <list key="attribute_values"> 
       <parameter key="Group_1" value="&quot;A&quot;"/> 
       <parameter key="Group_2" value="&quot;B&quot;"/> 
       <parameter key="Group_3" value="&quot;C&quot;"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="180" y="30"> 
      <list key="attribute_values"> 
       <parameter key="Group_1" value="&quot;B&quot;"/> 
       <parameter key="Group_2" value="&quot;C&quot;"/> 
       <parameter key="Group_3" value="&quot;D&quot;"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="315" y="30"> 
      <list key="attribute_values"> 
       <parameter key="Group_1" value="&quot;D&quot;"/> 
       <parameter key="Group_2" value="&quot;A&quot;"/> 
       <parameter key="Group_3" value="&quot;B&quot;"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="450" y="30"> 
      <list key="attribute_values"> 
       <parameter key="Group_1" value="&quot;A&quot;"/> 
       <parameter key="Group_2" value="&quot;C&quot;"/> 
       <parameter key="Group_3" value="&quot;M&quot;"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (5)" width="90" x="585" y="30"> 
      <list key="attribute_values"> 
       <parameter key="Group_1" value="&quot;C&quot;"/> 
       <parameter key="Group_2" value="&quot;M&quot;"/> 
       <parameter key="Group_3" value="&quot;M&quot;"/> 
      </list> 
      <list key="set_additional_roles"/> 
      </operator> 
      <operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="148" name="Append" width="90" x="720" y="30"/> 
      <operator activated="true" class="declare_missing_value" compatibility="6.4.000" expanded="true" height="76" name="Declare Missing Value" width="90" x="855" y="30"> 
      <parameter key="mode" value="nominal"/> 
      <parameter key="nominal_value" value="M"/> 
      </operator> 
      <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 2"/> 
      <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 1"/> 
      <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/> 
      <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/> 
      <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 5"/> 
      <connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/> 
      <connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/> 
      <portSpacing port="source_in 1" spacing="0"/> 
      <portSpacing port="sink_out 1" spacing="0"/> 
      <portSpacing port="sink_out 2" spacing="0"/> 
     </process> 
     </operator> 
     <operator activated="true" class="extract_macro" compatibility="7.1.001" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="75"> 
     <parameter key="macro" value="num_attr"/> 
     <parameter key="macro_type" value="number_of_attributes"/> 
     <list key="additional_macros"/> 
     </operator> 
     <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (2)" width="90" x="313" y="75"/> 
     <operator activated="true" class="multiply" compatibility="7.1.001" expanded="true" height="124" name="Multiply (2)" width="90" x="179" y="300"/> 
     <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="210"> 
     <parameter key="attribute_filter_type" value="subset"/> 
     <parameter key="attributes" value="|att1|att2"/> 
     </operator> 
     <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (5)" width="90" x="514" y="210"/> 
     <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="380" y="300"> 
     <parameter key="attribute_filter_type" value="subset"/> 
     <parameter key="attributes" value="att1||att3"/> 
     </operator> 
     <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (4)" width="90" x="380" y="390"> 
     <parameter key="attribute_filter_type" value="subset"/> 
     <parameter key="attributes" value="att2||att3"/> 
     </operator> 
     <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (3)" width="90" x="514" y="390"/> 
     <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (4)" width="90" x="514" y="300"/> 
     <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="648" y="210"> 
     <parameter key="condition_class" value="no_missing_attributes"/> 
     <list key="filters_list"/> 
     </operator> 
     <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (3)" width="90" x="648" y="300"> 
     <parameter key="condition_class" value="no_missing_attributes"/> 
     <list key="filters_list"/> 
     </operator> 
     <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (4)" width="90" x="648" y="390"> 
     <parameter key="condition_class" value="no_missing_attributes"/> 
     <list key="filters_list"/> 
     </operator> 
     <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (2)" width="90" x="782" y="390"> 
     <parameter key="first_attribute" value="att1"/> 
     <parameter key="second_attribute" value="att2"/> 
     </operator> 
     <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (3)" width="90" x="782" y="300"> 
     <parameter key="first_attribute" value="att1"/> 
     <parameter key="second_attribute" value="att2"/> 
     </operator> 
     <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (4)" width="90" x="782" y="210"> 
     <parameter key="first_attribute" value="att1"/> 
     <parameter key="second_attribute" value="att2"/> 
     </operator> 
     <operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="124" name="Append (3)" width="90" x="916" y="255"/> 
     <connect from_op="Generate Data" from_port="out 1" to_op="Extract Macro (2)" to_port="example set"/> 
     <connect from_op="Extract Macro (2)" from_port="example set" to_op="Rename by Generic Names (2)" to_port="example set input"/> 
     <connect from_op="Rename by Generic Names (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/> 
     <connect from_op="Rename by Generic Names (2)" from_port="original" to_port="result 2"/> 
     <connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/> 
     <connect from_op="Multiply (2)" from_port="output 2" to_op="Select Attributes (3)" to_port="example set input"/> 
     <connect from_op="Multiply (2)" from_port="output 3" to_op="Select Attributes (4)" to_port="example set input"/> 
     <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Rename by Generic Names (5)" to_port="example set input"/> 
     <connect from_op="Rename by Generic Names (5)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/> 
     <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename by Generic Names (4)" to_port="example set input"/> 
     <connect from_op="Select Attributes (4)" from_port="example set output" to_op="Rename by Generic Names (3)" to_port="example set input"/> 
     <connect from_op="Rename by Generic Names (3)" from_port="example set output" to_op="Filter Examples (4)" to_port="example set input"/> 
     <connect from_op="Rename by Generic Names (4)" from_port="example set output" to_op="Filter Examples (3)" to_port="example set input"/> 
     <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Generate Concatenation (4)" to_port="example set input"/> 
     <connect from_op="Filter Examples (3)" from_port="example set output" to_op="Generate Concatenation (3)" to_port="example set input"/> 
     <connect from_op="Filter Examples (4)" from_port="example set output" to_op="Generate Concatenation (2)" to_port="example set input"/> 
     <connect from_op="Generate Concatenation (2)" from_port="example set output" to_op="Append (3)" to_port="example set 3"/> 
     <connect from_op="Generate Concatenation (3)" from_port="example set output" to_op="Append (3)" to_port="example set 2"/> 
     <connect from_op="Generate Concatenation (4)" from_port="example set output" to_op="Append (3)" to_port="example set 1"/> 
     <connect from_op="Append (3)" from_port="merged set" to_port="result 1"/> 
     <portSpacing port="source_input 1" spacing="0"/> 
     <portSpacing port="sink_result 1" spacing="0"/> 
     <portSpacing port="sink_result 2" spacing="0"/> 
     <portSpacing port="sink_result 3" spacing="0"/> 
    </process> 
    </operator> 
</process> 

Antwort

1

Das ist eine schwierige Frage ist. Der Operator Loop Subsets gibt kein einzelnes Beispiel zurück, da dadurch mehrere verschiedene Beispielsets erstellt werden, die jeweils aus Attributen bestehen, die aus Kombinationen der Eingabeattribute erstellt wurden. Um dies zu umgehen, können die Operatoren Recall und Remember verwendet werden, um laufende Summen zu speichern. Die Geschichte ist noch nicht fertig, weil es in der Regel so ist, dass ein einzelnes Beispielset erforderlich ist, so dass einige extreme Gymnastik erforderlich sind, um sie umzubenennen und zu verbinden.

Um es kurz zu machen, ich habe einen eigenständigen Beispielprozess beigefügt, der all dies veranschaulicht. Es wird nicht ohne Anpassung an Ihre Daten funktionieren.

<?xml version="1.0" encoding="UTF-8" standalone="no"?> 
<process version="7.0.001"> 
    <context> 
    <input/> 
    <output/> 
    <macros/> 
    </context> 
    <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process"> 
    <process expanded="true"> 
     <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34"> 
    <parameter key="repository_entry" value="//Samples/data/Iris"/> 
     </operator> 
     <operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="136"/> 
     <operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34"> 
    <parameter key="exact_number_of_attributes" value="2"/> 
    <parameter key="min_number_of_attributes" value="2"/> 
    <parameter key="limit_max_number" value="true"/> 
    <parameter key="max_number_of_attributes" value="2"/> 
    <process expanded="true"> 
     <operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="112" y="34"> 
     <list key="log"> 
      <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/> 
     </list> 
     </operator> 
     <operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="112" y="238"> 
     <parameter key="log_name" value="Log"/> 
     </operator> 
     <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="Subprocess" width="90" x="246" y="238"> 
     <process expanded="true"> 
      <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="136"> 
     <parameter key="macro" value="remember"/> 
     <parameter key="macro_type" value="data_value"/> 
     <parameter key="attribute_name" value="Attributes"/> 
     <parameter key="example_index" value="1"/> 
     <list key="additional_macros"/> 
      </operator> 
      <operator activated="true" class="clear_log" compatibility="7.0.001" expanded="true" height="82" name="Clear Log" width="90" x="380" y="136"> 
     <parameter key="log_name" value="Log"/> 
     <parameter key="delete_table" value="true"/> 
      </operator> 
      <connect from_port="in 1" to_port="out 1"/> 
      <connect from_port="in 2" to_op="Extract Macro" to_port="example set"/> 
      <connect from_op="Extract Macro" from_port="example set" to_op="Clear Log" to_port="through 1"/> 
      <connect from_op="Clear Log" from_port="through 1" to_port="out 2"/> 
      <portSpacing port="source_in 1" spacing="0"/> 
      <portSpacing port="source_in 2" spacing="0"/> 
      <portSpacing port="source_in 3" spacing="0"/> 
      <portSpacing port="sink_out 1" spacing="0"/> 
      <portSpacing port="sink_out 2" spacing="0"/> 
      <portSpacing port="sink_out 3" spacing="0"/> 
     </process> 
     </operator> 
     <operator activated="true" class="materialize_data" compatibility="7.0.001" expanded="true" height="82" name="Materialize Data" width="90" x="246" y="34"/> 
     <operator activated="true" class="rename_by_generic_names" compatibility="7.0.001" expanded="true" height="82" name="Rename by Generic Names" width="90" x="380" y="34"/> 
     <operator activated="true" class="generate_concatenation" compatibility="7.0.001" expanded="true" height="82" name="Generate Concatenation" width="90" x="380" y="136"> 
     <parameter key="first_attribute" value="att1"/> 
     <parameter key="second_attribute" value="att2"/> 
     </operator> 
     <operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="238"> 
     <parameter key="attribute_filter_type" value="subset"/> 
     <parameter key="attributes" value="att2|att1"/> 
     <parameter key="invert_selection" value="true"/> 
     </operator> 
     <operator activated="true" class="rename" compatibility="7.0.001" expanded="true" height="82" name="Rename" width="90" x="514" y="34"> 
     <parameter key="old_name" value="att1_att2"/> 
     <parameter key="new_name" value="%{remember}"/> 
     <list key="rename_additional_attributes"/> 
     </operator> 
     <operator activated="true" class="handle_exception" compatibility="7.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="514" y="136"> 
     <process expanded="true"> 
      <operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall (2)" width="90" x="45" y="187"> 
     <parameter key="name" value="runningTotal"/> 
     <parameter key="remove_from_store" value="false"/> 
      </operator> 
      <operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join" width="90" x="179" y="34"> 
     <list key="key_attributes"> 
      <parameter key="Play" value="Play"/> 
     </list> 
      </operator> 
      <operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember" width="90" x="246" y="187"> 
     <parameter key="name" value="runningTotal"/> 
      </operator> 
      <connect from_port="in 1" to_op="Join" to_port="left"/> 
      <connect from_op="Recall (2)" from_port="result" to_op="Join" to_port="right"/> 
      <connect from_op="Join" from_port="join" to_op="Remember" to_port="store"/> 
      <connect from_op="Remember" from_port="stored" to_port="out 1"/> 
      <portSpacing port="source_in 1" spacing="0"/> 
      <portSpacing port="source_in 2" spacing="0"/> 
      <portSpacing port="sink_out 1" spacing="0"/> 
      <portSpacing port="sink_out 2" spacing="0"/> 
     </process> 
     <process expanded="true"> 
      <operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="179" y="34"> 
     <parameter key="name" value="runningTotal"/> 
      </operator> 
      <connect from_port="in 1" to_op="Remember (2)" to_port="store"/> 
      <connect from_op="Remember (2)" from_port="stored" to_port="out 1"/> 
      <portSpacing port="source_in 1" spacing="0"/> 
      <portSpacing port="source_in 2" spacing="0"/> 
      <portSpacing port="sink_out 1" spacing="0"/> 
      <portSpacing port="sink_out 2" spacing="0"/> 
     </process> 
     </operator> 
     <connect from_port="example set" to_op="Log" to_port="through 1"/> 
     <connect from_op="Log" from_port="through 1" to_op="Log to Data" to_port="through 1"/> 
     <connect from_op="Log to Data" from_port="exampleSet" to_op="Subprocess" to_port="in 2"/> 
     <connect from_op="Log to Data" from_port="through 1" to_op="Subprocess" to_port="in 1"/> 
     <connect from_op="Subprocess" from_port="out 1" to_op="Materialize Data" to_port="example set input"/> 
     <connect from_op="Materialize Data" from_port="example set output" to_op="Rename by Generic Names" to_port="example set input"/> 
     <connect from_op="Rename by Generic Names" from_port="example set output" to_op="Generate Concatenation" to_port="example set input"/> 
     <connect from_op="Generate Concatenation" from_port="example set output" to_op="Select Attributes" to_port="example set input"/> 
     <connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/> 
     <connect from_op="Rename" from_port="example set output" to_op="Handle Exception" to_port="in 1"/> 
     <portSpacing port="source_example set" spacing="0"/> 
    </process> 
     </operator> 
     <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="313" y="34"> 
    <process expanded="true"> 
     <operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall" width="90" x="246" y="85"> 
     <parameter key="name" value="runningTotal"/> 
     </operator> 
     <connect from_op="Recall" from_port="result" to_port="out 1"/> 
     <portSpacing port="source_in 1" spacing="0"/> 
     <portSpacing port="source_in 2" spacing="0"/> 
     <portSpacing port="sink_out 1" spacing="0"/> 
     <portSpacing port="sink_out 2" spacing="0"/> 
    </process> 
     </operator> 
     <operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join (2)" width="90" x="581" y="136"> 
    <list key="key_attributes"/> 
     </operator> 
     <connect from_op="Retrieve Iris" from_port="output" to_op="Multiply" to_port="input"/> 
     <connect from_op="Multiply" from_port="output 1" to_op="Loop Subsets" to_port="example set"/> 
     <connect from_op="Multiply" from_port="output 2" to_op="Join (2)" to_port="right"/> 
     <connect from_op="Loop Subsets" from_port="example set" to_op="Subprocess (2)" to_port="in 1"/> 
     <connect from_op="Subprocess (2)" from_port="out 1" to_op="Join (2)" to_port="left"/> 
     <connect from_op="Join (2)" from_port="join" to_port="result 1"/> 
     <portSpacing port="source_input 1" spacing="0"/> 
     <portSpacing port="sink_result 1" spacing="0"/> 
     <portSpacing port="sink_result 2" spacing="90"/> 
    </process> 
    </operator> 
</process> 

Punkte

  • Der Loop Subsets Operator gesetzt holen Attributpaare
  • Verwendung Log und Log to Data innen Loop Subsets ermöglicht das aktuelle Paar von Attributen ein Beispiel geben protokolliert, übertragen werden beachten und dann in ein Makro kopiert.
  • Attribute werden in einen allgemeinen Namen umbenannt, verkettet und anschließend wird das Ergebnis wieder in den ursprünglichen Namen umbenannt.
  • Ein laufender Gesamtbeispielsatz wird erstellt, indem Join für die vorherige Iteration verwendet wird. Beim ersten Mal gibt es keine vorherige Iteration und dies wird vom Operator Handle Exception gehandhabt.
  • Außerhalb des Loop Subsets Operators wird das Beispiel für den laufenden Gesamtwert innerhalb eines Sub Process abgerufen, um sicherzustellen, dass die Ausführungsreihenfolge korrekt angezeigt wird.
  • Die laufende Summe wird mit den Originaldaten verknüpft, um zu sehen, ob sie funktioniert oder nicht.

Ein letzter Punkt, der Materialize Data Operator ist erforderlich, obwohl es nicht sein sollte.

+0

Vielen Dank Andrew, Ich lerne tatsächlich eine Menge aus diesem Prozess. Aber ich habe ein paar Fragen, die mir hoffentlich helfen werden zu verstehen. Warum rufen Sie innerhalb des Unterprozesses "Schleifen-Subsets" den Operator "Ergebnisse (Protokoll)" direkt nach dem Operator "Protokollieren" auf? Wie können Sie den 'Recall (2) (Recall)' Operator verwenden, ohne vorher den 'Remember'Operator zu benutzen? Und was ist der "Remember (3)" Operator, direkt nach dem 'Recall'Operator, innerhalb des' Subprocess' für? – Ali

+0

Ich bearbeitete den Prozess, um es zu vereinfachen und fügte einige Erklärungen hinzu. Die erste Version hat extra Protokollierung durchgeführt, die nicht benötigt wird, und es gab einen zusätzlichen 'Recall', der auch nicht benötigt wurde. – awchisholm

Verwandte Themen