The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Split attributes from one to many repositories
Marco_P
RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 2 Contributor I
Hey,
i have a repository with over 20.000 attributes (different types etc.). A like to split the repository into many, whereby each new repository should contain 600 attributes out of the 20.000. If I would join them a get the orginal data set back.
Ideas?
Thx
Marco_P
Tagged:
0
Best Answer
-
MartinLiebig Administrator, Moderator, Employee-RapidMiner, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,533 RM Data Scientist
Hi Marco,
that turned out to be trickier than i expected it to be. Have a look on the attached process, i think this solved it.
~Martin
<?xml version="1.0" encoding="UTF-8"?><process version="7.2.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.2.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="7.2.001" expanded="true" height="68" name="Retrieve Sonar" width="90" x="45" y="85">
<parameter key="repository_entry" value="//Samples/data/Sonar"/>
</operator>
<operator activated="true" class="loop" compatibility="7.2.001" expanded="true" height="82" name="Loop" width="90" x="179" y="85">
<parameter key="set_iteration_macro" value="true"/>
<parameter key="iterations" value="5"/>
<process expanded="true">
<operator activated="true" class="generate_macro" compatibility="7.2.001" expanded="true" height="82" name="Generate Macro" width="90" x="45" y="136">
<list key="function_descriptions">
<parameter key="max" value="(eval(%{iteration}))*5"/>
<parameter key="min" value="(eval(%{iteration})-1)*5+1"/>
</list>
<description align="center" color="transparent" colored="false" width="126">Change the #attributes here</description>
</operator>
<operator activated="true" class="multiply" compatibility="7.2.001" expanded="true" height="103" name="Multiply" width="90" x="179" y="136"/>
<operator activated="true" class="subprocess" compatibility="7.2.001" expanded="true" height="103" name="Subprocess" width="90" x="313" y="187">
<process expanded="true">
<operator activated="true" class="transpose" compatibility="7.2.001" expanded="true" height="82" name="Transpose" width="90" x="45" y="34"/>
<operator activated="true" class="select_attributes" compatibility="7.2.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="id"/>
</operator>
<operator activated="true" class="filter_example_range" compatibility="7.2.001" expanded="true" height="82" name="Filter Example Range" width="90" x="313" y="34">
<parameter key="first_example" value="%{min}"/>
<parameter key="last_example" value="%{max}"/>
</operator>
<operator activated="true" class="transpose" compatibility="7.2.001" expanded="true" height="82" name="Transpose (2)" width="90" x="447" y="34"/>
<operator activated="true" class="data_to_weights" compatibility="7.2.001" expanded="true" height="82" name="Data to Weights" width="90" x="581" y="34"/>
<connect from_port="in 1" to_op="Transpose" to_port="example set input"/>
<connect from_op="Transpose" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
<connect from_op="Filter Example Range" from_port="example set output" to_op="Transpose (2)" to_port="example set input"/>
<connect from_op="Transpose (2)" from_port="example set output" to_op="Data to Weights" to_port="example set"/>
<connect from_op="Data to Weights" from_port="weights" to_port="out 1"/>
<connect from_op="Data to Weights" from_port="example set" to_port="out 2"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
<portSpacing port="sink_out 3" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Get a good weight vector</description>
</operator>
<operator activated="true" class="select_by_weights" compatibility="7.2.001" expanded="true" height="103" name="Select by Weights" width="90" x="447" y="136"/>
<operator activated="false" class="store" compatibility="7.2.001" expanded="true" height="68" name="Store" width="90" x="581" y="187"/>
<connect from_port="input 1" to_op="Generate Macro" to_port="through 1"/>
<connect from_op="Generate Macro" from_port="through 1" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Select by Weights" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Subprocess" to_port="in 1"/>
<connect from_op="Subprocess" from_port="out 1" to_op="Select by Weights" to_port="weights"/>
<connect from_op="Select by Weights" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
</process>
</operator>
<connect from_op="Retrieve Sonar" from_port="output" to_op="Loop" to_port="input 1"/>
<connect from_op="Loop" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>- Sr. Director Data Solutions, Altair RapidMiner -
Dortmund, Germany0
Answers
Hey Martin,
thx a lot. Very nice solution and works well.
Best Regards
Marco