Combining values from common examples into new attributes
Hi All
I have the following list:
Drug ID Drug Name Ingredient
23 Centrum Vitamin D
23 Centrum Vitamin B
23 Centrum Vitamin C
I need to convert this into:
Drug ID Drug Name Ingredient1 Ingredient 2 Ingredient 3
23 Centrum Vitamin D Vitamin B Vitamin C
I have tried everything and playedaround and could not get an answer...
Help ??
Arsalan
Best Answer
-
MartinLiebig Administrator, Moderator, Employee-RapidMiner, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,533 RM Data Scientist
Hey,
you can go for Aggregate group_by Drug Name and concat(Ingridient), this gives you
Drug ID Drug Name Ingredient
23 Centrum Vitamin D|Vitamin B|VitaminC
Afterwards you can use a Split operator to split the Ingredient on \| to get the desired output. Attached is the demo process.
Best,
Martin
<?xml version="1.0" encoding="UTF-8"?><process version="7.3.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.3.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" breakpoints="after" class="subprocess" compatibility="7.3.001" expanded="true" height="82" name="Subprocess" width="90" x="45" y="85">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.001" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="Drug ID" value="23"/>
<parameter key="Drug Name" value=""Centrum""/>
<parameter key="Ingridient" value=""Vitamin A""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.001" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="187">
<list key="attribute_values">
<parameter key="Drug ID" value="23"/>
<parameter key="Drug Name" value=""Centrum""/>
<parameter key="Ingridient" value=""Vitamin B""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="7.3.001" expanded="true" height="68" name="Generate Data by User Specification (3)" width="90" x="45" y="289">
<list key="attribute_values">
<parameter key="Drug ID" value="23"/>
<parameter key="Drug Name" value=""Centrum""/>
<parameter key="Ingridient" value=""Vitamin C""/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="7.3.001" expanded="true" height="124" name="Append" width="90" x="380" y="85"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
<connect from_op="Append" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="transparent" colored="false" width="126">Generate Data</description>
</operator>
<operator activated="true" class="aggregate" compatibility="7.3.001" expanded="true" height="82" name="Aggregate" width="90" x="179" y="85">
<list key="aggregation_attributes">
<parameter key="Ingridient" value="concatenation"/>
</list>
<parameter key="group_by_attributes" value="Drug Name|Drug ID"/>
</operator>
<operator activated="true" class="rename" compatibility="7.3.001" expanded="true" height="82" name="Rename" width="90" x="313" y="85">
<parameter key="old_name" value="concat(Ingridient)"/>
<parameter key="new_name" value="Ingridient"/>
<list key="rename_additional_attributes"/>
</operator>
<operator activated="true" class="split" compatibility="7.3.001" expanded="true" height="82" name="Split" width="90" x="514" y="85">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Ingridient"/>
<parameter key="split_pattern" value="\|"/>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>- Sr. Director Data Solutions, Altair RapidMiner -
Dortmund, Germany0
Answers
That's a tricky one. Maybe @mschmitz might know.
Wooh.. that was super... I did some playing around as I had 7 more attributes to handle as well.. but your tip helped me solve this...
Thanks
Arsalan