change attributes order
Hi there
I have a data set with patient_ID and some other attributes as specific diseases that each column includes 1/0
ID Diabetes Hypertension Depression
102 1 0 1
So how can I generate a new attribute that includes all current diseases for each patient_ID such as trhis:
ID Disease
102 Diabetes, Depression
any thought?
Thanks,
Abbas
Best Answer
-
JEdward RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 578 Unicorn
@abbaszavar
I'd take a very similar approach to @Telcontar120 to do this.Macros are variables that you can generate and use within a process in many different ways. In this example it's to take the attribute name and use it to replace values in your dataset.
Here is an example you can put into your RapidMiner.
<?xml version="1.0" encoding="UTF-8"?><process version="8.1.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.1.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="operator_toolbox:create_exampleset" compatibility="1.0.000" expanded="true" height="68" name="Create ExampleSet" width="90" x="45" y="34">
<parameter key="generator_type" value="comma_separated_text"/>
<list key="function_descriptions"/>
<list key="numeric_series_configuration"/>
<list key="date_series_configuration"/>
<list key="date_series_configuration (interval)"/>
<parameter key="input_csv_text" value="ID,Diabetes,Hypertension,Depression 102,1,0,1 103,1,1,1 104,0,1,1"/>
</operator>
<operator activated="true" class="subprocess" compatibility="8.1.001" expanded="true" height="82" name="Data Prep" width="90" x="179" y="34">
<process expanded="true">
<operator activated="true" class="set_role" compatibility="8.1.001" expanded="true" height="82" name="Set Role" width="90" x="45" y="34">
<parameter key="attribute_name" value="ID"/>
<parameter key="target_role" value="id"/>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="numerical_to_polynominal" compatibility="8.1.001" expanded="true" height="82" name="Numerical to Polynominal" width="90" x="179" y="34"/>
<connect from_port="in 1" to_op="Set Role" to_port="example set input"/>
<connect from_op="Set Role" from_port="example set output" to_op="Numerical to Polynominal" to_port="example set input"/>
<connect from_op="Numerical to Polynominal" from_port="example set output" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="source_in 2" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="concurrency:loop_attributes" compatibility="8.1.001" expanded="true" height="82" name="Loop Attributes" width="90" x="313" y="34">
<process expanded="true">
<operator activated="true" class="rename" compatibility="8.1.001" expanded="true" height="82" name="Rename" width="90" x="45" y="34">
<parameter key="old_name" value="%{loop_attribute}"/>
<parameter key="new_name" value="Disease"/>
<list key="rename_additional_attributes"/>
<description align="center" color="transparent" colored="false" width="126">Rename the target attribute as Disease.</description>
</operator>
<operator activated="true" class="select_attributes" compatibility="8.1.001" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Disease"/>
<description align="center" color="transparent" colored="false" width="126">Keep only our Disease Attribute</description>
</operator>
<operator activated="true" class="replace" compatibility="8.1.001" expanded="true" height="82" name="Replace 1" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Disease"/>
<parameter key="replace_what" value="1"/>
<parameter key="replace_by" value="%{loop_attribute}"/>
</operator>
<operator activated="true" class="replace" compatibility="8.1.001" expanded="true" height="82" name="Replace 0" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Disease"/>
<parameter key="replace_what" value="0"/>
</operator>
<connect from_port="input 1" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Replace 1" to_port="example set input"/>
<connect from_op="Replace 1" from_port="example set output" to_op="Replace 0" to_port="example set input"/>
<connect from_op="Replace 0" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
<description align="center" color="yellow" colored="false" height="56" resized="true" width="247" x="315" y="157">Replace the values in the attribute.</description>
</process>
<description align="center" color="transparent" colored="false" width="126">This generates a collection of each disease which are then joined back together into a single example set.</description>
</operator>
<operator activated="true" class="append" compatibility="8.1.001" expanded="true" height="82" name="Append" width="90" x="447" y="34"/>
<operator activated="true" class="aggregate" compatibility="8.1.001" expanded="true" height="82" name="Aggregate" width="90" x="581" y="34">
<list key="aggregation_attributes">
<parameter key="Disease" value="concatenation"/>
</list>
<parameter key="group_by_attributes" value="ID"/>
<description align="center" color="transparent" colored="false" width="126">Concatenate Disease grouped by ID.</description>
</operator>
<operator activated="true" class="rename" compatibility="8.1.001" expanded="true" height="82" name="Tidy Names" width="90" x="715" y="34">
<parameter key="old_name" value="concat(Disease)"/>
<parameter key="new_name" value="Disease"/>
<list key="rename_additional_attributes"/>
</operator>
<connect from_op="Create ExampleSet" from_port="output" to_op="Data Prep" to_port="in 1"/>
<connect from_op="Data Prep" from_port="out 1" to_op="Loop Attributes" to_port="input 1"/>
<connect from_op="Loop Attributes" from_port="output 1" to_op="Append" to_port="example set 1"/>
<connect from_op="Append" from_port="merged set" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Aggregate" from_port="example set output" to_op="Tidy Names" to_port="example set input"/>
<connect from_op="Tidy Names" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>3
Answers
If you use Loop Attributes, you should be able to replace the 0/1 value with the name of the attribute itself or blank using an IF expression and a macro for the attribute name inside Generate Attributes.
Then you can concatenate using Generate Attributes again to put that all together into a single attribute. There might be other more elegant ways as well, but this one should work.
Lindon Ventures
Data Science Consulting from Certified RapidMiner Experts
Thanks, Brian.
I haven't used Loop, macro or others that you mentioned before.
Could you plese explain more for me?
Thanks in asdvance,
Abbas
Thank you so much!
it is an amazing solotion.
May I have your email address to send you a sample of data set and out put as it seems there is a problem because just consider one of my attributes?
Thanks again,
Abbas