The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
weight of attributes -
Dear RapidMiner Community!
I am a newbie here, the same as in data science. I am doing my first analysis project for the college assignment.
I tried to find the answer here in the forum and followed the suggestions, but still I am stuck.
The data set I am working on has got 35 attributes, the target one is binominal (yes/no).
Before I choose the most relevant attributes for further exploring and examing correlation, I want to see how much % of positive values 'Yes' there is in every attribute.
I will appreciate any help for a begginer student.
I am a newbie here, the same as in data science. I am doing my first analysis project for the college assignment.
I tried to find the answer here in the forum and followed the suggestions, but still I am stuck.
The data set I am working on has got 35 attributes, the target one is binominal (yes/no).
Before I choose the most relevant attributes for further exploring and examing correlation, I want to see how much % of positive values 'Yes' there is in every attribute.
I will appreciate any help for a begginer student.
Best Answers
MartinLiebig Administrator, Moderator, Employee-RapidMiner, RapidMiner Certified Analyst, RapidMiner Certified Expert, University Professor Posts: 3,533
RM Data Scientist
Hi @GosiaRze ,you can do aggregate with an default aggregation of sum and group by your assignment attribute.Best,Martin- Sr. Director Data Solutions, Altair RapidMiner -
Dortmund, Germany6 -
GosiaRze Member Posts: 3
Learner I
@mschmitz - Thank you! I tried the Aggregate operator yesterday, but still I make some mistake.
If I understood correctly:
"Aggregate-> Default Aggregation -> Sum -> Group by attributes -> (my attribute) "
What I get is the sum of the data in different columns, e.g. for the column "Age" I got the sum of age values for "Yes" and "No", respectively to my target attribute. That is not what I am looking for.
I changed "Default Aggregation->Sum" for "Default Aggregation -> Count (percantage)", but the results for every columns are the same - every columns shows the % of Yes and No from my target attribute.
What I trying to get is: how much % of Yes from my target attribute is linked to every column?
In other words, what % of examples in every column is defined by Yes and No from the target column?
What is the mistake that I make?
0 -
Telcontar120 RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 1,635
You can also run a Naive Bayes classifier and then output the model, which shows the distribution table which will have the % of Yes and No for each value of each attribute.
<operator activated="true" class="process" compatibility="9.7.000" expanded="true" name="Process">
<parameter key="logverbosity" value="init"/>
<parameter key="random_seed" value="2001"/>
<parameter key="send_mail" value="never"/>
<parameter key="notification_email" value=""/>
<parameter key="process_duration_for_mail" value="30"/>
<parameter key="encoding" value="SYSTEM"/>
<process expanded="true">
<operator activated="true" class="retrieve" compatibility="9.7.000" expanded="true" height="68" name="Retrieve Golf" width="90" x="179" y="136">
<parameter key="repository_entry" value="//Samples/data/Golf"/>
<operator activated="true" class="select_attributes" compatibility="9.7.000" expanded="true" height="82" name="Select Attributes" width="90" x="313" y="136">
<parameter key="attribute_filter_type" value="value_type"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="nominal"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<description align="center" color="transparent" colored="false" width="126">only nominal</description>
<operator activated="true" class="concurrency:loop_attributes" compatibility="9.7.000" expanded="true" height="82" name="Loop Attributes" width="90" x="447" y="136">
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="attribute_name_macro" value="loop_attribute"/>
<parameter key="reuse_results" value="false"/>
<parameter key="enable_parallel_execution" value="true"/>
<process expanded="true">
<operator activated="true" class="blending:pivot" compatibility="9.7.000" expanded="true" height="82" name="Pivot (2)" width="90" x="179" y="34">
<parameter key="group_by_attributes" value="Play"/>
<parameter key="column_grouping_attribute" value="%{loop_attribute}"/>
<list key="aggregation_attributes">
<parameter key="%{loop_attribute}" value="count"/>
<parameter key="use_default_aggregation" value="false"/>
<parameter key="default_aggregation_function" value="first"/>
<operator activated="true" class="generate_attributes" compatibility="9.7.000" expanded="true" height="82" name="Generate Attributes" width="90" x="514" y="34">
<list key="function_descriptions">
<parameter key="attribute" value="%{loop_attribute}"/>
<parameter key="keep_all" value="true"/>
<connect from_port="input 1" to_op="Pivot (2)" to_port="input"/>
<connect from_op="Pivot (2)" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="output 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="source_input 2" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
<portSpacing port="sink_output 2" spacing="0"/>
<operator activated="false" class="operator_toolbox:advanced_append" compatibility="2.7.000-SNAPSHOT" expanded="true" height="68" name="Append (Superset)" width="90" x="581" y="238"/>
<operator activated="false" class="aggregate" compatibility="9.7.000" expanded="true" height="82" name="Aggregate" width="90" x="447" y="289">
<parameter key="use_default_aggregation" value="false"/>
<parameter key="attribute_filter_type" value="all"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_matrix_row_start"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes">
<parameter key="Outlook" value="count"/>
<parameter key="group_by_attributes" value="Play|Outlook"/>
<parameter key="count_all_combinations" value="false"/>
<parameter key="only_distinct" value="false"/>
<parameter key="ignore_missings" value="true"/>
<connect from_op="Retrieve Golf" from_port="output" to_op="Select Attributes" to_port="example set input"/>
<connect from_op="Select Attributes" from_port="example set output" to_op="Loop Attributes" to_port="input 1"/>
<connect from_op="Loop Attributes" from_port="output 1" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
Dortmund, Germany
Dortmund, Germany