The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here

Row name changes in Turbo Prep and apply calculations to all cells

graceweigracewei Member Posts: 9 Learner III
Hi, 

Brief summary of my data set: 
I have 6152 columns/attributes and 156 rows/examples. My columns are DNA names, and my rows are patient IDs. Simply put, the values are the number of mRNAs in each DNA for each patient, and these are transformed by log. 

I have two separate questions about RapidMiner: 

1. Every time I run the process, the first column of the result table is always row names like I wanted. However, after I click on Turbo Prep, the row name column always becomes the last column. I have tried the "Set Role" operator, and tried making the target role as "label" and "id". But it still doesn't work. How can I fix this problem? 

2. I want to apply the same formula to all the cells/values (except for the column names and row names) in my table. I want to reverse log base 10. In other words, let all the values become the exponent of 10. How can I achieve that? I was thinking to use the GENERATE function in Turbo Prep, but it seems like that only creates a new column and my dataset is too large for that. 

Thank you in advance! 
Tagged:

Answers

  • cdapontecdaponte Member Posts: 29 Maven
    Hi, for the first question you can try using the Reorder Attributes operator. And for the second question, you can with operators like Replace or Map. 
  • yyhuangyyhuang Administrator, Employee-RapidMiner, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 364 RM Data Scientist
    Hi @gracewei,

    I don't think RapidMiner will use the row names like R. If your mRNA data has meaningful row names, you can convert the rownames to a proper column of the data and set it to special role (role can be called rname or name).

    If you want to generate new columns with a formula like 10^(att) for 6000+ columns, a "loop attribute" is suggested here.
    Inside the loop attribute, you will use "Generate Attribute" for the transformations.

    Cheers,

    YY




  • yyhuangyyhuang Administrator, Employee-RapidMiner, RapidMiner Certified Analyst, RapidMiner Certified Expert, Member Posts: 364 RM Data Scientist
    Example "Loop" with "Generate Attribute"

    <?xml version="1.0" encoding="UTF-8"?><process version="9.3.001">
      <context>
        <input/>
        <output/>
        <macros/>
      </context>
      <operator activated="true" class="process" compatibility="9.3.001" expanded="true" name="Process">
        <parameter key="logverbosity" value="init"/>
        <parameter key="random_seed" value="2001"/>
        <parameter key="send_mail" value="never"/>
        <parameter key="notification_email" value=""/>
        <parameter key="process_duration_for_mail" value="30"/>
        <parameter key="encoding" value="SYSTEM"/>
        <process expanded="true">
          <operator activated="true" class="retrieve" compatibility="9.3.001" expanded="true" height="68" name="Retrieve boston-housing-prices" width="90" x="45" y="34">
            <parameter key="repository_entry" value="//Keras Samples/boston_housing_prices_regression/boston-housing-prices"/>
          </operator>
          <operator activated="true" class="multiply" compatibility="9.3.001" expanded="true" height="103" name="Multiply" width="90" x="179" y="187"/>
          <operator activated="true" class="concurrency:loop_attributes" compatibility="9.3.001" expanded="true" height="82" name="Loop Attributes" width="90" x="313" y="34">
            <parameter key="attribute_filter_type" value="all"/>
            <parameter key="attribute" value=""/>
            <parameter key="attributes" value=""/>
            <parameter key="use_except_expression" value="false"/>
            <parameter key="value_type" value="attribute_value"/>
            <parameter key="use_value_type_exception" value="false"/>
            <parameter key="except_value_type" value="time"/>
            <parameter key="block_type" value="attribute_block"/>
            <parameter key="use_block_type_exception" value="false"/>
            <parameter key="except_block_type" value="value_matrix_row_start"/>
            <parameter key="invert_selection" value="false"/>
            <parameter key="include_special_attributes" value="false"/>
            <parameter key="attribute_name_macro" value="loop_attribute"/>
            <parameter key="reuse_results" value="true"/>
            <parameter key="enable_parallel_execution" value="true"/>
            <process expanded="true">
              <operator activated="true" class="generate_attributes" compatibility="9.3.001" expanded="true" height="82" name="Generate Attributes" width="90" x="246" y="34">
                <list key="function_descriptions">
                  <parameter key="NEW_ATT" value="10^(#{loop_attribute})"/>
                </list>
                <parameter key="keep_all" value="true"/>
              </operator>
              <operator activated="true" class="select_attributes" compatibility="9.3.001" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="34">
                <parameter key="attribute_filter_type" value="single"/>
                <parameter key="attribute" value="%{loop_attribute}"/>
                <parameter key="attributes" value=""/>
                <parameter key="use_except_expression" value="false"/>
                <parameter key="value_type" value="attribute_value"/>
                <parameter key="use_value_type_exception" value="false"/>
                <parameter key="except_value_type" value="time"/>
                <parameter key="block_type" value="attribute_block"/>
                <parameter key="use_block_type_exception" value="false"/>
                <parameter key="except_block_type" value="value_matrix_row_start"/>
                <parameter key="invert_selection" value="true"/>
                <parameter key="include_special_attributes" value="false"/>
              </operator>
              <operator activated="true" class="rename" compatibility="9.3.001" expanded="true" height="82" name="Rename" width="90" x="648" y="34">
                <parameter key="old_name" value="NEW_ATT"/>
                <parameter key="new_name" value="%{loop_attribute}"/>
                <list key="rename_additional_attributes"/>
              </operator>
              <connect from_port="input 1" to_op="Generate Attributes" to_port="example set input"/>
              <connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
              <connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
              <connect from_op="Rename" from_port="example set output" to_port="output 1"/>
              <portSpacing port="source_input 1" spacing="0"/>
              <portSpacing port="source_input 2" spacing="0"/>
              <portSpacing port="sink_output 1" spacing="0"/>
              <portSpacing port="sink_output 2" spacing="0"/>
            </process>
          </operator>
          <connect from_op="Retrieve boston-housing-prices" from_port="output" to_op="Multiply" to_port="input"/>
          <connect from_op="Multiply" from_port="output 1" to_op="Loop Attributes" to_port="input 1"/>
          <connect from_op="Multiply" from_port="output 2" to_port="result 2"/>
          <connect from_op="Loop Attributes" from_port="output 1" to_port="result 1"/>
          <portSpacing port="source_input 1" spacing="0"/>
          <portSpacing port="sink_result 1" spacing="0"/>
          <portSpacing port="sink_result 2" spacing="0"/>
          <portSpacing port="sink_result 3" spacing="0"/>
        </process>
      </operator>
    </process>
    


Sign In or Register to comment.