Non-nominal label: the lavel attribute must be nominal

rgavankeulen · December 2016

Hi,

i encountered a problem that i'm unable to solve.

When i do cross validation for KNN i get an error pop for the function Performance (classification): " Non-nominal label: the lavel attribute must be nominal"

I don't understand why i get this pop up since the data is all nomimal before the KNN function?

Also when i use the function " polynominal to binominal" i get below errorNumerical label not supported

Kind regards,

k

IngoRM · December 2016

Hi,

Can you please post the XML of your process here? If possible including the data. Hard to tell what is going on otherwise...

Best,

Ingo

alinebora · August 2018

I am having the exact same problem, and I came here looking for a solution through previous questions, but our colleague here either threw the towel or may have found another way, and didnt share with us.

Anyway... Please help unicorns T-T @IngoRM

Here is the 2nd part of my process (After data was cleaned as I posted separated in process 1 bellow) I have already tried operators 'nominal to text', 'text to nominal'... Don't know what else to do.

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.0.000" expanded="true" height="68" name="Retrieve" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//AirlineData/AirData/Analise do Sucesso (Resultado) 1 Fellowship"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="User-ID"/>
        <parameter key="attributes" value="User-ID|followers_count|friends_count|sentiment|text"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="313" y="34">
        <parameter key="attribute_name" value="User-ID"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="34">
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="179" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="313" y="34"/>
          <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="447" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (3)" width="90" x="581" y="34">
        <parameter key="attribute_name" value="sentiment"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="9.0.000" expanded="true" height="145" name="Cross Validation" width="90" x="849" y="34">
        <parameter key="number_of_folds" value="4"/>
        <parameter key="sampling_type" value="stratified sampling"/>
        <process expanded="true">
          <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes (3)" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="no_missing_values"/>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.0.000" expanded="true" height="82" name="Nominal to Text" width="90" x="179" y="34"/>
          <operator activated="true" class="support_vector_machine" compatibility="9.0.000" expanded="true" height="124" name="SVM" width="90" x="246" y="187"/>
          <connect from_port="training set" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <connect from_op="SVM" from_port="weights" to_port="through 1"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <portSpacing port="sink_through 2" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="112" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="246" y="34"/>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="source_through 2" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="retrieve" compatibility="9.0.000" expanded="true" height="68" name="Retrieve (2)" width="90" x="45" y="136">
        <parameter key="repository_entry" value="//AirlineData/AirData/Analise do Sucesso (Resultado) 1 Fellowship"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="179" y="136">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attribute" value="User-ID"/>
        <parameter key="attributes" value="User-ID|followers_count|friends_count|sentiment|text"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (2)" width="90" x="313" y="136">
        <parameter key="attribute_name" value="User-ID"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data (2)" width="90" x="447" y="136">
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize (2)" width="90" x="45" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases (2)" width="90" x="179" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (2)" width="90" x="313" y="34"/>
          <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (2)" width="90" x="447" y="34"/>
          <connect from_port="document" to_op="Tokenize (2)" to_port="document"/>
          <connect from_op="Tokenize (2)" from_port="document" to_op="Transform Cases (2)" to_port="document"/>
          <connect from_op="Transform Cases (2)" from_port="document" to_op="Filter Stopwords (2)" to_port="document"/>
          <connect from_op="Filter Stopwords (2)" from_port="document" to_op="Filter Tokens (2)" to_port="document"/>
          <connect from_op="Filter Tokens (2)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (4)" width="90" x="581" y="136">
        <parameter key="attribute_name" value="text"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model (2)" width="90" x="983" y="238">
        <list key="application_parameters"/>
      </operator>
      <connect from_op="Retrieve" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Set Role (3)" to_port="example set input"/>
      <connect from_op="Set Role (3)" from_port="example set output" to_op="Cross Validation" to_port="example set"/>
      <connect from_op="Cross Validation" from_port="model" to_op="Apply Model (2)" to_port="model"/>
      <connect from_op="Cross Validation" from_port="example set" to_port="result 1"/>
      <connect from_op="Cross Validation" from_port="test result set" to_port="result 3"/>
      <connect from_op="Retrieve (2)" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
      <connect from_op="Set Role (2)" from_port="example set output" to_op="Process Documents from Data (2)" to_port="example set"/>
      <connect from_op="Process Documents from Data (2)" from_port="example set" to_op="Set Role (4)" to_port="example set input"/>
      <connect from_op="Set Role (4)" from_port="example set output" to_op="Apply Model (2)" to_port="unlabelled data"/>
      <connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

alinebora · August 2018

Bellow is process 1, from which I got the datasets, cleaned and such... After this process was done, I saved it and retrieved into process 2, trying to perform Cross Validation with SVM.

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Data Files" width="90" x="45" y="34">
        <process expanded="true">
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 1" width="90" x="179" y="34">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\Alitalia.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 2" width="90" x="179" y="85">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\airfrance.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 3" width="90" x="179" y="187">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\easyJet.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 4" width="90" x="179" y="289">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\klm.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 5" width="90" x="179" y="391">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\ryanair.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="append" compatibility="9.0.000" expanded="true" height="166" name="Append" width="90" x="380" y="34"/>
          <connect from_port="in 1" to_op="Read Airline 1" to_port="file"/>
          <connect from_op="Read Airline 1" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Read Airline 2" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Read Airline 3" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Read Airline 4" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Read Airline 5" from_port="output" to_op="Append" to_port="example set 5"/>
          <connect from_op="Append" from_port="merged set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Clean up data" width="90" x="179" y="34">
        <process expanded="true">
          <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="text|status_id|user_id|followers_count|friends_count"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
            <parameter key="old_name" value="user_id"/>
            <parameter key="new_name" value="User-ID"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="replace" compatibility="9.0.000" expanded="true" height="82" name="Replace" width="90" x="313" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="User-ID"/>
            <parameter key="attributes" value="User-ID|status_id"/>
            <parameter key="replace_what" value="x"/>
          </operator>
          <connect from_port="in 1" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Replace" to_port="example set input"/>
          <connect from_op="Replace" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Removing unneeded attributes, renaming the needed ones.</description>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="ETL Subprocess" width="90" x="313" y="34">
        <process expanded="true">
          <operator activated="true" class="remove_duplicates" compatibility="9.0.000" expanded="true" height="103" name="Remove Duplicates" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="User-ID"/>
            <description align="center" color="transparent" colored="false" width="126">Remove Duplicate Tweets from same user</description>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
            <parameter key="invert_filter" value="true"/>
            <list key="filters_list">
              <parameter key="filters_entry_key" value="text.contains.RT"/>
            </list>
          </operator>
          <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="313" y="34">
            <parameter key="attribute_name" value="status_id"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
            <description align="center" color="transparent" colored="false" width="126">Set Role for Label</description>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.0.000" expanded="true" height="82" name="Nominal to Text" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
          </operator>
          <connect from_port="in 1" to_op="Remove Duplicates" to_port="example set input"/>
          <connect from_op="Remove Duplicates" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Binning for Label subprocess - suspect</description>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="34">
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="112" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="246" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="380" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="380" y="136">
            <parameter key="directory" value="C:\WordNet\dict"/>
          </operator>
          <operator activated="true" class="wordnet:find_sentiment_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Extract Sentiment (English)" width="90" x="514" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Extract Sentiment (English)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Extract Sentiment (English)" to_port="dictionary"/>
          <connect from_op="Extract Sentiment (English)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="free_memory" compatibility="9.0.000" expanded="true" height="82" name="Free Memory" width="90" x="581" y="34"/>
      <operator activated="true" class="store" compatibility="9.0.000" expanded="true" height="68" name="Store" width="90" x="715" y="34">
        <parameter key="repository_entry" value="Analise do Sucesso (Resultado)"/>
      </operator>
      <operator activated="true" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="WordList to Data" width="90" x="581" y="187"/>
      <operator activated="true" class="sort" compatibility="9.0.000" expanded="true" height="82" name="Sort" width="90" x="715" y="238">
        <parameter key="attribute_name" value="total"/>
        <parameter key="sorting_direction" value="decreasing"/>
      </operator>
      <connect from_op="Data Files" from_port="out 1" to_op="Clean up data" to_port="in 1"/>
      <connect from_op="Clean up data" from_port="out 1" to_op="ETL Subprocess" to_port="in 1"/>
      <connect from_op="ETL Subprocess" from_port="out 1" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Free Memory" to_port="through 1"/>
      <connect from_op="Process Documents from Data" from_port="word list" to_op="WordList to Data" to_port="word list"/>
      <connect from_op="Free Memory" from_port="through 1" to_op="Store" to_port="input"/>
      <connect from_op="Store" from_port="through" to_port="result 1"/>
      <connect from_op="WordList to Data" from_port="example set" to_op="Sort" to_port="example set input"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <description align="right" color="blue" colored="true" height="277" resized="true" width="262" x="423" y="10">Word Relevance Operators</description>
      <description align="center" color="transparent" colored="true" height="103" resized="false" width="126" x="25" y="124">Extracted data from Twitter</description>
    </process>
  </operator>
</process

Telcontar120 · August 2018

@alinebora Can you also post a zip with the Wordnet dictionary you are using in this? It's required to complete your initial process and I want to make sure I replicate the same analysis in order to correctly troubleshoot process #2. Thanks.

alinebora · August 2018

@Telcontar120 I tried to attach the zip file here but it didnt work.

But I easily I downloaded in the link below, "WordNet 3.1 DATABASE FILES ONLY".

https://wordnet.princeton.edu/download/current-version

Telcontar120 · August 2018

@alinebora I was able to discover what your problem was (and it was likely the problem with the OP as well).

In your original dataset, for whatever reason you have the "status-id" field set as the role of label. In RapidMiner, the role of label is for the thing you are trying to predict, so the correct role for the "status-id" would never be label. It should in fact be the role id.

When you then later switch the role of label to the attribute "sentiment" then that leaves the status-id as a regular attribute, which RapidMiner tries to use for modeling, but it has the type polynominal. That's what is causing your error message. So you can make that go away by changing the role of status-id to "id" first. Take a look at the simplified process below.

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.0.000" expanded="true" height="68" name="Retrieve Analise do Sucesso (Resultado)" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Lindon Ventures Repository/Misc/Airline tweets/Analise do Sucesso (Resultado)"/>
      </operator>
      <operator activated="true" breakpoints="after" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="User-ID|followers|followers_count|friends|friends_count"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" breakpoints="after" class="generate_attributes" compatibility="9.0.000" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
        <list key="function_descriptions">
          <parameter key="sentiment" value="if(sentiment&gt;0,&quot;positive&quot;,&quot;negative&quot;)"/>
        </list>
      </operator>
      <operator activated="true" breakpoints="after" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="648" y="34">
        <parameter key="attribute_name" value="sentiment"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles">
          <parameter key="status_id" value="id"/>
        </list>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" width="90" x="782" y="34">
        <parameter key="number_of_folds" value="5"/>
        <process expanded="true">
          <operator activated="true" class="support_vector_machine" compatibility="9.0.000" expanded="true" height="124" name="SVM" width="90" x="179" y="34"/>
          <connect from_port="training set" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_binominal_classification" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
            <parameter key="classification_error" value="true"/>
            <parameter key="AUC" value="true"/>
            <parameter key="f_measure" value="true"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <connect from_op="Retrieve Analise do Sucesso (Resultado)" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Validation" to_port="example set"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="test result set" to_port="result 2"/>
      <connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

When I reviewed your processes, I had a few other suggestions for you. Your original dataset contains over 12k attributes that you generated from the original text processing. Many of these have hardly any occurrences, and many are also meaningless (such as single letters or two letters). You can add some additional operators to your original text processing task such as "Filter Token by Length" to get rid of these, and you can also turn on the pruning option in the "Process Documents from Data" operator to remove low occurrence values that will not be useful in modeling because they occur too infrequently. This is highly recommended.

You also are trying to predict the numerical sentiment score from Wordnet, which is likely to be quite difficult. You may be better off recoding this as a simple positive/negative nominal attribute and then predicting that, at least to start. My example process above shows how to do that as well.

Finally, in your process #2, you seem to be taking the same dataset through both branches of your process, the first to build the score and the 2nd to apply that score. That's extra work that isn't needed, since you can output the scored records directly from the cross-validation, as I show in the sample process above. If you have a separate dataset to score, then your 2nd branch would be needed.

I hope you find this all helpful with your project. Don't hestitate to come back and ask more questions if needed.

alinebora · August 2018

Dear Brian @Telcontar120 thank you so much for your reply! :catvery-happy::catvery-happy:

They were very useful, I applied both 'filter by token' operator (process 2) and prune method in 'Process documents' (process 1).

However, I noticed that you switched the 'Support Vector Machine' operator by 'Classification by regression'. Isn't the SVM operator no longer applicable to this process? Could you explain why one instead the other?

Also, when I run the process, it does not give me the stats informing performance accuracy, as I'd like to see like the example in the picture.

I attached again my latest process in xml.

Thank you again :cathappy::cathappy::cathappy:

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.0.000" expanded="true" height="68" name="Retrieve Results Process 1" width="90" x="45" y="34">
        <parameter key="repository_entry" value="Analise do Sucesso (Resultado)"/>
      </operator>
      <operator activated="true" breakpoints="after" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="246" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="User-ID|friends|friends_count|quoted_created_at|followers"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" breakpoints="after" class="generate_attributes" compatibility="9.0.000" expanded="true" height="82" name="Generate Attributes" width="90" x="380" y="34">
        <list key="function_descriptions">
          <parameter key="sentiment" value="if(sentiment&gt;0,&quot;positive&quot;,&quot;negative&quot;)"/>
        </list>
      </operator>
      <operator activated="true" breakpoints="after" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="514" y="34">
        <parameter key="attribute_name" value="sentiment"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles">
          <parameter key="status_id" value="id"/>
        </list>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" width="90" x="648" y="34">
        <parameter key="number_of_folds" value="5"/>
        <process expanded="true">
          <operator activated="true" class="classification_by_regression" compatibility="9.0.000" expanded="true" height="82" name="Classification by Regression" width="90" x="153" y="34">
            <process expanded="true">
              <operator activated="true" class="support_vector_machine" compatibility="9.0.000" expanded="true" height="124" name="SVM" width="90" x="179" y="34"/>
              <connect from_port="training set" to_op="SVM" to_port="training set"/>
              <connect from_op="SVM" from_port="model" to_port="model"/>
              <portSpacing port="source_training set" spacing="0"/>
              <portSpacing port="sink_model" spacing="0"/>
            </process>
          </operator>
          <connect from_port="training set" to_op="Classification by Regression" to_port="training set"/>
          <connect from_op="Classification by Regression" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="transparent" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_binominal_classification" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
            <parameter key="classification_error" value="true"/>
            <parameter key="AUC" value="true"/>
            <parameter key="f_measure" value="true"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="transparent" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <connect from_op="Retrieve Results Process 1" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Validation" to_port="example set"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="test result set" to_port="result 2"/>
      <connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <description align="center" color="transparent" colored="true" height="105" resized="false" width="180" x="10" y="113">Retrieving the results performed in step 1, where data was extracted and cleaned for analysis</description>
    </process>
  </operator>
</process>

, which I'd like to describe in my analysis. Could you advice me with that?

Telcontar120 · August 2018

@alinebora I'm not sure what you mean here. When I look at my process in RapidMiner Studio 9.0, I see the SVM model in the training side of the Cross-Validation inner process. You can also see the operator name clearly in the raw XML code I posted here as well.

And I also see the Performance (Binominal Classification) operator there, which outputs the confusion matrix as shown in your picture, as long as you output the "per" port from the Cross Validation (which is already configured to do in my process). Are you certain you pasted my code and didn't modify it before you attempted to run it? As long as you paste the code into your XML panel and then press the green "check" mark, it should render the process as I have created it. If you are doing that, then I have no idea what might be going on.

Assuming you do get the code running, remember also that SVM has a number of parameters that probably need to be tuned (like kernel type, C, and gamma) to get the best model. Reviewing the training videos regarding model optimization would be a good starting point for that.

Happy data mining!

alinebora · August 2018

Dear Brian @Telcontar120I copied and run your xml process again. The SVM operator there was correct (I think I did something before-sorry about that). Then, the process runs smoothly but the results only show the ExampleSet (No performance measure). One interesting fact about your process is that the run button changes as well, and every time I click on it, it shows me the ExampleSet first just with the sentiment values, and the secod time shows sentiment attibute with negative or positive. (It's the first time I see that, that's why I mention). Below is the result I get running your process.

Telcontar120 · August 2018

@alinebora Those are just the breakpoints in the process, which are there so you can see what happens at each step. Keep pressing the play button after each pause and the process will complete normally (until you get the normal triangle again).

As long as you connect all the outputs from the last Cross Validation operator, you will also see the performance statistics. You want the "per" and "mod" and "tes" ports connected. They are connected in my process already but sometimes if you switch the data input source then connections are dropped.

alinebora · August 2018

Dear Brian @Telcontar120

The connections are all there for Cross Valildation, the only change I made was to locate the data to run the process. If I keep pressing the button as you suggested I get this error message:

Telcontar120 · August 2018

@alinebora

That's the same error as before, which originally was being thrown because of the polynominal attribute status-id. The problem is that SVM cannot handle nominal attributes as predictors, only numerical. Did you do something else upstream that could have added more polynominal attributes to the dataset?

I know that running that process that I posted works on the original dataset, which was based on your original input files. Perhaps you can post your current version of the process that you are running? Also check your data file for any polynominal attributes. You could add a "Select Attributes" right before the Cross Validation and filter for only attributes of type "numerical" and that would probably also fix the issue.

EDITED: updated sample process attached---this definitely works on the input data, so if it doesn't work for you it is because you have changed something else.

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.0.000" expanded="true" height="68" name="Retrieve Analise do Sucesso (Resultado)" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Lindon Ventures Repository/Misc/Airline tweets/Analise do Sucesso (Resultado)"/>
      </operator>
      <operator activated="true" breakpoints="after" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="179" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="User-ID|followers|followers_count|friends|friends_count"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" breakpoints="after" class="generate_attributes" compatibility="9.0.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
        <list key="function_descriptions">
          <parameter key="sentiment" value="if(sentiment&gt;0,&quot;positive&quot;,&quot;negative&quot;)"/>
        </list>
      </operator>
      <operator activated="true" breakpoints="after" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="447" y="34">
        <parameter key="attribute_name" value="sentiment"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles">
          <parameter key="status_id" value="id"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="581" y="34">
        <parameter key="attribute_filter_type" value="value_type"/>
        <parameter key="value_type" value="numeric"/>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" width="90" x="715" y="34">
        <parameter key="number_of_folds" value="5"/>
        <process expanded="true">
          <operator activated="true" class="support_vector_machine" compatibility="9.0.000" expanded="true" height="124" name="SVM" width="90" x="179" y="34"/>
          <connect from_port="training set" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_binominal_classification" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
            <parameter key="classification_error" value="true"/>
            <parameter key="AUC" value="true"/>
            <parameter key="f_measure" value="true"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <connect from_op="Retrieve Analise do Sucesso (Resultado)" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Validation" to_port="example set"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="test result set" to_port="result 2"/>
      <connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

alinebora · August 2018

Dear Brian @Telcontar120 ever since the very first time I ran your process here, I did not get any %Performance results :catsad:

Nevertheless, I must say that after, I did made a small change in my Data preparation (process 1): :catwink:

In 'Select attributes' I selected the attribute 'created_at' (instead another I had called 'followers count'). I renamed with 'Rename' operator as "Date" and ran again (just because I wanted see after in a graph the sentiment per day)... Even though I did this, the new 'Date attribute' now shows me only '?'

I didn't see any change in the ExampleSet result to be honest.

It could be that that may have had some impact, I'm not sure... This is the result I got running the latter suggested by you. I attached the xml of Process 1 again (with the small mentioned change).

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Data Files" width="90" x="45" y="85">
        <process expanded="true">
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 1" width="90" x="179" y="34">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\Alitalia.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 2" width="90" x="179" y="85">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\airfrance.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 3" width="90" x="179" y="187">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\easyJet.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 4" width="90" x="179" y="289">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\klm.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 5" width="90" x="179" y="391">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\ryanair.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="append" compatibility="9.0.000" expanded="true" height="166" name="Append" width="90" x="380" y="34"/>
          <connect from_port="in 1" to_op="Read Airline 1" to_port="file"/>
          <connect from_op="Read Airline 1" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Read Airline 2" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Read Airline 3" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Read Airline 4" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Read Airline 5" from_port="output" to_op="Append" to_port="example set 5"/>
          <connect from_op="Append" from_port="merged set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Clean up data" width="90" x="179" y="85">
        <process expanded="true">
          <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="followers_count|friends_count|status_id|text|user_id|quoted_created_at"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
            <parameter key="old_name" value="user_id"/>
            <parameter key="new_name" value="User-ID"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename (2)" width="90" x="313" y="34">
            <parameter key="old_name" value="quoted_created_at"/>
            <parameter key="new_name" value="Date"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="replace" compatibility="9.0.000" expanded="true" height="82" name="Replace" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="User-ID"/>
            <parameter key="attributes" value="User-ID|status_id"/>
            <parameter key="replace_what" value="x"/>
          </operator>
          <connect from_port="in 1" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
          <connect from_op="Rename (2)" from_port="example set output" to_op="Replace" to_port="example set input"/>
          <connect from_op="Replace" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Removing unneeded attributes, renaming the needed ones.</description>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="ETL Subprocess" width="90" x="313" y="85">
        <process expanded="true">
          <operator activated="true" class="remove_duplicates" compatibility="9.0.000" expanded="true" height="103" name="Remove Duplicates" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="User-ID"/>
            <description align="center" color="transparent" colored="false" width="126">Remove Duplicate Tweets from same user</description>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
            <parameter key="invert_filter" value="true"/>
            <list key="filters_list">
              <parameter key="filters_entry_key" value="text.contains.RT"/>
            </list>
          </operator>
          <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="313" y="34">
            <parameter key="attribute_name" value="User-ID"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
            <description align="center" color="transparent" colored="false" width="126">Set Role for Label</description>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.0.000" expanded="true" height="82" name="Nominal to Text" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
          </operator>
          <connect from_port="in 1" to_op="Remove Duplicates" to_port="example set input"/>
          <connect from_op="Remove Duplicates" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Set Role" to_port="example set input"/>
          <connect from_op="Set Role" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Binning for Label subprocess - suspect</description>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Process Documents from Data" width="90" x="447" y="85">
        <parameter key="prune_method" value="absolute"/>
        <parameter key="prune_below_absolute" value="4"/>
        <parameter key="prune_above_absolute" value="1000"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34"/>
          <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="179" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="313" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="447" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="447" y="136">
            <parameter key="directory" value="C:\WordNet\dict"/>
          </operator>
          <operator activated="true" class="wordnet:find_sentiment_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Extract Sentiment (English)" width="90" x="648" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Extract Sentiment (English)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Extract Sentiment (English)" to_port="dictionary"/>
          <connect from_op="Extract Sentiment (English)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="free_memory" compatibility="9.0.000" expanded="true" height="82" name="Free Memory" width="90" x="581" y="85"/>
      <operator activated="true" class="store" compatibility="9.0.000" expanded="true" height="68" name="Store" width="90" x="715" y="34">
        <parameter key="repository_entry" value="Analise do Sucesso (Resultado 1)"/>
      </operator>
      <operator activated="true" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="WordList to Data" width="90" x="581" y="187"/>
      <operator activated="true" class="sort" compatibility="9.0.000" expanded="true" height="82" name="Sort" width="90" x="715" y="238">
        <parameter key="attribute_name" value="total"/>
        <parameter key="sorting_direction" value="decreasing"/>
      </operator>
      <connect from_op="Data Files" from_port="out 1" to_op="Clean up data" to_port="in 1"/>
      <connect from_op="Clean up data" from_port="out 1" to_op="ETL Subprocess" to_port="in 1"/>
      <connect from_op="ETL Subprocess" from_port="out 1" to_op="Process Documents from Data" to_port="example set"/>
      <connect from_op="Process Documents from Data" from_port="example set" to_op="Free Memory" to_port="through 1"/>
      <connect from_op="Process Documents from Data" from_port="word list" to_op="WordList to Data" to_port="word list"/>
      <connect from_op="Free Memory" from_port="through 1" to_op="Store" to_port="input"/>
      <connect from_op="Store" from_port="through" to_port="result 1"/>
      <connect from_op="WordList to Data" from_port="example set" to_op="Sort" to_port="example set input"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <description align="center" color="orange" colored="true" height="277" resized="true" width="262" x="423" y="10">Word Relevance Operators</description>
      <description align="center" color="transparent" colored="true" height="103" resized="false" width="126" x="25" y="124">Extracted data from Twitter</description>
    </process>
  </operator>
</process>

Telcontar120 · August 2018

And what happens if you simply take your latest output from your data ETL process it and run it through the process I posted (you just need to point the first Retrieve operator to your stored repository entry)? That should work fine since I added the extra operator to get rid of nominal attributes, and should also show you the performance data you want.

alinebora · August 2018

I did just did this here (attached xml). But it remains the same. But again... Ever since the very first time I ran the process the %Performance stat never apperead to me. T-T :catsad:

By the way, is there a way that I can visualize the Data again (which was the previous attribute "created_at".

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Data Files" width="90" x="45" y="34">
        <process expanded="true">
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 1" width="90" x="179" y="34">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\Alitalia.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 2" width="90" x="179" y="85">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\airfrance.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 3" width="90" x="179" y="187">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\easyJet.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 4" width="90" x="179" y="289">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\klm.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Airline 5" width="90" x="179" y="391">
            <parameter key="excel_file" value="C:\Users\aline\OneDrive\Documentos\ryanair.xlsx"/>
            <list key="annotations"/>
            <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
            <list key="data_set_meta_data_information">
              <parameter key="0" value="user_id.true.polynominal.attribute"/>
              <parameter key="1" value="status_id.true.polynominal.attribute"/>
              <parameter key="2" value="created_at.true.date_time.attribute"/>
              <parameter key="3" value="screen_name.true.polynominal.attribute"/>
              <parameter key="4" value="text.true.polynominal.attribute"/>
              <parameter key="5" value="source.true.polynominal.attribute"/>
              <parameter key="6" value="display_text_width.true.integer.attribute"/>
              <parameter key="7" value="reply_to_status_id.true.polynominal.attribute"/>
              <parameter key="8" value="reply_to_user_id.true.polynominal.attribute"/>
              <parameter key="9" value="reply_to_screen_name.true.polynominal.attribute"/>
              <parameter key="10" value="is_quote.true.polynominal.attribute"/>
              <parameter key="11" value="is_retweet.true.polynominal.attribute"/>
              <parameter key="12" value="favorite_count.true.integer.attribute"/>
              <parameter key="13" value="retweet_count.true.integer.attribute"/>
              <parameter key="14" value="hashtags.true.polynominal.attribute"/>
              <parameter key="15" value="symbols.true.polynominal.attribute"/>
              <parameter key="16" value="urls_url.true.polynominal.attribute"/>
              <parameter key="17" value="urls_t\.co.true.polynominal.attribute"/>
              <parameter key="18" value="urls_expanded_url.true.polynominal.attribute"/>
              <parameter key="19" value="media_url.true.polynominal.attribute"/>
              <parameter key="20" value="media_t\.co.true.polynominal.attribute"/>
              <parameter key="21" value="media_expanded_url.true.polynominal.attribute"/>
              <parameter key="22" value="media_type.true.polynominal.attribute"/>
              <parameter key="23" value="ext_media_url.true.polynominal.attribute"/>
              <parameter key="24" value="ext_media_t\.co.true.polynominal.attribute"/>
              <parameter key="25" value="ext_media_expanded_url.true.polynominal.attribute"/>
              <parameter key="26" value="ext_media_type.true.polynominal.attribute"/>
              <parameter key="27" value="mentions_user_id.true.polynominal.attribute"/>
              <parameter key="28" value="mentions_screen_name.true.polynominal.attribute"/>
              <parameter key="29" value="lang.true.polynominal.attribute"/>
              <parameter key="30" value="quoted_status_id.true.polynominal.attribute"/>
              <parameter key="31" value="quoted_text.true.polynominal.attribute"/>
              <parameter key="32" value="quoted_created_at.true.polynominal.attribute"/>
              <parameter key="33" value="quoted_source.true.polynominal.attribute"/>
              <parameter key="34" value="quoted_favorite_count.true.polynominal.attribute"/>
              <parameter key="35" value="quoted_retweet_count.true.polynominal.attribute"/>
              <parameter key="36" value="quoted_user_id.true.polynominal.attribute"/>
              <parameter key="37" value="quoted_screen_name.true.polynominal.attribute"/>
              <parameter key="38" value="quoted_name.true.polynominal.attribute"/>
              <parameter key="39" value="quoted_followers_count.true.polynominal.attribute"/>
              <parameter key="40" value="quoted_friends_count.true.polynominal.attribute"/>
              <parameter key="41" value="quoted_statuses_count.true.polynominal.attribute"/>
              <parameter key="42" value="quoted_location.true.polynominal.attribute"/>
              <parameter key="43" value="quoted_description.true.polynominal.attribute"/>
              <parameter key="44" value="quoted_verified.true.polynominal.attribute"/>
              <parameter key="45" value="retweet_status_id.true.polynominal.attribute"/>
              <parameter key="46" value="retweet_text.true.polynominal.attribute"/>
              <parameter key="47" value="retweet_created_at.true.polynominal.attribute"/>
              <parameter key="48" value="retweet_source.true.polynominal.attribute"/>
              <parameter key="49" value="retweet_favorite_count.true.polynominal.attribute"/>
              <parameter key="50" value="retweet_retweet_count.true.polynominal.attribute"/>
              <parameter key="51" value="retweet_user_id.true.polynominal.attribute"/>
              <parameter key="52" value="retweet_screen_name.true.polynominal.attribute"/>
              <parameter key="53" value="retweet_name.true.polynominal.attribute"/>
              <parameter key="54" value="retweet_followers_count.true.polynominal.attribute"/>
              <parameter key="55" value="retweet_friends_count.true.polynominal.attribute"/>
              <parameter key="56" value="retweet_statuses_count.true.polynominal.attribute"/>
              <parameter key="57" value="retweet_location.true.polynominal.attribute"/>
              <parameter key="58" value="retweet_description.true.polynominal.attribute"/>
              <parameter key="59" value="retweet_verified.true.polynominal.attribute"/>
              <parameter key="60" value="place_url.true.polynominal.attribute"/>
              <parameter key="61" value="place_name.true.polynominal.attribute"/>
              <parameter key="62" value="place_full_name.true.polynominal.attribute"/>
              <parameter key="63" value="place_type.true.polynominal.attribute"/>
              <parameter key="64" value="country.true.polynominal.attribute"/>
              <parameter key="65" value="country_code.true.polynominal.attribute"/>
              <parameter key="66" value="geo_coords.true.polynominal.attribute"/>
              <parameter key="67" value="coords_coords.true.polynominal.attribute"/>
              <parameter key="68" value="bbox_coords.true.polynominal.attribute"/>
              <parameter key="69" value="status_url.true.polynominal.attribute"/>
              <parameter key="70" value="name.true.polynominal.attribute"/>
              <parameter key="71" value="location.true.polynominal.attribute"/>
              <parameter key="72" value="description.true.polynominal.attribute"/>
              <parameter key="73" value="url.true.polynominal.attribute"/>
              <parameter key="74" value="protected.true.polynominal.attribute"/>
              <parameter key="75" value="followers_count.true.integer.attribute"/>
              <parameter key="76" value="friends_count.true.integer.attribute"/>
              <parameter key="77" value="listed_count.true.integer.attribute"/>
              <parameter key="78" value="statuses_count.true.integer.attribute"/>
              <parameter key="79" value="favourites_count.true.integer.attribute"/>
              <parameter key="80" value="account_created_at.true.date_time.attribute"/>
              <parameter key="81" value="verified.true.polynominal.attribute"/>
              <parameter key="82" value="profile_url.true.polynominal.attribute"/>
              <parameter key="83" value="profile_expanded_url.true.polynominal.attribute"/>
              <parameter key="84" value="account_lang.true.polynominal.attribute"/>
              <parameter key="85" value="profile_banner_url.true.polynominal.attribute"/>
              <parameter key="86" value="profile_background_url.true.polynominal.attribute"/>
              <parameter key="87" value="profile_image_url.true.polynominal.attribute"/>
            </list>
            <parameter key="read_not_matching_values_as_missings" value="false"/>
          </operator>
          <operator activated="true" class="append" compatibility="9.0.000" expanded="true" height="166" name="Append" width="90" x="380" y="34"/>
          <connect from_port="in 1" to_op="Read Airline 1" to_port="file"/>
          <connect from_op="Read Airline 1" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Read Airline 2" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Read Airline 3" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Read Airline 4" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Read Airline 5" from_port="output" to_op="Append" to_port="example set 5"/>
          <connect from_op="Append" from_port="merged set" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="Clean up data" width="90" x="179" y="34">
        <process expanded="true">
          <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes (3)" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="followers_count|friends_count|status_id|text|user_id|quoted_created_at"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename" width="90" x="179" y="34">
            <parameter key="old_name" value="user_id"/>
            <parameter key="new_name" value="User-ID"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="rename" compatibility="9.0.000" expanded="true" height="82" name="Rename (2)" width="90" x="313" y="34">
            <parameter key="old_name" value="quoted_created_at"/>
            <parameter key="new_name" value="Date"/>
            <list key="rename_additional_attributes"/>
          </operator>
          <operator activated="true" class="replace" compatibility="9.0.000" expanded="true" height="82" name="Replace" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attribute" value="User-ID"/>
            <parameter key="attributes" value="User-ID|status_id"/>
            <parameter key="replace_what" value="x"/>
          </operator>
          <connect from_port="in 1" to_op="Select Attributes (3)" to_port="example set input"/>
          <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename" to_port="example set input"/>
          <connect from_op="Rename" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
          <connect from_op="Rename (2)" from_port="example set output" to_op="Replace" to_port="example set input"/>
          <connect from_op="Replace" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Removing unneeded attributes, renaming the needed ones.</description>
      </operator>
      <operator activated="true" class="subprocess" compatibility="9.0.000" expanded="true" height="82" name="ETL Subprocess" width="90" x="313" y="34">
        <process expanded="true">
          <operator activated="true" class="remove_duplicates" compatibility="9.0.000" expanded="true" height="103" name="Remove Duplicates" width="90" x="45" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="User-ID"/>
            <description align="center" color="transparent" colored="false" width="126">Remove Duplicate Tweets from same user</description>
          </operator>
          <operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Filter Examples" width="90" x="179" y="34">
            <parameter key="invert_filter" value="true"/>
            <list key="filters_list">
              <parameter key="filters_entry_key" value="text.contains.RT"/>
            </list>
          </operator>
          <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (2)" width="90" x="313" y="34">
            <parameter key="attribute_name" value="User-ID"/>
            <parameter key="target_role" value="label"/>
            <list key="set_additional_roles"/>
            <description align="center" color="transparent" colored="false" width="126">Set Role for Label</description>
          </operator>
          <operator activated="true" class="nominal_to_text" compatibility="9.0.000" expanded="true" height="82" name="Nominal to Text" width="90" x="447" y="34">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="text"/>
          </operator>
          <connect from_port="in 1" to_op="Remove Duplicates" to_port="example set input"/>
          <connect from_op="Remove Duplicates" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
          <connect from_op="Filter Examples" from_port="example set output" to_op="Set Role (2)" to_port="example set input"/>
          <connect from_op="Set Role (2)" from_port="example set output" to_op="Nominal to Text" to_port="example set input"/>
          <connect from_op="Nominal to Text" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <description align="center" color="transparent" colored="false" width="126">Binning for Label subprocess - suspect</description>
      </operator>
      <operator activated="true" breakpoints="after" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes" width="90" x="447" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="User-ID|followers|followers_count|friends|friends_count"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" breakpoints="after" class="generate_attributes" compatibility="9.0.000" expanded="true" height="82" name="Generate Attributes" width="90" x="581" y="34">
        <list key="function_descriptions">
          <parameter key="sentiment" value="if(sentiment&gt;0,&quot;positive&quot;,&quot;negative&quot;)"/>
        </list>
      </operator>
      <operator activated="true" breakpoints="after" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="447" y="187">
        <parameter key="attribute_name" value="sentiment"/>
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles">
          <parameter key="status_id" value="id"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select Attributes (2)" width="90" x="581" y="187">
        <parameter key="attribute_filter_type" value="value_type"/>
        <parameter key="value_type" value="numeric"/>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" width="90" x="715" y="34">
        <parameter key="number_of_folds" value="5"/>
        <process expanded="true">
          <operator activated="true" class="support_vector_machine" compatibility="9.0.000" expanded="true" height="124" name="SVM" width="90" x="179" y="34"/>
          <connect from_port="training set" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="green" colored="true" height="80" resized="true" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_binominal_classification" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
            <parameter key="classification_error" value="true"/>
            <parameter key="AUC" value="true"/>
            <parameter key="f_measure" value="true"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="blue" colored="true" height="103" resized="true" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <connect from_op="Data Files" from_port="out 1" to_op="Clean up data" to_port="in 1"/>
      <connect from_op="Clean up data" from_port="out 1" to_op="ETL Subprocess" to_port="in 1"/>
      <connect from_op="ETL Subprocess" from_port="out 1" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Validation" to_port="example set"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="test result set" to_port="result 2"/>
      <connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

Telcontar120 · August 2018

@alinebora the latest version of the process you posted appears to be missing the entire section where all the text is processed from the documents and the sentiment score from WordNet is appended. What happened to that? It seems like you keep changing things around here and it is making it a bit hard to track what is happening. No wonder the modeling process is throwing errors for this.

Basically, you need to make sure that everything is still being done in order. Originally, your process #1 prepared the data from the Excel files and did the text processing and then appended the sentiment scoring from WordNet and stored it in the repository. The process I supplied here was supposed to be used on the output from that process and effetively replaced your entire process #2. This process looks like it is some kind of hybrid of your original process #1, process #2, and the process I posted??

Just take the stored output from your original process #1 (which you may have modified slightly by adding the Filter Token by Length and pruning options), then retrieve it and run it through the process I posted, and see what happens.

alinebora · August 2018

@Telcontar120 Dear Brian,

I did the process several times, deleted everything, took the original data and started everything over, but it wasn't working. I had just given up on Cross Validation, but now I found that I need to include in my analysis, that's why I'm back to bother you again if you don't mind :cattongue:

Back then, I had identified 2 constraints:

My machine could not handle processing Cross Validation given the amount of data;
I learned that I could not visualize the performance results (found in other posts) because rapidminer generates a vlperspective file in my Rapidminer home folder that for some reason unables that.

All that sad, I have decided to get a sample from my data and perform my Cross Validation there (It's allowed because even with the stratified sample I have enough rows for my analysis).

However, when I apply the Cross Validation operator, the subprocess within SVM presents 2 errors:

It asks for a label attribute, I had selected 'Set role' operator and selected the attribute name Status ID as id as you previously advised in this post;
I tried to add another 'Set role' with label but it doesn't show me the attributes to be selected anymore.
It asks to add a 'Nominal to numerical' operator, and even when I add it it does not run.

If you could check my process please and identify what am I doing wrong... I added the dataset again to save you from going back and look for it. It's the same one.

Thanks in advance.

“Keep trying no matter how many times you have failed.
If you fail, try, try again.
Never stop trying.
You success will come unexpected.” -Lailah GiftyAkita

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="9.0.000" expanded="true" height="68" name="Retrieve Analise do Sucesso (Resultado)" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Saint Seiya/Data/02/Airline Tweets"/>
      </operator>
      <operator activated="true" class="remove_duplicates" compatibility="9.0.000" expanded="true" height="103" name="Remove dups" width="90" x="45" y="136">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="User ID"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Remove RT's" width="90" x="45" y="289">
        <parameter key="invert_filter" value="true"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="Content.contains.RT"/>
        </list>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select features" width="90" x="246" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Content|Followers|Friends|Status ID|Tweet Date|User ID"/>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="246" y="136">
        <parameter key="attribute_name" value="Status ID"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles"/>
      </operator>
      <operator activated="true" class="sample_stratified" compatibility="9.0.000" expanded="true" height="82" name="Sample (Stratified)" width="90" x="514" y="136">
        <parameter key="sample" value="relative"/>
        <parameter key="sample_ratio" value="0.2"/>
        <parameter key="use_local_random_seed" value="true"/>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" width="90" x="782" y="34">
        <parameter key="number_of_folds" value="5"/>
        <process expanded="true">
          <operator activated="true" class="nominal_to_numerical" compatibility="9.0.000" expanded="true" height="103" name="Nominal to Numerical" width="90" x="44" y="34">
            <list key="comparison_groups"/>
          </operator>
          <operator activated="true" class="support_vector_machine" compatibility="9.0.000" expanded="true" height="124" name="SVM" width="90" x="179" y="34"/>
          <connect from_port="training set" to_op="Nominal to Numerical" to_port="example set input"/>
          <connect from_op="Nominal to Numerical" from_port="example set output" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="green" colored="true" height="80" resized="false" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_binominal_classification" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
            <parameter key="classification_error" value="true"/>
            <parameter key="AUC" value="true"/>
            <parameter key="f_measure" value="true"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="blue" colored="true" height="103" resized="false" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <operator activated="false" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Text Processing" width="90" x="380" y="289">
        <parameter key="data_management" value="memory-optimized"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34"/>
          <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="179" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="313" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="447" y="34"/>
          <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="581" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="715" y="136">
            <parameter key="directory" value="C:\WordNet\dict"/>
          </operator>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="8.1.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="715" y="34">
            <parameter key="max_length" value="3"/>
          </operator>
          <operator activated="true" class="wordnet:find_sentiment_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Extract Sentiment (English)" width="90" x="849" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
          <connect from_op="Stem (Porter)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Extract Sentiment (English)" to_port="dictionary"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_op="Extract Sentiment (English)" to_port="document"/>
          <connect from_op="Extract Sentiment (English)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="false" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="Word List" width="90" x="514" y="289"/>
      <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role (2)" width="90" x="380" y="136">
        <parameter key="target_role" value="label"/>
        <list key="set_additional_roles"/>
      </operator>
      <connect from_op="Retrieve Analise do Sucesso (Resultado)" from_port="output" to_op="Remove dups" to_port="example set input"/>
      <connect from_op="Remove dups" from_port="example set output" to_op="Remove RT's" to_port="example set input"/>
      <connect from_op="Remove RT's" from_port="example set output" to_op="Select features" to_port="example set input"/>
      <connect from_op="Select features" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Sample (Stratified)" to_port="example set input"/>
      <connect from_op="Sample (Stratified)" from_port="example set output" to_op="Validation" to_port="example set"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="example set" to_port="result 2"/>
      <connect from_op="Validation" from_port="test result set" to_port="result 3"/>
      <connect from_op="Text Processing" from_port="word list" to_op="Word List" to_port="word list"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
    </process>
  </operator>
</process>

Telcontar120 · August 2018

@alinebora I wondered what happened after I never heard back after the last exchange!

The process contained in this post starts from the Excel file that you provided in the last post (airlinex.xlsx). It takes that file, does the text processing (which is required for modeling) and adds pruning, which was still not being done. Without pruning it was generating too many useless attributes. You may want to play around with the pruning threshold in Process Documents from Data parameters, but I do not suggest the option "none" when pruning.

It then selects out the non-text features and transforms the sentiment score into a simple negative/positive binominal attribute, which is required for the SVM model (and the source of your original errors). The role is then set for the id and the transformed sentiment is the label, and the dataset is sampled down to 500 of the positive class and 500 of the negative class (and you could adjust this further if you need to based on your memory constraints). Finally it performs the SVM cross validation and outputs the performance as well as the scored records and the model.

This model is not optimized at all so the performance is mediocre at best. But at least you should have a working process here (just change the path of the Excel file in the first operator) to get started!

<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="9.0.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_excel" compatibility="9.0.000" expanded="true" height="68" name="Read Excel" width="90" x="45" y="34">
        <parameter key="excel_file" value="D:\RapidMiner\LindonVentures\Misc\Airline tweets\AirlinesX.xlsx"/>
        <list key="annotations"/>
        <parameter key="date_format" value="MMM d, yyyy h:mm:ss a z"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="Content.true.text.attribute"/>
          <parameter key="1" value="User ID.true.real.attribute"/>
          <parameter key="2" value="Tweet Date.true.date_time.attribute"/>
          <parameter key="3" value="Followers.true.integer.attribute"/>
          <parameter key="4" value="Friends.true.integer.attribute"/>
          <parameter key="5" value="Status ID.true.real.attribute"/>
        </list>
        <parameter key="read_not_matching_values_as_missings" value="false"/>
      </operator>
      <operator activated="true" class="remove_duplicates" compatibility="9.0.000" expanded="true" height="103" name="Remove dups" width="90" x="45" y="136">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="User ID"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="9.0.000" expanded="true" height="103" name="Remove RT's" width="90" x="179" y="136">
        <parameter key="invert_filter" value="true"/>
        <list key="filters_list">
          <parameter key="filters_entry_key" value="Content.contains.RT"/>
        </list>
      </operator>
      <operator activated="true" class="text:process_document_from_data" compatibility="8.1.000" expanded="true" height="82" name="Text Processing" width="90" x="246" y="34">
        <parameter key="prune_method" value="absolute"/>
        <parameter key="prune_below_absolute" value="5"/>
        <parameter key="prune_above_absolute" value="1000"/>
        <parameter key="data_management" value="memory-optimized"/>
        <list key="specify_weights"/>
        <process expanded="true">
          <operator activated="true" class="text:tokenize" compatibility="8.1.000" expanded="true" height="68" name="Tokenize" width="90" x="45" y="34"/>
          <operator activated="true" class="text:filter_by_length" compatibility="8.1.000" expanded="true" height="68" name="Filter Tokens (by Length)" width="90" x="179" y="34"/>
          <operator activated="true" class="text:transform_cases" compatibility="8.1.000" expanded="true" height="68" name="Transform Cases" width="90" x="313" y="34"/>
          <operator activated="true" class="text:filter_stopwords_english" compatibility="8.1.000" expanded="true" height="68" name="Filter Stopwords (English)" width="90" x="447" y="34"/>
          <operator activated="true" class="text:stem_porter" compatibility="8.1.000" expanded="true" height="68" name="Stem (Porter)" width="90" x="581" y="34"/>
          <operator activated="true" class="wordnet:open_wordnet_dictionary" compatibility="5.3.000" expanded="true" height="68" name="Open WordNet Dictionary" width="90" x="715" y="136">
            <parameter key="directory" value="D:\RapidMiner\LindonVentures\Misc\Airline tweets\dict"/>
          </operator>
          <operator activated="true" class="text:generate_n_grams_terms" compatibility="8.1.000" expanded="true" height="68" name="Generate n-Grams (Terms)" width="90" x="715" y="34"/>
          <operator activated="true" class="wordnet:find_sentiment_wordnet" compatibility="5.3.000" expanded="true" height="82" name="Extract Sentiment (English)" width="90" x="849" y="34"/>
          <connect from_port="document" to_op="Tokenize" to_port="document"/>
          <connect from_op="Tokenize" from_port="document" to_op="Filter Tokens (by Length)" to_port="document"/>
          <connect from_op="Filter Tokens (by Length)" from_port="document" to_op="Transform Cases" to_port="document"/>
          <connect from_op="Transform Cases" from_port="document" to_op="Filter Stopwords (English)" to_port="document"/>
          <connect from_op="Filter Stopwords (English)" from_port="document" to_op="Stem (Porter)" to_port="document"/>
          <connect from_op="Stem (Porter)" from_port="document" to_op="Generate n-Grams (Terms)" to_port="document"/>
          <connect from_op="Open WordNet Dictionary" from_port="dictionary" to_op="Extract Sentiment (English)" to_port="dictionary"/>
          <connect from_op="Generate n-Grams (Terms)" from_port="document" to_op="Extract Sentiment (English)" to_port="document"/>
          <connect from_op="Extract Sentiment (English)" from_port="document" to_port="document 1"/>
          <portSpacing port="source_document" spacing="0"/>
          <portSpacing port="sink_document 1" spacing="0"/>
          <portSpacing port="sink_document 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="text:wordlist_to_data" compatibility="8.1.000" expanded="true" height="82" name="Word List" width="90" x="380" y="187"/>
      <operator activated="true" class="select_attributes" compatibility="9.0.000" expanded="true" height="82" name="Select features" width="90" x="380" y="34">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="Followers|Friends|Tweet Date|User ID"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="9.0.000" expanded="true" height="82" name="Generate Attributes" width="90" x="514" y="34">
        <list key="function_descriptions">
          <parameter key="sentiment" value="if(sentiment&lt;0,&quot;negative&quot;,&quot;positive&quot;)"/>
        </list>
      </operator>
      <operator activated="true" class="set_role" compatibility="9.0.000" expanded="true" height="82" name="Set Role" width="90" x="648" y="34">
        <parameter key="attribute_name" value="Status ID"/>
        <parameter key="target_role" value="id"/>
        <list key="set_additional_roles">
          <parameter key="sentiment" value="label"/>
        </list>
      </operator>
      <operator activated="true" class="sample" compatibility="9.0.000" expanded="true" height="82" name="Sample" width="90" x="782" y="34">
        <parameter key="balance_data" value="true"/>
        <list key="sample_size_per_class">
          <parameter key="negative" value="500"/>
          <parameter key="positive" value="500"/>
        </list>
        <list key="sample_ratio_per_class"/>
        <list key="sample_probability_per_class"/>
      </operator>
      <operator activated="true" class="concurrency:cross_validation" compatibility="8.2.000" expanded="true" height="145" name="Validation" width="90" x="916" y="34">
        <parameter key="number_of_folds" value="5"/>
        <process expanded="true">
          <operator activated="true" class="support_vector_machine" compatibility="9.0.000" expanded="true" height="124" name="SVM" width="90" x="179" y="34"/>
          <connect from_port="training set" to_op="SVM" to_port="training set"/>
          <connect from_op="SVM" from_port="model" to_port="model"/>
          <portSpacing port="source_training set" spacing="0"/>
          <portSpacing port="sink_model" spacing="0"/>
          <portSpacing port="sink_through 1" spacing="0"/>
          <description align="left" color="green" colored="true" height="80" resized="false" width="248" x="37" y="137">In the training phase, a model is built on the current training data set. (90 % of data by default, 10 times)</description>
        </process>
        <process expanded="true">
          <operator activated="true" class="apply_model" compatibility="9.0.000" expanded="true" height="82" name="Apply Model" width="90" x="45" y="34">
            <list key="application_parameters"/>
          </operator>
          <operator activated="true" class="performance_binominal_classification" compatibility="9.0.000" expanded="true" height="82" name="Performance" width="90" x="179" y="34">
            <parameter key="classification_error" value="true"/>
            <parameter key="AUC" value="true"/>
            <parameter key="f_measure" value="true"/>
          </operator>
          <connect from_port="model" to_op="Apply Model" to_port="model"/>
          <connect from_port="test set" to_op="Apply Model" to_port="unlabelled data"/>
          <connect from_op="Apply Model" from_port="labelled data" to_op="Performance" to_port="labelled data"/>
          <connect from_op="Performance" from_port="performance" to_port="performance 1"/>
          <connect from_op="Performance" from_port="example set" to_port="test set results"/>
          <portSpacing port="source_model" spacing="0"/>
          <portSpacing port="source_test set" spacing="0"/>
          <portSpacing port="source_through 1" spacing="0"/>
          <portSpacing port="sink_test set results" spacing="0"/>
          <portSpacing port="sink_performance 1" spacing="0"/>
          <portSpacing port="sink_performance 2" spacing="0"/>
          <description align="left" color="blue" colored="true" height="103" resized="false" width="315" x="38" y="137">The model created in the Training step is applied to the current test set (10 %).&lt;br/&gt;The performance is evaluated and sent to the operator results.</description>
        </process>
        <description align="center" color="transparent" colored="false" width="126">A cross-validation evaluating a decision tree model.</description>
      </operator>
      <connect from_op="Read Excel" from_port="output" to_op="Remove dups" to_port="example set input"/>
      <connect from_op="Remove dups" from_port="example set output" to_op="Remove RT's" to_port="example set input"/>
      <connect from_op="Remove RT's" from_port="example set output" to_op="Text Processing" to_port="example set"/>
      <connect from_op="Text Processing" from_port="example set" to_op="Select features" to_port="example set input"/>
      <connect from_op="Text Processing" from_port="word list" to_op="Word List" to_port="word list"/>
      <connect from_op="Word List" from_port="word list" to_port="result 4"/>
      <connect from_op="Select features" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Set Role" to_port="example set input"/>
      <connect from_op="Set Role" from_port="example set output" to_op="Sample" to_port="example set input"/>
      <connect from_op="Sample" from_port="example set output" to_op="Validation" to_port="example set"/>
      <connect from_op="Validation" from_port="model" to_port="result 1"/>
      <connect from_op="Validation" from_port="test result set" to_port="result 2"/>
      <connect from_op="Validation" from_port="performance 1" to_port="result 3"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="84"/>
      <portSpacing port="sink_result 5" spacing="0"/>
    </process>
  </operator>
</process>

alinebora · August 2018

@Telcontar120 Dear Brian,

I just ran the process here and it worked! I cannot believe it! I'm so happy!

Thank you so much for your help - Unicorn's magic :cathappy:

Howdy, Stranger!

Quick Links

Categories

Altair RapidMiner Community

GET HELP. LEARN BEST PRACTICES. NETWORK WITH YOUR PEERS.

Non-nominal label: the lavel attribute must be nominal

Answers