Enrich Data by Web Service for Twitter
What am I doing wrong here, says I canot connect to the url:
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="read_excel" compatibility="7.5.003" expanded="true" height="68" name="Read Excel" width="90" x="112" y="187">
<parameter key="excel_file" value="/Users/xlsfile.xlsx"/>
<parameter key="sheet_number" value="1"/>
<parameter key="imported_cell_range" value="A1:B2364"/>
<parameter key="encoding" value="SYSTEM"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations">
<parameter key="0" value="Name"/>
</list>
<parameter key="date_format" value=""/>
<parameter key="time_zone" value="SYSTEM"/>
<parameter key="locale" value="English (United States)"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="Name.true.polynominal.attribute"/>
<parameter key="1" value="f_id.true.numeric.attribute"/>
</list>
<parameter key="read_not_matching_values_as_missings" value="true"/>
<parameter key="datamanagement" value="double_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="format_numbers" compatibility="7.5.003" expanded="true" height="82" name="Format Numbers" width="90" x="246" y="187">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="f_id"/>
<parameter key="attributes" value=""/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="numeric"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="real"/>
<parameter key="block_type" value="value_series"/>
<parameter key="use_block_type_exception" value="false"/>
<parameter key="except_block_type" value="value_series_end"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
<parameter key="format_type" value="pattern"/>
<parameter key="pattern" value="0"/>
<parameter key="locale" value="English (United States)"/>
<parameter key="use_grouping" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="web:enrich_data_by_webservice" compatibility="7.3.000" expanded="true" height="68" name="Enrich Data by Webservice" width="90" x="514" y="187">
<parameter key="query_type" value="JsonPath"/>
<list key="string_machting_queries"/>
<parameter key="attribute_type" value="Nominal"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries"/>
<list key="namespaces"/>
<parameter key="ignore_CDATA" value="true"/>
<parameter key="assume_html" value="true"/>
<list key="index_queries"/>
<list key="jsonpath_queries"/>
<parameter key="request_method" value="GET"/>
<parameter key="url" value="https://api.twitter.com/1.1/users/lookup.json?userid=<%f_id%>,twitter"/>
<parameter key="delay" value="0"/>
<list key="request_properties">
<parameter key="Authorisation" value="OAuth oauth_consumer_key="n4XUp0xONUyJ7HxPdAMzg", oauth_nonce="TgmOuBfDLuu1DQNlM6BxrbyCmOJk9pkOGfi0TzJIA", oauth_signature="tnnArxj06cWHq44gCs1OSKk%2FjLY%3D", oauth_signature_method="HMAC-SHA1", oauth_timestamp="OAuth oauth_consumer_key=&<mykey>;, oauth_nonce="<mytoken>", oauth_signature="tnnArxj06cWHq44gCs1OSKk%2FjLY%3D", oauth_signature_method="HMAC-SHA1", oauth_timestamp="1318622958", oauth_token="<mytoken>;, oauth_version="1.0"", oauth_token="<mytoken>", oauth_version="1.0""/>
</list>
<parameter key="encoding" value="SYSTEM"/>
</operator>
</process>
Best Answer
-
robin Member Posts: 100 Guru
Solved the problem, on the data to docments it is important to use the combine documents operator. Once combined they need to be read in to continue processing.
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="productivity:execute_program" compatibility="7.5.003" expanded="true" height="103" name="Execute Program (2)" width="90" x="45" y="85">
<parameter key="command" value="./tw_imp_id.sh"/>
<parameter key="log_stdout" value="true"/>
<parameter key="log_stderr" value="true"/>
<parameter key="working_directory" value="/Users/Robin/Dropbox/import_id"/>
<list key="env_variables"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="text:read_document" compatibility="7.5.000" expanded="true" height="68" name="Read Document (2)" width="90" x="179" y="85">
<parameter key="extract_text_only" value="false"/>
<parameter key="use_file_extension_as_type" value="true"/>
<parameter key="content_type" value="txt"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="text:json_to_data" compatibility="7.5.000" expanded="true" height="82" name="JSON To Data (2)" width="90" x="313" y="85">
<parameter key="ignore_arrays" value="false"/>
<parameter key="limit_attributes" value="false"/>
<parameter key="skip_invalid_documents" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="text:data_to_documents" compatibility="7.5.000" expanded="true" height="68" name="Data to Documents" width="90" x="447" y="85">
<parameter key="select_attributes_and_weights" value="true"/>
<list key="specify_weights">
<parameter key="id" value="1.0"/>
</list>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="text:combine_documents" compatibility="7.5.000" expanded="true" height="82" name="Combine Documents" width="90" x="581" y="85"/>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="text:write_document" compatibility="7.5.000" expanded="true" height="82" name="Write Document" width="90" x="715" y="85">
<parameter key="file" value="/Users/Robin/test.csv.*"/>
<parameter key="overwrite" value="true"/>
<parameter key="encoding" value="SYSTEM"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="text:documents_to_data" compatibility="7.5.000" expanded="true" height="82" name="Documents to Data" width="90" x="849" y="85">
<parameter key="text_attribute" value="text"/>
<parameter key="label_attribute" value="text"/>
<parameter key="add_meta_information" value="true"/>
<parameter key="datamanagement" value="double_sparse_array"/>
<parameter key="data_management" value="auto"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="select_attributes" compatibility="7.5.003" expanded="true" height="82" name="Select Attributes (3)" width="90" x="983" y="85">
<parameter key="attribute_filter_type" value="regular_expression"/>
<parameter key="attribute" value=""/>
<parameter key="attributes" value="[0]._id"/>
<parameter key="regular_expression" value=".*_id"/>
<parameter key="use_except_expression" value="false"/>
<parameter key="value_type" value="attribute_value"/>
<parameter key="use_value_type_exception" value="false"/>
<parameter key="except_value_type" value="time"/>
<parameter key="block_type" value="attribute_block"/>
<parameter key="use_block_type_exception" value="true"/>
<parameter key="except_block_type" value="attribute_block"/>
<parameter key="invert_selection" value="false"/>
<parameter key="include_special_attributes" value="false"/>
</operator>
</process>
<?xml version="1.0" encoding="UTF-8"?><process version="7.5.003">
<operator activated="true" class="transpose" compatibility="7.5.003" expanded="true" height="82" name="Transpose" width="90" x="1117" y="85"/>
</process>1
Answers
hi @robin - can you please repost that XML directly from your RapidMiner Studio XML panel and into a "code" pane here (use the </> button)? The XML you posted looks like three processes mixed together. Thanks.
Scott
The data for the file is as follows:
I have left the API and secret inside the post to facilitate the answer, I will edit and remove once we have worked this out.
There still is something wrong with the XML, it's like its nesting multiple processes.