The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
Xpath problem not getting attribute for simple Xpath expressions
Hi
I have to extract the column values from html while extracting the attribute value is always showing ? symbol without any output
i have tried with many simple Xpaths like
//h:tr[@class='c0']/text()
but the output is not shown correctly and i have made changes to the process as mentioned in the thread of rapidminer forum names "Xpath commands working in google docs not in rapidminer"
My process is as follows
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:process_document_from_file" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Files" width="90" x="246" y="75">
<list key="text_directories">
<parameter key="all" value="/home/user/Desktop/march_04/single"/>
</list>
<parameter key="content_type" value="html"/>
<process expanded="true">
<operator activated="true" class="text:extract_information" compatibility="5.3.002" expanded="true" height="60" name="Extract Information" width="90" x="313" y="165">
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries">
<parameter key="Temp" value="//h:section[@class='fixed']/text()"/>
</list>
<list key="namespaces"/>
<list key="index_queries"/>
</operator>
<connect from_port="document" to_op="Extract Information" to_port="document"/>
<connect from_op="Extract Information" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="write_csv" compatibility="5.3.015" expanded="true" height="76" name="Write CSV" width="90" x="447" y="75">
<parameter key="csv_file" value="/home/user/Desktop/march_04/crawl_web_output.csv"/>
</operator>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Write CSV" to_port="input"/>
<connect from_op="Write CSV" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
I have to extract the column values from html while extracting the attribute value is always showing ? symbol without any output
i have tried with many simple Xpaths like
//h:tr[@class='c0']/text()
but the output is not shown correctly and i have made changes to the process as mentioned in the thread of rapidminer forum names "Xpath commands working in google docs not in rapidminer"
My process is as follows
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.015">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.015" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="text:process_document_from_file" compatibility="5.3.002" expanded="true" height="76" name="Process Documents from Files" width="90" x="246" y="75">
<list key="text_directories">
<parameter key="all" value="/home/user/Desktop/march_04/single"/>
</list>
<parameter key="content_type" value="html"/>
<process expanded="true">
<operator activated="true" class="text:extract_information" compatibility="5.3.002" expanded="true" height="60" name="Extract Information" width="90" x="313" y="165">
<parameter key="query_type" value="XPath"/>
<list key="string_machting_queries"/>
<list key="regular_expression_queries"/>
<list key="regular_region_queries"/>
<list key="xpath_queries">
<parameter key="Temp" value="//h:section[@class='fixed']/text()"/>
</list>
<list key="namespaces"/>
<list key="index_queries"/>
</operator>
<connect from_port="document" to_op="Extract Information" to_port="document"/>
<connect from_op="Extract Information" from_port="document" to_port="document 1"/>
<portSpacing port="source_document" spacing="0"/>
<portSpacing port="sink_document 1" spacing="0"/>
<portSpacing port="sink_document 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="write_csv" compatibility="5.3.015" expanded="true" height="76" name="Write CSV" width="90" x="447" y="75">
<parameter key="csv_file" value="/home/user/Desktop/march_04/crawl_web_output.csv"/>
</operator>
<connect from_op="Process Documents from Files" from_port="example set" to_op="Write CSV" to_port="input"/>
<connect from_op="Write CSV" from_port="through" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
0