The Altair Community is migrating to a new platform to provide a better experience for you. In preparation for the migration, the Altair Community is on read-only mode from October 28 - November 6, 2024. Technical support via cases will continue to work as is. For any urgent requests from Students/Faculty members, please submit the form linked here
"Need to crawl websites requiring authentication"
Hello,
I am trying to crawl a website which requires authentication.
for that, i have used get page operator and enabled cookies entering my details.
still, i am not able to log in, using get pages operator as discussed in: http://rapid-i.com/rapidforum/index.php/topic,6106.0.html
Here is my process.
Please guide me.
Regards,
Vineet
I am trying to crawl a website which requires authentication.
for that, i have used get page operator and enabled cookies entering my details.
still, i am not able to log in, using get pages operator as discussed in: http://rapid-i.com/rapidforum/index.php/topic,6106.0.html
Here is my process.
Please guide me.
Regards,
Vineet
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.005">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="5.3.005" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="web:get_webpage" compatibility="5.3.000" expanded="true" height="60" name="Get Page" width="90" x="179" y="30">
<parameter key="url" value="https://accounts.google.com/ServiceLogin?service=mail&passive=true&rm=false&continue=https://mail.google.com/mail/&ss=1&scc=1&ltmpl=default&ltmplcache=2"/>
<parameter key="user_agent" value="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0 "/>
<parameter key="accept_cookies" value="all"/>
<parameter key="request_method" value="POST"/>
<list key="query_parameters">
<parameter key="Email" value="infospace007%40gmail.com"/>
<parameter key="Passwd" value="infospace"/>
<parameter key="signIn" value="Sign+in"/>
<parameter key="_utf8" value="%E2%98%83"/>
<parameter key="service" value="mail"/>
<parameter key="continue" value="https%3A%2F%2Fmail.google.com%2Fmail%2F%3Ftab%3Dwm"/>
<parameter key="rm" value="false"/>
<parameter key="dsh" value="-6653757825738056045"/>
<parameter key="ltmpl" value="default"/>
<parameter key="scc" value="1"/>
<parameter key="pstMsg" value="1"/>
<parameter key="checkedConnection" value="youtube%3A164%3A1"/>
<parameter key="bgresponse" value="%21A0J8pNRL3fHDlkSpYLs1st775gIAAABBUgAAAAkqAQXRmXOYFfgQ8E-HKcxRaASVrT6PYOYVYqHciZ4i69haFUqHy15D-LE069TDBl9TaU6Jd_qURb1T5swIKm-JTGKhRnPOaXNDZNkNk3a-qkfh_q9F7fEicPTC8ovAY6PkLaF2UFn9P-Iwzc0Hw4337oLj-WqUHVNNBw4R4qIU_2uMBSj6g7pBi96Cywk8Keplxk-q6UATUVSCJleWXkY5XQBzCU27cbKp5VP9C8VvOK9IMRyYdJSRfvEt-siU02XsealJr4Jx6r6VB6rXfeDc-g-JuBp9HXmu3BZhfIZmd7l_InxQVDrgNMD7XoitF2mHOqamgk_IIYAHp1IT9DG09abzLGRSrF0"/>
<parameter key="ServiceLoginAuth continue" value="https%3A%2F%2Fmail.google.com%2Fmail%2F%3Ftab%3Dwm&service=mail&rm=false&dsh=-6653757825738056045"/>
<parameter key="scc" value="1"/>
<parameter key="GALX" value="HEa-r2pPMxw"/>
<parameter key="pstMsg" value="1"/>
<parameter key="checkedDomains" value="youtube"/>
<parameter key="rmshown" value="1"/>
</list>
<list key="request_properties">
<parameter key="Host" value="mail.google.com"/>
<parameter key="User-Agent" value="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0"/>
<parameter key="Accept" value="text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"/>
<parameter key="Accept-Language" value="en-US,en;q=0.5"/>
<parameter key="Accept-Encoding" value="gzip, deflate"/>
<parameter key="Content-Type" value="application/x-www-form-urlencoded; charset=UTF-8"/>
<parameter key="Referer" value="https://mail.google.com/_/mail-static/_/js/main/m_i,t/rt=h/ver=Phek_zmgDkw.en./sv=1/am=!e0FPFCylvh30BOXaPKvsAjcptCvQCCVC9BId1VwCFx8MmmElTTaKOWsaXu4VvCDP35Vm5w/d=1"/>
<parameter key="Content-Length" value="53"/>
<parameter key="Cookie" value="S=gmail=fVbtStnM0ILFS8KWFN-3Qw; GX=DQAAALwAAAChzU23SXgpyDRPJTmxXVjoBigeLCrhmYVDKgu2u4VsKYlQhOvrOgvKDe89R62ZGL88RtsvU-N8j24DLvuXE_4sR-kuS5qcFvZ9cTzBMwVFexU6GLDYHe7lFdcmI-3sDan_0RK4_3zbLTJ1NGqf1P2wgFeKfz6vIGOSzkFm-f81jsYjF0nTSKh1dT4qdaE2WRVoq01jBBV-a01GchGz_aU_4DQC-DAlw9bOCLGHU5dEl6YW3K40nHz7eCwdABGGuEs; GMAIL_AT=AF6bupOzN7Bbjpx2Bf-GpEdzRHGwivBcgw; gmailchat=infospace007@gmail.com/692382; GMAIL_IMP=v*2%2Ftl-inv*0!inbox!unk%2Ftl-si-inbox*22%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-si-inbox*22%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-si-inbox*22%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-si-inbox*22%2Ftl-inv*0!inbox!unk%2Ftl-inv*0!inbox!unk%2Ftl-si-inbox*22%2Ftl-inv*0!inbox!unk%2Fr-cs*312%2Ftl-inv*0!inbox!unk; NID=67=sxcfkTjwFxz6m2hog_RyfOMpCvnCFBjYdHHYzV89lTqTOaiO0saz2asvZ9ksSpyYjXtJMQKcyUsN274rj0uTLSWOfUBWvKS0GF08bW2NEJWmTK9zOWj7rhPFKCmsDAgsDPjmou4L7AmsmK-nUGmBImVLo3P_aLaIMK43HLpbfthH4pPcpBo; PREF=ID=ed62523eeec51027:U=cdf6c6a482def491:FF=0:LD=en:TM=1358143677:LM=1358143941:GM=1:S=L6h3Y5ZDjDyAPhbJ; SID=DQAAALgAAAD_jVZHoPU3VifOjphHsX8jERNCurrJ3YfQpKKChF3NgDMOamsK6mSb-31ZZK1N40TSLsKsL6wnlnad6PuQTdFosh83cv7rVUIphdm4pBoI-4K71C_fymyLHR6L0mbtFFygDQVXiC_2afM8szMnagfR1zJ2wkC6TBJlRlqEXtOmOaxg6Tzhrx6wvK47AGBDd2xnDJS7oo6roLCP2KAraSjVFP5laBJeaX5yk2YzV54gg03YqEN1_kmFCpvqDR5G4Bw; HSID=ATb9cr-saLj_-NbeH; SSID=AvFOA5coM2-AT321Q; APISID=-hBge7bhsykHMDcM/AvF1ZY6w7uSivHp7u; SAPISID=Mi7Dh04BKMjP3vsB/AJvvoSHxTkWgyfhlU"/>
<parameter key="Connection" value="keep-alive"/>
<parameter key="Pragma" value="no-cache"/>
<parameter key="Cache-Control" value="no-cache"/>
</list>
</operator>
<operator activated="true" class="read_excel" compatibility="5.3.005" expanded="true" height="60" name="Read Excel" width="90" x="112" y="210">
<parameter key="excel_file" value="C:\Users\vnagpal\Desktop\Book1.xlsx"/>
<parameter key="imported_cell_range" value="A1:B1"/>
<parameter key="first_row_as_names" value="false"/>
<list key="annotations"/>
<list key="data_set_meta_data_information">
<parameter key="0" value="A.true.integer.attribute"/>
<parameter key="1" value="B.true.file_path.attribute"/>
</list>
</operator>
<operator activated="true" class="web:retrieve_webpages" compatibility="5.3.000" expanded="true" height="60" name="Get Pages" width="90" x="313" y="210">
<parameter key="link_attribute" value="B"/>
<parameter key="user_agent" value="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0 "/>
<parameter key="accept_cookies" value="all"/>
<parameter key="request_method" value="POST"/>
<parameter key="delay" value="random"/>
</operator>
<connect from_op="Get Page" from_port="output" to_port="result 1"/>
<connect from_op="Read Excel" from_port="output" to_op="Get Pages" to_port="Example Set"/>
<connect from_op="Get Pages" from_port="Example Set" to_port="result 2"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
</operator>
</process>
Tagged:
0