JFIFXX    $.' ",#(7),01444'9=82<.342  2!!22222222222222222222222222222222222222222222222222"4 ,PG"Z_4˷kjزZ,F+_z,© zh6٨icfu#ډb_N?wQ5-~I8TK<5oIv-k_U_~bMdӜUHh?]EwQk{_}qFW7HTՑYF?_'ϔ_Ջt=||I 6έ"D/[k9Y8ds|\Ҿp6Ҵ].6znopM[mei$[soᘨ˸ nɜG-ĨUycP3.DBli;hjx7Z^NhN3u{:jx힞#M&jL P@_ P&o89@Sz6t7#Oߋ s}YfTlmrZ)'Nk۞pw\Tȯ?8`Oi{wﭹW[r Q4F׊3m&L=h3z~#\l :F,j@ ʱwQT8"kJO6֚l}R>ډK]y&p}b;N1mr$|7>e@BTM*-iHgD) Em|ؘbҗaҾt4oG*oCNrPQ@z,|?W[0:n,jWiEW$~/hp\?{(0+Y8rΟ+>S-SVN;}s?. w9˟<Mq4Wv'{)01mBVW[8/< %wT^5b)iM pgN&ݝVO~qu9 !J27$O-! :%H ـyΠM=t{!S oK8txA& j0 vF Y|y ~6@c1vOpIg4lODL Rcj_uX63?nkWyf;^*B @~a`Eu+6L.ü>}y}_O6͐:YrGXkGl^w~㒶syIu! W XN7BVO!X2wvGRfT#t/?%8^WaTGcLMI(J1~8?aT ]ASE(*E} 2#I/׍qz^t̔bYz4xt){ OH+(EA&NXTo"XC')}Jzp ~5}^+6wcQ|LpdH}(.|kc4^"Z?ȕ a<L!039C EuCFEwç ;n?*oB8bʝ'#RqfM}7]s2tcS{\icTx;\7KPʇ Z O-~c>"?PEO8@8GQgaՎ󁶠䧘_%#r>1zaebqcPѵn#L =׀t L7`VA{C:ge@w1 Xp3c3ġpM"'-@n4fGB3DJ8[JoߐgK)ƛ$ 83+ 6ʻ SkI*KZlT _`?KQKdB`s}>`*>,*@JdoF*弝O}ks]yߘc1GV<=776qPTtXԀ!9*44Tހ3XΛex46YD  BdemDa\_l,G/֌7Y](xTt^%GE4}bTڹ;Y)BQu>J/J ⮶.XԄjݳ+Ed r5_D1 o Bx΢#<W8R6@gM. drD>(otU@x=~v2 ӣdoBd3eO6㣷ݜ66YQz`S{\P~z m5{J/L1xO\ZFu>ck#&:`$ai>2ΔloF[hlEܺΠk:)` $[69kOw\|8}ބ:񶐕IA1/=2[,!.}gN#ub ~݊}34qdELc$"[qU硬g^%B zrpJru%v\h1Yne`ǥ:gpQM~^Xi `S:V29.PV?Bk AEvw%_9CQwKekPؠ\;Io d{ ߞoc1eP\ `E=@KIRYK2NPlLɀ)&eB+ь( JTx_?EZ }@ 6U뙢طzdWIn` D噥[uV"G&Ú2g}&m?ċ"Om# {ON"SXNeysQ@FnVgdX~nj]J58up~.`r\O,ư0oS _Ml4kv\JSdxSW<AeIX$Iw:Sy›R9Q[,5;@]%u@ *rolbI  +%m:͇ZVủθau,RW33 dJeTYE.Mϧ-oj3+yy^cVO9NV\nd1 !͕_)av;թMlWR1)ElP;yوÏu 3k5Pr6<⒲l!˞*u־n!l:UNW %Chx8vL'X@*)̮ˍ D-M+JUkvK+x8cY?Ԡ~3mo|u@[XeYC\Kpx8oCC&N~3-H MXsu<`~"WL$8ξ3a)|:@m\^`@ҷ)5p+6p%i)P Mngc#0AruzRL+xSS?ʮ}()#tmˇ!0}}y$6Lt;$ʳ{^6{v6ķܰgVcnn ~zx«,2u?cE+ȘH؎%Za)X>uWTzNyosFQƤ$*&LLXL)1" LeOɟ9=:tZcŽY?ӭVwv~,Yrۗ|yGaFC.+ v1fήJ]STBn5sW}y$~z'c 8  ,! pVNSNNqy8z˱A4*'2n<s^ǧ˭PJޮɏUGLJ*#i}K%,)[z21z ?Nin1?TIR#m-1lA`fT5+ܐcq՝ʐ,3f2Uեmab#ŠdQy>\)SLYw#.ʑf ,"+w~N'cO3FN<)j&,- љ֊_zSTǦw>?nU仆Ve0$CdrP m׈eXmVu L.bֹ [Դaզ*\y8Է:Ez\0KqC b̘cөQ=0YsNS.3.Oo:#v7[#߫ 5܎LEr49nCOWlG^0k%;YߝZǓ:S#|}y,/kLd TA(AI$+I3;Y*Z}|ӧOdv..#:nf>>ȶITX 8y"dR|)0=n46ⲑ+ra ~]R̲c?6(q;5% |uj~z8R=XIV=|{vGj\gcqz؋%Mߍ1y#@f^^>N#x#۹6Y~?dfPO{P4Vu1E1J *|%JN`eWuzk M6q t[ gGvWIGu_ft5j"Y:Tɐ*; e54q$C2d} _SL#mYpO.C;cHi#֩%+) ӍƲVSYźg |tj38r|V1#;.SQA[S#`n+$$I P\[@s(EDzP])8G#0B[ىXIIq<9~[Z멜Z⊔IWU&A>P~#dp]9 "cP Md?٥Ifتuk/F9c*9Ǎ:ØFzn*@|Iށ9N3{'['ͬҲ4#}!V Fu,,mTIkv C7vB6kT91*l '~ƞFlU'M ][ΩũJ_{iIn$L jOdxkza۪#EClx˘oVɞljr)/,߬hL#^Lф,íMƁe̩NBLiLq}(q6IçJ$WE$:=#(KBzђ xlx?>Պ+>W,Ly!_DŌlQ![ SJ1ƐY}b,+Loxɓ)=yoh@꥟/Iѭ=Py9 ۍYӘe+pJnϱ?V\SO%(t =?MR[Șd/ nlB7j !;ӥ/[-A>dNsLj ,ɪv=1c.SQO3UƀܽE̻9GϷD7(}Ävӌ\y_0[w <΍>a_[0+LF.޺f>oNTq;y\bՃyjH<|q-eɏ_?_9+PHp$[uxK wMwNی'$Y2=qKBP~Yul:[<F12O5=d]Ysw:ϮEj,_QXz`H1,#II dwrP˂@ZJVy$\y{}^~[:NߌUOdؾe${p>G3cĖlʌ ת[`ϱ-WdgIig2 }s ؤ(%#sS@~3XnRG~\jc3vӍLM[JBTs3}jNʖW;7ç?=XF=-=qߚ#='c7ڑWI(O+=:uxqe2zi+kuGR0&eniT^J~\jyp'dtGsO39* b#Ɋ p[BwsT>d4ۧsnvnU_~,vƜJ1s QIz)(lv8MU=;56Gs#KMP=LvyGd}VwWBF'à ?MHUg2 !p7Qjڴ=ju JnA suMeƆҔ!)'8Ϣٔޝ(Vpצ֖d=ICJǠ{qkԭ߸i@Ku|p=..*+xz[Aqġ#s2aƊRR)*HRsi~a &fMP-KL@ZXy'x{}Zm+:)) IJ-iu ܒH'L(7yGӜq j 6ߌg1go,kرtY?W,pefOQS!K۟cҒA|սj>=⬒˧L[ ߿2JaB~Ru:Q] 0H~]7ƼI(}cq 'ήETq?fabӥvr )o-Q_'ᴎoK;Vo%~OK *bf:-ťIR`B5!RB@ï u ̯e\_U_ gES3QTaxU<~c?*#]MW,[8Oax]1bC|踤Plw5V%){t<d50iXSUm:Z┵i"1^B-PhJ&)O*DcWvM)}Pܗ-q\mmζZ-l@}aE6F@&Sg@ݚM ȹ 4#p\HdYDoH"\..RBHz_/5˘6KhJRPmƶim3,#ccoqa)*PtRmk7xDE\Y閣_X<~)c[[BP6YqS0%_;Àv~| VS؇ 'O0F0\U-d@7SJ*z3nyPOm~P3|Yʉr#CSN@ ƮRN)r"C:: #qbY. 6[2K2uǦHYRQMV G$Q+.>nNHq^ qmMVD+-#*U̒ p욳u:IBmPV@Or[b= 1UE_NmyKbNOU}the`|6֮P>\2PVIDiPO;9rmAHGWS]J*_G+kP2KaZH'KxWMZ%OYDRc+o?qGhmdSoh\D|:WUAQc yTq~^H/#pCZTI1ӏT4"ČZ}`w#*,ʹ 0i課Om*da^gJ݅{le9uF#Tֲ̲ٞC"qߍ ոޑo#XZTp@ o8(jdxw],f`~|,s^f1t|m򸄭/ctr5s79Q4H1꠲BB@l9@C+wpxu£Yc9?`@#omHs2)=2.ljg9$YS%*LRY7Z,*=䷘$armoϰUW.|rufIGwtZwo~5 YյhO+=8fF)W7L9lM̘·Y֘YLf큹pRF99.A "wz=E\Z'a 2Ǚ#;'}G*l^"q+2FQ hjkŦ${ޮ-T٭cf|3#~RJt$b(R(rdx >U b&9,>%E\ Άe$'q't*אެb-|dSBOO$R+H)܎K1m`;J2Y~9Og8=vqD`K[F)k[1m޼cn]skz$@)!I x՝"v9=ZA=`Ɠi :E)`7vI}dYI_ o:obo 3Q&D&2= Ά;>hy.*ⅥSӬ+q&j|UƧ}J0WW< ۋS)jQRjƯrN)Gű4Ѷ(S)Ǣ8iW52No˓ ۍ%5brOnL;n\G=^UdI8$&h'+(cȁ߫klS^cƗjԌEꭔgFȒ@}O*;evWVYJ\]X'5ղkFb 6Ro՜mi Ni>J?lPmU}>_Z&KKqrIDՉ~q3fL:Se>E-G{L6pe,8QIhaXaUA'ʂs+טIjP-y8ۈZ?J$WP Rs]|l(ԓsƊio(S0Y 8T97.WiLc~dxcE|2!XKƘਫ਼$((6~|d9u+qd^389Y6L.I?iIq9)O/뚅OXXVZF[یgQLK1RҖr@v#XlFНyS87kF!AsM^rkpjPDyS$Nqnxҍ!Uf!ehi2m`YI9r6 TFC}/y^Η5d'9A-J>{_l+`A['յϛ#w:݅%X}&PStQ"-\縵/$ƗhXb*yBS;Wջ_mcvt?2}1;qSdd~u:2k52R~z+|HE!)Ǟl7`0<,2*Hl-x^'_TVgZA'j ^2ΪN7t?w x1fIzC-ȖK^q;-WDvT78Z hK(P:Q- 8nZ܃e貾<1YT<,"6{/ ?͟|1:#gW>$dJdB=jf[%rE^il:BxSּ1հ,=*7 fcG#q eh?27,!7x6nLC4x},GeǝtC.vS F43zz\;QYC,6~;RYS/6|25vTimlv& nRh^ejRLGf? ۉҬܦƩ|Ȱ>3!viʯ>vオX3e_1zKȗ\qHS,EW[㺨uch⍸O}a>q6n6N6qN ! 1AQaq0@"2BRb#Pr3C`Scst$4D%Td ?Na3mCwxAmqmm$4n淿t'C"wzU=D\R+wp+YT&պ@ƃ3ޯ?AﶂaŘ@-Q=9Dռѻ@MVP܅G5fY6# ?0UQ,IX(6ڵ[DIMNލc&υj\XR|,4 jThAe^db#$]wOӪ1y%LYm뭛CUƃߜ}Cy1XνmF8jI]HۺиE@Ii;r8ӭVFՇ| &?3|xBMuSGe=Ӕ#BE5GY!z_eqр/W>|-Ci߇t1ޯќdR3ug=0 5[?#͏qcfH{ ?u=??ǯ}ZzhmΔBFTWPxs}G93 )gGR<>r h$'nchPBjJҧH -N1N?~}-q!=_2hcMlvY%UE@|vM2.Y[|y"EïKZF,ɯ?,q?vM 80jx";9vk+ ֧ ȺU?%vcVmA6Qg^MA}3nl QRNl8kkn'(M7m9وq%ޟ*h$Zk"$9: ?U8Sl,,|ɒxH(ѷGn/Q4PG%Ա8N! &7;eKM749R/%lc>x;>C:th?aKXbheᜋ^$Iհ hr7%F$EFdt5+(M6tÜUU|zW=aTsTgdqPQb'm1{|YXNb P~F^F:k6"j! Ir`1&-$Bevk:y#ywI0x=D4tUPZHڠ底taP6b>xaQ# WeFŮNjpJ* mQN*I-*ȩFg3 5Vʊɮa5FO@{NX?H]31Ri_uѕ 0 F~:60p͈SqX#a5>`o&+<2D: ڝ$nP*)N|yEjF5ټeihyZ >kbHavh-#!Po=@k̆IEN@}Ll?jO߭ʞQ|A07xwt!xfI2?Z<ץTcUj]陎Ltl }5ϓ$,Omˊ;@OjEj(ا,LXLOЦ90O .anA7j4 W_ٓzWjcBy՗+EM)dNg6y1_xp$Lv:9"zpʙ$^JԼ*ϭo=xLj6Ju82AH3$ٕ@=Vv]'qEz;I˼)=ɯx /W(Vp$ mu񶤑OqˎTr㠚xsrGCbypG1ߠw e8$⿄/M{*}W]˷.CK\ުx/$WPwr |i&}{X >$-l?-zglΆ(FhvS*b߲ڡn,|)mrH[a3ר[13o_U3TC$(=)0kgP u^=4 WYCҸ:vQרXàtkm,t*^,}D* "(I9R>``[~Q]#afi6l86:,ssN6j"A4IuQ6E,GnHzSHOuk5$I4ؤQ9@CwpBGv[]uOv0I4\yQѸ~>Z8Taqޣ;za/SI:ܫ_|>=Z8:SUIJ"IY8%b8H:QO6;7ISJҌAά3>cE+&jf$eC+z;V rʺmyeaQf&6ND.:NTvm<- uǝ\MvZYNNT-A>jr!SnO 13Ns%3D@`ܟ 1^c< aɽ̲Xë#w|ycW=9I*H8p^(4՗karOcWtO\ƍR8'KIQ?5>[}yUײ -h=% qThG2)"ו3]!kB*pFDlA,eEiHfPs5H:Փ~H0DتDIhF3c2E9H5zԑʚiX=:mxghd(v׊9iSOd@0ڽ:p5h-t&Xqӕ,ie|7A2O%PEhtjY1wЃ!  ࢽMy7\a@ţJ 4ȻF@o̒?4wx)]P~u57X 9^ܩU;Iꭆ 5 eK27({|Y׎ V\"Z1 Z}(Ǝ"1S_vE30>p; ΝD%xW?W?vo^Vidr[/&>~`9Why;R ;;ɮT?r$g1KACcKl:'3 cﳯ*"t8~l)m+U,z`(>yJ?h>]vЍG*{`;y]IT ;cNUfo¾h/$|NS1S"HVT4uhǜ]v;5͠x'C\SBplh}N ABx%ޭl/Twʽ]D=Kžr㻠l4SO?=k M: cCa#ha)ѐxcsgPiG{+xQI= zԫ+ 8"kñj=|c yCF/*9жh{ ?4o kmQNx;Y4膚aw?6>e]Qr:g,i"ԩA*M7qB?ӕFhV25r[7 Y }LR}*sg+xr2U=*'WSZDW]WǞ<叓{$9Ou4y90-1'*D`c^o?(9uݐ'PI& fJݮ:wSjfP1F:X H9dԯ˝[_54 }*;@ܨ ðynT?ןd#4rGͨH1|-#MrS3G3).᧏3vz֑r$G"`j 1tx0<ƆWh6y6,œGagAyb)hDß_mü gG;evݝnQ C-*oyaMI><]obD":GA-\%LT8c)+y76oQ#*{(F⽕y=rW\p۩cA^e6KʐcVf5$'->ՉN"F"UQ@fGb~#&M=8טJNu9D[̤so~ G9TtW^g5y$bY'سǴ=U-2 #MCt(i lj@Q 5̣i*OsxKf}\M{EV{υƇ);HIfeLȣr2>WIȂ6ik 5YOxȺ>Yf5'|H+98pjn.OyjY~iw'l;s2Y:'lgꥴ)o#'SaaKZ m}`169n"xI *+ }FP"l45'ZgE8?[X7(.Q-*ތL@̲v.5[=t\+CNܛ,gSQnH}*FG16&:t4ُ"Ạ$b |#rsaT ]ӽDP7ո0y)e$ٕvIh'QEAm*HRI=: 4牢) %_iNݧl] NtGHL ɱg<1V,J~ٹ"KQ 9HS9?@kr;we݁]I!{ @G["`J:n]{cAEVʆ#U96j#Ym\qe4hB7Cdv\MNgmAyQL4uLjj9#44tl^}LnR!t±]rh6ٍ>yҏNfU  Fm@8}/ujb9he:AyծwGpΧh5l}3p468)Udc;Us/֔YX1O2uqs`hwgr~{ RmhN؎*q 42*th>#E#HvOq}6e\,Wk#Xb>p}դ3T5†6[@Py*n|'f֧>lư΂̺SU'*qp_SM 'c6m ySʨ;MrƋmKxo,GmPAG:iw9}M(^V$ǒѽ9| aJSQarB;}ٻ֢2%Uc#gNaݕ'v[OY'3L3;,p]@S{lsX'cjwk'a.}}& dP*bK=ɍ!;3ngΊUߴmt'*{,=SzfD Ako~Gaoq_mi}#mPXhύmxǍ΂巿zfQc|kc?WY$_Lvl߶c`?ljݲˏ!V6UЂ(A4y)HpZ_x>eR$/`^'3qˏ-&Q=?CFVR DfV9{8gnh(P"6[D< E~0<@`G6Hгcc cK.5DdB`?XQ2ٿyqo&+1^ DW0ꊩG#QnL3c/x 11[yxპCWCcUĨ80me4.{muI=f0QRls9f9~fǨa"@8ȁQ#cicG$Gr/$W(WV"m7[mAmboD j۳ l^kh׽ # iXnveTka^Y4BNĕ0 !01@Q"2AaPq3BR?@4QT3,㺠W[=JKϞ2r^7vc:9 EߴwS#dIxu:Hp9E! V 2;73|F9Y*ʬFDu&y؟^EAA(ɩ^GV:ݜDy`Jr29ܾ㝉[E;FzxYGUeYC v-txIsםĘqEb+P\ :>iC';k|zرny]#ǿbQw(r|ӹs[D2v-%@;8<a[\o[ϧwI!*0krs)[J9^ʜp1) "/_>o<1AEy^C`x1'ܣnps`lfQ):lb>MejH^?kl3(z:1ŠK&?Q~{ٺhy/[V|6}KbXmn[-75q94dmc^h X5G-}دBޟ |rtMV+]c?-#ڛ^ǂ}LkrOu>-Dry D?:ޞUǜ7V?瓮"#rչģVR;n/_ ؉vݶe5db9/O009G5nWJpA*r9>1.[tsFnQ V 77R]ɫ8_0<՜IFu(v4Fk3E)N:yڮeP`1}$WSJSQNjٺ޵#lј(5=5lǏmoWv-1v,Wmn߀$x_DȬ0¤#QR[Vkzmw"9ZG7'[=Qj8R?zf\a=OU*oBA|G254 p.w7  &ξxGHp B%$gtЏ򤵍zHNuЯ-'40;_3 !01"@AQa2Pq#3BR?ʩcaen^8F<7;EA{EÖ1U/#d1an.1ě0ʾRh|RAo3m3 % 28Q yφHTo7lW>#i`qca m,B-j݋'mR1Ήt>Vps0IbIC.1Rea]H64B>o]($Bma!=?B KǾ+Ծ"nK*+[T#{EJSQs5:U\wĐf3܆&)IԆwE TlrTf6Q|Rh:[K zc֧GC%\_a84HcObiؖV7H )*ģK~Xhչ04?0 E<}3#u? |gS6ꊤ|I#Hڛ աwX97Ŀ%SLy6č|Fa 8b$sקhb9RAu7˨pČ_\*w묦F 4D~f|("mNKiS>$d7SlA/²SL|6N}S˯g]6; #. 403WebShell
403Webshell
Server IP : 45.32.152.128  /  Your IP : 216.73.216.91
Web Server : nginx/1.24.0
System : Linux stage-vultr 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64
User : forge ( 1000)
PHP Version : 8.2.14
Disable Function : NONE
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : ON  |  Pkexec : ON
Directory :  /usr/share/nmap/nselib/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /usr/share/nmap/nselib/httpspider.lua
---
-- A smallish httpspider library providing basic spidering capabilities
-- It consists of the following classes:
--
-- * <code>Options</code>
-- ** This class is responsible for handling library options.
--
-- * <code>LinkExtractor</code>
-- ** This class contains code responsible for extracting urls from web pages.
--
-- * <code>URL</code>
-- ** This class contains code to parse and process URLs.
--
-- * <code>UrlQueue</code>
-- ** This class contains a queue of the next links to process.
--
-- * <code>Crawler</code>
-- ** This class is responsible for the actual crawling.
--
-- The following sample code shows how the spider could be used:
-- <code>
--   local crawler = httpspider.Crawler:new( host, port, '/', { scriptname = SCRIPT_NAME } )
--   crawler:set_timeout(10000)
--
--   local result
--   while(true) do
--     local status, r = crawler:crawl()
--     if ( not(status) ) then
--       break
--     end
--     if ( r.response.body:match(str_match) ) then
--        crawler:stop()
--        result = r.url
--        break
--     end
--   end
--
--   return result
-- </code>
--
-- For advanced use, the library currently supports a number of closures (withinhost,
-- withindomain, doscraping). Please note, that withinhost and withindomain options also
-- support boolean values. You will want to override them only for advanced use. You can
-- define them using the following utilities:
--
-- * <code>iswithinhost</code>
-- ** You can use this utility to check if the resource exists within the host.
--
-- * <code>iswithindomain</code>
-- ** You can use this utility to check if the resource exists within the domain.
--
-- * <code>isresource</code>
-- ** You can use this utility to check the type of the resource (for example "js").
-- ** A third option may hold a number of signs that may exist after the extension
-- ** of the resource. By default, these are [#, ?]. For example, if we want to return
-- only php resources, the function will also return example.php?query=foo or
-- example.php#foo.
--
-- The following sample code shows an example usage. We override the default
-- withinhost method and we allow spidering only on resources within the host
-- that they are not "js" or "css".
-- <code>
--   crawler.options.withinhost = function(url)
--       if crawler:iswithinhost(url)
--       and not crawler:isresource(url, "js")
--       and not crawler:isresource(url, "css") then
--           return true
--       end
--    end
-- </code>
--
-- @author Patrik Karlsson <patrik@cqure.net>
--
-- @args httpspider.maxdepth the maximum amount of directories beneath
--       the initial url to spider. A negative value disables the limit.
--       (default: 3)
-- @args httpspider.maxpagecount the maximum amount of pages to visit.
--       A negative value disables the limit (default: 20)
-- @args httpspider.url the url to start spidering. This is a URL
--       relative to the scanned host eg. /default.html (default: /)
-- @args httpspider.withinhost Closure that overrides the default withinhost
--       function that only spiders URLs within the same host. If this is
--       set to false the crawler will spider URLs both inside and outside
--       the host. See the closure section above to override the default
--       behaviour. (default: true)
-- @args httpspider.withindomain Closure that overrides the default
--       withindomain function that only spiders URLs within the same
--       domain. This widens the scope from <code>withinhost</code> and can
--       not be used in combination. See the closure section above to
--       override the default behaviour. (default: false)
-- @args httpspider.noblacklist if set, doesn't load the default blacklist
-- @args httpspider.useheadfornonwebfiles if set, the crawler would use
--       HEAD instead of GET for files that do not have extensions indicating
--       that they are webpages (the list of webpage extensions is located in
--       nselib/data/http-web-files-extensions.lst)
-- @args httpspider.doscraping Closure that overrides the default doscraping
--       function used to check if the resource should be scraped (in terms
--       of extracting any links within it). See the closure section above to
--       override the default behaviour.
---

local coroutine = require "coroutine"
local http = require "http"
local io = require "io"
local nmap = require "nmap"
local stdnse = require "stdnse"
local string = require "string"
local table = require "table"
local url = require "url"
_ENV = stdnse.module("httpspider", stdnse.seeall)

local LIBRARY_NAME = "httpspider"
local PREFETCH_SIZE = 5

-- The Options class, handling all spidering options
Options = {

  new = function(self, options)
    local o = { }

    -- copy all options as class members
    for k, v in pairs(options) do o[k] = v  end

    -- set a few default values
    o.timeout  = options.timeout or 10000
    o.whitelist = o.whitelist or {}
    o.blacklist = o.blacklist or {}
    local removewww = function(url) return string.gsub(url, "^www%.", "") end

    -- set up the appropriate matching functions
    if ( o.withinhost ) then
      o.withinhost = function(u)
        local parsed_u = url.parse(tostring(u))
        local host = parsed_u.ascii_host or parsed_u.host

        if ( o.base_url:getPort() ~= 80 and o.base_url:getPort() ~= 443 ) then
          if ( parsed_u.port ~= tonumber(o.base_url:getPort()) ) then
            return false
          end
        elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
          return false
          -- if urls don't match only on the "www" prefix, then they are probably the same
        elseif ( host == nil or removewww(host:lower()) ~= removewww(o.base_url:getHost():lower()) ) then
          return false
        end
        return true
      end
    end
    if ( o.withindomain ) then
      o.withindomain = function(u)
        local parsed_u = url.parse(tostring(u))
        local host = parsed_u.ascii_host or parsed_u.host
        if ( o.base_url:getPort() ~= 80 and o.base_url:getPort() ~= 443 ) then
          if ( parsed_u.port ~= tonumber(o.base_url:getPort()) ) then
            return false
          end
        elseif ( parsed_u.scheme ~= o.base_url:getProto() ) then
          return false
        elseif ( host == nil or host:sub(-#o.base_url:getDomain()):lower() ~= o.base_url:getDomain():lower() ) then
          return false
        end
        return true
      end
    end

    if (not o.doscraping) then

      o.doscraping = function(u)
        return true
      end
    end

    setmetatable(o, self)
    self.__index = self
    return o
  end,

  addWhitelist = function(self, func) table.insert(self.whitelist, func) end,
  addBlacklist = function(self, func) table.insert(self.blacklist, func) end,

}

-- Placeholder for form extraction code
FormExtractor = {

}

LinkExtractor = {

  -- Creates a new instance of LinkExtractor
  -- @return o instance of LinkExtractor
  new = function(self, url, html, options)
    local o = {
      url = url,
      html = html,
      links = {},
      options = options,
    }
    setmetatable(o, self)
    self.__index = self
    o:parse()

    return o
  end,

  -- is the link absolute or not?
  isAbsolute = function(url)
    -- at this point we don't care about the protocol
    -- also, we don't add // to cover stuff like:
    -- feed:http://example.com/rss.xml
    return ( url:match('^%w*:') ~= nil )
  end,

  -- Creates an absolute link from a relative one based on the base_url
  -- The functionality is very simple and does not take any ../../ in
  -- consideration.
  --
  -- @param base_url URL containing the page url from which the links were
  --        extracted
  -- @param rel_url string containing the relative portion of the URL
  -- @return link string containing the absolute link
  createAbsolute = function(base_url, rel_url, base_href)

    -- is protocol-relative?
    if rel_url:match("^//") then
      return ("%s%s%s"):format(base_url:getProto(), ":", rel_url)
    end

    -- is relative with leading slash? ie /dir1/foo.html
    local leading_slash = rel_url:match("^/")
    rel_url = rel_url:match("^/?(.*)") or '/'

    -- check for tailing slash
    if ( base_href and not(base_href:match("/$") ) ) then
      base_href = base_href .. '/'
    end

    if base_url:getPort() == url.get_default_port(base_url:getProto()) then
      if ( leading_slash ) then
        return ("%s://%s/%s"):format(base_url:getProto(), base_url:getHost(), rel_url)
      else
        if ( base_href ) then
          return ("%s%s"):format(base_href, rel_url)
        else
          return ("%s://%s%s%s"):format(base_url:getProto(), base_url:getHost(), base_url:getDir(), rel_url)
        end
      end
    else
      if ( leading_slash ) then
        return ("%s://%s:%d/%s"):format(base_url:getProto(), base_url:getHost(), base_url:getPort(), rel_url)
      else
        if ( base_href ) then
          return ("%s%s"):format(base_href, rel_url)
        else
          return ("%s://%s:%d%s%s"):format(base_url:getProto(), base_url:getHost(), base_href or base_url:getPort(), base_url:getDir(), rel_url)
        end
      end
    end
  end,

  -- Gets the depth of the link, relative to our base url eg.
  -- base_url = http://www.cqure.net/wp/
  -- url = http://www.cqure.net/wp/                           - depth: 0
  -- url = http://www.cqure.net/wp/index.php                  - depth: 0
  -- url = http://www.cqure.net/wp/2011/index.php             - depth: 1
  -- url = http://www.cqure.net/index.html                    - depth: -1
  --
  -- @param url instance of URL
  -- @return depth number containing the depth relative to the base_url
  getDepth = function(self, url)
    local base_dir, url_dir = self.options.base_url:getDir(), url:getDir()
    if ( url_dir and base_dir ) then
      local m = url_dir:match(base_dir.."(.*)")
      if ( not(m) ) then
        return -1
      else
        local _, depth = m:gsub("/", "/")
        return depth
      end
    end
  end,

  validate_link = function(self, url)
    local valid = true

    -- if our url is nil, abort, this could be due to a number of
    -- reasons such as unsupported protocols: javascript, mail ... or
    -- that the URL failed to parse for some reason
    if ( url == nil or tostring(url) == nil ) then
      return false
    end

    -- linkdepth trumps whitelisting
    if ( self.options.maxdepth and self.options.maxdepth >= 0 ) then
      local depth = self:getDepth( url )
      if ( -1 == depth or depth > self.options.maxdepth ) then
        stdnse.debug3("%s: Skipping link depth: %d; b_url=%s; url=%s", LIBRARY_NAME, depth, tostring(self.options.base_url), tostring(url))
        return false
      end
    end

    -- withindomain trumps any whitelisting
    if ( self.options.withindomain ) then
      if ( not(self.options.withindomain(url)) ) then
        stdnse.debug2("%s: Link is not within domain: %s", LIBRARY_NAME, tostring(url))
        return false
      end
    end

    -- withinhost trumps any whitelisting
    if ( self.options.withinhost ) then
      if ( not(self.options.withinhost(url)) ) then
        stdnse.debug2("%s: Link is not within host: %s", LIBRARY_NAME, tostring(url))
        return false
      end
    end

    -- run through all blacklists
    if ( #self.options.blacklist > 0 ) then
      for _, func in ipairs(self.options.blacklist) do
        if ( func(url) ) then
          stdnse.debug2("%s: Blacklist match: %s", LIBRARY_NAME, tostring(url))
          valid = false
          break
        end
      end
    end

    -- check the url against our whitelist
    if ( #self.options.whitelist > 0 ) then
      valid = false
      for _, func in ipairs(self.options.whitelist) do
        if ( func(url) ) then
          stdnse.debug2("%s: Whitelist match: %s", LIBRARY_NAME, tostring(url))
          valid = true
          break
        end
      end
    end
    return valid
  end,

  -- Parses a HTML response and extracts all links it can find
  -- The function currently supports href, src and action links
  -- Also all behaviour options, such as depth, white- and black-list are
  -- processed in here.
  parse = function(self)
    local links = {}
    local patterns = {
      '<[^>]+[hH][rR][eE][fF]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
      '<[^>]+[hH][rR][eE][fF]%s*=%s*([^\'\"][^%s>]+)',
      '<[^>]+[sS][rR][cC]%s*=%s*[\'"]%s*([^"^\']-)%s*[\'"]',
      '<[^>]+[sS][rR][cC]%s*=%s*([^\'\"][^%s>]+)',
      '<[^>]+[aA][cC][tT][iI][oO][nN]%s*=%s*[\'"]%s*([^"^\']+%s*)[\'"]',
    }

    local base_hrefs = {
      '<[^>]+[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*[\'"](%s*[^"^\']+%s*)[\'"]',
      '<[^>]+[Bb][Aa][Ss][Ee]%s*[Hh][Rr][Ee][Ff]%s*=%s*([^\'\"][^%s>]+)'
    }

    local base_href
    for _, pattern in ipairs(base_hrefs) do
      base_href = self.html:match(pattern)
      if ( base_href ) then
        break
      end
    end

    for _, pattern in ipairs(patterns) do
      for l in self.html:gmatch(pattern) do
        local link = l
        if ( not(LinkExtractor.isAbsolute(l)) ) then
          link = LinkExtractor.createAbsolute(self.url, l, base_href)
        end

        local url = URL:new(link)

        local valid = self:validate_link(url)

        if ( valid ) then
          stdnse.debug3("%s: Adding link: %s", LIBRARY_NAME, tostring(url))
          links[tostring(url)] = true
        elseif ( tostring(url) ) then
          stdnse.debug3("%s: Skipping url: %s", LIBRARY_NAME, link)
        end
      end
    end

    for link in pairs(links) do
      table.insert(self.links, link)
    end

  end,

  -- Gets a table containing all of the retrieved URLs, after filtering
  -- has been applied.
  getLinks = function(self) return self.links end,


}

-- The URL class, containing code to process URLS
-- This class is heavily inspired by the Java URL class
URL = {

  -- Creates a new instance of URL
  -- @param url string containing the text representation of a URL
  -- @return o instance of URL, in case of parsing being successful
  --         nil in case parsing fails
  new = function(self, url)
    local o = {
      raw = url,
    }

    setmetatable(o, self)
    self.__index = self
    if ( o:parse() ) then
      return o
    end
  end,

  -- Parses the string representation of the URL and splits it into different
  -- URL components
  -- @return status true on success, false on failure
  parse = function(self)
    local parsed = url.parse(self.raw)
    if parsed.scheme and parsed.scheme:match("^https?$") then
      self.proto = parsed.scheme
      self.host = parsed.ascii_host or parsed.host
      self.port = tonumber(parsed.port) or url.get_default_port(self.proto)
      -- "file" is the path, params, and query, but not the fragment
      local fileparts = {parsed.path}
      if parsed.params then
        fileparts[#fileparts+1] = ";"
        fileparts[#fileparts+1] = parsed.params
      end
      if parsed.query then
        fileparts[#fileparts+1] = "?"
        fileparts[#fileparts+1] = parsed.query
      end
      self.file = table.concat(fileparts)
      self.path = parsed.path
      -- Normalize the values; removes dot and dot-dot path segments
      self.file = url.absolute("", self.file)
      self.path = url.absolute("", self.path)
      self.dir   = self.path:match("^(.+%/)") or "/"
      -- TODO: Use public suffix list to extract domain
      self.domain= self.host:match("^[^%.]-%.(.*)")
      return true
    end
    return false
  end,

  -- Gets the host portion of the URL
  -- @return host string containing the hostname
  getHost = function(self) return self.host end,

  -- Gets the protocol representation of the URL
  -- @return proto string containing the protocol (ie. http, https)
  getProto = function(self) return self.proto end,

  -- Returns the filename component of the URL.
  -- @return file string containing the path and query components of the url
  getFile = function(self) return self.file end,

  -- Gets the port component of the URL
  -- @return port number containing the port of the URL
  getPort = function(self) return self.port end,

  -- Gets the path component of the URL
  -- @return the full path and filename of the URL
  getPath = function(self) return self.path end,

  -- Gets the directory component of the URL
  -- @return directory string containing the directory part of the URL
  getDir  = function(self) return self.dir end,

  -- Gets the domain component of the URL
  -- @return domain string containing the hosts domain
  getDomain = function(self)
    if ( self.domain ) then
      return self.domain
      -- fallback to the host, if we can't find a domain
    else
      return self.host
    end
  end,

  -- Converts the URL to a string
  -- @return url string containing the string representation of the url
  __tostring = function(self)
    return string.format("%s://%s:%s%s",
      self.proto,
      self.host,
      self.port,
      self.file
      )
  end,
}

-- An UrlQueue
UrlQueue = {

  -- creates a new instance of UrlQueue
  -- @param options table containing options
  -- @return o new instance of UrlQueue
  new = function(self, options)
    local o = {
      urls = {},
      options = options
    }
    setmetatable(o, self)
    self.__index = self
    return o
  end,

  -- gets the next available url in the queue
  getNext = function(self)
    return table.remove(self.urls,1)
  end,

  -- adds a new url to the queue
  -- @param url can be either a string or a URL or a table of URLs
  add = function(self, url)
    assert( type(url) == 'string' or type(url) == 'table', "url was neither a string or table")
    local urls = ( 'string' == type(url) ) and URL:new(url) or url

    -- if it's a table, it can be either a single URL or an array of URLs
    if ( 'table' == type(url) and url.raw ) then
      urls = { url }
    end

    for _, u in ipairs(urls) do
      u = ( 'string' == type(u) ) and URL:new(u) or u
      if ( u ) then
        table.insert(self.urls, u)
      else
        stdnse.debug1("ERROR: Invalid URL: %s", url)
      end
    end
  end,

  -- dumps the contents of the UrlQueue
  dump = function(self)
    for _, url in ipairs(self.urls) do
      print("url:", url)
    end
  end,

}

-- The Crawler class
Crawler = {

  options = {},

  removewww = function(url) return string.gsub(url, "^www%.", "") end,

  -- An utility when defining closures. Checks if the resource exists within host.
  -- @param u URL that points to the resource we want to check.
  iswithinhost = function(self, u)
    local parsed_u = url.parse(tostring(u))
    local host = parsed_u.ascii_host or parsed_u.host
    if ( self.options.base_url:getPort() ~= 80 and self.options.base_url:getPort() ~= 443 ) then
      if ( parsed_u.port ~= tonumber(self.options.base_url:getPort()) ) then
        return false
      end
    elseif ( parsed_u.scheme ~= self.options.base_url:getProto() ) then
      return false
      -- if urls don't match only on the "www" prefix, then they are probably the same
    elseif ( host == nil or self.removewww(host:lower()) ~= self.removewww(self.options.base_url:getHost():lower()) ) then
      return false
    end
    return true
  end,

  -- An utility when defining closures. Checks if the resource exists within domain.
  -- @param u URL that points to the resource we want to check.
  iswithindomain = function(self, u)
    local parsed_u = url.parse(tostring(u))
    local host = parsed_u.ascii_host or parsed_u.host
    if ( self.options.base_url:getPort() ~= 80 and self.options.base_url:getPort() ~= 443 ) then
      if ( parsed_u.port ~= tonumber(self.options.base_url:getPort()) ) then
        return false
      end
    elseif ( parsed_u.scheme ~= self.options.base_url:getProto() ) then
      return false
    elseif ( host == nil or host:sub(-#self.options.base_url:getDomain()):lower() ~= self.options.base_url:getDomain():lower() ) then
      return false
    end
    return true
  end,

  -- An utility when defining closures. Checks the type of the resource.
  -- @param u URL that points to the resource we want to check.
  -- @param ext the extension of the resource.
  -- @param signs table of signs that may exist after the extension of the resource.
  isresource = function(self, u, ext, signs)
    u = tostring(u)

    if string.match(u, "." .. ext .. "$") then
      return true
    end

    local signstring = ""
    if signs then
      for _, s in signs do
        signstring = signstring .. s
      end
      signstring:gsub('?', '%?')
    else
      signstring = "#%?"
    end

    return string.match(u, "." .. ext .. "[" .. signstring .. "]" .. "[^.]*$")

  end,

  -- creates a new instance of the Crawler instance
  -- @param host table as received by the action method
  -- @param port table as received by the action method
  -- @param url string containing the relative URL
  -- @param options table of options:
  --        <code>noblacklist</code> - do not load default blacklist
  --        <code>base_url</code> - start url to crawl
  --        <code>timeout</code> - timeout for the http request
  --        <code>maxdepth</code> - the maximum directory depth to crawl
  --        <code>maxpagecount</code> - the maximum amount of pages to retrieve
  --        <code>withinhost</code> - stay within the host of the base_url
  --        <code>withindomain</code> - stay within the base_url domain
  --        <code>doscraping</code> - Permit scraping
  --        <code>scriptname</code> - should be set to SCRIPT_NAME to enable
  --                                  script specific arguments.
  --        <code>redirect_ok</code> - redirect_ok closure to pass to http.get function
  --        <code>no_cache</code> -  no_cache option to pass to http.get function
  -- @return o new instance of Crawler or nil on failure
  new = function(self, host, port, url, options)
    local o = {
      host = host,
      port = port,
      url = url,
      options = options or {},
      basethread = stdnse.base(),
    }

    setmetatable(o, self)
    self.__index = self

    self.options = o

    o:loadScriptArguments()
    o:loadLibraryArguments()
    o:loadDefaultArguments()

    local response = http.get(o.host, o.port, '/', { timeout = o.options.timeout, redirect_ok = o.options.redirect_ok, no_cache = o.options.no_cache } )

    if ( not(response) or 'table' ~= type(response) ) then
      return
    end

    o.url = o.url:match("/?(.*)")

    local u_host = o.host.targetname or o.host.name
    if ( not(u_host) or 0 == #u_host ) then
      u_host = o.host.ip
    end
    local u = ("%s://%s:%d/%s"):format(response.ssl and "https" or "http", u_host, o.port.number, o.url)
    o.options.base_url = URL:new(u)
    o.options = Options:new(o.options)
    o.urlqueue = UrlQueue:new(o.options)
    o.urlqueue:add(o.options.base_url)

    o.options.timeout = o.options.timeout or 10000
    o.processed = {}

    -- script arguments have precedence
    if ( not(o.options.maxdepth) ) then
      o.options.maxdepth = tonumber(stdnse.get_script_args("httpspider.maxdepth"))
    end

    -- script arguments have precedence
    if ( not(o.options.maxpagecount) ) then
      o.options.maxpagecount = tonumber(stdnse.get_script_args("httpspider.maxpagecount"))
    end

    if ( not(o.options.noblacklist) ) then
      o:addDefaultBlacklist()
    end

    if ( o.options.useheadfornonwebfiles ) then
      -- Load web files extensions from a file in nselib/data folder.
      -- For more information on individual file formats, see
      -- http://en.wikipedia.org/wiki/List_of_file_formats.
      o.web_files_extensions = {}
      local f = nmap.fetchfile("nselib/data/http-web-files-extensions.lst")
      if f then
        for l in io.lines(f) do
          table.insert(o.web_files_extensions, l)
        end
      end
    end

    stdnse.debug2("%s: %s", LIBRARY_NAME, o:getLimitations())

    return o
  end,

  -- Sets the timeout used by the http library
  -- @param timeout number containing the timeout in ms.
  set_timeout = function(self, timeout)
    self.options.timeout = timeout
  end,

  -- Gets the amount of pages that has been retrieved
  -- @return count number of pages retrieved by the instance
  getPageCount = function(self)
    local count = 1
    for url in pairs(self.processed) do
      count = count + 1
    end
    return count
  end,

  -- Adds a default blacklist blocking binary files such as images,
  -- compressed archives and executable files
  addDefaultBlacklist = function(self)
    -- References:
    --[[
      Image file formats: https://en.wikipedia.org/wiki/Image_file_formats
      Video file formats: https://en.wikipedia.org/wiki/Video_file_format
      Audio file formats: https://en.wikipedia.org/wiki/Audio_file_format
      Doc file extension: https://en.wikipedia.org/wiki/List_of_Microsoft_Office_filename_extensions
      Archive formats: https://en.wikipedia.org/wiki/List_of_archive_formats ,
                       https://en.wikipedia.org/wiki/Category:Archive_formats
    ]]
    local extensions = {
      image_extensions = {"png", "jpg", "jpeg", "gif", "bmp", "jfif", "exif",
      "tiff", "bmp", "ppm", "pgm", "pbm", "pnm", "webp", "heif", "bpg",
      "cgm", "svg"},
      video_extensions = {"avi", "flv", "ogg", "mp4", "m4p", "m4v", "wmv",
      "vob", "ogv", "mng", "mov", "rmvb", "asf", "nsv", "f4v", "f4p",
      "amv", "webm", "mkv", "mpg", "mp2", "mpeg", "mpv", "svi", "3gp",
      "3g2", "mxf", "roq"},
      audio_extensions = {"aac", "m4a", "mp3", "wav", "aa", "aax", "act", "aiff",
      "amr", "ape", "au", "awb", "dct", "dss", "dvf", "flac", "gsm", "iklax",
      "ivs", "m4a", "m4b", "m4p", "mmf", "mpc", "msc", "ogg", "oga", "mogg",
      "oups", "ra", "raw", "sln", "tta", "vox", "wma", "wv", "webm"},
      doc_extensions = {"pdf", "doc", "docx", "docm", "xla", "xls", "xlsx",
      "xlsm", "ppt", "pptx", "pptm", "odf", "ods", "odp", "ps", "xps", "dot",
      "wbk", "dotx", "dotm", "docb", "xlt", "xlm", "xltx", "xltm", "xlsb",
      "xlam", "xll", "xlw", "pot", "pps", "potx", "potm", "ppam", "ppsx", "ppsm", "pub"},
      archive_extensions = {"zip", "tar.gz", "gz", "rar", "7z", "sit", "sitx",
      "tgz", "tar.bz", "tar", "iso", "a", "ar", "cpio", "shar", "lbr", "iso",
      "mar", "sbx", "bz2", "lz", "lzma", "lzo", "rz", "sz", "s7z", "ace", "afa",
      "alz", "apk", "tar.bz2", "tar.Z", "tar.lzma", "tlz", "tbz2", "xp3", "zz",
      "bzip", "lzip", "lzop", "rzip"},
      exe_extensions = {"exe", "com", "msi", "bin","dmg"}
    }
    local blacklist = {}
    for _, cat in pairs(extensions) do
      for _, ext in ipairs(cat) do
        table.insert(blacklist, string.format(".%s$", ext))
      end
    end

    self.options:addBlacklist( function(url)
        local p = url:getPath():lower()
        for _, pat in ipairs(blacklist) do
          if ( p:match(pat) ) then
            return true
          end
        end
      end )
  end,

  -- does the heavy crawling
  --
  -- The crawler may exit due to a number of different reasons, including
  -- invalid options, reaching max count or simply running out of links
  -- We return a false status for all of these and in case the error was
  -- unexpected or requires attention we set the error property accordingly.
  -- This way the script can alert the user of the details by calling
  -- getError()
  crawl_thread = function(self, response_queue)
    local condvar = nmap.condvar(response_queue)

    if ( false ~= self.options.withinhost and false ~= self.options.withindomain ) then
      table.insert(response_queue, { false, { err = true, reason = "Invalid options: withinhost and withindomain can't both be true" } })
      condvar "signal"
      return
    end

    while(true) do

      if ( self.quit or coroutine.status(self.basethread) == 'dead'  ) then
        table.insert(response_queue, {false, { err = false, msg = "Quit signalled by crawler" } })
        break
      end

      -- in case the user set a max page count to retrieve check how many
      -- pages we have retrieved so far
      local count = self:getPageCount()
      if ( self.options.maxpagecount and
          ( self.options.maxpagecount > 0 ) and
          ( count > self.options.maxpagecount ) ) then
        table.insert(response_queue, { false, { err = false, msg = "Reached max page count" } })
        condvar "signal"
        return
      end

      -- pull links from the queue until we get a valid one
      local url
      repeat
        url = self.urlqueue:getNext()
      until( not(url) or not(self.processed[tostring(url)]) )

      -- if no url could be retrieved from the queue, abort ...
      if ( not(url) ) then
        table.insert(response_queue, { false, { err = false, msg = "No more urls" } })
        condvar "signal"
        return
      end

      if ( self.options.maxpagecount ) then
        stdnse.debug2("%s: Fetching url [%d of %d]: %s", LIBRARY_NAME, count, self.options.maxpagecount, tostring(url))
      else
        stdnse.debug2("%s: Fetching url: %s", LIBRARY_NAME, tostring(url))
      end

      local scrape = true


      if not (self.options.doscraping(url)) then
        stdnse.debug2("%s: Scraping is not allowed for url: %s", LIBRARY_NAME, tostring(url))
        scrape = false
      end

      local response
      -- in case we want to use HEAD rather than GET for files with certain extensions
      if ( self.options.useheadfornonwebfiles ) then
        local is_web_file = false
        local file = url:getPath():lower()
        -- check if we are at a URL with 'no extension', for example: nmap.org/6
        if string.match(file,".*(/[^/%.]*)$") or string.match(file, "/$") then is_web_file = true end
        if not is_web_file then
          for _,v in pairs(self.web_files_extensions) do
            if string.match(file, "%."..v.."$") then
              is_web_file = true
              break
            end
          end
        end
        if is_web_file then
          stdnse.debug2("%s: Using GET: %s", LIBRARY_NAME, file)
          response = http.get(url:getHost(), url:getPort(), url:getFile(), { timeout = self.options.timeout, redirect_ok = self.options.redirect_ok, no_cache = self.options.no_cache } )
        else
          stdnse.debug2("%s: Using HEAD: %s", LIBRARY_NAME, file)
          response = http.head(url:getHost(), url:getPort(), url:getFile())
        end
      else
        -- fetch the url, and then push it to the processed table
        response = http.get(url:getHost(), url:getPort(), url:getFile(), { timeout = self.options.timeout, redirect_ok = self.options.redirect_ok, no_cache = self.options.no_cache } )
      end

      self.processed[tostring(url)] = true

      if ( response ) then
        -- were we redirected?
        if ( response.location ) then
          -- was the link absolute?
          local link = response.location[#response.location]
          if ( link:match("^http") ) then
            url = URL:new(link)
            -- guess not
          else
            url.path = link
          end
        end
        -- if we have a response, proceed scraping it
        if ( response.body ) and scrape then
          local links = LinkExtractor:new(url, response.body, self.options):getLinks()
          self.urlqueue:add(links)
        end
      else
        response = { body = "", headers = {} }
      end
      table.insert(response_queue, { true, { url = url, response = response } } )
      while ( PREFETCH_SIZE < #response_queue ) do
        stdnse.debug2("%s: Response queue full, waiting ...", LIBRARY_NAME)
        condvar "wait"
      end
      condvar "signal"
    end
    condvar "signal"
  end,

  -- Loads the argument set on a script level
  loadScriptArguments = function(self)
    local sn = self.options.scriptname
    if ( not(sn) ) then
      stdnse.debug1("%s: WARNING: Script argument could not be loaded as scriptname was not set", LIBRARY_NAME)
      return
    end

    if ( nil == self.options.maxdepth ) then
      self.options.maxdepth = tonumber(stdnse.get_script_args(sn .. ".maxdepth"))
    end
    if ( nil == self.options.maxpagecount ) then
      self.options.maxpagecount = tonumber(stdnse.get_script_args(sn .. ".maxpagecount"))
    end
    if ( nil == self.url ) then
      self.url = stdnse.get_script_args(sn .. ".url")
    end
    if ( nil == self.options.withinhost ) then
      self.options.withinhost = stdnse.get_script_args(sn .. ".withinhost")
    end
    if ( nil == self.options.withindomain ) then
      self.options.withindomain = stdnse.get_script_args(sn .. ".withindomain")
    end
    if ( nil == self.options.noblacklist ) then
      self.options.noblacklist = stdnse.get_script_args(sn .. ".noblacklist")
    end
    if ( nil == self.options.useheadfornonwebfiles ) then
      self.options.useheadfornonwebfiles = stdnse.get_script_args(sn .. ".useheadfornonwebfiles")
    end
    if ( nil == self.options.doscraping ) then
      self.options.doscraping = stdnse.get_script_args(sn .. ".doscraping")
    end

  end,

  -- Loads the argument on a library level
  loadLibraryArguments = function(self)
    local ln = LIBRARY_NAME

    if ( nil == self.options.maxdepth ) then
      self.options.maxdepth = tonumber(stdnse.get_script_args(ln .. ".maxdepth"))
    end
    if ( nil == self.options.maxpagecount ) then
      self.options.maxpagecount = tonumber(stdnse.get_script_args(ln .. ".maxpagecount"))
    end
    if ( nil == self.url ) then
      self.url = stdnse.get_script_args(ln .. ".url")
    end
    if ( nil == self.options.withinhost ) then
      self.options.withinhost = stdnse.get_script_args(ln .. ".withinhost")
    end
    if ( nil == self.options.withindomain ) then
      self.options.withindomain = stdnse.get_script_args(ln .. ".withindomain")
    end
    if ( nil == self.options.noblacklist ) then
      self.options.noblacklist = stdnse.get_script_args(ln .. ".noblacklist")
    end
    if ( nil == self.options.useheadfornonwebfiles ) then
      self.options.useheadfornonwebfiles = stdnse.get_script_args(ln .. ".useheadfornonwebfiles")
    end
    if ( nil == self.options.doscraping ) then
      self.options.doscraping = stdnse.get_script_args(ln .. ".doscraping")
    end
  end,

  -- Loads any defaults for arguments that were not set
  loadDefaultArguments = function(self)
    local function tobool(b)
      if ( nil == b ) then
        return
      end
      assert("string" == type(b) or "boolean" == type(b) or "number" == type(b), "httpspider: tobool failed, unsupported type")
      if ( "string" == type(b) ) then
        if ( "true" == b ) then
          return true
        else
          return false
        end
      elseif ( "number" == type(b) ) then
        if ( 1 == b ) then
          return true
        else
          return false
        end
      end
      return b
    end

    if self.options.withinhost == 0 then
      self.options.withinhost = false
    end

    if self.options.withindomain == 0 then
      self.options.withindomain = false
    end

    -- fixup some booleans to make sure they're actually booleans
    self.options.noblacklist = tobool(self.options.noblacklist)
    self.options.useheadfornonwebfiles = tobool(self.options.useheadfornonwebfiles)

    if ( self.options.withinhost == nil ) then
      if ( self.options.withindomain ~= true ) then
        self.options.withinhost = true
      else
        self.options.withinhost = false
      end
    end
    if ( self.options.withindomain == nil ) then
      self.options.withindomain = false
    end
    if ( not ( type(self.options.doscraping) == "function" ) ) then
      self.options.doscraping = false
    end
    self.options.maxdepth = tonumber(self.options.maxdepth) or 3
    self.options.maxpagecount = tonumber(self.options.maxpagecount) or 20
    self.url = self.url or '/'
  end,

  -- gets a string of limitations imposed on the crawl
  getLimitations = function(self)
    local o = self.options
    local limits = {}
    if ( o.maxdepth > 0 or o.maxpagecount > 0 or
        o.withinhost or o.withindomain ) then
      if ( o.maxdepth > 0 ) then
        table.insert(limits, ("maxdepth=%d"):format(o.maxdepth))
      end
      if ( o.maxpagecount > 0 ) then
        table.insert(limits, ("maxpagecount=%d"):format(o.maxpagecount))
      end
      if ( o.withindomain ) then
        table.insert(limits, ("withindomain=%s"):format(o.base_url:getDomain() or o.base_url:getHost()))
      end
      if ( o.withinhost ) then
        table.insert(limits, ("withinhost=%s"):format(o.base_url:getHost()))
      end
    end

    if ( #limits > 0 ) then
      return ("Spidering limited to: %s"):format(table.concat(limits, "; "))
    end
  end,

  -- does the crawling
  crawl = function(self)
    self.response_queue = self.response_queue or {}
    local condvar = nmap.condvar(self.response_queue)
    if ( not(self.thread) ) then
      self.thread = stdnse.new_thread(self.crawl_thread, self, self.response_queue)
    end

    if ( #self.response_queue == 0 and coroutine.status(self.thread) ~= 'dead') then
      condvar "wait"
    end
    condvar "signal"
    if ( #self.response_queue == 0 ) then
      return false, { err = false, msg = "No more urls" }
    else
      return table.unpack(table.remove(self.response_queue, 1))
    end
  end,

  -- signals the crawler to stop
  stop = function(self)
    local condvar = nmap.condvar(self.response_queue)
    self.quit = true
    condvar "signal"
    if ( coroutine.status(self.thread) == "dead" ) then
      return
    end
    condvar "wait"
  end
}

return _ENV;

Youez - 2016 - github.com/yon3zu
LinuXploit