Logo Search packages:      
Sourcecode: paros version File versions  Download package

Collector.java

/*
*
* Paros and its related class files.
* 
* Paros is an HTTP/HTTPS proxy for assessing web application security.
* Copyright (C) 2003-2004 Chinotec Technologies Company
* 
* This program is free software; you can redistribute it and/or
* modify it under the terms of the Clarified Artistic License
* as published by the Free Software Foundation.
* 
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* Clarified Artistic License for more details.
* 
* You should have received a copy of the Clarified Artistic License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
package org.parosproxy.paros.core.spider;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Vector;

import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.parosproxy.paros.network.HttpBody;
import org.parosproxy.paros.network.HttpHeader;
import org.parosproxy.paros.network.HttpMalformedHeaderException;
import org.parosproxy.paros.network.HttpMessage;
import org.parosproxy.paros.network.HttpRequestHeader;


/**
 *
 * To change the template for this generated type comment go to
 * Window - Preferences - Java - Code Generation - Code and Comments
 */
00041 public class Collector {

    private SpiderThread parent = null;
    
    Collector(SpiderThread parent) {
        this.parent = parent;
    }
        
      
    /**
     * Build URI given a base HTML.  Keep absolute if it is.
     * @param html
     * @param link
     * @return
     * @throws URIException
     * @throws HttpMalformedHeaderException
     */
00058     private HttpMessage buildMsg(URI base, String link) throws URIException, HttpMalformedHeaderException {

        URI uri = null;        
        uri = new URI(base, link, true);
        HttpMessage msg = new HttpMessage(new HttpRequestHeader(HttpRequestHeader.GET, uri, HttpHeader.HTTP11));
        return msg;
    }

    private boolean isDuplicateInSameHtml(Vector list, HttpMessage msg) {
        
        if (list.contains(msg)) {
            return true;
        } else {
            list.add(msg);            
        }
        return false;
    }
    
      void collect(Html html, int currentDepth) {
          Vector previousFoundList = new Vector();

          URI uri = null;
          A[] as = html.getAs();
          Frame[] frames = html.getFrames();
//        Img[] imgs = html.getImgs();
          Hyperlink[] hlinks = html.getHyperlinks();
          Meta[] metas = html.getMetas();
          
          HttpMessage msg = null;
          
          // process ANCHOR
          for (int i=0; i<as.length; i++) {
              String href = as[i].getHref().replaceAll("&amp;","&");
              try {
                  msg = buildMsg(html.getURI(), href);
                  
                  if (!isDuplicateInSameHtml(previousFoundList, msg)) {
                        parent.foundURI(msg, html.getURI().toString(), currentDepth);

                  }
              } catch (Exception e) {
                  //System.out.println("A - invalid uri:" + href);
                  //e.printStackTrace();
              }
              
          }
          
          // process frame
          for (int i=0; i<frames.length; i++) {
              String src = frames[i].getSrc();
              try {
                  msg = buildMsg(html.getURI(), src);
                  if (!isDuplicateInSameHtml(previousFoundList, msg)) {
                      parent.foundURI(msg, html.getURI().toString(), currentDepth);
                  }
              } catch (Exception e) {
                  //System.out.println("Frame - invalid uri:" + src);
                  //e.printStackTrace();
              }
          }
          
          // process imgs
//          suppressed extracting URLs from image         
//        for (int i=0; i<imgs.length; i++) {
//            String src = imgs[i].getSrc();
//            try {
//                msg = buildMsg(html.getURI(), src);
//                parent.foundURI(msg, html.getURI().toString(), currentDepth);
//
//            } catch (Exception e) {
//                //System.out.println("Img - invalid uri:" + src);
//                //e.printStackTrace();
//            }
//        }
          
          // process forms
        
          Vector formQueryList = getFormsQuery(html);
          for (int i=0; i<formQueryList.size(); i++) {
              msg = (HttpMessage) formQueryList.get(i);
              try {
                  parent.foundURI(msg, html.getURI().toString(), currentDepth);
              } catch (URIException e) {
                  //e.printStackTrace();
              }
          }
          
          // process general hyperlinks (eg in javascript/elsewhere)
          for (int i=0; i<hlinks.length; i++) {
              String link = hlinks[i].getLink().replaceAll("&amp;","&");
              try {
                  msg = buildMsg(html.getURI(), link);
                  if (!isDuplicateInSameHtml(previousFoundList, msg)) {
                      parent.foundURI(msg, html.getURI().toString(), currentDepth);
                  }
              } catch (Exception e) {
                  //e.printStackTrace();
              }
          }
          
          // process metas
          for (int i=0; i<metas.length; i++) {
              String url = metas[i].getURL();
              try {
                  msg = buildMsg(html.getURI(), url);
                  if (!isDuplicateInSameHtml(previousFoundList, msg)) {
                      parent.foundURI(msg, html.getURI().toString(), currentDepth);
                  }
              } catch (Exception e) {
                  //System.out.println("Frame - invalid uri:" + src);
                  //e.printStackTrace();
              }
          }
          
          
          
      }
      
      public Vector getFormsQuery(Html html) {
            Vector qryList = new Vector();
            Form[] forms = html.getForms();
            for (int i=0; i<forms.length; i++) {
                  Form form = forms[i];
                  Vector oneForm = getFormQuery(form, html.getURI());
                  qryList.addAll(oneForm);
            }
            return qryList;
      }

      private Vector getFormQuery(Form form, URI baseURI) {
            Vector qryStrList = new Vector();
            Vector qryList          = new Vector();
            String queryString = "";
            HttpRequestHeader reqHeader = null;
            HttpBody reqBody = null;
            
            if (form.getAction()==null) {
                return qryList;
            }
            
            int combinationCount = 1;
            for (int i=0; i<form.getSelect().length; i++) {
                // restrict max # of options to 2
                  combinationCount *= (form.getSelect()[i].getOption().length > 2) ? 2: form.getSelect()[i].getOption().length;
            }

            if (combinationCount > 512) {
                  return qryList;
            }

            try {
                  // build all queryString using input tags
                  for (int i=0; i<form.getInput().length; i++) {
                        Input input = form.getInput()[i];
                if (input.getName()== null || input.getName().length() == 0) {
                    continue;
                }
                        if (input.getType() != null) {
                              if (//input.getType().equalsIgnoreCase(Input.SUBMIT) ||
                            
                        // submit field should also be sent for better crawling.
                            
                                    input.getType().equalsIgnoreCase(Input.PASSWORD) ||
                                    input.getType().equalsIgnoreCase(Input.CHECKBOX) ||
                                    input.getType().equalsIgnoreCase(Input.RESET)) {
                                    continue;
                              }
                        }
                        String value = input.getValue();
                        if (input.getType().equalsIgnoreCase(Input.TEXT) && value.equals("")) {
                            // arbitrary fill a "1" for displayable fields.
                            value = "1";
                        }
                queryString = buildPostQueryString(queryString, input.getName(), value);
                  }

            // build all queryString using textarea tags
            for (int i=0; i<form.getTextArea().length; i++) {
                TextArea textarea = form.getTextArea()[i];
                if (textarea.getName()== null || textarea.getName().length() == 0) {
                    continue;
                }
                String value = textarea.getValue();
                queryString = buildPostQueryString(queryString, textarea.getName(), value);
            }
            
                  // build all queryString using SELECT and OPTION tags
                  qryStrList.addElement(queryString);
                  for (int i=0; i<form.getSelect().length;i++) {
                        Select select = form.getSelect()[i];
                if (select.getName()== null || select.getName().length() == 0) {
                    continue;
                }
                        qryStrList = addSelectField(qryStrList, select);
                  }

                  for (int i=0; i<qryStrList.size(); i++) {
                      HttpMessage msg = null;
                        URI uri = null;
                        String qryStr = (String) qryStrList.elementAt(i);
                        if (form.getMethod().equalsIgnoreCase(Form.GET)) {
                            String action = (form.getAction().indexOf(QUESTION) <0) ? form.getAction()+QUESTION+qryStr : form.getAction()+AMPERSAND+qryStr;                       
                              uri = new URI(baseURI, action, true);
                              reqHeader = new HttpRequestHeader(form.getMethod().trim().toUpperCase(), uri, HttpHeader.HTTP11);
                              msg = new HttpMessage(reqHeader);
                        } else if (form.getMethod().equalsIgnoreCase(Form.POST)) {
                    
                    if (!parent.getParent().getSpiderParam().isPostForm()) {
                        continue;
                    }
                            uri = new URI(baseURI, form.getAction(), true);
                            reqHeader = new HttpRequestHeader(form.getMethod().trim().toUpperCase(), uri, HttpHeader.HTTP11);
                            reqBody = new HttpBody(qryStr);
                            reqHeader.setContentLength(reqBody.length());
                            msg = new HttpMessage(reqHeader, reqBody);
                        } else {
                            continue;
                        }
                        msg.getRequestHeader().setContentLength(msg.getRequestBody().length());
                        qryList.add(msg);
                  }
            } catch (Exception e) {
            }

            return qryList;

      }

      private Vector addSelectField(Vector qry, Select select) {
            Vector newQryList = new Vector();
            String queryString = null;
            if (select.getOption() == null) {
                  return newQryList;
            }

            for (int i=0; i<select.getOption().length && i<2; i++) {
                // only select at most 2 option to avoid too much combinations
            
            // if 2nd option exist, don't use first option because first option is usually not valid option
            if (i==0 && select.getOption().length > 1) {
                continue;
            }
            
                  String value = select.getOption()[i].getValue();
            try {
                        if (qry.isEmpty()) {
                              queryString = buildPostQueryString("", select.getName(), value);
                              newQryList.addElement(queryString);
                        } else {
                              for (int j=0; j<qry.size(); j++) {
                                    queryString = (String) qry.elementAt(j);
                                    queryString = buildPostQueryString(queryString, select.getName(), value);
                                    newQryList.addElement(queryString);
                              }
                        }
                  } catch (Exception e) {
                  }
            }
            return newQryList;
      }

      private static final String EQUAL         = "=";
      private static final String AMPERSAND     = "&";
      private static final String QUESTION      = "?";
      private String buildPostQueryString(String oldQuery, String newField, String newValue) {
            StringBuffer result = new StringBuffer(oldQuery);
            if (oldQuery.length() > 0) {
                  result.append(AMPERSAND);
            }
            result.append(newField);
            result.append(EQUAL);
            try {
            result.append(URLEncoder.encode(newValue, "UTF8"));
        } catch (UnsupportedEncodingException e) {
        }
            return result.toString();
      }
      
}

Generated by  Doxygen 1.6.0   Back to index