OT: Hi...Help making searching program for linux...

From: fito <fito@dont-contact.us>
Date: Sat, 11 Sep 1999 13:07:51 -0500 (CDT)

        Hi...my name is Rodolfo Conde i'm from mexico hope i'm not
bothering you with this mail but the fact that i'm writting to you is
because of this:

        I want to make a program for linux like copernic99 for
windows...if you haven't seen it it's a program that receives some words
related to something and then it consults many search engines with these
words and receives all the results from the sites and puts them all
together in a html file for you to browse it....it can consult http,
newsgroups and a lot of search engines....as i guess there's lot of people
well aware of HTTP and CGI i thought that someone here could if got the
time lend a
hand at this as i've tried to do a text version of my program but only
altavista responds from all the server i ask results...the other didn't
even give me a HTTP 404 :(.....i'm trying to make my program in C....

        Hope you can help me...if you'd want to see copernic99 in action
by yourself download it at www.copernic.com...Anything would be helpfull,
guide, references, url's, what must exactly be send to the server so they
answer the search query ?? i tried altavista, yahoo, infoseek, lycos,
etc...Of course answer me in my e-mail address as this is not what the
mailing list is about...if any interested in help i'm sending my text
version of the program....

        thanks for reading....again hope i'm not bothering you....

----- begin prubSearch.c ----

/*
  Rodolfo Conde Martinez
  Buscador
  19990822
*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <netdb.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <netinet/in.h>

/*
  Estructura de los datos de cada server
*/
typedef struct {
  char servname[33];
  char fileName[20];
  char queryString[500];
  struct sockaddr_in sa;
  struct hostent *server;
} server;

main(int argc, char **argv)
{
  // the servers
    server servs[] = {{"www.altavista.com", "altavista-news.html",
                       "POST /cgi-bin/query?pg=q&what=news&q=%s+ HTTP/1.0"},
                      {"www.altavista.com", "altavista.html",
                       "POST /cgi-bin/query?pg=q&kl=XX&stype=stext&q=%s HTTP/1.0"},
                      {"search.yahoo.com", "yahoo.html", "POST /bin/query?p=%s&hc=0&hs=0 HTTP/1.1"},
                      {"infoseek.go.com", "infoseek.html",
                       "POST /Titles?qt=%s+&col=WW&su=IS&lk=noframes&sux=home_searchbox HTTP/1.1"},
                      {"astalavista4.box.sk", "astalavista.html",
                       "POST cgi-bin/robot?srch=%s&submit=+search+&project=robot&gfx=robot HTTP/1.0"},
                      {"search.exite.com", "exite.html", "POST /search.gw?search=%s&Submit=Search HTTP/1.1"},
                      {"www.hotbot.com", "hotbot.html",
                       "POST /MT=%s&SM=MC&DV=O&LG=any&DC=10&DE=2&BT=L&submit.x=36&submit.y=9 HTTP/1.0"}
                      , {"www.looksmart.com", "looksmart.html", "POST /r_search?search=&key=%s&x=3&y=15 HTTP/1.0"},
                      {"www.lycos.com", "lycos.html", "POST /cgi-bin/pursuit?cat=dir&query=%s&x=58&y=8 HTTP/1.0"},
                      {"www.snap.com", "snap.html",
                       "POST /search/directory/results/1,61,home-0,00.html?tag=st.sn.fdsb&keyword=%s HTTP/1.0"},
                      {"www.google.com", "google.html", "POST /search?q=%s HTTP/1.0"}};
    int i, j, fd, sockserv;
    char keywords[BUFSIZ + 1] = {'\0'}, *strtmp, *substr, strtmp1[100], buffer[BUFSIZ + 1] = {'\0'};

    if (argc == 1)
      printf("Usage: %s word1 [word2 word3 ... ]\n", *argv);
    else {
      // make the query string with '+' instead of ' '
      strtmp = keywords;
      for (i = 1; i < argc; i++) {
           strcat(strtmp, argv[i]);

       if (argc - 1 != i)
         strcat(strtmp, "+");
      }

      // Begin main loop...
      for (i = 0; i < 11; i++) {
       
         if ((servs[i].server = gethostbyname(servs[i].servname)) == NULL)
           fprintf(stderr, "%s: Host desconocido, pasando al siguiente\n", servs[i].servname);
         else {
           //memcpy(&(servs[i].sa.sin_addr.s_addr), (servs[i].server)->h_addr, servs[i].server->h_length);
           bcopy((char *) servs[i].server->h_addr, (char *) &servs[i].sa.sin_addr.s_addr, servs[i].server->h_length);
           servs[i].sa.sin_family = AF_INET;
           servs[i].sa.sin_port = htons(80);
           // servs[i].server->h_name = servs[i].servname;

           
           if ((sockserv = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
             fputs("No se puede abrir el socket", stderr);
             exit(EXIT_FAILURE);
           }

           printf("Connecting to %s on port 80.....", servs[i].servname);

           // Nos conectamos...
           if (connect(sockserv, &(servs[i].sa), sizeof(servs[i].sa)) == -1)
             fprintf(stderr, "\nCan't connect to %s...skipping server\n", servs[i].servname);
           else {
             puts("Connected !!");
             // prepare all the query string to be send
             strtmp = substr = servs[i].queryString;
             while (*++substr != '%')
               ;
             strtmp = substr - 1;
             substr += 2; // jump '%' y 's'
             strcpy(strtmp1, substr);
             *(strtmp + 1) = '\0';
             strcat(strtmp, keywords);
             strcat(strtmp, strtmp1);

             // prepare the file for the results...
             if ((fd = creat(servs[i].fileName, S_IRUSR | S_IWUSR)) == -1) {
               fprintf(stderr, "Can't open file %s..Exiting\n", servs[i].fileName);
               exit(EXIT_FAILURE);
             }

             // Send query to server....
             write(sockserv, servs[i].queryString, strlen(servs[i].queryString));
             printf("Request %s send, waiting for answer...", servs[i].queryString);
             // wait and write answer to file
             while ((j = read(sockserv, buffer, BUFSIZ)) > 0)
               write(fd, buffer, j);

             puts("Done...closing connection....");
             close(sockserv);
             close(fd);
             sleep(5);
           }
         }
       }
    }
}

fito.

--
fito@crosswinds.net
ICQ 14757500
Received on Sat Sep 11 1999 - 12:12:22 MDT

This archive was generated by hypermail pre-2.1.9 : Tue Dec 09 2003 - 16:48:24 MST