// Job: Jay's Own Browser, a web browser written in Java by Jay Skeer // Copyright (C) 1996 Jay Skeer, Jay Prime Positive // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. // /** Subclass of PushbackInputStream which returns HTML tokens */ import Jdb; import java.io.*; import HtmlToken; public class HtmlTokenStream extends PushbackInputStream { int angle_level= 0; HtmlToken unread_token= null; boolean eof= true; public HtmlTokenStream (String file_path) throws IOException { super(new FileInputStream(file_path)); eof= false; } public HtmlTokenStream (File file_path) throws IOException { super(new FileInputStream(file_path)); eof= false; } public void close() throws IOException { eof= true; super.close(); } public boolean eof() { return ((null == unread_token) && eof); } static final String IDOKSTR= "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; boolean isIdChar(int c) { return (0 <= (IDOKSTR.indexOf(c))); } public void skipEmptySpace() throws IOException { int bytes_left= this.available(); if (0 < bytes_left) { int read_byte= this.read(); bytes_left= this.available(); while ((0 < bytes_left) && (Character.isSpace((char)read_byte))) { read_byte= this.read(); bytes_left= this.available(); } if (! Character.isSpace((char)read_byte)) { this.unread(read_byte); /* put back non-space */ } } } /** Parse the folowing bytes in this stream, along with the first_char, as an Id token. */ public HtmlToken readIdToken(int first_char) throws IOException { String value= String.valueOf((char)first_char); int bytes_left= this.available(); int read_byte; boolean working= (0 < bytes_left); while (working) { read_byte= this.read(); if (isIdChar(read_byte)) { value= value + String.valueOf((char)read_byte); bytes_left= this.available(); working= (0 < bytes_left); } else { this.unread(read_byte); working= false; } } HtmlToken rv= new HtmlToken(value); rv.value= value; if (HtmlToken.OTHER == rv.id) { rv.id= HtmlToken.ID; } return rv; } /** Parse the folowing bytes in this stream, along with the first_char, as a "..." type token. */ public HtmlToken readStringToken(int first_char) throws IOException { String value= new String(); int read_byte; int bytes_left= this.available(); boolean working= (0 < bytes_left); while (working) { read_byte= this.read(); if ('"' == read_byte) { HtmlToken rv= new HtmlToken(HtmlToken.STRING); rv.value= value; /* check for reserved words? */ return rv; } else { value= value + String.valueOf((char)read_byte); bytes_left= this.available(); working= (0 < bytes_left); } } /* ran out of characters -- an error! */ HtmlToken rv= new HtmlToken(HtmlToken.STRING); rv.value= value; /* check for reserved words? */ return rv; } /** Parse the folowing bytes in this stream, along with the first_char, as a TEXT type token. */ public HtmlToken readTextToken(int first_char) { //Jdb.enter("HTS.readTextToken()"); StringBuffer value= new StringBuffer(); value.append((char)first_char); HtmlToken rv; try { int bytes_left= this.available(); int read_byte; boolean working= (0 < bytes_left); while (working) { read_byte= this.read(); if ((0 < angle_level) ?('>' == read_byte) :('<' == read_byte)) { this.unread(read_byte); working= false; } else { value.append((char)read_byte); bytes_left= this.available(); working= (0 < bytes_left); } } rv= new HtmlToken(HtmlToken.TEXT); rv.value= value.toString(); } catch (IOException x) { // file io problem??? rv= new HtmlToken(HtmlToken.OTHER, "ioerror"); } //Jdb.exit("HTS.readTextToken()"); return rv; } /** Parse the bytes in a file into (and return) a HtmlComponent which captures the essence of the html file. */ public HtmlToken readToken() { // Jdb.enter("HTS.readToken()"); HtmlToken rv; if (null != unread_token) { rv= unread_token; unread_token= null; // Jdb.trace("HTS.readToken() ... rereads old token"); } else { try { int bytes_left= this.available(); if (bytes_left <= 0) { //Jdb.trace("see EOF condition"); rv= new HtmlToken(HtmlToken.EOF, ""); eof= true; } else { int read_byte; read_byte= this.read(); if ('<' == read_byte) { angle_level++; //Jdb.trace("see OPEN"); rv= new HtmlToken(HtmlToken.OPEN, "<"); } else if ('>' == read_byte) { angle_level--; //Jdb.trace("see CLOSE"); rv= new HtmlToken(HtmlToken.CLOSE, ">"); } else if (-1 == read_byte) { //Jdb.trace("see EOF char"); rv= new HtmlToken(HtmlToken.EOF, ""); eof= true; } else if (0 < angle_level) { if ('/' == read_byte) { //Jdb.trace("see END"); rv= new HtmlToken(HtmlToken.END, "/"); } else if ('=' == read_byte) { rv= new HtmlToken(HtmlToken.EQUAL, "="); //Jdb.trace("see EQUAL"); } else if ('"' == read_byte) { //Jdb.trace("see DblQt"); rv= this.readStringToken(read_byte); } else { Jdb.trace("try id"); unread(read_byte); skipEmptySpace(); read_byte= read(); rv= this.readIdToken(read_byte); } } else { Jdb.trace("try text"); rv= this.readTextToken(read_byte); } } } catch (IOException x) { // file io problem???; rv= new HtmlToken(HtmlToken.OTHER, "ioerror"); } } // Jdb.exit( Jdb.trace( "HTS.readToken() " + Integer.toString(angle_level) + " => "+rv.toString()); return rv; } public HtmlToken readCdata() { HtmlToken rv; try { int read_byte= this.read(); if ('"' == read_byte) { rv= readStringToken(read_byte); } else { rv= readTextToken(read_byte); } } catch (IOException x) { // file io problem??? rv= new HtmlToken(HtmlToken.OTHER, "ioerror"); } return rv; } public HtmlToken readNdata() { HtmlToken rv; try { int read_byte= this.read(); rv= readIdToken(read_byte); } catch (IOException x) { // file io problem??? rv= new HtmlToken(HtmlToken.OTHER, "ioerror"); } return rv; } /** read the token which folows foo= in a tag */ public HtmlToken readArgValueToken(String tag, String name) { //Jdb.enter("HTS.readArgValueToken()"); HtmlToken rv; if ((tag.equalsIgnoreCase("img")) || (tag.equalsIgnoreCase("applet"))) { if ((name.equalsIgnoreCase("width")) || (name.equalsIgnoreCase("height"))) { rv= readNdata(); } else { rv= readCdata(); } } else { rv= readCdata(); } //Jdb.exit("HTS.readArgValueToken()"); return rv; } /** Put the given token (back) as the next token to be read */ public void unreadToken(HtmlToken t) { // Jdb.enter("HTS.unreadToken("+t.toString()+")"); unread_token= t; // Jdb.exit("HTS.unreadToken("+t.toString()+")"); } /** Look at the next token with out removing it from the stream */ public HtmlToken peekToken() { HtmlToken t= readToken(); unreadToken(t); return t; } }