############################################################################ # # File: html.icn # # Subject: Procedures for parsing HTML # # Author: Gregg M. Townsend # # Date: April 26, 2005 # ############################################################################ # # This file is in the public domain. # ############################################################################ # # These procedures parse HTML files: # # htchunks(f) generates the basic chunks -- tags and text -- # that compose an HTML file. # # htrefs(f) generates the tagname/keyword/value combinations # that reference other files. # # These procedures process strings from HTML files: # # httag(s) extracts the name of a tag. # # htvals(s) generates the keyword/value pairs from a tag. # # urlmerge(base,new) interprets a new URL in the context of a base. # # canpath(s) puts a path in canonical form # ############################################################################ # # htchunks(f) generates the HTML chunks from file f. # It returns strings beginning with # # ") + 3) then fail # normal case: discard comment s ||:= tab(0) &subject := (read(f) || "\n") | break } &subject := s # rescan unclosed comment return "