############################################################################ # # File: chkhtml.icn # # Subject: Program to check HTML files # # Author: Robert J. Alexander # # Date: November 15, 1994 # ############################################################################ # # This file is in the public domain. # ############################################################################ # # Program to check an HTML file. # # Errors detected: # # - Reference to undefined anchor name. # - Duplicated anchor name. # - Warning for unreferenced anchor name. # - Unknown tag. # - Badly formed tag. # - Improper tag nesting. # - Unescaped <, >, ", or &. # - Bad escape string. # - Improper embedding of attributes. # - Bad (non-ascii) characters # # Advises on: # - Use of , tags. # procedure Usage(s) write(&errout,\s) stop( "Usage: ChkHTML -options file..._ \n -u supress warnings for unreferenced anchor names_ \n -q supress errors for \"\\\"\" (quote) character in open text_ \n -g supress errors for \">\" character in open text_ \n -l n level of HTML (default 2)") end global SupressUnrefNames,SupressOpenQuot,SupressOpenGT,HTMLLevel procedure Init(arg) local opt,f ListTypes := ["UL","OL","MENU","DIR"] opt := options(arg,"uqgl+",Usage) if *arg = 0 then Usage() SupressUnrefNames := opt["u"] SupressOpenQuot := opt["q"] SupressOpenGT := opt["g"] HTMLLevel := \opt["l"] | 2 return opt end link options global FileName,LineNbr,TagStack,HRefList,NameSet,NameRefSet,ErrorCount, SeenSet,PlainText,Tagless,Msg,ListTypes procedure main(arg) SetMsg() Init(arg) every CheckHTML(!arg) end procedure CheckHTML(fn) local f,line,c static badChars,scanChars initial { badChars := ~(&cset[33:128] ++ '\t') scanChars := '<>"&' ++ badChars } # # Open the input file. # f := open(fn) | { write(&errout,"Can't open \"",fn,"\"") fail } FileName := fn write(&errout) Msg("Checking HTML format...") ErrorCount := 0 LineNbr := 0 TagStack := [] NameSet := set() NameRefSet := set() HRefList := [] SeenSet := set() PlainText := &null while line := read(f) do line ? { LineNbr +:= 1 while tab(upto(scanChars)) do { case c := move(1) of { "<": ProcessTag(f) | break ">": if /Tagless & /SupressOpenGT then Error("\">\" in open text") "\"": if /Tagless & /SupressOpenQuot then Error("\"\\\"\" (quote) in open text") "&": if /Tagless then ProcessEscape() | Error("\"&\" in open text") default: Error("Bad character: ",image(c)) } } } close(f) CheckStack() CheckHRefs() FileName := fn LineNbr := &null GiveAdvice() Msg((if ErrorCount > 0 then string(ErrorCount) else "No") ," error",(if ErrorCount = 1 then "" else "s"), " detected") return end procedure CheckHRefs() local x every x := !HRefList do { if not member(NameSet,x.value) then { FileName := x.fileName LineNbr := x.lineNbr Error("Anchor name referenced but not defined: ",image(x.value)) } } if /SupressUnrefNames then { LineNbr := &null every x := !(NameSet -- NameRefSet) do { Msg("Warning: Anchor name not referenced: ",image(x)) } } return end procedure CheckStack() local tag every tag := pop(TagStack) do Error(pop(TagStack),"Unterminated tag: <",tag,">") return end procedure ProcessTag(f) local tag,subLine,upTag,endFlag,popCount,tagLines,listType # # Scan to the end of the tag (which might be multiple lines). # tag := "" tagLines := 0 if ="!--" then { # # Comment tag. # until tab(find("-->") + 3) do { &subject := read(f) | { Error("Unclosed HTML comment (\"