############################################################################ # # File: codeobj.icn # # Subject: Procedures to encode and decode Icon data # # Author: Ralph E. Griswold # # Date: March 25, 2002 # ############################################################################ # # This file is in the public domain. # ############################################################################ # # These procedures provide a way of storing Icon values as strings and # retrieving them. The procedure encode(x) converts x to a string s that # can be converted back to x by decode(s). These procedures handle all # kinds of values, including structures of arbitrary complexity and even # loops. For "scalar" types -- null, integer, real, cset, and string -- # # decode(encode(x)) === x # # For structures types -- list, set, table, and record types -- # decode(encode(x)) is, for course, not identical to x, but it has the # same "shape" and its elements bear the same relation to the original # as if they were encoded and decode individually. # # No much can be done with files, functions and procedures, and # co-expressions except to preserve type and identification. # # The encoding of strings and csets handles all characters in a way # that it is safe to write the encoding to a file and read it back. # # No particular effort was made to use an encoding of value that # minimizes the length of the resulting string. Note, however, that # as of Version 7 of Icon, there are no limits on the length of strings # that can be written out or read in. # ############################################################################ # # The encoding of a value consists of four parts: a tag, a length, # a type code, and a string of the specified length that encodes the value # itself. # # The tag is omitted for scalar values that are self-defining. # For other values, the tag serves as a unique identification. If such a # value appears more than once, only its tag appears after the first encoding. # There is, therefore, a type code that distinguishes a label for a previously # encoded value from other encodings. Tags are strings of lowercase # letters. Since the tag is followed by a digit that starts the length, the # two can be distinguished. # # The length is simply the length of the encoded value that follows. # # The type codes consist of single letters taken from the first character # of the type name, with lower- and uppercase used to avoid ambiguities. # # Where a structure contains several elements, the encodings of the # elements are concatenated. Note that the form of the encoding contains # the information needed to separate consecutive elements. # # Here are some examples of values and their encodings: # # x encode(x) # ------------------------------------------------------- # # 1 "1i1" # 2.0 "3r2.0" # &null "0n" # "\377" "4s\\377" # '\376\377' "8c\\376\\377" # procedure main "a4pmain" # co-expression #1 (0) "b0C" # [] "c0L" # set() "d0S" # table("a") "e3T1sa" # L1 := ["hi","there"] "f11L2shi5sthere" # # A loop is illustrated by # # L2 := [] # put(L2,L2) # # for which # # x encode(x) # ------------------------------------------------------- # # L2 "g3L1lg" # # Of course, you don't have to know all this to use encode and decode. # ############################################################################ # # Links: escape, gener, procname, typecode # ############################################################################ # # Requires: co-expressions # ############################################################################ invocable all link escape, gener, procname, typecode global outlab, inlab record triple(type,value,tag) # Encode an arbitary value as a string. # procedure encode(x,level) local str, tag, Type static label initial label := create "l" || star(string(&lcase)) if /level then outlab := table() # table is global, but reset at # each root call. tag := "" Type := typecode(x) if Type == !"ri" then str := string(x) # first the scalars else if Type == !"cs" then str := image(string(x))[2:-1] # remove quotes else if Type == "n" then str := "" else if Type == !"LSRTfpC" then # next the structures and other types if str := \outlab[x] then # if the object has been processed, Type := "l" # use its label and type it as label. else { tag := outlab[x] := @label # else make a label for it. str := "" if Type == !"LSRT" then { # structures every str ||:= encode( # generate, recurse, and concatenate case Type of { !"LS": !x # elements "T": x[[]] | !sort(x,3) # default, then elements "R": type(x) | !x # type then elements } ,1) # indicate internal call } else str ||:= case Type of { # other things "f": image(x) "C": "" "p": procname(x) } } else stop("unsupported type in encode: ",image(x)) return tag || *str || Type || str end # Generate decoded results. At the top level, there is only one, # but for structures, it is called recursively and generates the # the decoded elements. # procedure decode(s,level) local p if /level then inlab := table() # global but reset every p := separ(s) do { suspend case p.type of { "l": inlab[p.value] # label for an object "i": integer(p.value) "s": escape(p.value) "c": cset(escape(p.value)) "r": real(p.value) "n": &null "L": delist(p.value,p.tag) "R": derecord(p.value,p.tag) "S": deset(p.value,p.tag) "T": detable(p.value,p.tag) "f": defile(p.value) "C": inlab[p.tag] := create &fail # can't hurt much to fail "p": inlab[p.tag] := (proc(p.value) | stop("encoded procedure not found")) \ 1 default: stop("unexpected type in decode: ",p.type) } } end # Generate triples for the encoded values in concatenation. # procedure separ(s) local p, size while *s ~= 0 do { p := triple() s ?:= { p.tag := tab(many(&lcase)) size := tab(many(&digits)) | break p.type := move(1) p.value := move(size) tab(0) } suspend p } end # Decode a list. The newly constructed list is added to the table that # relates tags to structure values. # procedure delist(s,tag) local a inlab[tag] := a := [] # insert object for label every put(a,decode(s,1)) return a end # Decode a set. Compare to delist above. # procedure deset(s,tag) local S inlab[tag] := S := set() every insert(S,decode(s,1)) return S end # Decode a record. # procedure derecord(s,tag) local R, e e := create decode(s,1) # note use of co-expressions to control # generation, since record must be constructed # before fields are produced. inlab[tag] := R := proc(@e)() | stop("error in decoding record") every !R := @e return R end # Decode a table. # procedure detable(s,tag) local t, e e := create decode(s,1) # see derecord above; here it's the default # value that motivates co-expressions. inlab[tag] := t := table(@e) while t[@e] := @e return t end # Decode a file. # procedure defile(s, tag) return inlab[tag] := case s of { # files aren't so simple ... "&input": &input "&output": &output "&errout": &errout default: s ? { ="file(" # open for reading to play it safe open(tab(upto(')'))) | stop("cannot open encoded file") } } end