############################################################################ # # File: mtf3.icn # # Subject: Program to map tar file # # Author: Richard Goerwitz # # Date: June 3, 1991 # ############################################################################ # # This file is in the public domain. # ############################################################################ # # Version: 3.4 # ############################################################################ # # PURPOSE: Maps 15+ char. filenames in a tar archive to 14 chars. # Handles both header blocks and the archive itself. Mtf is intended # to facilitate installation of tar'd archives on systems subject to # the System V 14-character filename limit. # # USAGE: mtf inputfile [-r reportfile] [-e .extensions] [-x exceptions] # # "Inputfile" is a tar archive. "Reportfile" is file containing a # list of files already mapped by mtf in a previous run (used to # avoid clashes with filenames in use outside the current archive). # The -e switch precedes a list of filename .extensions which mtf is # supposed to leave unscathed by the mapping process # (single-character extensions such as .c and .o are automatically # preserved; -e allows the user to specify additional extensions, # such as .pxl, .cpi, and .icn). The final switch, -x, precedes a # list of strings which should not be mapped at all. Use this switch # if, say, you have a C file with a structure.field combination such # as "thisisveryverybig.hashptr" in an archive that contains a file # called "thisisveryverybig.h," and you want to avoid mapping that # portion of the struct name which matches the name of the overlong # file (to wit, "mtf inputfile -x thisisveryverybig.hashptr"). To # prevent mapping of any string (including overlong filenames) begin- # ning, say, with "thisisvery," use "mtf inputfile -x thisisvery." # Be careful with this option, or you might end up defeating the # whole point of using mtf in the first place. # # OUTPUT FORMAT: Mtf writes a mapped tar archive to the stdout. # When finished, it leaves a file called "map.report" in the current # directory which records what filenames were mapped and how. Rename # and save this file, and use it as the "reportfile" argument to any # subsequent runs of mtf in this same directory. Even if you don't # plan to run mtf again, this file should still be examined, just to # be sure that the new filenames are acceptable, and to see if # perhaps additional .extensions and/or exceptions should be # specified. # # BUGS: Mtf only maps filenames found in the main tar headers. # Because of this, mtf cannot accept nested tar archives. If you try # to map a tar archive within a tar file, mtf will abort with a nasty # message about screwing up your files. Please note that, unless you # give mtf a "reportfile" to consider, it knows nothing about files # existing outside the archive. Hence, if an input archive refers to # an overlong filename in another archive, mtf naturally will not # know to shorten it. Mtf will, in fact, have no way of knowing that # it is a filename, and not, say, an identifier in a C program. # Final word of caution: Try not to use mtf on binaries. It cannot # possibly preserve the correct format and alignment of strings in an # executable. Same goes for compressed files. Mtf can't map # filenames that it can't read! # ############################################################################ global filenametbl, chunkset, short_chunkset # see procedure mappiece(s) global extensions, no_nos # ditto record hblock(name,junk,size,mtime,chksum, # tar header struct; linkflag,linkname,therest) # see readtarhdr(s) procedure main(a) local usage, intext, i, current_list usage := "usage: mtf inputfile [-r reportfile] " || "[-e .extensions] [-x exceptions]" *a = 0 & stop(usage) intext := open_input_file(a[1]) & pop(a) i := 0 extensions := []; no_nos := [] while (i +:= 1) <= *a do { case a[i] of { "-r" : readin_old_map_report(a[i+:=1]) "-e" : current_list := extensions "-x" : current_list := no_nos default : put(current_list,a[i]) } } every !extensions ?:= (=".", tab(0)) # Run through all the headers in the input file, filling # (global) filenametbl with the names of overlong files; # make_table_of_filenames fails if there are no such files. make_table_of_filenames(intext) | { write(&errout,"mtf: no overlong path names to map") a[1] ? (tab(find(".tar")+4), pos(0)) | write(&errout,"(Is ",a[1]," even a tar archive?)") exit(1) } # Now that a table of overlong filenames exists, go back # through the text, remapping all occurrences of these names # to new, 14-char values; also, reset header checksums, and # reformat text into correctly padded 512-byte blocks. Ter- # minate output with 512 nulls. seek(intext,1) every writes(output_mapped_headers_and_texts(intext)) close(intext) write_report() # Record mapped file and dir names for future ref. exit(0) end procedure open_input_file(s) local intext intext := open("" ~== s,"r") | stop("mtf: can't open ",s) find("UNIX",&features) | stop("mtf: I'm not tested on non-UNIX systems.") s[-2:0] == ".Z" & stop("mtf: sorry, can't accept compressed files") return intext end procedure readin_old_map_report(s) local mapfile, line, chunk, lchunk initial { filenametbl := table() chunkset := set() short_chunkset := set() } mapfile := open_input_file(s) while line := read(mapfile) do { line ? { if chunk := tab(many(~' \t')) & tab(upto(~' \t')) & lchunk := move(14) & pos(0) then { filenametbl[chunk] := lchunk insert(chunkset,chunk) insert(short_chunkset,chunk[1:16]) } if /chunk | /lchunk then stop("mtf: report file, ",s," seems mangled.") } } end procedure make_table_of_filenames(intext) local header # chunkset is global # search headers for overlong filenames; for now # ignore everything else while header := readtarhdr(reads(intext,512)) do { # tab upto the next header block tab_nxt_hdr(intext,trim_str(header.size),1) # record overlong filenames in several global tables, sets fixpath(trim_str(header.name)) } *\chunkset ~= 0 | fail return &null end procedure output_mapped_headers_and_texts(intext) # Remember that filenametbl, chunkset, and short_chunkset # (which are used by various procedures below) are global. local header, newtext, full_block, block, lastblock # Read in headers, one at a time. while header := readtarhdr(reads(intext,512)) do { # Replace overlong filenames with shorter ones, according to # the conversions specified in the global hash table filenametbl # (which were generated by fixpath() on the first pass). header.name := left(map_filenams(header.name),100,"\x00") header.linkname := left(map_filenams(header.linkname),100,"\x00") # Use header.size field to determine the size of the subsequent text. # Read in the text as one string. Map overlong filenames found in it # to shorter names as specified in the global hash table filenamtbl. newtext := map_filenams(tab_nxt_hdr(intext,trim_str(header.size))) # Now, find the length of newtext, and insert it into the size field. header.size := right(exbase10(*newtext,8) || " ",12," ") # Calculate the checksum of the newly retouched header. header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ") # Finally, join all the header fields into a new block and write it out full_block := ""; every full_block ||:= !header suspend left(full_block,512,"\x00") # Now we're ready to write out the text, padding the final block # out to an even 512 bytes if necessary; the next header must start # right at the beginning of a 512-byte block. newtext ? { while block := move(512) do suspend block pos(0) & next lastblock := left(tab(0),512,"\x00") suspend lastblock } } # Write out a final null-filled block. Some tar programs will write # out 1024 nulls at the end. Dunno why. return repl("\x00",512) end procedure trim_str(s) # Knock out spaces, nulls from those crazy tar header # block fields (some of which end in a space and a null, # some just a space, and some just a null [anyone know # why?]). return s ? { (tab(many(' ')) | &null) & trim(tab(find("\x00")|0)) } end procedure tab_nxt_hdr(f,size_str,firstpass) # Tab upto the next header block. Return the bypassed text # as a string if not the first pass. local hs, next_header_offset hs := integer("8r" || size_str) next_header_offset := (hs / 512) * 512 hs % 512 ~= 0 & next_header_offset +:= 512 if 0 = next_header_offset then return "" else { # if this is pass no. 1 don't bother returning a value; we're # just collecting long filenames; if \firstpass then { seek(f,where(f)+next_header_offset) return } else { return reads(f,next_header_offset)[1:hs+1] | stop("mtf: error reading in ", string(next_header_offset)," bytes.") } } end procedure fixpath(s) local s2, piece # Fixpath is a misnomer of sorts, since it is used on # the first pass only, and merely examines each filename # in a path, using the procedure mappiece to record any # overlong ones in the global table filenametbl and in # the global sets chunkset and short_chunkset; no fixing # is actually done here. s2 := "" s ? { while piece := tab(find("/")+1) do s2 ||:= mappiece(piece) s2 ||:= mappiece(tab(0)) } return s2 end procedure mappiece(s) local chunk, i, lchunk # Check s (the name of a file or dir as recorded in the tar header # being examined) to see if it is over 14 chars long. If so, # generate a unique 14-char version of the name, and store # both values in the global hashtable filenametbl. Also store # the original (overlong) file name in chunkset. Store the # first fifteen chars of the original file name in short_chunkset. # Sorry about all of the tables and sets. It actually makes for # a reasonably efficient program. Doing away with both sets, # while possible, causes a tenfold drop in execution speed! # global filenametbl, chunkset, short_chunkset, extensions local j, ending initial { /filenametbl := table() /chunkset := set() /short_chunkset := set() } chunk := trim(s,'/') if chunk ? (tab(find(".tar")+4), pos(0)) then { write(&errout, "mtf: Sorry, I can't let you do this.\n", " You've nested a tar archive within\n", " another tar archive, which makes it\n", " likely I'll f your filenames ubar.") exit(2) } if *chunk > 14 then { i := 0 if /filenametbl[chunk] then { # if we have not seen this file, then... repeat { # ...find a new unique 14-character name for it; # preserve important suffixes like ".Z," ".c," etc. # First, check to see if the original filename (chunk) # ends in an important extension... if chunk ? (tab(find(".")), ending := move(1) || tab(match(!extensions)|any(&ascii)), pos(0) ) # ...If so, then leave the extension alone; mess with the # middle part of the filename (e.g. file.with.extension.c -> # file.with001.c). then { j := (15 - *ending - 3) lchunk:= chunk[1:j] || right(string(i+:=1),3,"0") || ending } # If no important extension is present, then reformat the # end of the file (e.g. too.long.file.name -> too.long.fi01). else lchunk := chunk[1:13] || right(string(i+:=1),2,"0") # If the resulting shorter file name has already been used... if lchunk == !filenametbl # ...then go back and find another (i.e. increment i & try # again; else break from the repeat loop, and... then next else break } # ...record both the old filename (chunk) and its new, # mapped name (lchunk) in filenametbl. Also record the # mapped names in chunkset and short_chunkset. filenametbl[chunk] := lchunk insert(chunkset,chunk) insert(short_chunkset,chunk[1:16]) } } # If the filename is overlong, return lchunk (the shortened # name), else return the original name (chunk). If the name, # as passed to the current function, contained a trailing / # (i.e. if s[-1]=="/"), then put the / back. This could be # done more elegantly. return (\lchunk | chunk) || ((s[-1] == "/") | "") end procedure readtarhdr(s) local this_block # Read the silly tar header into a record. Note that, as was # complained about above, some of the fields end in a null, some # in a space, and some in a space and a null. The procedure # trim_str() may (and in fact often _is_) used to remove this # extra garbage. this_block := hblock() s ? { this_block.name := move(100) # <- to be looked at later this_block.junk := move(8+8+8) # skip the permissions, uid, etc. this_block.size := move(12) # <- to be looked at later this_block.mtime := move(12) this_block.chksum := move(8) # <- to be looked at later this_block.linkflag := move(1) this_block.linkname := move(100) # <- to be looked at later this_block.therest := tab(0) } integer(this_block.size) | fail # If it's not an integer, we've hit # the final (null-filled) block. return this_block end procedure map_filenams(s) local el, ch # Chunkset is global, and contains all the overlong filenames # found in the first pass through the input file; here the aim # is to map these filenames to the shortened variants as stored # in filenametbl (GLOBAL). local s2, tmp_chunk_tbl, tmp_lst static new_chunklist initial { # Make sure filenames are sorted, longest first. Say we # have a file called long_file_name_here.1 and one called # long_file_name_here.1a. We want to check for the longer # one first. Otherwise the portion of the second file which # matches the first file will get remapped. tmp_chunk_tbl := table() every el := !chunkset do insert(tmp_chunk_tbl,el,*el) tmp_lst := sort(tmp_chunk_tbl,4) new_chunklist := list() every put(new_chunklist,tmp_lst[*tmp_lst-1 to 1 by -2]) } s2 := "" s ? { until pos(0) do { # first narrow the possibilities, using short_chunkset if member(short_chunkset,&subject[&pos:&pos+15]) # then try to map from a long to a shorter 14-char filename then { if match(ch := !new_chunklist) & not match(!no_nos) then s2 ||:= filenametbl[=ch] else s2 ||:= move(1) } else s2 ||:= move(1) } } return s2 end # From the IPL. Thanks, Ralph - # Author: Ralph E. Griswold # Date: June 10, 1988 # exbase10(i,j) convert base-10 integer i to base j # The maximum base allowed is 36. procedure exbase10(i,j) static digits local s, d, sign initial digits := &digits || &lcase if i = 0 then return 0 if i < 0 then { sign := "-" i := -i } else sign := "" s := "" while i > 0 do { d := i % j if d > 9 then d := digits[d + 1] s := d || s i /:= j } return sign || s end # end IPL material procedure get_checksum(r) local sum, field # Calculates the new value of the checksum field for the # current header block. Note that the specification say # that, when calculating this value, the chksum field must # be blank-filled. sum := 0 r.chksum := " " every field := !r do every sum +:= ord(!field) return sum end procedure write_report() # This procedure writes out a list of filenames which were # remapped (because they exceeded the SysV 14-char limit), # and then notifies the user of the existence of this file. local outtext, stbl, i, j, mapfile_name # Get a unique name for the map.report (thereby preventing # us from overwriting an older one). mapfile_name := "map.report"; j := 1 until not close(open(mapfile_name,"r")) do mapfile_name := (mapfile_name[1:11] || string(j+:=1)) (outtext := open(mapfile_name,"w")) | open(mapfile_name := "/tmp/map.report","w") | stop("mtf: Can't find a place to put map.report!") stbl := sort(filenametbl,3) every i := 1 to *stbl -1 by 2 do { match(!no_nos,stbl[i]) | write(outtext,left(stbl[i],35," ")," ",stbl[i+1]) } write(&errout,"\nmtf: ",mapfile_name," contains the list of changes.") write(&errout," Please save this list!") close(outtext) return &null end