############################################################################ # # File: gedcom.icn # # Subject: Procedures for reading GEDCOM files # # Author: Gregg M. Townsend # # Date: March 25, 2002 # ############################################################################ # # This file is in the public domain. # ############################################################################ # # These procedures read and interpret GEDCOM files, a standard # format for genealogy databases. # ############################################################################ # # gedload(f) loads GEDCOM data from file f and returns a gedcom # record containing the following fields: # tree root of tree of gednode records # id table of labeled nodes, indexed by @ID@ # fam list of FAM nodes (marriages) # ind list of INDI nodes (individuals) # # The tree is composed of gednode records R containing these fields: # level level # id ID (label), including @...@ delimiters # tag tag # data data # lnum line number # parent parent node in tree # ref referenced node, if any # sub sub-entry list # hcode unique hashcode, if INDI node # # gedwalk(tree) generates the nodes of the tree in preorder. # # Three procedures find descendants of a node based on a sequence # of identifying tag strings: # gedsub(R, tag...) generates subnodes specified by tag sequence # gedval(R, tag...) generates data values of those subnodes # gedref(R, tag...) generates nodes referenced by those subnodes # # Three procedures extract a person's name from an INDI record: # gedfnf(R) produces "John Quincy Adams" form # gedlnf(R) produces "Adams, John Quincy" form # gednmf(R,f) produces an arbitrary format, substituting # prefix, firstname, lastname, suffix for # "P", "F", "L", "S" (respectively) in f # # geddate(R) finds the DATE subnode of a node and returns a string # of at least 12 characters in a standard form such as "11 Jul 1767" # or "abt 1810". It is assumed that the input is in English. # # gedyear(R) returns the year from the DATE subnode of a node. # # gedfind(g,s) generates the individuals under gedcom record g # that are named by s, a string of whitespace-separated words. # gedfind() generates each INDI node for which every word of s # is matched by either a word of the individual's name or by # the birth year. Matching is case-insensitive. # ############################################################################ record gedcom( tree, # tree of data records id, # table of labeled nodes, indexed by @ID@ fam, # list of FAM nodes ind # list of INDI nodes ) record gednode( level, # level id, # ID (label), including @...@ delimiters tag, # tag data, # data lnum, # line number parent, # parent node in tree ref, # referenced node, if any sub, # sub-entry list hcode # hashcode, if INDI node ) $define WHITESPACE ' \t\n\r' # gedload(f) -- load GEDCOM data from file f, returning gedcom record. procedure gedload(f) #: load GEDCOM data from file f local line, lnum, r, curr local root, id, fam, ind local hset, h1, h2, c lnum := 0 root := curr := gednode(-1, , "ROOT", "", lnum, , , []) id := table() fam := [] ind := [] while line := read(f) do { lnum +:= 1 if *line = 0 then next if not (r := gedscan(line)) then { write(&errout, "ERR, line ", lnum, ": ", line) next } r.lnum := lnum r.sub := [] if r.tag == "CONC" then { # continuation line (no \n) curr.data ||:= r.data next } if r.tag == "CONT" then { # continuation line (with \n) curr.data ||:= "\n" || r.data next } while curr.level >= r.level do curr := curr.parent put(curr.sub, r) r.parent := curr curr := r id[\r.id] := r case r.tag of { "FAM": put(fam, r) "INDI": put(ind, r) } } every r := gedwalk(root) do r.ref := id[r.data] hset := set() every r := !ind do { h1 := h2 := gedhi(r) every c := !"123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" do if member(hset, h2) then h2 := h1 || c # add disambiguating suffix if needed else break insert(hset, r.hcode := h2) } return gedcom(root, id, fam, ind) end # gedscan(f) -- scan one line of a GEDCOM record, returning gednode record procedure gedscan(s) # (internal procedure) local level, id, tag, data static alnum initial alnum := &letters ++ &digits ++ '_' s ? { tab(many(WHITESPACE)) level := tab(many(&digits)) | fail tab(many(WHITESPACE)) if id := (="@" || tab(upto('@') + 1)) then tab(many(WHITESPACE)) tag := tab(many(alnum)) | fail tab(many(WHITESPACE)) data := tab(0) return gednode(level, id, tag, data) } end # gedwalk(r) -- walk GEDCOM tree, generating nodes in preorder procedure gedwalk(r) #: generate GEDCOM tree nodes in preorder suspend r | gedwalk(!r.sub) fail end # gedsub(r, field...) -- generate subrecords with given tags # gedval(r, field...) -- generate values of subrecords with given tags # gedref(r, field...) -- generate nodes referenced by given tags procedure gedsub(r, f[]) #: find subrecords local tag, x tag := get(f) | fail every x := !r.sub do { if x.tag == tag then if *f > 0 then suspend gedsub ! push(f, x) else suspend x } end procedure gedval(a[]) #: find subrecord values suspend (gedsub ! a).data end procedure gedref(a[]) #: find referenced nodes suspend \(gedsub ! a).ref end # gedfnf(r) -- get name from individual record, first name first procedure gedfnf(r) #: get first name first return gednmf(r, "P F L S") end # gedlnf(r) -- get name from individual record, last name first procedure gedlnf(r) #: get last name first local s s := gednmf(r, "L, P F S") s ? { =", " return tab(0) } end # gednmf(r, f) -- general name formatter # # substitutes the first name, last name, prefix, and suffix # for the letters F, L, P, S respectively in string f. # multiple spaces are suppressed. procedure gednmf(r, f) #: format name local c, s, prefix, first, last, suffix prefix := gedval(r, "TITL" | "NPFX") | gedval(r, "NAME", "NPFX") s := gedval(r, "NAME") | fail s ? { first := trim(tab(upto('/') | 0)) ="/" last := trim(tab(upto('/') | 0)) ="/" suffix := gedval(r, "NSFX") | ("" ~== tab(0)) } s := "" f ? { while s ||:= tab(upto('PFLS ')) do { while c := tab(any('PFLS ')) do { s ||:= case c of { "P": \prefix "F": \first "L": \last "S": \suffix " ": s[-1] ~== " " } } } s ||:= tab(0) } return trim(s) end # geddate(r) -- get date from record in standard form procedure geddate(r) #: get canonical date local s, t, w static ftab initial { ftab := table() ftab["JAN"] := "Jan"; ftab["FEB"] := "Feb"; ftab["MAR"] := "Mar" ftab["APR"] := "Apr"; ftab["MAY"] := "May"; ftab["JUN"] := "Jun" ftab["JUL"] := "Jul"; ftab["AUG"] := "Aug"; ftab["SEP"] := "Sep" ftab["OCT"] := "Oct"; ftab["NOV"] := "Nov"; ftab["DEC"] := "Dec" ftab["ABT"] := "abt"; ftab["BEF"] := "bef"; ftab["AFT"] := "aft" ftab["CAL"] := "cal"; ftab["EST"] := "est" } s := trim(gedval(r, "DATE"), WHITESPACE) | fail t := "" s ? while not pos(0) do { tab(many(WHITESPACE)) w := tab(upto(WHITESPACE) | 0) t ||:= " " || (\ftab[w] | w) } if *t > 13 then return t[2:0] else return right(t, 12) end # gedyear(r) -- get year from event record procedure gedyear(r) #: get year local d, y d := gedval(r, "DATE") | fail d ? while tab(upto(&digits)) do if (y := tab(many(&digits)) \ 1) >= 1000 then return y end # gedhi -- generate hashcode for individual record # # The hashcode uses two initials, final digits of birth year, # and a 3-letter hashing of the full name and birthdate fields. procedure gedhi(r) # (internal procedure) local s, name, bdate, bd static lc, uc initial { uc := string(&ucase) lc := string(&lcase) } s := "" name := gedval(r, "NAME") | "" name ? { # prefer initial of nickname; else skip unused firstname in parens tab(upto('"') + 1) | (="(" & tab(upto(')') + 1)) tab(any(' \t')) s ||:= tab(any(&letters)) | "X" # first initial tab(upto('/') + 1) tab(any(' \t')) s ||:= tab(any(&letters)) | "X" # second initial } bdate := geddate(gedsub(r, "BIRT")) | "" bd := bdate[-2:0] | "00" if not (bd ? (tab(many(&digits)) & pos(0))) then bd := "99" s ||:= bd || gedh3a(name || bdate) return map(s, lc, uc) end # gedh3a(s) -- hash arbitrary string into three alphabetic characters procedure gedh3a(s) # (internal procedure) local n, d1, d2, d3, c n := 0 every c := !map(s) do if not upto(' \t\f\r\n', c) then n := 37 * n + ord(c) - 32 d1 := 97 + (n / 676) % 26 d2 := 97 + (n / 26) % 26 d3 := 97 + n % 26 return char(d1) || char(d2) || char(d3) end # gedfind(g, s) -- find records by name from gedcom record # # g is a gedcom record; s is a string of whitespace-separated words. # gedfind() generates each INDI node for which every word of s # is matched by either a word of the individual's name or by # the birth year. Matching is case-insensitive. procedure gedfind(g, s) #: find individual by name local r every r := !g.ind do if gedmatch(r, s) then suspend r end # gedmatch(r, s) -- match record against name # # s is a string of words to match name field and/or birth year. # Matching is case sensitive. procedure gedmatch(r, s) # (internal procedure) local w every w := gedlcw(s) do (w == (gedlcw(gedval(r, "NAME")) | gedyear(gedsub(r, "BIRT")))) | fail return r end # gedlcw(s, c) -- generate words from string s separated by chars from c # # words are mapped to lower-case to allow case-insensitive comparisons procedure gedlcw(s, c) # (internal procedure) /c := '/ \t\r\n\v\f' map(s) ? { tab(many(c)) while not pos(0) do { suspend tab(upto(c) | 0) \ 1 tab(many(c)) } } fail end