<!-- @version CVS $Id: omstd20.xml,v 1.134 2004/05/13 11:01:05 openmath Exp $ -->



<!DOCTYPE book 
SYSTEM "docbook/docbookx.dtd"
[
<!-- 
 docbook customisations:
    add MathML
    allow sidebar in figures (used for change log)
    add author attribute to sidebar
    add xml:space (to correct for IE bug, dropping spaces)
-->

<!ENTITY % local.para.char.mix "|xs:schema|comment|string|grammar|CD|CDSignatures|CDGroup">
<!ENTITY % inlineobj.char.class "math">
<!ATTLIST book xml:space (default|preserve) #IMPLIED >
<!--
  MathML DTD (somewhat simplified)
-->

<!ELEMENT math (mrow|mn|mi|mo|msub|msup|mtext|mspace|mfrac|mfenced)+>
<!--
  IE Bug doesn't allow this, so switch to mml namespace via stylesheet
  xmlns CDATA #FIXED "http://www.w3.org/1998/Math/MathML" 
-->
<!ATTLIST math
  id ID #IMPLIED
  display CDATA #IMPLIED
  class CDATA #IMPLIED
  revisionflag CDATA #IMPLIED
>


<!ELEMENT mfenced (mrow|mn|mi|mo|msub|msup|mtext|mspace|mfrac|mfenced)+>
<!ELEMENT mrow (mrow|mn|mi|mo|msub|msup|mtext|mspace|mfrac|mfenced)+>
<!ELEMENT mn (#PCDATA)>
<!ELEMENT mi (#PCDATA)>
<!ATTLIST mi
  mathcolor CDATA #IMPLIED
  mathvariant (bold) #IMPLIED>
<!ELEMENT mo (#PCDATA)>
<!ATTLIST mo
  fence CDATA #IMPLIED
  separator CDATA #IMPLIED>
<!ELEMENT msup ((mn|mi|mo|mrow),(mn|mi|mo|mrow))>
<!ELEMENT msub ((mn|mi|mo|mrow),(mn|mi|mo|mrow))>
<!ELEMENT mfrac ((mn|mi|mo|mrow),(mn|mi|mo|mrow))>
<!ELEMENT mtext (#PCDATA)>
<!ELEMENT mspace EMPTY>
<!ATTLIST mspace width CDATA #IMPLIED
                 linebreak CDATA #IMPLIED>



<!ELEMENT comment (#PCDATA|token)*>
<!ELEMENT string (#PCDATA|token)*>

<!ELEMENT xs:any ANY>
<!ATTLIST xs:any
    processContents CDATA #IMPLIED
    namespace CDATA #IMPLIED
>
<!ELEMENT xs:enumeration ANY>
<!ATTLIST xs:enumeration
    value CDATA #IMPLIED
>

<!ELEMENT xs:attributeGroup ANY>
<!ATTLIST xs:attributeGroup
    ref CDATA #IMPLIED
    name CDATA #IMPLIED
>
<!ELEMENT xs:schema ANY>
<!ATTLIST xs:schema
    elementFormDefault CDATA #IMPLIED
    targetNamespace CDATA #IMPLIED
    xmlns CDATA #IMPLIED
    xmlns:xs CDATA #IMPLIED
    xmlns:om CDATA #IMPLIED
>

<!ELEMENT xs:choice ANY>
<!ATTLIST xs:choice
    maxOccurs CDATA #IMPLIED
    minOccurs CDATA #IMPLIED
>
<!ELEMENT xs:sequence ANY>
<!ATTLIST xs:sequence
    maxOccurs CDATA #IMPLIED
    minOccurs CDATA #IMPLIED
>
<!ELEMENT xs:group ANY>
<!ATTLIST xs:group
    ref CDATA #IMPLIED
    name CDATA #IMPLIED
    maxOccurs CDATA #IMPLIED
    minOccurs CDATA #IMPLIED
>
<!ELEMENT xs:complexType ANY>
<!ATTLIST xs:complexType
  mixed CDATA #IMPLIED
>
<!ELEMENT xs:import ANY>
<!ATTLIST xs:import
    namespace CDATA #IMPLIED
    schemaLocation CDATA #IMPLIED
>
<!ELEMENT xs:pattern ANY>
<!ATTLIST xs:pattern
  value CDATA #IMPLIED
>
<!ELEMENT xs:simpleType ANY>
<!ELEMENT xs:simpleContent ANY>
<!ELEMENT xs:restriction ANY>
<!ATTLIST xs:restriction
  base CDATA #IMPLIED
>
<!ELEMENT xs:extension ANY>
<!ATTLIST xs:extension
  base CDATA #IMPLIED
>
<!ELEMENT xs:element ANY>
<!ATTLIST xs:element
    ref CDATA #IMPLIED
    name CDATA #IMPLIED
>
<!ELEMENT xs:attribute ANY>
<!ATTLIST xs:attribute
    ref CDATA #IMPLIED
    type CDATA #IMPLIED
    use CDATA #IMPLIED
    name CDATA #IMPLIED
>

<!ELEMENT grammar ANY>
<!ATTLIST grammar
    datatypeLibrary CDATA #IMPLIED
    name CDATA #IMPLIED
    xmlns CDATA #IMPLIED
    ns CDATA #IMPLIED>
<!ELEMENT define ANY>
<!ATTLIST define
    name CDATA #IMPLIED
>
<!ELEMENT choice ANY>
<!ELEMENT ref EMPTY>
<!ATTLIST ref
    name CDATA #IMPLIED
>
<!ELEMENT anyName ANY>
<!ELEMENT zeroOrMore ANY>
<!ELEMENT oneOrMore ANY>
<!ELEMENT nsName ANY>
<!ELEMENT value ANY>
<!ELEMENT start ANY>

<!ELEMENT except ANY>
<!ELEMENT text ANY>
<!ELEMENT param ANY>
<!ATTLIST param
    name CDATA #IMPLIED
>
<!ELEMENT data ANY>
<!ATTLIST data
    type CDATA #IMPLIED
>
<!ELEMENT element ANY>
<!ATTLIST element
    name CDATA #IMPLIED
>
<!ELEMENT attribute ANY>
<!ATTLIST attribute
    name CDATA #IMPLIED
>

<!ENTITY % group.module "IGNORE">
<!ELEMENT group ANY>
<!ENTITY % optional.module "IGNORE">
<!ELEMENT optional ANY>
<!ATTLIST optional
    name CDATA #IMPLIED>


<!--
  Abbreviations used in this document
-->
<!ENTITY OM "<emphasis>OpenMath</emphasis>">
<!ENTITY exml "<acronym>xml</acronym>">


<!ENTITY digits "0-9">
<!ENTITY exadigits "0-9A-F">
<!ENTITY lcalpha "a-z">
<!ENTITY ucalpha "A-Z">
<!ENTITY sign "[+-]"><!-- dpc: Correct regxp for + or - -->
<!ENTITY zsp "">
<!ENTITY longrightarrow "<mo>&#8594;</mo>"><!--dpc: short, actually -->

<!ENTITY varnamechar "+=(),-./:?!#$&#37;*;@[]^_`{|}"><!-- dpc: remove `TeX error?  -->

<!ENTITY omrnc SYSTEM "openmath2rnc.xml">
<!ENTITY cdrnc SYSTEM "omcd2rnc.xml">
<!ENTITY sigrnc SYSTEM "omcdsig2rnc.xml">
<!ENTITY cdgrouprnc SYSTEM "omcdgroup2rnc.xml">
<!ENTITY mathmlcdg SYSTEM "mathml.cdg">

<!ENTITY omxsd SYSTEM "openmath2.xsd">
<!ENTITY omdtd SYSTEM "openmath2dtd.xml">
<!ENTITY omrng SYSTEM "openmath2.rng">
<!ENTITY cdrng SYSTEM "omcd2.rng">
<!ENTITY sigrng SYSTEM "omcdsig2.rng">

<!ENTITY % openmath2 SYSTEM "openmath2.dtd">
%openmath2;
<!ENTITY % omdtd "">
<!ENTITY % omcd2 SYSTEM "omcd2.dtd">
%omcd2;
<!ENTITY % omcdsig2 SYSTEM "omcdsig2.dtd">
%omcdsig2;

<!--
Can't include this as name clashes with CD dtd (as DTD not namespace
aware) so include subset of the DTD inline below.
<!ENTITY % omcdgroup2 SYSTEM "omcdgroup2.dtd">
%omcdgroup2;
-->
<!ELEMENT CDGroupName (#PCDATA)>
<!ELEMENT CDGroupVersion (#PCDATA)>
<!ELEMENT CDGroupRevision (#PCDATA)>
<!ELEMENT CDGroupURL (#PCDATA)>
<!ELEMENT CDGroupDescription (#PCDATA)>
<!ELEMENT CDGroupMember (CDComment?,CDName,CDVersion?,CDURL?)>
<!ELEMENT CDGroup (CDGroupName,CDGroupVersion,CDGroupRevision?,
                   CDGroupURL,CDGroupDescription,
                   (CDGroupMember|CDComment)*)>
<!ATTLIST CDGroup
  xmlns CDATA #FIXED 'http://www.openmath.org/OpenMathCDG'>


<!ENTITY metacd SYSTEM "meta.ocd">
<!ENTITY arith1cd SYSTEM "arith1.ocd">
<!ENTITY arith1sts SYSTEM "arith1.sts">
<!ENTITY errorcd SYSTEM "error.ocd">
]>
<book xml:space="preserve">
<title>The &OM; Standard</title>
<bookinfo>
<releaseinfo>2.0 Public Draft 6c (11 May 2004)</releaseinfo>
<author><firstname>The &OM; Society</firstname></author>


<editor><firstname>S.</firstname><surname>Buswell</surname></editor>
<editor><firstname>O.</firstname><surname>Caprotti</surname></editor>
<editor><firstname>D.</firstname><othername>P.</othername><surname>Carlisle</surname></editor>
<editor><firstname>M.</firstname><othername>C.</othername><surname>Dewar</surname></editor>
<editor><firstname>M.</firstname><surname>Gaetano</surname></editor>
<editor><firstname>M.</firstname><surname>Kohlhase</surname></editor>
<date>May 2004</date>

<copyright>
<year>2000&#8211;2004</year>
<holder>The OpenMath Society</holder>
</copyright>

<abstract>
<para revisionflag="deleted">This document proposes &OM; as a standard for the communication of
  semantically rich mathematical objects. This draft of the &OM; 
  standard comprises the following: a description of &OM; objects, the
  grammar of &exml; and of the binary encoding of objects, a
  description of Content Dictionaries and an &exml; document type
  definition for validating Content Dictionaries. The non-normative
  <xref linkend="cha_his"/> of this document briefly overviews the history
  of &OM;.</para>

<para revisionflag="added">This document describes version 2 of
&OM;: a standard for
the representation and communication of mathematical objects.  &OM;
allows the <emphasis>meaning</emphasis> of an object to be encoded
rather than just a visual representation.  It is designed to allow the
free exchange of mathematical objects between software systems and human
beings.  On the worldwide web it is designed to allow mathematical
expressions embedded in web pages to be manipulated and computed with in
a meaningful and correct way.  It is designed to be machine-generatable
and machine-readable, rather than written by hand.
</para>

<para revisionflag="added">The &OM; Standard is the official reference for
the &OM; language and has been approved by the &OM; Society.  It is not
intended as an introductory document or a user's guide, for the latest
available material of this nature please consult the &OM; web-site at
<ulink url="http://www.openmath.org">http://www.openmath.org</ulink>.</para>

<para revisionflag="added">This document includes an overview of the
&OM; architecture, an abstract description of &OM; objects and two
mechanisms for producing concrete encodings of such objects.  The first,
in &exml;, is designed primarily for use on the web, in documents, and
for applications which want to mix &OM; as a content representation with
MathML as a presentation format.  The second, a binary format, is
designed for applications which wish to exchange very large objects, or
a lot of data as efficiently as possible.  This document also includes a
description of Content Dictionaries - the mechanism by which the meaning
of a symbol in the &OM; language is encoded, as well as an XML encoding
for them.  Finally it includes guidelines for the development of
&OM;-compliant applications.</para>

</abstract>
</bookinfo>
  
<toc/>
<lot><title>List of Figures</title></lot>

<chapter id="cha_his" revisionflag="deleted">
<title>&OM; Movement</title>

<para>This chapter is a historical account of &OM; and should be regarded
as non-normative.</para>

<para>&OM; is a standard for representing mathematical objects,
allowing them to be exchanged between computer programs, stored in
databases, or published on the worldwide web.  While the original
designers were mainly developers of computer algebra systems, it
<phrase revisionflag="deleted">is now attracting</phrase>
<phrase revisionflag="added">has since attracted</phrase>
interest from other areas of scientific computation and
from many publishers of electronic documents with a significant
mathematical content.  There is a strong relationship to the MathML
recommendation <citation>MathML_2003</citation> from the Worldwide Web
Consortium, and a large overlap between the two developer communities.
MathML deals principally with the <emphasis>presentation</emphasis> of
mathematical objects, while &OM; is solely concerned with their
semantic meaning or <emphasis>content</emphasis>.  While MathML does
have some limited facilities for dealing with content, it also allows
semantic information encoded in &OM; to be embedded inside a MathML
structure.  Thus the two technologies may be seen as highly
complementary.</para>

<section id="sec_hist">
<title>History</title>

<para>&OM; was originally developed through a series of workshops held
in Zurich (1993 and 1996), Oxford (1994), Amsterdam (1995), Copenhagen
(1995), Bath (1996), Dublin (1996), Nice (1997), Yorktown Heights
(1997), Berlin (1998), and Tallahassee (1998).  The participants in
these workshops formed a global &OM; community which was coordinated
by a Steering Committee and operated through electronic mailing groups
and ad-hoc working parties.  This loose arrangement has been
formalised through the establishment of an &OM; Society.  Up until the
end of 1996 much of the work of the community was funded through a
grant from the Human Capital and Mobility program of the European
Union, 
<phrase revisionflag="added"> and by</phrase>
the contributions of several institutions and individuals.  A
document outlining the objectives and basic design of &OM; was
produced (later published as
<citation>Abbott_Leeuwen_Strotmann_98</citation>).  By the end of 1996
a simplified specification had been agreed upon and some prototype
implementations had come about
<citation>Dalmas_Gaetano_Watt_97</citation>.</para>

<para>In 1996 a group of European participants in &OM; decided to bid
for funding under the European Union's Fourth Framework Programme for
strategic research in information technology.  This bid was successful
and the project started in late 1997.  The principal aims of the
project were to formalise &OM; as a standard and to develop it
further through industrial applications; this process led to the
OpenMath 1.0 and 1.1 standards which were endorsed at
workshops in Tallahassee (November 1998) and Eindhoven (June
1999).</para>

<para revisionflag="added">In November 1998 the &OM; Society was established to coordinate
all &OM; activities. The society is based in Helsinki, Finland and is
coordinated by the executive committee whose members are elected by the
society. The official web page of the society is
<ulink url="http://www.openmath.org">http://www.openmath.org</ulink>.</para>

<para revisionflag="added">In 2001 the European Union agreed to fund a Thematic Network
under its Fifth Framework programme to coordinate further work on
&OM; and MathML, and in particular to support a further series of workshops.  
This document is one outcome of that project, and seeks to update &OM;
in the light of recent developments in XML and of the &OM; community's
collective experience working with the old standard.</para>
</section>

<section id="sec_omsoc">
<title>&OM; Society</title>

<para revisionflag="deleted">In November 1998 the &OM; Society has been established to coordinate
all &OM; activities. The society is based in Helsinki, Finland and is
steered by the executive committee whose members are elected by the
society. The official web page of the society is
<ulink url="http://www.openmath.org">http://www.openmath.org</ulink>.</para>

<para revisionflag="added">The &OM; Society continues to provide
long-term coordination of &OM; activities.  Membership is open to
anybody who is active in &OM;; for further details see the web-site
at 
<ulink url="http://www.openmath.org/society/index.html">
http://www.openmath.org/society/index.html</ulink>.</para>

</section>

</chapter>

<chapter id="cha_int">
<title>Introduction to &OM;</title>




<para>This chapter briefly introduces &OM; concepts and notions that are
referred to in the rest of this document.</para>

<section id="sec_om-arch">
<title>&OM; Architecture</title>


<figure id="fig_om">
    <title>The &OM; Architecture</title>
    <graphic fileref="om-arch" depth="500" width="700"/>
</figure>

<para>The architecture of &OM; is described in <xref
linkend="fig_om"/> and summarizes the interactions among the different
&OM; components.  There are three layers of representation of a
mathematical object <citation revisionflag="deleted">OM_98</citation>. The first is
a  private layer that
is the internal representation used by an application.  The second is
an abstract layer that is the representation as an &OM; object.
<phrase revisionflag="added">Note that these
two layers may, in some cases, be the same.</phrase>
The third is a
communication layer that translates the &OM; object representation into
a stream of bytes. An application dependent program manipulates the
mathematical objects using its internal representation, it can convert
them to &OM; objects and communicate them by using the byte stream
representation of &OM; objects.</para>
</section>

<section id="sec_intro-obj">
<title>&OM; Objects and Encodings</title>


<para>&OM; objects are representations of mathematical entities that
can be communicated among various software applications in a
meaningful way, that is, preserving their
<quote>semantics</quote>.</para>

<para>&OM; objects and encodings are described in detail in <xref
linkend="cha_obj"/> and <xref linkend="cha_enco"/>.</para>


<para>The standard endorses two encodings in &exml; and binary
formats.
<phrase revisionflag="added">At the time of writing, these are the encodings
supported by most existing &OM; tools and applications,</phrase>
<phrase revisionflag="deleted"> These are the encodings supported by 
the official &OM; libraries</phrase>
however they are not the only possible encodings of &OM;
objects. Users who wish to define their own encoding
<phrase revisionflag="deleted">using some other
specific language (e.g. Lisp) may</phrase>
<phrase revisionflag="added">, are free to</phrase>
do so provided that there is
<phrase revisionflag="deleted">an
effective translation from this encoding to an official one
</phrase>
<phrase revisionflag="added">a well-defined correspondence
between the new encoding and the abstract model defined in <xref
linkend="cha_obj"/>. </phrase>
</para>

</section>

<section id="sec_intro-cd">
<title>Content Dictionaries</title>


<para>Content Dictionaries (CDs) are used to assign informal and formal
semantics to all symbols used in the &OM; objects. They define the
symbols used to represent concepts arising in a particular area of
mathematics.</para>

<para>The Content Dictionaries are public, they represent the actual
common knowledge among &OM; applications.  Content Dictionaries fix
the <quote>meaning</quote> of objects independently of the
application.  The application receiving the object may then recognize
whether or not, according to the semantics of the symbols defined in
the Content Dictionaries, the object can be transformed to the
corresponding internal representation used by the application.</para>
</section>

<section id="sec_addnfiles">
<title>Additional Files</title> 
<para>Several
additional files are related to Content Dictionaries.  Signature files
contain the signatures of symbols defined in some &OM; Content
Dictionary and their format is endorsed by this standard.</para>

<para>Furthermore, the standard fixes how to define a specific
set of Content Dictionaries as a CDGroup.</para>

<para>Auxiliary files that define presentation and rendering or that
are used for manipulating and processing Content Dictionaries are not
discussed by the standard.</para>

</section>
<section id="sec_phrasebooks">
<title>Phrasebooks</title>



<para>The conversion of an &OM; object to/from the internal
representation in a software application is performed by an interface
program called a <emphasis>Phrasebook</emphasis>. The translation is
governed by the Content Dictionaries and the specifics of the
application. It is envisioned that a software application dealing with
a specific area of mathematics declares which Content Dictionaries it
understands. As a consequence, it is expected that the Phrasebook of
the application is able to translate &OM; objects built using symbols
from these Content Dictionaries to/from the internal mathematical
objects of the application.
</para>

 <para>&OM; objects do not
specify any computational behaviour, they merely represent mathematical
expressions.  Part of the &OM; philosophy is to leave it to the
application to decide what it does with an object once it has received
it.  &OM; is not a query or programming language. Because of this,
&OM; does not prescribe a way of forcing <quote>evaluation</quote> or
<quote>simplification</quote> of objects like
<math><mn>2</mn><mo>+</mo><mn>3</mn></math> or
<math><mi>sin</mi><mo>(</mo><mi>&#960;</mi><mo>)</mo></math>. Thus,
the same object <math><mn>2</mn><mo>+</mo><mn>3</mn></math> could be
transformed to <math><mn>5</mn></math> by a computer algebra system,
or displayed as <math><mn>2</mn><mo>+</mo><mn>3</mn></math> by a
typesetting tool.</para>
</section>
</chapter>

<chapter id="cha_obj">
<title>&OM; Objects</title>



<para>In this chapter we provide a self-contained description of &OM;
objects. We first do so by means of an abstract grammar
description (<xref linkend="sec_omabs"/>)
and then give a more informal description (<xref
linkend="sec_omin"/>).</para>


<section id="sec_omabs">
<title>Formal Definition of &OM; Objects</title>


<para>&OM; represents mathematical objects as terms or as labelled
trees that are called &OM; objects or &OM; expressions. The definition
of an abstract &OM; object is then the following.</para>


<section id="sec_basic">
<title>Basic &OM; objects</title> <para>The Basic &OM; Objects form
the leaves of the &OM; Object tree.  A Basic &OM; Object is of one of
the following.</para> 
<itemizedlist>
<listitem><para><phrase>(i)</phrase> Integer.</para> <para>Integers in
  the mathematical sense, with no predefined range.  They are
  <quote>infinite precision</quote> integers (also called
  <quote>bignums</quote> in computer algebra).</para>

</listitem>
<listitem><para><phrase>(ii)</phrase> <acronym>ieee</acronym> floating point
    number.</para> <para>Double precision floating-point numbers
    following the <acronym>ieee</acronym> 754-1985
    standard&#160;<citation>ieee754_85</citation>.</para>

</listitem>
<listitem><para><phrase>(iii)</phrase> Character string.</para>

 <para>A Unicode Character string. This also corresponds to `characters' in
  &exml;.</para>

</listitem>
<listitem><para><phrase>(iv)</phrase> Bytearray.</para>

 <para>A sequence of bytes.</para>

</listitem>
<listitem><para><phrase>(v)</phrase> Symbol.</para>
<para revisionflag="deleted">A Symbol
encodes two fields of information, a <emphasis>name</emphasis> and a
<emphasis>Content Dictionary</emphasis>. Each is a sequence of
characters matching a regular expression, as described below.</para>

    <para revisionflag="added">A Symbol encodes three fields of
    information, a <emphasis>symbol name</emphasis>, a <emphasis>Content
    Dictionary name</emphasis>, and (optionally) a <emphasis>Content
    Dictionary base URI</emphasis>, The name of a symbol is a sequence
    of characters matching the regular expression described in <xref
    linkend="sec_names"/>.  The Content Dictionary is the location of
    the definition of the symbol, consisting of a name (a sequence of
    characters matching the regular expression described in <xref
    linkend="sec_names"/>) and, optionally, a unique prefix called a
    <emphasis>cdbase</emphasis> which is used to disambiguate multiple
    Content Dictionaries of the same name.  There are other properties
    of the symbol that are not explicit in these fieleds but whose
    values may be obtained by inspecting the Content Dictionary
    specified. these include the symbol definition, formal properties
    and examples and, optionally, a <emphasis>Role</emphasis> which is
    a restriction on where the symbol may appear in an &OM; object.  The
    possible roles are described in <xref linkend="sec_roles"/>.
    </para>

</listitem>
<listitem><para><phrase>(vi)</phrase> Variable.</para>


<para>A Variable <phrase revisionflag="deleted">consists of</phrase>
<phrase revisionflag="added">must have</phrase> a
<emphasis>name</emphasis> which is a sequence of characters matching a
regular expression, as described in <xref linkend="sec_names"/>.
</para>

</listitem>
</itemizedlist>
</section>

<section id="sec_derived" revisionflag="added">
<title>Derived &OM; Objects</title>

<para>Derived &OM; objects are currently used as a way by which non-&OM;
data is embedded inside an &OM; object.
A derived &OM; object is built as follows: 
<itemizedlist>
<listitem><para><phrase>(i)</phrase> If <math><mi>A</mi></math> is
<emphasis>not</emphasis> an &OM; object, then <math><mi
mathvariant="bold">foreign</mi><mfenced><mi>A</mi></mfenced></math> is an &OM;
<emphasis>foreign object</emphasis>.  An &OM; foreign object may optionally
have an <emphasis>encoding</emphasis> field which describes how its contents
should be interpreted.</para> 
</listitem>
</itemizedlist>
</para>
</section>

<section id="sec_compound">
<title><phrase revisionflag="deleted">Compound</phrase>&OM; Objects</title>
  
<para>&OM; objects are built recursively as follows.
<itemizedlist>
<listitem><para><phrase>(i)</phrase> Basic &OM; objects are &OM; objects.
(<phrase revisionflag="added">Note that derived &OM; objects are
<emphasis>not</emphasis> &OM; objects, but are used to construct &OM;
objects as described below.)</phrase></para>
</listitem>

<listitem>
  <para>
    <phrase>(ii)</phrase> If
    <math><msub><mi>A</mi><mn>1</mn></msub></math>,
    <phrase>&#8230;</phrase>,
    <math><msub><mi>A</mi><mi>n</mi></msub></math>
    <math><mo>(</mo><mi>n</mi><mo>&gt;</mo><mn>0</mn><mo>)</mo></math>
    are &OM; objects, then
  <math display="block">
  <mi mathvariant="bold">application</mi><mo>(</mo><msub><mi>A</mi><mn>1</mn></msub><mo>,</mo> <mi>&#8230;</mi><mo>,</mo> <msub><mi>A</mi><mi>n</mi></msub><mo>)</mo>
  </math>
  is an &OM; <emphasis>application object</emphasis>.</para>
    
 </listitem> <listitem><para><phrase>(iii)</phrase> If
  <math><msub><mi>S</mi><mn>1</mn></msub><mo>,</mo>
  <mi>&#8230;</mi><mo>,</mo> <msub><mi>S</mi><mi>n</mi></msub></math>
  are &OM; symbols, and
<phrase revisionflag="deleted">
<math><mi>A</mi></math>,
  <math><msub><mi>A</mi><mn>1</mn></msub></math>,
  <phrase>&#8230;</phrase>, <math><msub><mi>A</mi><mi>n</mi></msub></math>, <math><mo>(</mo><mi>n</mi><mo>&gt;</mo><mn>0</mn><mo>)</mo></math> are &OM; objects, then
</phrase>
<phrase revisionflag="added">
<math><mi>A</mi></math> is an &OM; object, and
  <math><msub><mi>A</mi><mn>1</mn></msub></math>,
  <phrase>&#8230;</phrase>, <math><msub><mi>A</mi><mi>n</mi></msub></math> <math><mo>(</mo><mi>n</mi><mo>&gt;</mo><mn>0</mn><mo>)</mo></math> are &OM; objects or &OM; derived objects, then
</phrase>

  <math display="block"><mi mathvariant="bold">attribution</mi>
  <mo>(</mo><mi>A</mi><mo>,</mo> <msub><mi>S</mi><mn>1</mn></msub>
  <mspace width=".3em"/> <msub><mi>A</mi><mn>1</mn></msub><mo>,</mo>
  <mspace width=".3em"/> <mi>&#8230;</mi> <mspace width=".3em"/>
  <mo>,</mo> <msub><mi>S</mi><mi>n</mi></msub> <mspace width=".3em"/>
  <msub><mi>A</mi><mi>n</mi></msub><mo>)</mo></math> is an &OM;
  <emphasis>attribution object</emphasis>.
  </para> 

  <para>
  <phrase revisionflag="deleted">and</phrase> <math><mi>A</mi></math>
  is the object <emphasis>stripped of attributions</emphasis>. <phrase
  revisionflag="added">
  <math><msub><mi>S</mi><mn>1</mn></msub><mo>,</mo>
  <mi>&#8230;</mi><mo>,</mo> <msub><mi>S</mi><mi>n</mi></msub></math>
  are referred to as <emphasis>keys</emphasis> and
  <math><msub><mi>A</mi><mn>1</mn></msub></math>,
  <phrase>&#8230;</phrase>,
  <math><msub><mi>A</mi><mi>n</mi></msub></math> as their associated
  <emphasis>values</emphasis></phrase>.
  <phrase revisionflag="deleted">The operation of recursively
  applying stripping to the stripped object is called
  <emphasis>flattening of the attribution</emphasis>.

                  When the stripped object after flattening is a
  variable, the attributed object is called <emphasis>attributed
  variable</emphasis>.</phrase>
  <phrase revisionflag="added">If, after  recursively
  applying stripping to remove attributions, the resulting
  un-attributed object is a 
  variable, the original attributed object is called an <emphasis>attributed
  variable</emphasis>.</phrase>
  </para>
</listitem>

<listitem><para><phrase>(iv)</phrase> If <math><mi>B</mi></math> and
  <math><mi>C</mi></math> are &OM; objects, and
  <math><msub><mi>v</mi><mn>1</mn></msub></math>,
  <math><mi>&#8230;</mi></math>,
  <math><msub><mi>v</mi><mi>n</mi></msub></math>
  <math><mo>(</mo><mi>n</mi> <mo>&#8805;</mo>
  <mn>0</mn><mo>)</mo></math> are &OM; variables or attributed
  variables, then
  <math display="block">
  <mi mathvariant="bold">binding</mi> <mo>(</mo><mi>B</mi><mo>,</mo> <msub><mi>v</mi><mn>1</mn></msub><mo>,</mo> <mi>&#8230;</mi><mo>,</mo> <msub><mi>v</mi><mi>n</mi></msub><mo>,</mo> <mi>C</mi><mo>)</mo>
  </math>
is an &OM; <emphasis>binding object</emphasis>.</para>

</listitem>
<listitem><para><phrase>(v)</phrase> If <math><mi>S</mi></math> is an
&OM; symbol and <math><msub><mi>A</mi><mn>1</mn></msub></math>,
<phrase>&#8230;</phrase>,
<math><msub><mi>A</mi><mi>n</mi></msub></math>
<math><mo>(</mo><mi>n</mi> <mo>&#8805;</mo>
<mn>0</mn><mo>)</mo></math> are &OM; objects <phrase revisionflag="added">or
&OM; derived objects</phrase>, then <math
display="block"><mi mathvariant="bold">error</mi>
<mo>(</mo><mi>S</mi><mo>,</mo>
<msub><mi>A</mi><mn>1</mn></msub><mo>,</mo><mi>&#8230;</mi><mo>,</mo><msub><mi>A</mi><mi>n</mi></msub><mo>)</mo>
  </math>
  is an &OM; <emphasis>error object</emphasis>.</para>
</listitem>
</itemizedlist>
</para>
</section>

<section id="sec_roles" revisionflag="added">
<title>&OM; Symbol Roles</title>

<para>
We say that an &OM; symbol is used to <emphasis>construct</emphasis>
an &OM; object if it is the first child of an &OM; application,
binding or error object, or an even-indexed child of an &OM;
attribution object (i.e. the <emphasis>key</emphasis> in a
<emphasis>(key, value)</emphasis> pair).
The <emphasis>role</emphasis> of an &OM; symbol is a restriction
on how it may be used to construct a compound &OM; object and, in the
case of the key in an attribution object, a clarification of how that
attribution should be interpreted.  Possible roles are:
<orderedlist numeration="lowerroman">

<listitem><para><emphasis>binder</emphasis> The symbol may 
appear as the first child of an &OM; binding object.
</para></listitem>

<listitem><para> <emphasis>attribution</emphasis> The symbol may 
be used as key in an &OM; attribution object, i.e. as the first
element of a (key, value) pair, or in an equivalent context (for example
to refer to the value of an attribution).  This form of attribution
may be ignored by an application, so should be used for information
which does not change the meaning of the attributed &OM; object.
</para></listitem> 

<listitem><para> <emphasis>semantic-attribution</emphasis> This is the
same as <emphasis>attribution</emphasis> except that it modifies the
meaning of the attributed &OM; object and thus cannot be ignored by an
application.  </para></listitem> 

<listitem><para> <emphasis>error</emphasis> The symbol can appear
as the first child of an &OM; error object.  </para></listitem>

<listitem><para> <emphasis>application</emphasis> The symbol can appear
as the first child of an &OM; application object.  </para></listitem>

<listitem><para> <emphasis>constant</emphasis> The symbol cannot be
used to construct an &OM; compound object.

</para></listitem>

</orderedlist>

A symbol cannot have more than one role and 
cannot be used to construct a compound &OM; object in a way
which requires a different role (using the definition of construct given
earlier in this section).
This means that one cannot use a symbol which binds some variables to
construct, say, an application object.  However it does not prevent
the use of that symbol as an <emphasis>argument</emphasis> in an
application object (where by argument we mean a child with index
greater than 1). 
</para>

<para> 
If no role is indicated then the symbol can be used anywhere.  Note
that this is not the same as saying that the symbol's role is
<emphasis>constant</emphasis>.
</para>


</section>

</section>
<section id="sec_omin">
<title>Further Description of &OM; Objects</title>



  

<para>Informally, an &OM; <phrase role="sl">object</phrase> can be
viewed as a tree and is also referred to as a term.  The objects at
the leaves of &OM; trees are called <phrase role="sl">basic
objects</phrase>.  The basic objects supported by &OM; are:
<variablelist>
<varlistentry><term>Integer</term><listitem><para>Arbitrary Precision
integers.</para> </listitem></varlistentry>
<varlistentry><term>Float</term><listitem> <para>&OM; floats are
<acronym>ieee</acronym> 754 Double precision floating-point
numbers. Other types of floating point number may be encoded in &OM;
by the use of suitable content dictionaries.</para>
  
</listitem>
</varlistentry>
<varlistentry><term>Character strings</term><listitem><para>are
  sequences of characters. These characters come from the Unicode
  standard&#160;<citation>UNICODE</citation>.</para>
  
</listitem></varlistentry>
<varlistentry><term>Bytearrays</term><listitem><para>are sequences of
bytes. There is no <quote>byte</quote> in &OM; as an object of its
own. However, a single byte can of course be represented by a
bytearray of length 1.  The difference between strings and bytearrays
is the following: a character string is a sequence of bytes with a
fixed interpretation (as characters, Unicode texts may require several
bytes to code one character), whereas a bytearray is an uninterpreted
sequence of bytes with no intrinsic meaning.  Bytearrays could be used
inside &OM; errors to provide information to, for example, a debugger;
they could also contain intermediate results of calculations, or
`handles' into computations or databases.</para>
</listitem>
</varlistentry>
<varlistentry><term>Symbols</term><listitem>
  
  
  <para>
 are uniquely defined by the Content Dictionary in which they occur
  and by a name.
<phrase revisionflag="deleted">In definition in <xref linkend="sec_omabs"/> we have
  left this information implicit. However, it should be kept in mind
  that all symbols appearing in an &OM; object are defined in a
  Content Dictionary.
</phrase>The form of these definitions is explained in
  <xref linkend="cha_cd"/>.  Each symbol has no more than one
  definition in a Content Dictionary. Many Content Dictionaries may
  define differently a symbol with the same name (e.g. the symbol
  <systemitem>union</systemitem> is defined as
  associative-commutative set theoretic union in a Content Dictionary
  <systemitem>set1</systemitem> but another Content Dictionary,
  <systemitem>multiset1</systemitem> might define a symbol
  <systemitem>union</systemitem> as the union of multi-sets).
<phrase revisionflag="deleted">The name
  of a symbol can only contain alphanumeric characters and
  underscores.  More precisely, a symbol name matches the following
  regular expression:</phrase>
<blockquote revisionflag="deleted"><para>
  [<systemitem>A</systemitem>-<systemitem>Z</systemitem><systemitem>a</systemitem>-<systemitem>z</systemitem>]
  [<systemitem>A</systemitem>-<systemitem>Z</systemitem><systemitem>a</systemitem>-<systemitem>z</systemitem><systemitem>0</systemitem>-<systemitem>9</systemitem><systemitem>_</systemitem>]*
  </para></blockquote>
  <phrase revisionflag="deleted">Notice that these symbol names are case sensitive.  &OM;
  <emphasis>recommends</emphasis> that symbol names should be no longer than
  100 characters.</phrase>
</para>
  
 
  </listitem>
</varlistentry>
<varlistentry><term>Variables</term><listitem><para>are meant to
  denote parameters, variables or indeterminate (such as bound
  variables of function definitions, variables in summations and
  integrals, independent variables of derivatives).  <phrase
  revisionflag="deleted">Plain variable names are restricted to use a
  subset of the printable ASCII characters.  Formally the names must
  match the regular expression:</phrase> <blockquote   revisionflag="deleted"><para>
  [A-Za-z0-9=+(),-./:?!#$%*;=@[]^_`{|}]+ </para></blockquote>
</para>
</listitem>
</varlistentry>
</variablelist> </para>


<para revisionflag="added">Derived &OM; objects are constructed from
non-&OM; data.  They differ from bytearrays in that they can have any
structure.  Currently there is only one way of making a derived &OM;
object.</para>

<variablelist revisionflag="added">
<varlistentry><term>Foreign</term><listitem><para>is used to import a
non-&OM; object into an &OM; attribution.  Examples of its use could
be to annotate a formula with a visual or aural rendering, an
animation etc.  They may also appear in &OM; error objects, for
example to allow an application to report an error in processing such
an object.  
</para>
</listitem>
</varlistentry>
</variablelist>


<para>The four following constructs can be used to make compound
 &OM; objects <phrase revisionflag="added"> out of basic or derived &OM;
objects</phrase>.</para>
<variablelist>
<varlistentry><term>Application</term><listitem><para>constructs an
  &OM; object from a sequence of one or more &OM; objects. The first
  argument of an application is referred to as its <quote>head</quote> while
  the remaining objects are called its <quote>arguments</quote>.  An &OM;
  application object can be used to convey the mathematical notion of
  application of a function to a set of arguments.  For instance,
  suppose that the &OM; symbol <math><mi>sin</mi></math> is defined in
  a <phrase revisionflag="added">suitable</phrase> Content Dictionary,
  <phrase revisionflag="deleted">for trigonometry</phrase> then <math><mi
  mathvariant="bold">application</mi><mo>(</mo><mi>sin</mi><mo>,</mo>
  <mi>x</mi> <mo>)</mo></math> is the abstract &OM; object
  corresponding to <math><mi>sin</mi> <mo>(</mo><mi>x</mi>
  <mo>)</mo></math>.  More generally, an &OM; application object can
  be used as a constructor to convey a mathematical object built from
  other objects such as a polynomial constructed from a set of
  monomials.  Constructors build inhabitants of some symbolic type,
  for instance the type of rational numbers or the type of
  polynomials.  The rational number, usually denoted as
  <math><mn>1</mn><mo>/</mo><mn>2</mn></math>, is represented by the
  &OM; application object <math><mi
  mathvariant="bold">application</mi><mo>(</mo><mi>Rational</mi><mo>,</mo>
  <mn>1</mn><mo>,</mo> <mn>2</mn><mo>)</mo></math>. The symbol
  <math><mi>Rational</mi></math> must be defined, by a Content
  Dictionary, as a constructor symbol for the rational numbers.</para>
   
<figure id="fig_obj">
    <title>The &OM; application and binding objects for
<math><mi>sin</mi> <mo>(</mo><mi>x</mi> <mo>)</mo></math> and
<math><mi>&#955;</mi> <mi>x</mi><mo>.</mo><mi>x</mi> <mo>+</mo>
<mn>2</mn></math> in tree-like notation.</title>  <graphic fileref="lambda"
width="600" depth="190"/>
</figure>

  
</listitem>
</varlistentry>
<varlistentry><term>Binding</term><listitem><para>objects are
  constructed from an &OM; object, and from a sequence of zero or more
  variables followed by another &OM; object.  The first &OM; object is
  the <quote>binder</quote> object. Arguments 2 to
  <math><mi>n</mi><mo>-</mo><mn>1</mn></math> are always variables to
  be bound in the <quote>body</quote> which is the
  <math><msup><mi>n</mi><mi>th</mi></msup></math> argument object. It
  is allowed to have no bound variables, but the binder object and the
  body should be present. Binding can be used to express functions or
  logical statements.  The function <math><mi>&#955;</mi>
  <mi>x</mi><mo>.</mo><mi>x</mi> <mo>+</mo><mn>2</mn></math>, in which
  the variable <math><mi>x</mi></math> is bound by
  <math><mi>&#955;</mi></math>, corresponds to a binding object having
  as binder the &OM; symbol <math><mi>lambda</mi></math>: <math
  display="block"><mi
  mathvariant="bold">binding</mi><mo>(</mo><mi>lambda</mi><mo>,</mo>
  <mi>x</mi> <mo>,</mo> <mi
  mathvariant="bold">application</mi><mo>(</mo><mi>plus</mi><mo>,</mo>
  <mi>x</mi> <mo>,</mo>
  <mn>2</mn><mo>)</mo><mo>)</mo><mtext>.</mtext></math></para>
  
  
  


<para>Phrasebooks are allowed to use <math><mi>&#945;</mi></math>
  conversion in order to avoid clashes of variable names. Suppose an
  object <math><mi>&#937;</mi></math> contains an occurrence of the
  object <math><mi mathvariant="bold">binding</mi>
  <mo>(</mo><mi>B</mi> <mo>,</mo> <mi>v</mi> <mo>,</mo> <mi>C</mi>
  <mo>)</mo></math>.  This object <math><mi
  mathvariant="bold">binding</mi> <mo>(</mo><mi>B</mi> <mo>,</mo>
  <mi>v</mi> <mo>,</mo> <mi>C</mi> <mo>)</mo></math> can be replaced
  in <math><mi>&#937;</mi></math> by <math><mi
  mathvariant="bold">binding</mi> <mo>(</mo><mi>B</mi> <mo>,</mo>
  <mi>z</mi> <mo>,</mo> <mi>C'</mi><mo>)</mo></math> where
  <math><mi>z</mi></math> is a variable not occurring free in
  <math><mi>C</mi></math> and <math><mi>C'</mi></math> is obtained
  from <math><mi>C</mi></math> by replacing each free (i.e., not bound
  by any intermediate <varname>binding</varname> construct) occurrence
  of <math><mi>v</mi></math> by <math><mi>z</mi></math>.  This
  operation preserves the semantics of the object
  <math><mi>&#937;</mi></math>. In the above example, a phrasebook is
  thus allowed to transform the object to, e.g.  <math revisionflag="deleted"
  display="block"><mi mathvariant="bold">binding</mi>
  <mo>(</mo><mi>lambda</mi><mo>,</mo> <mi>v</mi> <mo>,</mo> <mi
  mathvariant="bold">binding</mi> <mo>(</mo><mi>lambda</mi><mo>,</mo>
  <mi>z</mi> <mo>,</mo><mi mathvariant="bold">application</mi>
  <mo>(</mo><mi>times</mi><mo>,</mo><mi>z</mi>
  <mo>,</mo><mi>z</mi><mo>)</mo><mo>)</mo><mo>)</mo><mtext>.</mtext></math>
  <math revisionflag="added" display="block"><mi
  mathvariant="bold">binding</mi><mo>(</mo><mi>lambda</mi><mo>,</mo>
  <mi>z</mi> <mo>,</mo> <mi
  mathvariant="bold">application</mi><mo>(</mo><mi>plus</mi><mo>,</mo>
  <mi>z</mi> <mo>,</mo>
  <mn>2</mn><mo>)</mo><mo>)</mo><mtext>.</mtext></math>
</para>
<para>Repeated occurrences of the same variable in a binding operator
  are allowed. An &OM; application should treat a binding with
  multiple occurrences of the same variable as equivalent to the
  binding in which all but the last occurrence of each variable is
  replaced by a new variable which does not occur free in the body of
  the binding.  <math display="block"><mi
  mathvariant="bold">binding</mi> <mo>(</mo><mi>lambda</mi><mo>,</mo>
  <mi>v</mi> <mo>,</mo> <mi>v</mi> <mo>,</mo><mi
  mathvariant="bold">application</mi>
  <mo>(</mo><mi>times</mi><mo>,</mo><mi>v</mi>
  <mo>,</mo><mi>v</mi><mo>)</mo> <mo>)</mo></math> is semantically
  equivalent to: <math display="block"><mi
  mathvariant="bold">binding</mi> <mo>(</mo><mi>lambda</mi> <mo>,</mo>
  <msup><mi>v</mi><mo>'</mo></msup> <mo>,</mo> <mi>v</mi>
  <mo>,</mo><mi mathvariant="bold">application</mi>
  <mo>(</mo><mi>times</mi><mo>,</mo><mi>v</mi>
  <mo>,</mo><mi>v</mi><mo>)</mo> <mo>)</mo></math> so that the
  resulting function is actually a constant in its first argument
  (<math><msup><mi>v</mi><mo>'</mo></msup></math> does not occur free
  in the body <math><mi mathvariant="bold">application</mi>
  <mo>(</mo><mi>times</mi><mo>,</mo><mi>v</mi>
  <mo>,</mo><mi>v</mi><mo>)</mo> <mo>)</mo></math>).</para>

  
</listitem>
</varlistentry>
<varlistentry><term>Attribution</term><listitem><para>decorates an
  object with a sequence of one or more pairs made up of an &OM;
  symbol, the <quote>attribute</quote>, and an associated <phrase
  revisionflag="deleted">&OM;</phrase> object, the <quote>value of the
  attribute</quote>.  The value of the attribute can be an <phrase
  revisionflag="added">&OM;</phrase> attribution object itself. As an
  example of this, consider the &OM; objects representing groups,
  automorphism groups, and group dimensions. It is then possible to
  attribute an &OM; object representing a group by its automorphism
  group, itself attributed by its dimension.</para>

<para revisionflag="added">
&OM; objects can be attributed with &OM; foreign objects, which are
containers for non-&OM; structures.  For example a mathematical
expression could be attributed with its spoken or visual rendering.
</para>

<para>Composition of attributions, as in
  <math display="block">
<mi mathvariant="bold">attribution</mi><mo>(</mo><mi
  mathvariant="bold">attribution</mi><mo>(</mo><mi>A</mi><mo>,</mo>
  <msub><mi>S</mi><mn>1</mn></msub> <mspace width=".3em"/>
  <msub><mi>A</mi><mn>1</mn></msub><mo>,</mo><mi>&#8230;</mi><mo>,</mo><msub><mi>S</mi><mi>h</mi></msub>
  <mspace width=".3em"/>
  <msub><mi>A</mi><mi>h</mi></msub><mo>)</mo><mo>,</mo>
  <msub><mi>S</mi><mrow><mi>h</mi><mo>+</mo><mn>1</mn></mrow></msub>
  <mspace width=".3em"/>
  <msub><mi>A</mi><mrow><mi>h</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>,</mo>
  <mi>&#8230;</mi><mo>,</mo> <msub><mi>S</mi><mi>n</mi></msub> <mspace
  width=".3em"/> <msub><mi>A</mi><mi>n</mi></msub><mo>)</mo></math> is
  semantically equivalent to a single attribution, that is <math
  display="block"><mi
  mathvariant="bold">attribution</mi><mo>(</mo><mi>A</mi><mo>,</mo>
  <msub><mi>S</mi><mn>1</mn></msub> <mspace width=".3em"/>
  <msub><mi>A</mi><mn>1</mn></msub><mo>,</mo>
  <mi>&#8230;</mi><mo>,</mo> <msub><mi>S</mi><mi>h</mi></msub> <mspace
  width=".3em"/> <msub><mi>A</mi><mi>h</mi></msub><mo>,</mo>
  <msub><mi>S</mi><mrow><mi>h</mi><mo>+</mo><mn>1</mn></mrow></msub>
  <mspace width=".3em"/>
  <msub><mi>A</mi><mrow><mi>h</mi><mo>+</mo><mn>1</mn></mrow></msub><mo>,</mo>
  <mi>&#8230;</mi><mo>,</mo> <msub><mi>S</mi><mi>n</mi></msub> <mspace
  width=".3em"/>
  <msub><mi>A</mi><mi>n</mi></msub><mo>)</mo><mtext>.</mtext></math>
  The operation that produces an object with a single layer of
  attribution is called <emphasis>flattening</emphasis>.</para>

<para>Multiple attributes with the same name are allowed.  While the
  order of the given attributes does not imply any notion of priority,
  potentially it could be significant. For instance, consider the case
  in which <math><msub><mi>S</mi><mi>h</mi></msub> <mo>=</mo>
  <msub><mi>S</mi><mi>n</mi></msub></math> (<math><mi>h</mi>
  <mo>&lt;</mo> <mi>n</mi></math>) in the example above. Then, the
  object is to be interpreted as if the value
  <math><msub><mi>A</mi><mi>n</mi></msub></math> overwrites the value
  <math><msub><mi>A</mi><mi>h</mi></msub></math>.  (&OM; however does
  not mandate that an application preserves the attributes or their
  order.)</para>

<para revisionflag="added">Attribution acts as either adornment
  annotation or as semantical annotation. When the key has role
  <emphasis>attribution</emphasis>, then replacement of the
  attributed object by the object itself is not harmful and preserves
  the semantics. When the key has role
  <emphasis>semantic-attribution</emphasis> then the attributed
  object is modified by the attribution and cannot be viewed as
  semantically equivalent to the stripped object. If the attribute
  lacks the role specification then attribution is acting as adornment
  annotation.
  </para>


<para>Objects can be decorated in a multitude of ways.
<phrase revisionflag="deleted">In&#160;<citation>OMD132b</citation>, typing of &OM; objects is
expressed by using an attribution.
</phrase>
<phrase revisionflag="added">An example of the use of an adornment attribution
would be to indicate the colour in which an &OM; object should be
displayed, for example <math><mi
mathvariant="bold">attribution</mi><mo>(</mo><mi>A</mi><mo>,</mo>
<mi>colour</mi> <mspace width=".3em"/> <mi>red</mi> <mo>)</mo></math>.
Note that both <math><mi>A</mi></math> and <math><mi>red</mi></math> are &OM;
objects.
An example of the use of a semantic attribution would be to indicate the
type of an object.  For example</phrase>
the object <math><mi
mathvariant="bold">attribution</mi><mo>(</mo><mi>A</mi><mo>,</mo>
<mi>type</mi> <mspace width=".3em"/> <mi>t</mi> <mo>)</mo></math>
represents the judgment stating that object <math><mi>A</mi></math>
has type <math><mi>t</mi></math>. Note that both
<math><mi>A</mi></math> and <math><mi>t</mi></math> are &OM;
objects.</para>


<para revisionflag="deleted">Attribution can act as either annotation,
  in the sense of adornment, or as modifier. In the former case,
  replacement of the adorned object by the object itself is probably
  not harmful (preserves the semantics). In the latter case however,
  it may very well be.  Therefore, attribution in general should by
  default be treated as a construct rather than as adornment. Only
  when the CD definitions of the attributes make it clear that they
  are adornments, can the attributed object be viewed as semantically
  equivalent to the stripped object.</para>

            
  
  
</listitem>
</varlistentry>
<varlistentry><term>Error</term><listitem><para>is made up of an &OM;
  symbol and a sequence of zero or more &OM; objects. This object has
  no direct mathematical meaning.  Errors occur as the result of some
  treatment on an &OM; object and are thus of real interest only when
  some sort of communication is taking place. Errors may occur inside
  other objects and also inside other errors.  Error objects might
  consist only of a symbol as in the object: <math><mi
  mathvariant="bold">error</mi> <mo>(</mo><mi>S</mi>
  <mo>)</mo></math>.</para> 
</listitem>
</varlistentry>
</variablelist> 
</section>

<section id="sec_names" revisionflag="added">
<title>Names</title>

<para>The names of symbols, variables and content dictionaries must
conform to the production <systemitem>Name</systemitem> specified in the following
grammar
(which is identical to that for &exml; names in XML 1.1,
<citation>xml_04</citation>). Informally speaking, a name is a sequence
of Unicode <citation>UNICODE</citation>
characters which begins with a letter and cannot contain
certain punctuation and combining  characters.  The notation
<systemitem>#x...</systemitem> represents the hexadecimal value of 
the encoding of a Unicode character. 
Some of the character values or <emphasis>code points</emphasis> in the
following productions are currently unassigned, but this is
likely to change in the future as Unicode evolves<footnote id="xml1">
<para>
We note that in XML 1 the name production explicitly listed 
the characters that were allowed, so all the characters added in
versions of Unicode after 2.0 (which amounted to tens of thousands of
characters) were not allowed in names.
</para>
</footnote>.

</para>

<blockquote>
<informaltable>
<tgroup cols="3">
<tbody>
<row>
<entry>Name </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> NameStartChar (NameChar)* </entry>
</row>
<row>
<entry>NameStartChar</entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry>  ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] |</entry></row>
<row><entry/><entry/><entry>[#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] |</entry></row>
<row><entry/><entry/><entry>[#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |</entry></row>
<row><entry/><entry/><entry>[#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |</entry></row>
<row><entry/><entry/><entry>[#x10000-#xEFFFF] 
</entry>
</row>
<row>
<entry>NameChar</entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry>  NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] |</entry></row>
<row><entry/><entry/><entry>[#x203F-#x2040] </entry>
</row>
</tbody>
</tgroup>
</informaltable>
</blockquote>


<formalpara><title>CD Base</title>

<para>A cdbase must conform to the grammar for URIs described in
<citation>IETF2396</citation>.  Note that if non-ASCII characters are
used in a CD or symbol name then when a URI for that symbol is
constructed it will be necessary to map the non-ASCII characters to a
sequence of octets.  The precise mechanism for doing this depends on
the URI scheme.</para>


</formalpara>

<formalpara><title>Note on content dictionary names</title>
<para>
It is a common convention to store a Content Dictionary in a file of
the same name, which can cause difficulties on many file systems.  If
this convention is to be followed then &OM;
<emphasis>recommends</emphasis> that the name be restricted to the
subset of the above grammar which is a legal POSIX
<citation>POSIX</citation> filename, namely:
<blockquote>
<informaltable>
<tgroup cols="3">
<tbody>
<row>
<entry>Name </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> (PosixLetter | '_') (Char)*
</entry>
</row>
<row>
<entry>Char</entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> PosixLetter | Digit | '.' | '-' | '_' 
</entry>
</row>
<row>
<entry>PosixLetter</entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> 
'a' | 'b' | ... | 'z' | 'A' | 'B' | ... | 'Z'
</entry>
</row>
</tbody>
</tgroup>
</informaltable>
</blockquote>
</para>
</formalpara>

<formalpara><title>Canonical URIs for Symbols</title>
<para>
To facilitate the use of &OM; within a URI-based framework (such as RDF
<citation>rdf</citation> or OWL <citation>owl</citation>), we provide the
following scheme for constructing a canonical URI
for an &OM; Symbol:
<blockquote>
  <para><systemitem>URI = cdbase-value + '/' + cd-value + '#' + name-value</systemitem></para>
</blockquote>
So for example the URI for the symbol with cdbase
<systemitem>http://www.openmath.org/cd</systemitem>, cd
<systemitem>transc1</systemitem> and name <systemitem>sin</systemitem>
is:
<blockquote>
  <para><systemitem>http://www.openmath.org/cd/transc1#sin</systemitem></para>
</blockquote>
In particular, this now allows us to refer uniquely to an &OM; symbol from a
MathML document <citation>MathML_2003</citation>:
<literallayout>
&lt;mathml:csymbol xmlns:mathml="http://www.w3.org/1998/Math/MathML/"
                definitionURL="http://www.openmath.org/cd/transc1#sin"&gt;
  &lt;mo&gt; sin &lt;/mo&gt; 
&lt;/csymbol&gt;
</literallayout>
</para>
</formalpara>

</section>

<section id="sec_summary">
<title>Summary</title>

<itemizedlist>
<listitem> <para>&OM; supports basic objects like integers, symbols,
  floating-point numbers, character strings, bytearrays, and
  variables.</para>
</listitem>
<listitem> <para>&OM; compound objects are of four kinds:
  applications, bindings, errors, and attributions.</para>
</listitem>
<listitem revisionflag="added"> <para>&OM; objects may be attributed
with non-&OM; objects via the use of foreign &OM; objects.
  </para>
</listitem>
<listitem> <para>&OM; objects have the expressive power to cover all
  areas of computational mathematics.</para>
</listitem>
</itemizedlist>

 <para>Observe that an &OM;
application object is viewed as a <quote>tree</quote> by software
applications that do not understand Content Dictionaries, whereas a
Phrasebook that understands the semantics of the symbols, as defined
in the Content Dictionaries, should interpret the object as functional
application, constructor, or binding accordingly. Thus, for example,
for some applications, the &OM; object corresponding to
<math><mn>2</mn><mo>+</mo><mn>5</mn></math> may result in a command
that writes <math><mn>7</mn></math>.</para>
</section>
</chapter>

<chapter id="cha_enco">
<title>&OM; Encodings</title>


<para>In this chapter, two encodings are defined that map between &OM;
objects and byte streams.  These byte streams constitute a low level
representation that can be easily exchanged between processes (via
almost any communication method) or stored and retrieved from
files.</para>


<para revisionflag="deleted">The first encoding uses ISO 646:1983
characters&#160;<citation>iso646_83</citation> (also known as
<acronym>ascii</acronym> characters) and is an &exml;
application. Although the &exml; markup of the encoding uses only
<acronym>ascii</acronym> characters, &OM; strings may use arbitrary
Unicode/ISO 10646:1988 characters&#160;<citation>UNICODE</citation>.
It can be used, for example, to send &OM; objects via e-mail, news,
cut-and-paste, etc. The texts produced by this encoding can be part of
&exml; documents.</para>

<para revisionflag="added">The first encoding is a character-based
encoding in &exml; format.  In previous versions of the &OM; Standard
this encoding was a restricted subset of the full legal &exml; syntax.
In this version, however, we have removed all these restrictions so that
the earlier encoding is a strict subset of the existing one.  The
&exml; encoding can be used, for example, to send &OM; objects via
e-mail, cut-and-paste, etc. and to embed &OM; objects in &exml;
documents or to have &OM; objects processed by &exml;-aware
applications.</para>

<para>The second encoding is a binary encoding that is meant to be
used when the compactness of the encoding is important (inter-process
communications over a network is an example).</para>

<para>Note that these two encodings are sufficiently different for
auto-detection to be effective: an application reading the bytes can
very easily determine which encoding is used.</para>

<section id="sec_xml">
<title>The &exml; Encoding</title>

<para>This encoding has been designed with two main goals in mind:
<orderedlist>
<listitem><para>to provide an encoding that uses common character sets
  (so that it can easily be included in most documents and transport
  protocols) and that is both readable and writable by a human.</para>
</listitem>
<listitem><para>to provide an encoding that can be included (embedded) in
  &exml; documents or processed by &exml;-aware applications.</para>
</listitem>
</orderedlist> 
</para>

<section id="ssec_xml">
<title>A <phrase
revisionflag="deleted">Grammar</phrase><phrase>Schema</phrase> for the &exml; Encoding</title>




<para revisionflag="added">The &exml; encoding of an &OM; object is
defined by the Relax NG schema <citation>RELAX</citation> given below.
Relax NG has a number of advantages over the older XSD Schema format
<citation>XSD</citation>, in particular it allows for tighter control
of attributes and has a modular, extensible structure.  Although we
have made the &exml; form, which is given in <xref
linkend="app_openmath.rng"/>, normative, it is generated from the
 compact syntax given below.  It is also very easy to restrict the schema to allow
a limited set of &OM; symbols as described in <xref
linkend="app_relaxrestricted"/>.  </para>

<para revisionflag="added"> Standard tools exist for generating a DTD
or an XSD schema from a Relax NG Schema.  Examples of such documents
are given in <xref linkend="app_dtd"/> and <xref linkend="app_xsd"/>
respectively.</para>

<literallayout revisionflag="added">
&omrnc;
</literallayout>
<para><phrase revisionflag="deleted">the &exml; encoding of an
&OM; object is defined by the dtd given in Figure 4.1 below with the
following additional rules not implied by the &exml;
<acronym>dtd</acronym>.</phrase></para>

<itemizedlist revisionflag="deleted">
<listitem><para revisionflag="deleted" >Comments are permitted only between
elements, not within element character data.</para>
</listitem>

<listitem><para revisionflag="deleted">Processing Instructions are
 only allowed before the <acronym>OMOBJ</acronym> element.</para>
</listitem>

<listitem><para revisionflag="deleted">The content of an
<acronym>OMB</acronym> element, is a valid base64-encoded text.</para>
</listitem>

<listitem><para revisionflag="deleted">The character data forming
element content and attribute values matches the regular expressions
of <xref linkend="fig_xml"/>.</para>
</listitem>
</itemizedlist>



<para id="fig_objdtd" revisionflag="deleted">
    <phrase>DTD for the &OM; &exml; encoding of objects.</phrase>
<literallayout revisionflag="deleted"><![CDATA[
<!-- DTD for OM Objects - sb 29.10.98 -->
<!-- sb 3.2.99 -->

<!--
     general list of embeddable elements
      : excludes OMATP as this is only embeddable in OMATTR
      : excludes OMBVAR as this is only embeddable in OMBIND
-->

<!ENTITY % omel "OMS | OMV | OMI | OMB | OMSTR
                                | OMF | OMA | OMBIND | OME
                                | OMATTR | ]]><![CDATA[">


<!-- things which can be variables -->

<!ENTITY % omvar "OMV | OMATTR" >

]]><![CDATA[

<!-- symbol -->
<!ELEMENT OMS EMPTY>

  <!ATTLIST OMS ]]><![CDATA[
              name CDATA #REQUIRED
              cd CDATA #REQUIRED >

<!-- variable -->
<!ELEMENT OMV EMPTY>
<!ATTLIST OMV  ]]><![CDATA[
              name CDATA #REQUIRED >

<!-- integer -->
<!ELEMENT OMI (#PCDATA) >
]]><![CDATA[

<!-- byte array -->
<!ELEMENT OMB (#PCDATA) >
]]><![CDATA[

<!-- string -->
<!ELEMENT OMSTR (#PCDATA) >
]]><![CDATA[

<!-- floating point -->
<!ELEMENT OMF EMPTY>
<!ATTLIST OMF  ]]><![CDATA[
              dec CDATA #IMPLIED
               hex CDATA #IMPLIED>

<!-- apply constructor -->
<!ELEMENT OMA (%omel;)+ >
]]><![CDATA[


<!-- binding constructor & bound variables -->
<!ELEMENT OMBIND ((%omel;), OMBVAR, (%omel;)) >
]]><![CDATA[

<!ELEMENT OMBVAR (%omvar;)+ >
]]><![CDATA[

<!-- error -->
<!ELEMENT OME (OMS, (%omel;)* ) >
]]><![CDATA[

<!-- attribution constructor & attribute pair constructor -->
<!ELEMENT OMATTR (OMATP, (%omel;)) >
]]><![CDATA[

<!ELEMENT OMATP (OMS, (%omel;))+ >
]]><![CDATA[

]]><![CDATA[

<!-- OM object constructor -->
<!ELEMENT OMOBJ (%omel;) >
<!ATTLIST OMOBJ ]]><![CDATA[
                xlmns:xlink CDATA #FIXED 'http://www.w3.org/1999/xlink'>]]>
</literallayout>
</para>


<para revisionflag="deleted">In addition, if the &exml; document
encoding the &OM; object is linearised into the &exml; concrete
syntax, the following further constraints apply, which ensure that the
encoding may be read by &OM; applications that may not include a full
&exml; parser.</para> 




<itemizedlist revisionflag="deleted" >
<listitem>
<para revisionflag="deleted">The document should use <acronym>utf-8</acronym> encoding.</para>

</listitem>
<listitem>
<para revisionflag="deleted">A <systemitem>&lt;!DOCTYPE</systemitem> declaration should not be used.</para>

</listitem>

<listitem>
<para  revisionflag="deleted">Character references should not be used. As
<systemitem>&lt;!DOCTYPE</systemitem> is not used, the only entity
references that are allowed are the five predefined entity references:
<systemitem>&amp;apos;</systemitem> (&apos;),
<systemitem>&amp;quot;</systemitem> (&quot;),
<systemitem>&amp;lt;</systemitem> (&lt;),
<systemitem>&amp;gt;</systemitem> (&gt;),
<systemitem>&amp;amp;</systemitem> (&amp;).
</para>
</listitem>

<listitem>
 

<para  revisionflag="deleted">The &exml; empty element form
<systemitem>&lt;|#8230;/&gt;</systemitem> should
always be used to encode elements such as <acronym>omf</acronym> which
are specified in the <acronym>dtd</acronym> as being
<acronym>empty</acronym>. It should never be used for elements that
may sometimes be empty, such as <acronym>omstr</acronym>.</para>

</listitem>
</itemizedlist>

<para revisionflag="deleted">Such a linearisation of an &exml; encoded &OM; Object would
match the match the character based grammar given in <xref
linkend="fig_xml"/>.</para>

<para revisionflag="deleted">The notation used in this section and in
<xref linkend="fig_xml"/> should be quite straightforward (+ meaning
<quote>one or more</quote>, ? meaning zero or one, and | meaning
<quote>or</quote>).  The start symbol of the grammar is
<quote>start</quote>, <quote>space</quote> stands for the space
character, <quote>cr</quote> for the carriage return character,
<quote>nl</quote> for the line feed character and <quote>tab</quote>
for the horizontal tabulation character.</para>

<para revisionflag="deleted" id="fig_xml">
<!---->
    <phrase revisionflag="deleted">Grammar for the &exml; encoding of &OM; objects.</phrase>


<informaltable revisionflag="deleted">
<tgroup cols="3">
<tbody>
<row>
<entry>S           </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> (space | tab | cr | nl)+  
</entry>
</row>

<row>
<entry>integer     </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> 
        (<systemitem>-</systemitem> S?)? [&digits;]+ (S [&digits;]+)*  |
        (<systemitem>-</systemitem> S?)? <systemitem>x</systemitem> S? [&exadigits;]+ (S [&exadigits;]+)* 
</entry>
</row>
<row>
<entry> 

cdname      </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry>  [&lcalpha;][&lcalpha;&digits;<systemitem>_</systemitem>]*
</entry>
</row>

<row>
<entry>symbname    </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> [&ucalpha;&lcalpha;][&ucalpha;&lcalpha;&digits;<systemitem>_</systemitem>]*
</entry>
</row>

<row>
<entry>fpdec       </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry>  
    (<systemitem>-</systemitem>?)([&digits;]+)?(<systemitem>.</systemitem>[&digits;]+)?(<systemitem>e</systemitem>(&sign;?)[&digits;]+)?
</entry>
</row>

<row>
<entry>fphex       </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry>  [&digits;ABCDEF]+ 
</entry>
</row>

<row>
<entry>varname        </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> ([&ucalpha;&lcalpha;&digits;&varnamechar;])+ 
</entry>
</row>

<row>
<entry>base64      </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> ([&ucalpha;&lcalpha;&digits; +/=] | S)+ 
</entry>
</row>

<row>
<entry>char  </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> <emphasis>&exml; Character Data</emphasis>
</entry>
</row>
</tbody>
</tgroup>
</informaltable>
<!---->



<informaltable revisionflag="deleted">
<tgroup cols="3">
<tbody>
<row>
<entry>symbnameatt</entry>
<entry> <math>&longrightarrow;</math></entry>
<entry> 
    <systemitem>name</systemitem> S? = S? (<systemitem>"</systemitem> symbname <systemitem>"</systemitem> <systemitem>|</systemitem> <systemitem>'</systemitem> symbname <systemitem>'</systemitem>) 
</entry>
</row>

<row>
<entry>cdnameatt</entry>
<entry> <math>&longrightarrow;</math></entry>
<entry> <systemitem>cd</systemitem> S? = S? (<systemitem>"</systemitem> cdname <systemitem>"</systemitem> <systemitem>|</systemitem> <systemitem>'</systemitem> cdname <systemitem>'</systemitem>) 
</entry>
</row>

<row>
<entry>varnameatt</entry>
<entry> <math>&longrightarrow;</math></entry>
<entry> <systemitem>name</systemitem> S? = S? (<systemitem>"</systemitem> varname <systemitem>"</systemitem> <systemitem>|</systemitem> <systemitem>'</systemitem> varname <systemitem>'</systemitem>) 
</entry>
</row>

<row>
<entry>fpdecatt</entry>
<entry> <math>&longrightarrow;</math></entry>
<entry> <systemitem>dec</systemitem> S? = S? (<systemitem>"</systemitem> fpdec <systemitem>"</systemitem> <systemitem>|</systemitem> <systemitem>'</systemitem> fpdec <systemitem>'</systemitem>) 
</entry>
</row>

<row>
 <entry>fphexatt</entry>
 <entry> <math>&longrightarrow;</math></entry>
 <entry><systemitem>hex</systemitem> S? = S? (<systemitem>"</systemitem> fphex <systemitem>"</systemitem> <systemitem>|</systemitem> <systemitem>'</systemitem> fphex <systemitem>'</systemitem>) 
</entry>
</row>


<row>
<entry>PI </entry>
<entry> <math>&longrightarrow;</math></entry>
<entry> &lt;<systemitem>?</systemitem> char <systemitem>?</systemitem><systemitem>&gt;</systemitem></entry>
</row>

<row>
<entry>comment</entry>
<entry> <math>&longrightarrow;</math></entry>
<entry> &lt;<systemitem>!-&zsp;-</systemitem> char <systemitem>-&zsp;-</systemitem><systemitem>&gt;</systemitem>
</entry>
</row>

<row>
<entry>SC</entry>
<entry><math>&longrightarrow;</math></entry>
<entry> S+ <systemitem>|</systemitem> (comment S)+
</entry>
</row>

<row>
<entry>start  </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> 
 (SC <systemitem>|</systemitem> PI)* <systemitem>&lt;OMOBJ</systemitem> S?<systemitem>&gt;</systemitem> S? object S? <systemitem>&lt;/OMOBJ</systemitem> S?<systemitem>&gt;</systemitem> 
</entry>
</row>

<row>
<entry>symbol</entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> 
  <systemitem>&lt;OMS</systemitem> [[(S  symbnameatt) (S cdnameatt) ]] S? <systemitem>/&gt;</systemitem>
</entry>
</row>


<row>
<entry>variable</entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry><systemitem>&lt;OMV</systemitem>  varnameatt S? <systemitem>/&gt;</systemitem>
</entry>
</row>

<row><entry/>
<entry>|</entry>
<entry> <systemitem>&lt;OMATTRx</systemitem> S?<systemitem>&gt;</systemitem> SC? omatp SC? variable SC? <systemitem>&lt;/OMATTR</systemitem> S?<systemitem>&gt;</systemitem>
</entry>
</row>

<row>
<entry>omatp </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry>     <systemitem>&lt;OMATP</systemitem>  S?<systemitem>&gt;</systemitem> SC? attrs SC? <systemitem>&lt;/#1</systemitem> S?<systemitem>&gt;</systemitem> 
</entry>
</row>

<row>
<entry>object </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> symbol</entry>
</row>

<row>
<entry/>
<entry>|</entry><entry>variable</entry>
</row>

<row><entry/>
<entry>|</entry>
<entry><systemitem>&lt;OMI</systemitem> S <systemitem>&gt;</systemitem> S? integer S? <systemitem>&lt;/OMI</systemitem> S?<systemitem>&gt;</systemitem>
</entry>
</row>

<row><entry/>
<entry>|</entry><entry> <systemitem>&lt;OMF</systemitem> S fpdecatt  S?<systemitem>/&gt;</systemitem>
</entry>
</row>

<row>
  <entry/>
  <entry>|</entry><entry> <systemitem>&lt;OMF</systemitem> S fphexatt  S?<systemitem>/&gt;</systemitem>
</entry>
</row>

<row>
  <entry/>  
  <entry>|</entry><entry> <systemitem>&lt;OMSTR</systemitem>  S?<systemitem>&gt;</systemitem> char <systemitem>&lt;/OMSTR</systemitem> S?<systemitem>&gt;</systemitem> 
</entry>
</row>

<row>
  <entry/>  
  <entry>|</entry><entry> <systemitem>&lt;OMB</systemitem>  S?<systemitem>&gt;</systemitem> base64  <systemitem>&lt;/OMB</systemitem> S?<systemitem>&gt;</systemitem> 
</entry>
</row>

<row>
  <entry/>
<entry>|</entry>
<entry> <systemitem>&lt;OMA</systemitem>  S?<systemitem>&gt;</systemitem> SC? object SC? objects SC? <systemitem>&lt;/OMA</systemitem> S?<systemitem>&gt;</systemitem>
</entry>
</row>

<row><entry/>
<entry>|</entry><entry> <systemitem>&lt;OMBIND</systemitem>  S?<systemitem>&gt;</systemitem> SC? object SC? 
</entry>
</row>

<row><entry/>
<entry></entry>
<entry> <systemitem>&lt;OMBVAR</systemitem>  S?<systemitem>&gt;</systemitem> SC? variables SC? <systemitem>&lt;/OMBVAR</systemitem> S?<systemitem>&gt;</systemitem> 
</entry>
</row>

<row><entry/>
<entry></entry>
<entry> SC? object SC? <systemitem>&lt;/OMBIND</systemitem> S?<systemitem>&gt;</systemitem>
</entry>
</row>

<row>
  <entry/>
<entry>|</entry>
<entry> <systemitem>&lt;OME</systemitem>  S?<systemitem>&gt;</systemitem> SC? symbol SC? objects SC? <systemitem>&lt;/OME</systemitem> S?<systemitem>&gt;</systemitem> 
</entry>
</row>

<row>
  <entry/>
<entry>|</entry>
<entry> <systemitem>&lt;OMATTR</systemitem>  S?<systemitem>&gt;</systemitem> SC?  <systemitem>&lt;OMATP</systemitem> S?<systemitem>&gt;</systemitem> SC? attrs SC? <systemitem>&lt;/OMBVAR</systemitem> S?<systemitem>&gt;</systemitem>   
</entry>
</row>

<row><entry/>
<entry></entry>
<entry>SC? object SC? <systemitem>&lt;/OMATTR</systemitem> S?<systemitem>&gt;</systemitem>  
</entry>
</row>

<row>
<entry>attrs</entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> symbol S? object   
</entry>
</row>

<row>
  <entry/>
<entry>|</entry>
<entry> symbol S? object S? attrs 
</entry>
</row>

<row>
<entry>objects </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> SC?     
</entry>
</row>

<row>
  <entry/>
<entry>|</entry>
<entry> object SC? objects  
</entry>
</row>

<row>
<entry>variables </entry>
<entry> <math>&longrightarrow;</math> </entry>
<entry> SC?   
</entry>
</row>

<row>
  <entry/>
<entry>|</entry>
<entry> variable SC? variables  
</entry>
</row>

</tbody>
</tgroup>
</informaltable>


</para>

<para><emphasis role="bold">Note:</emphasis> This schema 
specifies names as being of the <systemitem>xsd:NCName</systemitem>
type. At the time of writing, W3C Schema types are defined in terms of
XML 1 <citation>xml_98</citation>.  This limits the characters allowed
in a name to a subset of the characters available in Unicode 2.0, which
is far more restrictive than the definition for an
&OM; name given in <xref linkend="sec_names"/>.
It is expected that W3C Schema types will be augmented to match the
new XML 1.1 recommendation <citation>xml_04</citation>, but for portability
reasons applications
should avoid  using the new XML 1.1 name characters unless they are
absolutely required.
The XML 1.1 specification has a useful appendix giving
advice on good strategies to use when naming identifiers.</para>

</section>

<section id="sec_xml-desc">
<title><phrase revisionflag="added">Informal</phrase> description of
the <phrase revisionflag="deleted">Grammar</phrase><phrase
revisionflag="added">&exml; Encoding</phrase></title>

<para>An encoded &OM; object is placed inside an <systemitem>OMOBJ</systemitem> element.  This 
element can contain the elements (and integers) described above.
<phrase revisionflag="added"> It can take an optional
<systemitem>version</systemitem> (&exml;) attribute which indicates to
which version of the &OM; standard it conforms.  In previous versions of
this standard this attribute did not exist, so any &OM; object without
such an attribute must conform to version 1 (or equivalently 1.1) of the
&OM; standard.  Objects which conform to the description given in this
document should have <systemitem>version="2.0"</systemitem>.
</phrase></para>

<para>We briefly discuss the &exml; encoding for each type of &OM; object
starting from the basic objects.</para>

<variablelist>
<varlistentry><term>Integers</term>
<listitem>
 <para>are encoded using the
<systemitem>OMI</systemitem> element around the sequence of their
digits in base 10 or 16 (most significant digit first).  White space
may be inserted between the characters of the integer representation,
this will be ignored.  After ignoring white space, integers written in
base 10 match the regular expression
<systemitem>-?[0-9]+</systemitem>.  Integers written in base 16 match
<systemitem>-?x[0-9A-F]+</systemitem>.  The integer 10 can be thus
encoded as <systemitem>&lt;OMI> 10 &lt;/OMI> </systemitem> or as
<systemitem>&lt;OMI> xA &lt;/OMI> </systemitem> but neither
<systemitem>&lt;OMI> +10 &lt;/OMI></systemitem> nor
<systemitem>&lt;OMI> +xA &lt;/OMI></systemitem> can be used.</para>

<para>The negative integer <math><mn>-120</mn></math> can be encoded
       as either as decimal <systemitem>&lt;OMI> -120
       &lt;/OMI></systemitem> or as hexadecimal <systemitem>&lt;OMI>
       -x78 &lt;/OMI></systemitem>.</para>

  
</listitem>
</varlistentry>
<varlistentry><term>Symbols</term><listitem><para>are encoded using
  the <systemitem>OMS</systemitem> element. This element has
  <phrase revisionflag="deleted">two</phrase>
  <phrase revisionflag="added">three</phrase>
  (&exml;) attributes <systemitem>cd</systemitem>,
  <systemitem>name</systemitem><phrase revisionflag="added">,  and
    <systemitem>cdbase</systemitem></phrase>. The value of
  <systemitem>cd</systemitem> is the name of the Content Dictionary in
  which the symbol is defined and the value of
  <systemitem>name</systemitem> is the name of the symbol.
  <phrase revisionflag="added">The optional <systemitem>cdbase</systemitem>
    attribute is a URI that can be used to disambiguate between two  content
    dictionaries with the same name.
  If a symbol does not have an explicit <systemitem>cdbase</systemitem>
attribute, then it inherits its <systemitem>cdbase</systemitem> from the
first ancestor in the &exml; tree with one, should such an element
exist.  In this document we have tended to omit the
<systemitem>cdbase</systemitem> for clarity.
  </phrase>
<phrase revisionflag="deleted">
  The name
  of the Content Dictionary is compulsory, but a future revision of
  the &OM; standard might introduce a defaulting mechanism.
</phrase> For
  example:
<blockquote revisionflag="deleted"><para><systemitem>&lt;OMS
 cd="transc" name="sin"/></systemitem></para>
</blockquote>
<blockquote revisionflag="added"><para><systemitem>&lt;OMS
 cdbase="http://www.openmath.org/cd" cd="transc1" name="sin"/></systemitem></para>
</blockquote>
  is the encoding of the symbol named <systemitem>sin</systemitem> in
  the Content Dictionary named <systemitem>transc1</systemitem>,
<phrase revisionflag="added">which is part of the collection
maintained by the &OM; Society</phrase>.</para>

<para revisionflag="added">As described in <xref linkend="sec_names"/>,
the three attributes of the
  <systemitem>OMS</systemitem> can be used to build a URI reference for the symbol,
for use in contexts where URI-based referencing mechanisms are used.
For example the URI for the above symbol is
 <systemitem>http://www.openmath.org/cd/transc1#sin</systemitem>.
</para>
<para revisionflag="added">
Note that the role attribute described in <xref
linkend="sec_roles"/> is contained in the Content Dictionary and is not
part of the encoding of a symbol, also the <systemitem>cdbase</systemitem> attribute need not
be explicit on each <systemitem>OMS</systemitem> as it is inherited
from any ancestor element.</para>
</listitem>
</varlistentry>
<varlistentry><term>Variables</term><listitem><para>are encoded using
  the <systemitem>OMV</systemitem> element, with only one
  (&exml;) attribute, <systemitem>name</systemitem>, whose value is the
  variable name. <phrase revisionflag="deleted">
The variable name is a subset of the printable
  <acronym>ascii</acronym> set of characters.  In particular, neither
  spaces nor double-quote <systemitem>&quot;</systemitem> are allowed
  in variable names.</phrase>  For instance, the encoding of the object
  representing the variable <math><mi>x</mi></math> is:
  <systemitem>&lt;OMV name="x"/></systemitem></para>

 
</listitem>
</varlistentry>
<varlistentry><term>Floating-point numbers</term><listitem><para>are
  encoded using the <systemitem>OMF</systemitem> element that has
  either the (&exml;) attribute <systemitem>dec</systemitem> or the
  (&exml;) attribute <systemitem>hex</systemitem>. The two
  (&exml;) attributes cannot be present simultaneously. The value of
  <systemitem>dec</systemitem> is the floating-point number expressed
  in base 10, using the common syntax:</para>
  
  <blockquote><para>
  <systemitem revisionflag="changed">(-?)([0-9]+)?("."[0-9]+)?([eE](-?)[0-9]+)?</systemitem>.
  </para>
  <para revisionflag="added">or one of the special values: INF, -INF or
  NaN.</para>
</blockquote>
  <para revisionflag="changed">The value of
  <systemitem>hex</systemitem> is a base 16 representation of the 
 64 bits of the <acronym>ieee</acronym> Double.
 Thus the number represents mantissa, exponent, and sign from lowest
  to highest bits using a  least significant byte ordering.
 This consists of a string of 16 digits <systemitem>0</systemitem>-<systemitem>9</systemitem>, <systemitem>A</systemitem>-<systemitem>F</systemitem>.
  </para>
  <para revisionflag="changed">For example, both <systemitem>&lt;OMF
    dec="1.0e-10"/&gt;</systemitem> and 
   <systemitem>&lt;OMF hex="3DDB7CDFD9D7BDBB"/&gt;</systemitem>
  are valid representations of the floating point number
  <math><mn>1</mn><mo>&#215;</mo>
<msup><mn>10</mn><mn>-10</mn></msup></math>.</para>
 
 <para revisionflag="added"> The symbols <systemitem>INF</systemitem>,
<systemitem>-INF</systemitem> and <systemitem>NaN</systemitem> represent
positive and negative infinity, and <emphasis>not a number</emphasis> as
defined in <citation>ieee754_85</citation>.  Note that while infinities
have a unique representation, it is possible for NaNs to contain extra
information about how they were generated and if this informations is to
be preserved then the hexadecimal representation must be used.  For
example
<systemitem>&lt;OMF hex="FFF8000000000000"/&gt;</systemitem> and
<systemitem>&lt;OMF hex="FFF8000000000001"/&gt;</systemitem> are both
hexadecimal representations of NaNs.
</para>


</listitem>
</varlistentry>
<varlistentry><term>Character strings</term><listitem><para>are encoded using the <systemitem>OMSTR</systemitem> element.
  Its content is  a Unicode text <phrase revisionflag="deleted">(The default encoding 
  is <acronym>utf-8</acronym><citation>utf8</citation>, although &exml; encoded &OM; may be embedded
 in a containing &exml; document that specifies alternative encoding in
  the &exml; declaration</phrase>. Note that as always in &exml; the
  characters <systemitem>&lt;</systemitem> and <systemitem>&amp;</systemitem>  need to be represented by the
  entity references <systemitem>&amp;lt;</systemitem> and
<systemitem>&amp;amp;</systemitem> respectively.</para>
  
</listitem>
</varlistentry>
<varlistentry><term>Bytearrays</term><listitem><para>are encoded using the <systemitem>OMB</systemitem> element. Its content
  is a sequence of characters that is a base64 encoding of the data.
  The base64 encoding is defined in <acronym>rfc</acronym>
<phrase revisionflag="deleted">1521 <citation>rfc1521</citation></phrase>
<phrase revisionflag="added">2045 <citation>rfc2045</citation></phrase>.
  Basically, it represents an arbitrary sequence of octets using 64
  <quote>digits</quote> (<systemitem>A</systemitem> through <systemitem>Z</systemitem>, <systemitem>a</systemitem> through <systemitem>z</systemitem>, <systemitem>0</systemitem> through <systemitem>9</systemitem>, <systemitem>+</systemitem> and /, in order of increasing
  value). Three octets are represented as four digits (the <systemitem>=</systemitem>
  character is used for padding at the end of the data). All line
  breaks and carriage return, space, form feed and horizontal
  tabulation characters are ignored. The reader is referred to
  <citation revisionflag="deleted">rfc1521</citation>
  <citation revisionflag="added">rfc2045</citation>
for more detailed information.</para>

</listitem>
</varlistentry>
</variablelist>
 
<para revisionflag="deleted">In detail the encoding of an &OM; object is described below.</para>

<variablelist>
<varlistentry><term>Applications</term><listitem><para>are encoded using the <systemitem>OMA</systemitem> element. The
  application whose head is the &OM; object <math><msub><mi>e</mi><mn>0</mn></msub></math> and whose arguments
  are the &OM; objects <math><msub><mi>e</mi><mn>1</mn></msub></math>, <phrase>&#8230;</phrase>, <math><msub><mi>e</mi><mi>n</mi></msub></math> is encoded as <systemitem>&lt;OMA></systemitem>
  <math><msub><mi>C</mi><mn>0</mn></msub></math> <math><msub><mi>C</mi><mn>1</mn></msub></math><phrase>&#8230;</phrase> <math><msub><mi>C</mi><mi>n</mi></msub></math> <systemitem>&lt;/OMA></systemitem> where <math><msub><mi>C</mi><mi>i</mi></msub></math> is the encoding of
  <math><msub><mi>e</mi><mi>i</mi></msub></math>.</para>

<para>For example, <math><mi mathvariant="bold">application</mi><mo>(</mo><mi>sin</mi><mo>,</mo><mi>x</mi> <mo>)</mo></math> is encoded as:
<literallayout><![CDATA[<OMA>  
  <OMS cd="transc1" name="sin"/> 
  <OMV name="x"/>  
</OMA>]]></literallayout>
  provided that the symbol <systemitem>sin</systemitem> is defined to be a function
  symbol in a Content Dictionary named <systemitem>transc1</systemitem>.</para>

  
</listitem>
</varlistentry>
<varlistentry><term>Binding</term><listitem><para>is encoded using the <systemitem>OMBIND</systemitem> element.  The binding
  by the &OM; object <math><mi>b</mi></math> of the &OM; variables <math><msub><mi>x</mi><mn>1</mn></msub></math>, <math><msub><mi>x</mi><mn>2</mn></msub></math>,
  <math><mi>&#8230;</mi></math>, <math><msub><mi>x</mi><mi>n</mi></msub></math> in the object <math><mi>c</mi></math> is encoded as <systemitem>&lt;OMBIND></systemitem> <math><mi>B</mi></math>
  <systemitem>&lt;OMBVAR></systemitem> <math><msub><mi>X</mi><mn>1</mn></msub></math> <math><mi>&#8230;</mi></math> <math><msub><mi>X</mi><mi>n</mi></msub></math> <systemitem>&lt;/OMBVAR></systemitem> <math><mi>C</mi></math> <systemitem>&lt;/OMBIND></systemitem> where <math><mi>B</mi></math>, <math><mi>C</mi></math>, and <math><msub><mi>X</mi><mi>i</mi></msub></math> are the encodings of <math><mi>b</mi></math>, <math><mi>c</mi></math>
  and <math><msub><mi>x</mi><mi>i</mi></msub></math>, respectively.</para>

<para>For instance the encoding of
  <math><mi mathvariant="bold">binding</mi>
       <mo>(</mo><mi>lambda</mi><mo>,</mo>
  <mi>x</mi><mo>,</mo><mi mathvariant="bold">application</mi>
     <mo>(</mo><mi>sin</mi><mo>,</mo> <mi>x</mi><mo>)</mo><mo>)</mo></math> is:
<literallayout><![CDATA[<OMBIND>
  <OMS cd="fns1" name="lambda"/>  
  <OMBVAR><OMV name="x"/></OMBVAR>  
  <OMA>
    <OMS cd="transc1" name="sin"/> 
    <OMV name="x"/>  
  </OMA>
</OMBIND>]]></literallayout></para>
  
<para>Binders are defined in  Content Dictionaries, in particular,
  the symbol <systemitem>lambda</systemitem> is defined in the Content Dictionary
  <systemitem>fns1</systemitem> for functions over functions.</para>
  
</listitem>
</varlistentry>
<varlistentry><term>Attributions</term><listitem><para>are encoded using the <systemitem>OMATTR</systemitem> element.  If
  the &OM; object <math><mi>e</mi></math> is attributed with (<math><msub><mi>s</mi><mn>1</mn></msub></math>, <math><msub><mi>e</mi><mn>1</mn></msub></math>), <phrase>&#8230;</phrase>, 
  (<math><msub><mi>s</mi><mi>n</mi></msub></math>, <math><msub><mi>e</mi><mi>n</mi></msub></math>) pairs (where <math><msub><mi>s</mi><mi>i</mi></msub></math> are the attributes), it is encoded
  as <systemitem>&lt;OMATTR></systemitem> <systemitem>&lt;OMATP></systemitem> <math><msub><mi>S</mi><mn>1</mn></msub></math> <math><msub><mi>C</mi><mn>1</mn></msub></math> <phrase>&#8230;</phrase> <math><msub><mi>S</mi><mi>n</mi></msub></math> <math><msub><mi>C</mi><mi>n</mi></msub></math> <systemitem>&lt;/OMATP></systemitem> <math><mi>E</mi></math> <systemitem>&lt;/OMATTR></systemitem> where <math><msub><mi>S</mi><mi>i</mi></msub></math> is the encoding of the
  symbol <math><msub><mi>s</mi><mi>i</mi></msub></math>, <math><msub><mi>C</mi><mi>i</mi></msub></math> of the object <math><msub><mi>e</mi><mi>i</mi></msub></math> and <math><mi>E</mi></math> is the encoding of
  <math><mi>e</mi></math>.</para>

<para>Examples are the use of attribution to decorate a group by its
  automorphism group:
<literallayout><![CDATA[<OMATTR>    
  <OMATP>
    <OMS cd="groups" name="automorphism_group" />  
    [..group-encoding..] 
  </OMATP>  
  [..group-encoding..] 
</OMATTR>]]></literallayout>
or to express the type of a variable:
<literallayout><![CDATA[<OMATTR>    
  <OMATP>
    <OMS cd="ecc" name="type" /> 
    <OMS cd="ecc" name="real" />
  </OMATP> 
  <OMV name="x" />
</OMATTR>]]></literallayout></para>

  
<para revisionflag="added">
A special use of attributions is to associate non-&OM; data with an
&OM; object.  This is done using the
<systemitem>OMFOREIGN</systemitem> element.  The children of this
element must be well-formed &exml;.  For example the attribution of the
&OM; object 
  <math>
     <mi>sin</mi><mfenced><mi>x</mi></mfenced></math> with its
representation in Presentation MathML is:
<literallayout><![CDATA[<OMATTR>
  <OMATP>
    <OMS cd="annotations1" name="presentation-form"/>  
    <OMFOREIGN encoding="MathML-Presentation">
      <math xmlns="http://www.w3.org/1998/Math/MathML">
        <mi>sin</mi><mfenced><mi>x</mi></mfenced>
      </math>
    </OMFOREIGN>  
  </OMATP>
  <OMA>
   <OMS cd="transc1" name="sin"/> 
   <OMV name="x"/>  
  </OMA>
</OMATTR>]]></literallayout>
Of course not everything has a natural XML encoding in this way and
often the contents of a <systemitem>OMFOREIGN</systemitem> will just
be data or some kind of encoded string.  For example the attribution
of the previous object with its <phrase>LaTeX</phrase> representation could be achieved
as follows:
<literallayout><![CDATA[<OMATTR>
  <OMATP>
    <OMS cd="annotations1" name="presentation-form"/>  
    <OMFOREIGN encoding="text/x-latex">\sin(x)</OMFOREIGN>  
  </OMATP>
  <OMA>
    <OMS cd="transc1" name="sin"/> 
    <OMV name="x"/>  
  </OMA>
</OMATTR>]]></literallayout>
For a discussion on the use of the <systemitem>encoding</systemitem>
attribute see <xref linkend="sec_compl_omforeign"/>.
</para>
</listitem>

</varlistentry>

<varlistentry>
 <term>Errors</term> 
 <listitem><para>are encoded using the <systemitem>OME</systemitem> element. The error whose
  symbol is <math><mi>s</mi></math> and whose arguments are the &OM; objects
<phrase revisionflag="added">or &OM; derived objects</phrase>
 <math><msub><mi>e</mi><mn>1</mn></msub></math>,
  <phrase>&#8230;</phrase>, <math><msub><mi>e</mi><mi>n</mi></msub></math> is encoded as <systemitem>&lt;OME></systemitem> <math><msub><mi>C</mi><mi>s</mi></msub></math> <math><msub><mi>C</mi><mn>1</mn></msub></math><phrase>&#8230;</phrase> <math><msub><mi>C</mi><mi>n</mi></msub></math> <systemitem>&lt;/OME></systemitem> where <math><msub><mi>C</mi><mi>s</mi></msub></math> is the encoding of <math><mi>s</mi></math> and <math><msub><mi>C</mi><mi>i</mi></msub></math> the encoding
  of <math><msub><mi>e</mi><mi>i</mi></msub></math>.</para>

<para>If an <systemitem>aritherror</systemitem> Content Dictionary contained a
  <systemitem>DivisionByZero</systemitem> symbol, then the object
  <math><mi mathvariant="bold">error</mi><mo>(</mo><mi>DivisionByZero</mi><mo>,</mo> <mi mathvariant="bold">application</mi>
  <mo>(</mo><mi>divide</mi><mo>,</mo> 
  <mi>x</mi><mo>,</mo> <mn>0</mn><mo>)</mo><mo>)</mo></math> would be encoded as follows:

<literallayout><![CDATA[<OME>
  <OMS cd="aritherror" name="DivisionByZero"/>  
  <OMA>
    <OMS cd="arith1" name="divide" />
    <OMV name="x"/>  
    <OMI> 0 </OMI>
  </OMA> 
 </OME>]]></literallayout></para>
  
<para revisionflag="added">
If a <systemitem>mathml</systemitem> Content Dictionary contained an
  <systemitem>unhandled_csymbol</systemitem> symbol, then an &OM; to
MathML translator might return an error such as:
<literallayout><![CDATA[<OME>
  <OMS cd="mathml" name="unhandled_csymbol"/>  
  <OMFOREIGN encoding="MathML-Content">
    <mathml:csymbol xmlns:mathml="http://www.w3.org/1998/Math/MathML/"
                    definitionURL="http://www.nag.co.uk/Airy#A">
      <mathml:mo>Ai</mathml:mo>
    </mathml:csymbol>
  </OMFOREIGN> 
 </OME>]]></literallayout></para>

<para revisionflag="added"> Note that it is possible to embed fragments
of valid &OM; inside an <systemitem>OMFOREIGN</systemitem> element but that it
cannot contain invalid &OM;.  In addition, the arguments to an
<systemitem>OMERROR</systemitem> must be well-formed &exml;.  If an
application wishes to signal that the &OM; it has received is invalid or
is not well-formed then the offending data must be encoded as a string.
For example:
<literallayout><![CDATA[<OME>
  <OMS cd="parser" name="invalid_XML"/>  
  <OMSTR>
    &ltOMA&gt; &lt;OMS name="cos" cd="transc1"&gt;
      &lt;OMV name="v"&gt; &lt;/OMA&gt;
  </OMSTR> 
 </OME>]]></literallayout>
Note that the `&lt;' and `&gt;' characters have been escaped as is usual in
an &exml; document.
</para>

</listitem>
</varlistentry>

<varlistentry revisionflag="added">
 <term>References</term>
 <listitem><para>
 &OM; integers, floating point numbers, character strings,
 bytearrays, applications, binding, attributions can also be encoded
 as an empty <systemitem>OMR</systemitem> element with an <systemitem>href</systemitem>
 attribute whose value is the value of a URI referencing an id
 attribute of an &OM; object of that type.
The &OM; element represented by this <systemitem>OMR</systemitem>
 reference is a copy of the &OM; element referenced
 <systemitem>href</systemitem> attribute. Note that this copy
 is <emphasis>structurally equal</emphasis>, but not identical
 to the element referenced. </para>

 <para>For instance, the &OM; object

 <math id="nestedap" display="block">
   <mrow>
     <mi mathvariant="bold">application</mi>
     <mrow>
       <mo fence="true">(</mo>
       <mrow>
         <mi>f</mi>
         <mo separator="true">,</mo>
         <mi mathvariant="bold">application</mi>
         <mrow>
           <mo fence="true">(</mo>
           <mrow>
             <mi>f</mi>
             <mo separator="true">,</mo>
             <mi mathvariant="bold">application</mi>
             <mrow>
               <mo fence="true">(</mo>
               <mrow><mi>f</mi><mo separator="true">,</mo><mi>a</mi><mo separator="true">,</mo><mi>a</mi></mrow>
               <mo fence="true">)</mo>
             </mrow>
             <mo separator="true">,</mo>
             <mi mathvariant="bold">application</mi>
             <mrow>
               <mo fence="true">(</mo>
               <mrow><mi>f</mi><mo separator="true">,</mo><mi>a</mi><mo separator="true">,</mo><mi>a</mi></mrow>
               <mo fence="true">)</mo>
             </mrow>
             <mo fence="true">)</mo>
           </mrow>
           <mo separator="true">,</mo>
           <mi mathvariant="bold">application</mi>
           <mrow>
             <mo fence="true">(</mo>
             <mrow>
               <mi>f</mi>
               <mo separator="true">,</mo>
               <mi mathvariant="bold">application</mi>
               <mrow>
                 <mo fence="true">(</mo>
                 <mrow><mi>f</mi><mo separator="true">,</mo><mi>a</mi><mo separator="true">,</mo><mi>a</mi></mrow>
                 <mo fence="true">)</mo>
               </mrow>
               <mo separator="true">,</mo>
               <mi mathvariant="bold">application</mi>
               <mrow>
                 <mo fence="true">(</mo>
                 <mrow><mi>f</mi><mo separator="true">,</mo><mi>a</mi><mo separator="true">,</mo><mi>a</mi></mrow>
                 <mo fence="true">)</mo>
               </mrow>
               <mo fence="true">)</mo>
             </mrow>
           </mrow>
           <mo fence="true">)</mo>
         </mrow>
       </mrow>
     </mrow>
   </mrow>
 </math>
</para>
<para>can be encoded in the &exml; encoding as either one of the
&exml; encodings given in <xref linkend="fig_shared_vs_unshared"/>
(and some intermediate versions as well).</para>
</listitem> </varlistentry> </variablelist>

<figure id="fig_shared_vs_unshared">
    <title>Shared vs. unshared representations</title>
    
 <literallayout><![CDATA[<OMOBJ version="2.0">         <OMOBJ version="2.0">
  <OMA>                         <OMA>
    <OMV name="f"/>               <OMV name="f"/> 
    <OMA>                         <OMA id="t1">
      <OMV name="f"/>               <OMV name="f"/>
      <OMA>                         <OMA id="t11">
        <OMV name="f"/>               <OMV name="f"/>
        <OMV name="a"/>               <OMV name="a"/>
        <OMV name="a"/>               <OMV name="a"/>
      </OMA>                        </OMA>
      <OMA>                         <OMR href="#t11"/>
        <OMV name="f"/>
        <OMV name="a"/> 
        <OMV name="a"/>
      </OMA>                                
    </OMA>                      </OMA>
    <OMA>                       <OMR href="#t1"/>
      <OMV name="f"/>
      <OMA>
        <OMV name="f"/>
        <OMV name="a"/>
        <OMV name="a"/>
      </OMA>
      <OMA>
        <OMV name="f"/>
        <OMV name="a"/>
        <OMV name="a"/>
      </OMA>
    </OMA>
  </OMA>
</OMOBJ>                     </OMOBJ>]]>
</literallayout>
</figure>
</section>

<section id="sec_references" revisionflag="added">
<title>Some Notes on References</title>

<para>We say that an &OM; element dominates all its children and all elements
they dominate. An <systemitem>OMR</systemitem> element dominates its target,
i.e. the element that carries the <systemitem>id</systemitem> attribute pointed to
by the <systemitem>xref</systemitem> attribute. For instance in the representation
in <xref linkend="fig_shared_vs_unshared"/>, the
<systemitem>OMA</systemitem> element with <systemitem>id="t1"</systemitem> and
also the second <systemitem>OMR</systemitem> dominate the
<systemitem>OMA</systemitem> element with <systemitem>id="t11"</systemitem>.
</para>

<section id="sec_acyclicity" revisionflag="added">
<title>An Acyclicity Constraint</title>

<para revisionflag="added">The occurrences of the <systemitem>OMR</systemitem> element must obey the following global
<emphasis>acyclicity constraint</emphasis>: An &OM; element may not dominate itself.</para>

<para revisionflag="added">Consider for instance the following (illegal) &exml; representation
<literallayout><![CDATA[<OMOBJ version="2.0">
  <OMA id="foo">
    <OMS cd="arith1" name="divide"/>
    <OMI>1</OMI>
    <OMA>
       <OMS cd="arith1" name="plus"/>
       <OMI>1</OMI>
       <OMR xref="foo"/>
    </OMA> 
  </OMA>
</OMOBJ>]]>
</literallayout>
</para>

<para revisionflag="added">Here, the <systemitem>OMA</systemitem> element with
<systemitem>id="foo"</systemitem> dominates its third child, which dominates the
<systemitem>OMR</systemitem> element, which dominates its target: the element with
<systemitem>id="foo"</systemitem>. So by transitivity, this element dominates itself, and
by the acyclicity constraint, it is not the &exml; representation of an &OM;
element. Even though it could be given the interpretation of the continued fraction
<math display="block">
 <mfrac>
   <mn>1</mn>
   <mrow>
     <mn>1</mn>
     <mo>+</mo>
     <mfrac>
       <mn>1</mn>
       <mrow>
         <mn>1</mn>
         <mo>+</mo>
         <mfrac><mn>1</mn><mi>...</mi></mfrac>
       </mrow>
     </mfrac>
   </mrow>
 </mfrac>
</math> this would correspond to an infinite tree of applications,
which is not admitted by the structure of &OM; objects described
in <xref linkend="cha_obj"/>.</para>

<para revisionflag="added">Note that the acyclicity constraints is not restricted
to such simple cases, as the example in <xref linkend="fig_sharing_between"/>
shows.</para>

<figure id="fig_sharing_between">
    <title>Sharing between &OM; objects (A cycle of order <math><mn>2</mn></math>.</title>
<literallayout><![CDATA[<OMOBJ version="2.0">                   <OMOBJ version="2.0">
  <OMA id="bar">                         <OMA id="baz">
    <OMS cd="arith1" name="plus"/>         <OMS cd="arith1" name="plus"/>
    <OMI>1</OMI>                           <OMI>1</OMI>
    <OMR xref="baz"/>                      <OMR xref="bar"/>
  </OMA>                                 </OMA>
</OMOBJ>                               </OMOBJ>]]>
</literallayout></figure>

<para revisionflag="added"> Here, the <systemitem>OMA</systemitem> with
<systemitem>id="bar"</systemitem> dominates its third child, the
<systemitem>OMR</systemitem> with <systemitem>xref="baz"</systemitem>,
which dominates its target <systemitem>OMA</systemitem> with
<systemitem>id="baz"</systemitem>, which in turn dominates its third
child, the <systemitem>OMR</systemitem> with
<systemitem>xref="bar"</systemitem>, this finally dominates its
target, the original <systemitem>OMA</systemitem> element with
<systemitem>id="bar"</systemitem>. So this pair of &OM; objects
violates the acyclicity constraint and is not the &exml;
representation of an &OM; object.</para>
</section>


<section id="sec_sharing_bvars" revisionflag="added">
<title>Sharing and Bound Variables</title>

<para>Note that the <systemitem>OMR</systemitem> element is a
<emphasis>syntactic</emphasis> referencing mechanism: an
<systemitem>OMR</systemitem> element stands for the exact &exml;
element it points to. In particular, referencing does not interact
with binding in a semantically intuitive way, since it allows for
variable capture. Consider for instance the following &exml;
representation: <literallayout><![CDATA[<OMBIND id="outer">
  <OMS cd="fns1" name="lambda"/>
  <OMBVAR><OMV name="X"/></OMBVAR>
  <OMA>
    <OMV name="f"/>
    <OMBIND id="inner">
      <OMS cd="fns1" name="lambda"/>
      <OMBVAR><OMV name="X"/></OMBVAR>
      <OMR id="copy" href="#orig"/>
    </OMBIND>
    <OMA id="orig"><OMV name="g"/><OMV name="X"/></OMA>
  </OMA>
</OMBIND>]]>
</literallayout>
it represents the &OM; object
<math display="block">
  <mi mathvariant="bold">binding</mi>
  <mrow>
    <mo fence="true">(</mo>
    <mo>&#x003BB;</mo>
      <mo separator="true">,</mo>
    <mi>X</mi>
    <mo separator="true">,</mo>
    <mrow>
      <mi mathvariant="bold">application</mi>
      <mo fence="true">(</mo>
      <mi>f</mi>
      <mo separator="true">,</mo>
      <mi mathvariant="bold">binding</mi>
      <mrow>
        <mo fence="true">(</mo>
        <mo>&#x003BB;</mo>
        <mo separator="true">,</mo>
        <mi>X</mi>
        <mo separator="true">,</mo>
        <mrow>
          <mi mathvariant="bold">application</mi>
          <mo fence="true">(</mo>
          <mi>g</mi>
          <mo separator="true">,</mo>
          <mi>X</mi>
          <mo fence="true">)</mo>
        </mrow>
        <mo fence="true">)</mo>
      </mrow>
      <mo separator="true">,</mo>
      <mrow>
        <mi mathvariant="bold">application</mi>
        <mo fence="true">(</mo>
        <mi>g</mi>
        <mo separator="true">,</mo>
        <mi>X</mi>
        <mo fence="true">)</mo>
      </mrow>
      <mo fence="true">)</mo>
    </mrow>
    <mo fence="true">)</mo>
  </mrow>
  <mo fence="true">)</mo> </math> which has two sub-terms of the form
<math>
  <mi mathvariant="bold">application</mi>
  <mo fence="true">(</mo>
  <mi>g</mi>
  <mo separator="true">,</mo>
  <mi>X</mi>
  <mo fence="true">)
  </mo> </math>, one with <systemitem>id="orig"</systemitem> (the one explicitly
represented) and one with <systemitem>id="copy"</systemitem>, represented by the
<systemitem>OMR</systemitem> element. In the original, the variable
<math><mi>X</mi></math> is bound by the <emphasis>outer</emphasis>
<systemitem>OMBIND</systemitem> element, and in the copy, the variable
<math><mi>X</mi></math> is bound by the <emphasis>inner</emphasis>
<systemitem>OMBIND</systemitem> element. We say that the inner
<systemitem>OMBIND</systemitem> has captured the variable <math><mi>X</mi></math>.
</para>

<para>It is well-known that variable capture does not conserve semantics. For
  instance, we could use <math><mi>&#x003B1;</mi></math>-conversion to rename the inner occurrence of
  <math><mi>X</mi></math> into, say, 
  <math><mi>Y</mi></math> arriving at the (same) object
<math display="block">
  <mi mathvariant="bold">binding</mi>
  <mrow>
    <mo fence="true">(</mo>
    <mo>&#x003BB;</mo>
      <mo separator="true">,</mo>
    <mi>X</mi>
    <mo separator="true">,</mo>
    <mrow>
      <mi mathvariant="bold">application</mi>
      <mo fence="true">(</mo>
      <mi>f</mi>
      <mo separator="true">,</mo>
      <mi mathvariant="bold">binding</mi>
      <mrow>
        <mo fence="true">(</mo>
        <mo>&#x003BB;</mo>
        <mo separator="true">,</mo>
        <mi mathcolor="red">Y</mi>
        <mo separator="true">,</mo>
        <mrow>
          <mi mathvariant="bold">application</mi>
          <mo fence="true">(</mo>
          <mi>g</mi>
          <mo separator="true">,</mo>
          <mi mathcolor="red">Y</mi>
          <mo fence="true">)</mo>
        </mrow>
        <mo fence="true">)</mo>
      </mrow>
      <mo separator="true">,</mo>
      <mrow>
        <mi mathvariant="bold">application</mi>
        <mo fence="true">(</mo>
        <mi>g</mi>
        <mo separator="true">,</mo>
        <mi>X</mi>
        <mo fence="true">)</mo>
      </mrow>
      <mo fence="true">)</mo>
    </mrow>
    <mo fence="true">)</mo>
  </mrow>
  <mo fence="true">)</mo> </math>
 Using references that
capture variables in this way can easily lead to representation errors, and is not
  recommended.
</para>
</section>
</section>

<section id="xmldoc">
<title>Embedding &OM; in &exml; Documents</title>

     
<para>The above encoding of &exml; encoded &OM; specifies the grammar to be
used in files that encode a single &OM; object, and specifies the
character streams that a conforming &OM; application should be able
to accept or produce.</para>

<para>When embedding &exml; encoded &OM; objects into a larger &exml; document
one may wish, or need, to use other &exml; features. For example use of
extra &exml; attributes to specify &exml; Namespaces&#160;<citation>xmlns</citation>
or <systemitem>xml:lang</systemitem> attributes to specify the language used in
strings&#160;<citation>xml_04</citation>. 
<phrase revisionflag="deleted">
Also, the encoding used in the larger document may not be
<acronym>utf-8</acronym>.</phrase>
</para>

 <para revisionflag="deleted">In particular, if &OM; is used with applications that use the
&exml; Namespace Recommendation &#160;<citation>xmlns</citation> then they should ensure
that &OM; elements are in the <phrase revisionflag="added">namespace
</phrase> <phrase role="tt">http://www.openmath.org/OpenMath</phrase>.
This is most conveniently achieved by adding the namespace declaration <literallayout>
xmlns="http://www.openmath.org/OpenMath" </literallayout> as an attribute to each
<systemitem>OMOBJ</systemitem> element in the document.</para>

<!--
<para revisionflag="added">
 Furthermore, for any &OM; object that contains the <systemitem>OMR</systemitem> element,
 we have to add the <systemitem>XLink</systemitem> namespace declaration
  <phrase role="tt">xmlns:xlink="http://www.w3.org/1999/xlink"</phrase>.
</para>
-->

<para>If such &exml; features are used then the &exml; application controlling the
document must, if passing the &OM; fragment to an &OM; application,
remove any such extra attributes and must ensure that the
fragment is encoded according to the grammar specified above.</para>
</section>
</section>

<section id="sec_binary">
<title>The Binary Encoding</title>

<para>The binary encoding was essentially designed to be more compact than
the &exml; encodings, so that it can be more efficient if large
amounts of data are involved. For the current encoding, we tried to
keep the right balance between compactness, speed of encoding and
decoding and simplicity (to allow a simple specification and easy
implementations).</para>

<section id="sec_binary_grammar">
<title>A Grammar for the Binary Encoding</title>

     

<figure id="fig_bin-enc">
    <title>Grammar of the binary encoding of &OM; objects.</title>
    
    <informaltable>
      <tgroup cols="6">
        <tbody>
          <row>
            <entry>start </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [24] object [25] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added">
	      [24+64]
	      [<math><mi>m</mi></math>]
              [<math><mi>n</mi></math>]
	      object [25]</entry>
          </row>
          
          <row>
            <entry>object </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> integer </entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> float</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> variable</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> symbol</entry>
          </row>

          <row>
            <entry/>
            <entry>|</entry>
            <entry>cdbase</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> string</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> bytearray</entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry>foreign</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> construct</entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry>internal_reference</entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry>external_reference</entry>
          </row>
          
          <row>
            <entry>integer </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [1] [_] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [1+64]
              [<math><mi>n</mi></math>]
              id:<math><mi>n</mi></math>
              [_]
            </entry>
          </row>

          <row revisionflag="added">
            <entry/>
            <entry><math><mo>|</mo></math></entry>
            <entry> [1+32] [_] </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [1+128] {_} </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [1+64+128]
              {<math><mi>n</mi></math>}
              id:<math><mi>n</mi></math>
              {_}
            </entry>
          </row>

          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [1+32+128] {_} </entry>
            <entry/>
            <entry/>
          </row>

          <row>
            <entry/>
            <entry>|</entry>
            <entry> [2]
              [<math><mi>n</mi></math>]
              [_] digits:<math><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [2+64]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              [_] digits:<math><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>

          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [2+32]
              [<math><mi>n</mi></math>]
              [_] digits:<math><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [2+128]
              {<math><mi>n</mi></math>}
              [_] digits:<math><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [2+64+128]
              {<math><mi>n</mi></math>}
              {<math><mi>n</mi></math>}
              [_]
              digits:<math><mi>n</mi></math>
              id:<math><mi>n</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [2+32+128]
              {<math><mi>n</mi></math>}
              [_] digits:<math><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry>float </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [3] {_}{_} </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [3+64]
              [<math><mi>n</mi></math>]
              id:<math><mi>n</mi></math>
              {_}{_}</entry>
          </row>
          
          <row>
            <entry>variable </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [5]
              [<math><mi>n</mi></math>]
              varname:<math><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [5+64]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              varname:<math><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [5+128]
              {<math><mi>n</mi></math>}
              varname:<math><mi>n</mi></math>
            </entry>
            <entry>|</entry>
            <entry revisionflag="added"> [5+64+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              varname:<math><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>
          
          <row>
            <entry>symbol</entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [8]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              cdname:<math><mi>n</mi></math>
              symbname:<math><mi>m</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [8+64]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              [<math><mi>k</mi></math>]
              cdname:<math><mi>n</mi></math>
              symbname:<math><mi>m</mi></math>
              id:<math><mi>k</mi></math>
            </entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [8+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              cdname:<math><mi>n</mi></math>
              symbname:<math><mi>m</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [8+64+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              {<math><mi>k</mi></math>}
              cdname:<math><mi>n</mi></math>
              symbname:<math><mi>m</mi></math>
              id:<math><mi>k</mi></math></entry>
          </row>
          
          <row>
            <entry>string </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [6]
              [<math><mi>n</mi></math>]
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [6+64]
              [<math><mi>n</mi></math>]
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mi>n</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry><math><mo>|</mo></math></entry>
            <entry> [6+32]
              [<math><mi>n</mi></math>]
              <phrase>bytes</phrase>:<math><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [6+128]
              {<math><mi>n</mi></math>}
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [6+64+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [6+32+128]
              {<math><mi>n</mi></math>}
              <phrase>bytes</phrase>:<math><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [7]
              [<math><mi>n</mi></math>]
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mn>2</mn><mi>n</mi></math>
            </entry>
            <entry  revisionflag="added">|</entry>
            <entry revisionflag="added"> [7+64]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mn></mn><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [7+32]
              [<math><mi>n</mi></math>]
              <phrase>bytes</phrase>:<math><mn>2</mn><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [7+128]
              {<math><mi>n</mi></math>}
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mn>2</mn><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [7+64+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              <phrase revisionflag="deleted">chars</phrase>
              <phrase revisionflag="added">bytes</phrase>:<math><mn>2</mn><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>
          
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [7+32+128]
              {<math><mi>n</mi></math>}
              <phrase>bytes</phrase>:<math><mn>2</mn><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry>bytearray </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [4]
              [<math><mi>n</mi></math>]
              bytes:<math><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [4+64]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              bytes:<math><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry><math><mo>|</mo></math></entry>
            <entry> [4+32]
              [<math><mi>n</mi></math>]
              bytes:<math><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [4+128]
              {<math><mi>n</mi></math>}
              bytes:<math><mi>n</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [4+64+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              bytes:<math><mi>n</mi></math>
              id:<math><mi>m</mi></math>
            </entry>
          </row>
          
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [4+32+128]
              {<math><mi>n</mi></math>}
              bytes:<math><mi>n</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row revisionflag="added">
            <entry>cdbase</entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [9]
              [<math><mi>n</mi></math>]
              uri:<math><mi>n</mi></math>
	      object
            </entry>
	  </row>

	  <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [9+128]
              {<math><mi>n</mi></math>}
              uri:<math><mi>n</mi></math>
	      object
            </entry>
          </row>

          <row revisionflag="added">
            <entry>foreign</entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [12]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              bytes:<math><mi>n</mi></math>
              bytes:<math><mi>m</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [12+64]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              [<math><mi>k</mi></math>]
              bytes:<math><mi>n</mi></math>
              bytes:<math><mi>m</mi></math>
              id:<math><mi>k</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry><math><mo>|</mo></math></entry>
            <entry> [12+32]
              [<math><mi>n</mi></math>]
              [<math><mi>m</mi></math>]
              bytes:<math><mi>n</mi></math>
              bytes:<math><mi>m</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [12+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              bytes:<math><mi>n</mi></math>
              bytes:<math><mi>m</mi></math>
            </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [12+64+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              {<math><mi>k</mi></math>}
              bytes:<math><mi>n</mi></math>
              bytes:<math><mi>m</mi></math>
              id:<math><mi>k</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [12+32+128]
              {<math><mi>n</mi></math>}
              {<math><mi>m</mi></math>}
              bytes:<math><mi>n</mi></math>
              bytes:<math><mi>m</mi></math>
            </entry>
            <entry/>
            <entry/>
          </row>
          
          <row>
            <entry>construct </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [16] object objects [17] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [16+64]
              {<math><mi>m</mi></math>}
              id:<math><mi>m</mi></math>
              object objects [17]
            </entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [22] symbol objects [23] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [22+64]
              {<math><mi>m</mi></math>}
              id:<math><mi>m</mi></math>
              symbol objects [23]</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [18] attrpairs object [19] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [18+64]
              {<math><mi>m</mi></math>}
              id:<math><mi>m</mi></math>
              attrpairs object [19]
            </entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [26] object bvars object [27] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [26+64]
              {<math><mi>m</mi></math>}
              id:<math><mi>m</mi></math>
              object bvars object [27]
            </entry>
          </row>
          
          <row>
            <entry>attrpairs </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [20] pairs [21] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [20+64]
              {<math><mi>m</mi></math>}
              id:<math><mi>m</mi></math>
              pairs [21]
            </entry>
          </row>
          
          <row>
            <entry>pairs </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> symbol object</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> symbol object pairs</entry>
          </row>
          
          <row>
            <entry>bvars </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [28] vars [29] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [28+64]
              {<math><mi>m</mi></math>}
              id:<math><mi>m</mi></math>
              vars [29]
            </entry>
          </row>
          
          <row>
            <entry>vars </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> attrvar</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> attrvar vars</entry>
          </row>
          
          <row>
            <entry>attrvar </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> variable</entry>
          </row>
          
          <row>
            <entry/>
            <entry>|</entry>
            <entry> [18] attrpairs attrvar [19] </entry>
            <entry revisionflag="added">|</entry>
            <entry revisionflag="added"> [18+64]
              {<math><mi>m</mi></math>}
              id:<math><mi>m</mi></math>
              attrpairs attrvar [19]
            </entry>
          </row>
          
          <row>
            <entry>objects </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> object objects</entry>
          </row>
          
          <row revisionflag="added">
            <entry>internal_reference </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [30] [_] </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [30+128] {_}</entry>
          </row>
          
          <row revisionflag="added">
            <entry>external_reference </entry>
            <entry><math>&longrightarrow;</math></entry>
            <entry> [31]
              [<math><mi>n</mi></math>]
              uri:<math><mi>n</mi></math>
            </entry>
          </row>
          
          <row revisionflag="added">
            <entry/>
            <entry>|</entry>
            <entry> [31+128]
              {<math><mi>n</mi></math>}
              uri:<math><mi>n</mi></math>
            </entry>
          </row>
        </tbody>
      </tgroup>
    </informaltable>
  </figure>
  
  <para><xref linkend="fig_bin-enc"/> gives a grammar for the binary
    encoding <phrase revisionflag="added"> (<quote>start</quote> is the start
      symbol).</phrase>.</para>
  <para>The following conventions are used in this section:
    [<math><mi>n</mi></math>] denotes a byte whose value is the integer
    <math><mi>n</mi></math> (<math><mi>n</mi></math> can range from 0 to 255),
    {<math><mi>m</mi></math>} denotes four bytes representing the (unsigned) integer
    <math><mi>m</mi></math> in network byte order, [_] denotes an arbitrary byte, {_}
    denotes an arbitrary sequence of four bytes.
    <phrase revisionflag="deleted">
      <emphasis>name</emphasis>:<math><mi>n</mi></math> denotes a sequence of
      <math><mi>n</mi></math> bytes named <emphasis>name</emphasis>.
      <emphasis>name</emphasis>:2<math><mi>n</mi></math> denotes a sequence of
      <math><mn>2</mn><mi>n</mi></math> bytes.  <quote>start</quote> is the start symbol of the
      grammar.</phrase></para>
  
  <para revisionflag="added"><emphasis>xxxx</emphasis>:<math><mi>n</mi></math>,
    where <emphasis>xxxx</emphasis> is one of <emphasis>symbname</emphasis>,
    <emphasis>cdname</emphasis>, <emphasis>varname</emphasis>,
    <emphasis>uri</emphasis>, <emphasis>id</emphasis>, <emphasis>digits</emphasis>, or
    <emphasis>bytes</emphasis> denotes a sequence of <math><mi>n</mi></math> bytes
    that conforms to the constraints on <emphasis>xxxx</emphasis> strings. For
    instance, for <emphasis>symbname</emphasis>, <emphasis>varname</emphasis>, or
    <emphasis>cdname</emphasis> this is the regular expression described in
    <xref linkend="sec_names"/>, for <emphasis>uri</emphasis> it is the grammar for
    URIs in <citation>IETF2396</citation>.</para>
</section>

<section id="sec_bin-desc">
  <title>Description of the Grammar</title>
  
<para>An &OM; object is encoded as a sequence of bytes starting with the begin object tag
(<phrase revisionflag="deleted">value&#160;24</phrase>
<phrase revisionflag="added">values 24 and 88</phrase>) and ending with the end
object tag (value&#160;25). These are similar to
the <systemitem>&lt;OMOBJ></systemitem> and <systemitem>&lt;/OMOBJ></systemitem> tags of
the &exml; encoding. <phrase revisionflag="added">Objects with start token [88]
  have two additional bytes <math><mi>m</mi></math> and <math><mi>n</mi></math>
that characterize the version
(<math><mrow><mi>m</mi><mo>.</mo><mi>n</mi></mrow></math>) of the encoding
directly after the start token. This is similar to <systemitem>&lt;OMOBJ
  version="m.n"></systemitem></phrase></para> 

<para>The encoding of each kind of &OM; object begins with a tag that is a single byte,
holding a <phrase role="sl">token identifier</phrase>
<phrase revisionflag="added"> that describes the kind of object</phrase> and two flags, the <phrase
role="sl">long</phrase> flag and the <phrase role="sl">shared</phrase> flag. The
identifier is stored in the first 6 bits (1 to 6). The long flag is the eighth bit
<phrase revisionflag="added">and the shared flag is the seventh bit. If the long
  flag is set, this signifies that  the names, strings, and data fields in the
  encoded &OM; object are longer than 255 bytes or characters. The sharing flag
  indicates that the encoded object may be shared in another (part of an) object
  somewhere else (see <xref linkend="sec_sharing_references"/>). Note that if the sharing
  flag is set (in the right column of the grammar in 
  <xref linkend="fig_bin-enc"/>, then the encoding includes a representation of
  an identifier that serves as the target of a reference (internal with token
  identifier 30 or external with token identifier 31).</phrase>
</para>

<para revisionflag="added">The concept of structure sharing in &OM; encodings and
  in particular the sharing bit in the binary encoding has been
  introduced in &OM;&#160;2 (see section <xref linkend="sec_sharing_references"/> for
  details). The binary encoding in &OM;&#160;2 leaves the tokens with sharing flag 0
  unchanged to ensure &OM;&#160;1 compatibility. To make use of functionality like
  the version attribute on the &OM; object
  introduced in &OM;&#160;2, the tokens with sharing flag 1 should be used.</para>

<para revisionflag="added">To facilitate the streaming of &OM; objects, some basic
  objects (integers, strings, bytearrays, and foreign objects) have variant token
  identifiers with the fifth bit set. The idea behind this is that these basic
  objects can be split into packets. If the fifth bit is not set, this packet is
  the final packet of the basic object. If the bit is set, then more packets of
  the basic object will follow directly after this one. Note that all packets
  making up a basic object must have the same token identifier (up to the fifth
  bit). In <xref linkend="fig_bin-enc_stream"/> we have represented an integer
  that is split up into three packets.  </para>

<para>Here is a description of the binary encodings of every kind of &OM; object:

<variablelist>
<varlistentry>
  <term>Integers</term><listitem><para>are encoded depending on how large they
      are. There are four possible formats.  Integers between -128 and 127 are
      encoded as the small integer tags (<phrase revisionflag="added">token identifier</phrase> 1) followed by a single byte that is the
      value of the integer (interpreted as a signed character). For
      example 16 is encoded as <systemitem>0x01 0x10</systemitem>.  Integers between
      <math>
        <msup>
          <mn>-2</mn>
          <mn>31</mn>
        </msup>
      </math