diff options
| author | Lennart Poettering <lennart@poettering.net> | 2006-04-14 23:47:33 +0000 | 
|---|---|---|
| committer | Lennart Poettering <lennart@poettering.net> | 2006-04-14 23:47:33 +0000 | 
| commit | 9522b4484206ef3a99fb5586831a27fdfea0c373 (patch) | |
| tree | a7c0b2fb633c51b3d170f0538df0dbac1eae4d61 | |
| parent | 86ad60185ae90520269d384d90569d2402c25dd1 (diff) | |
add an RTP sender module
git-svn-id: file:///home/lennart/svn/public/pulseaudio/trunk@712 fefdeb5f-60dc-0310-8127-8f9354f1896f
| -rw-r--r-- | src/modules/rtp/Makefile | 13 | ||||
| -rw-r--r-- | src/modules/rtp/module-rtp-monitor.c | 340 | ||||
| -rw-r--r-- | src/modules/rtp/rfc2327.txt | 2355 | ||||
| -rw-r--r-- | src/modules/rtp/rfc2974.txt | 1011 | ||||
| -rw-r--r-- | src/modules/rtp/rfc3550.txt | 5827 | ||||
| -rw-r--r-- | src/modules/rtp/rfc3551.txt | 2467 | ||||
| -rw-r--r-- | src/modules/rtp/rtp.c | 193 | ||||
| -rw-r--r-- | src/modules/rtp/rtp.h | 51 | ||||
| -rw-r--r-- | src/modules/rtp/sap.c | 107 | ||||
| -rw-r--r-- | src/modules/rtp/sap.h | 43 | ||||
| -rw-r--r-- | src/modules/rtp/sdp.c | 87 | ||||
| -rw-r--r-- | src/modules/rtp/sdp.h | 33 | 
12 files changed, 12527 insertions, 0 deletions
diff --git a/src/modules/rtp/Makefile b/src/modules/rtp/Makefile new file mode 100644 index 00000000..316beb72 --- /dev/null +++ b/src/modules/rtp/Makefile @@ -0,0 +1,13 @@ +# This is a dirty trick just to ease compilation with emacs +# +# This file is not intended to be distributed or anything +# +# So: don't touch it, even better ignore it! + +all: +	$(MAKE) -C ../.. + +clean: +	$(MAKE) -C ../.. clean + +.PHONY: all clean diff --git a/src/modules/rtp/module-rtp-monitor.c b/src/modules/rtp/module-rtp-monitor.c new file mode 100644 index 00000000..66332093 --- /dev/null +++ b/src/modules/rtp/module-rtp-monitor.c @@ -0,0 +1,340 @@ + +/*** +  This file is part of polypaudio. +  +  polypaudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2 of the License, +  or (at your option) any later version. +  +  polypaudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. +  +  You should have received a copy of the GNU Lesser General Public License +  along with polypaudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <assert.h> +#include <stdio.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> + +#include <polypcore/module.h> +#include <polypcore/llist.h> +#include <polypcore/source.h> +#include <polypcore/source-output.h> +#include <polypcore/memblockq.h> +#include <polypcore/log.h> +#include <polypcore/util.h> +#include <polypcore/xmalloc.h> +#include <polypcore/modargs.h> +#include <polypcore/namereg.h> + +#include "module-rtp-monitor-symdef.h" + +#include "rtp.h" +#include "sdp.h" +#include "sap.h" + +PA_MODULE_AUTHOR("Lennart Poettering") +PA_MODULE_DESCRIPTION("Read data from source and send it to the network via RTP") +PA_MODULE_VERSION(PACKAGE_VERSION) +PA_MODULE_USAGE( +        "source=<name for the source> " +        "format=<sample format> " +        "channels=<number of channels> " +        "rate=<sample rate> " +        "destinaton=<destination IP address> " +        "port=<port number> " +        "mtu=<maximum transfer unit> " +) + +#define DEFAULT_PORT 5666 +#define SAP_PORT 9875 +#define DEFAULT_DESTINATION "224.0.0.252" +#define MEMBLOCKQ_MAXLENGTH (1024*170) +#define DEFAULT_MTU 1024 +#define SAP_INTERVAL 5000000 + +static const char* const valid_modargs[] = { +    "source", +    "format", +    "channels", +    "rate", +    "destination", +    "port", +    NULL +}; + +struct userdata { +    pa_module *module; +    pa_core *core; + +    pa_source_output *source_output; +    pa_memblockq *memblockq; + +    pa_rtp_context rtp_context; +    pa_sap_context sap_context; +    size_t mtu; + +    pa_time_event *sap_event; +}; + +static void source_output_push(pa_source_output *o, const pa_memchunk *chunk) { +    struct userdata *u; +    assert(o); +    u = o->userdata; + +    if (pa_memblockq_push(u->memblockq, chunk) < 0) { +        pa_log(__FILE__": Failed to push chunk into memblockq."); +        return; +    } +     +    pa_rtp_send(&u->rtp_context, u->mtu, u->memblockq); +} + +static void source_output_kill(pa_source_output* o) { +    struct userdata *u; +    assert(o); +    u = o->userdata; + +    pa_module_unload_request(u->module); + +    pa_source_output_disconnect(u->source_output); +    pa_source_output_unref(u->source_output); +    u->source_output = NULL; +} + +static pa_usec_t source_output_get_latency (pa_source_output *o) { +    struct userdata *u; +    assert(o); +    u = o->userdata; + +    return pa_bytes_to_usec(pa_memblockq_get_length(u->memblockq), &o->sample_spec); +} + +static void sap_event(pa_mainloop_api *m, pa_time_event *t, const struct timeval *tv, void *userdata) { +    struct userdata *u = userdata; +    struct timeval next; +     +    assert(m); +    assert(t); +    assert(tv); +    assert(u); + +    pa_sap_send(&u->sap_context, 0); + +    pa_log("SAP update"); +    pa_gettimeofday(&next); +    pa_timeval_add(&next, SAP_INTERVAL); +    m->time_restart(t, &next); +} + +int pa__init(pa_core *c, pa_module*m) { +    struct userdata *u; +    pa_modargs *ma = NULL; +    const char *dest; +    uint32_t port = DEFAULT_PORT, mtu; +    int af, fd = -1, sap_fd = -1; +    pa_source *s; +    pa_sample_spec ss; +    pa_channel_map cm; +    struct sockaddr_in sa4, sap_sa4; +    struct sockaddr_in6 sa6, sap_sa6; +    struct sockaddr_storage sa_dst; +    pa_source_output *o = NULL; +    uint8_t payload; +    char *p; +    int r; +    socklen_t k; +    struct timeval tv; +     +    assert(c); +    assert(m); + +    if (!(ma = pa_modargs_new(m->argument, valid_modargs))) { +        pa_log(__FILE__": failed to parse module arguments"); +        goto fail; +    } + +    if (!(s = pa_namereg_get(m->core, pa_modargs_get_value(ma, "source", NULL), PA_NAMEREG_SOURCE, 1))) { +        pa_log(__FILE__": source does not exist."); +        goto fail; +    } + +    ss = s->sample_spec; +    pa_rtp_sample_spec_fixup(&ss); +    cm = s->channel_map; +    if (pa_modargs_get_sample_spec(ma, &ss) < 0) { +        pa_log(__FILE__": failed to parse sample specification"); +        goto fail; +    } + +    if (!pa_rtp_sample_spec_valid(&ss)) { +        pa_log(__FILE__": specified sample type not compatible with RTP"); +        goto fail; +    } + +    if (ss.channels != cm.channels) +        pa_channel_map_init_auto(&cm, ss.channels); + +    payload = pa_rtp_payload_type(&ss); + +    mtu = (DEFAULT_MTU/pa_frame_size(&ss))*pa_frame_size(&ss); +     +    if (pa_modargs_get_value_u32(ma, "mtu", &mtu) < 0 || mtu < 1 || mtu % pa_frame_size(&ss) != 0) { +        pa_log(__FILE__": invalid mtu."); +        goto fail; +    } +     +    if (pa_modargs_get_value_u32(ma, "port", &port) < 0 || port < 1 || port > 0xFFFF) { +        pa_log(__FILE__": port= expects a numerical argument between 1 and 65535."); +        goto fail; +    } + +    if ((dest = pa_modargs_get_value(ma, "destination", DEFAULT_DESTINATION))) { +        if (inet_pton(AF_INET6, dest, &sa6.sin6_addr) > 0) { +            sa6.sin6_family = af = AF_INET6; +            sa6.sin6_port = htons(port); +            sap_sa6 = sa6; +            sap_sa6.sin6_port = htons(SAP_PORT); +        } else if (inet_pton(AF_INET, dest, &sa4.sin_addr) > 0) { +            sa4.sin_family = af = AF_INET; +            sa4.sin_port = htons(port); +            sap_sa4 = sa4; +            sap_sa4.sin_port = htons(SAP_PORT); +        } else { +            pa_log(__FILE__": invalid destination '%s'", dest); +            goto fail; +        } +    } +     +    if ((fd = socket(af, SOCK_DGRAM, 0)) < 0) { +        pa_log(__FILE__": socket() failed: %s", strerror(errno)); +        goto fail; +    } + +    if (connect(fd, af == AF_INET ? (struct sockaddr*) &sa4 : (struct sockaddr*) &sa6, af == AF_INET ? sizeof(sa4) : sizeof(sa6)) < 0) { +        pa_log(__FILE__": connect() failed: %s", strerror(errno)); +        goto fail; +    } + +    if ((sap_fd = socket(af, SOCK_DGRAM, 0)) < 0) { +        pa_log(__FILE__": socket() failed: %s", strerror(errno)); +        goto fail; +    } + +    if (connect(sap_fd, af == AF_INET ? (struct sockaddr*) &sap_sa4 : (struct sockaddr*) &sap_sa6, af == AF_INET ? sizeof(sap_sa4) : sizeof(sap_sa6)) < 0) { +        pa_log(__FILE__": connect() failed: %s", strerror(errno)); +        goto fail; +    } + +    if (!(o = pa_source_output_new(s, __FILE__, "RTP Monitor Stream", &ss, &cm, PA_RESAMPLER_INVALID))) { +        pa_log(__FILE__": failed to create source output."); +        goto fail; +    } + +    o->push = source_output_push; +    o->kill = source_output_kill; +    o->get_latency = source_output_get_latency; +    o->owner = m; +     +    u = pa_xnew(struct userdata, 1); +    m->userdata = u; +    o->userdata = u; + +    u->module = m; +    u->core = c; +    u->source_output = o; +     +    u->memblockq = pa_memblockq_new( +            0, +            MEMBLOCKQ_MAXLENGTH, +            MEMBLOCKQ_MAXLENGTH, +            pa_frame_size(&ss), +            1, +            0, +            NULL, +            c->memblock_stat); + +    u->mtu = mtu; +     +    k = sizeof(sa_dst); +    r = getsockname(fd, (struct sockaddr*) &sa_dst, &k); +    assert(r >= 0); +         +    p = pa_sdp_build(af, +                     af == AF_INET ? (void*) &((struct sockaddr_in*) &sa_dst)->sin_addr : (void*) &((struct sockaddr_in6*) &sa_dst)->sin6_addr, +                     af == AF_INET ? (void*) &sa4.sin_addr : (void*) &sa6.sin6_addr, +                     "Polypaudio RTP Stream", port, payload, &ss); +     +    pa_rtp_context_init_send(&u->rtp_context, fd, 0, payload); +    pa_sap_context_init_send(&u->sap_context, sap_fd, p); + +    pa_log_info("RTP stream initialized with mtu %u on %s:%u, SSRC=0x%08x, payload=%u, initial sequence #%u", mtu, dest, port, u->rtp_context.ssrc, payload, u->rtp_context.sequence); +    pa_log_info("SDP-Data:\n%s\nEOF", p); +     +    pa_sap_send(&u->sap_context, 0); + +    pa_gettimeofday(&tv); +    pa_timeval_add(&tv, SAP_INTERVAL); +    u->sap_event = c->mainloop->time_new(c->mainloop, &tv, sap_event, u); + +    pa_modargs_free(ma); + +    return 0; + +fail: +    if (ma) +        pa_modargs_free(ma); + +    if (fd >= 0) +        close(fd); +     +    if (sap_fd >= 0) +        close(sap_fd); + +    if (o) { +        pa_source_output_disconnect(o); +        pa_source_output_unref(o); +    } +         +    return -1; +} + +void pa__done(pa_core *c, pa_module*m) { +    struct userdata *u; +    assert(c); +    assert(m); + +    if (!(u = m->userdata)) +        return; + +    c->mainloop->time_free(u->sap_event); +     +    if (u->source_output) { +        pa_source_output_disconnect(u->source_output); +        pa_source_output_unref(u->source_output); +    } + +    pa_rtp_context_destroy(&u->rtp_context); + +    pa_sap_send(&u->sap_context, 1); +    pa_sap_context_destroy(&u->sap_context); + +    pa_memblockq_free(u->memblockq); +     +    pa_xfree(u); +} diff --git a/src/modules/rtp/rfc2327.txt b/src/modules/rtp/rfc2327.txt new file mode 100644 index 00000000..ce77de61 --- /dev/null +++ b/src/modules/rtp/rfc2327.txt @@ -0,0 +1,2355 @@ + + + + + + +Network Working Group                                           M. Handley +Request for Comments: 2327                                     V. Jacobson +Category: Standards Track                                         ISI/LBNL +                                                                April 1998 + + +                   SDP: Session Description Protocol + +Status of this Memo + +   This document specifies an Internet standards track protocol for the +   Internet community, and requests discussion and suggestions for +   improvements.  Please refer to the current edition of the "Internet +   Official Protocol Standards" (STD 1) for the standardization state +   and status of this protocol.  Distribution of this memo is unlimited. + +Copyright Notice + +   Copyright (C) The Internet Society (1998).  All Rights Reserved. + +Abstract + +   This document defines the Session Description Protocol, SDP.  SDP is +   intended for describing multimedia sessions for the purposes of +   session announcement, session invitation, and other forms of +   multimedia session initiation. + +   This document is a product of the Multiparty Multimedia Session +   Control (MMUSIC) working group of the Internet Engineering Task +   Force. Comments are solicited and should be addressed to the working +   group's mailing list at confctrl@isi.edu and/or the authors. + +1.  Introduction + +   On the Internet multicast backbone (Mbone), a session directory tool +   is used to advertise multimedia conferences and communicate the +   conference addresses and conference tool-specific information +   necessary for participation.  This document defines a session +   description protocol for this purpose, and for general real-time +   multimedia session description purposes. This memo does not describe +   multicast address allocation or the distribution of SDP messages in +   detail.  These are described in accompanying memos.  SDP is not +   intended for negotiation of media encodings. + + + + + + + + +Handley & Jacobson          Standards Track                     [Page 1] + +RFC 2327                          SDP                         April 1998 + + +2.  Background + +   The Mbone is the part of the internet that supports IP multicast, and +   thus permits efficient many-to-many communication.  It is used +   extensively for multimedia conferencing.  Such conferences usually +   have the property that tight coordination of conference membership is +   not necessary; to receive a conference, a user at an Mbone site only +   has to know the conference's multicast group address and the UDP +   ports for the conference data streams. + +   Session directories assist the advertisement of conference sessions +   and communicate the relevant conference setup information to +   prospective participants.  SDP is designed to convey such information +   to recipients.  SDP is purely a format for session description - it +   does not incorporate a transport protocol, and is intended to use +   different transport protocols as appropriate including the Session +   Announcement Protocol [4], Session Initiation Protocol [11], Real- +   Time Streaming Protocol [12], electronic mail using the MIME +   extensions, and the Hypertext Transport Protocol. + +   SDP is intended to be general purpose so that it can be used for a +   wider range of network environments and applications than just +   multicast session directories.  However, it is not intended to +   support negotiation of session content or media encodings - this is +   viewed as outside the scope of session description. + +3.  Glossary of Terms + +   The following terms are used in this document, and have specific +   meaning within the context of this document. + +   Conference +     A multimedia conference is a set of two or more communicating users +     along with the software they are using to communicate. + +   Session +     A multimedia session is a set of multimedia senders and receivers +     and the data streams flowing from senders to receivers.  A +     multimedia conference is an example of a multimedia session. + +   Session Advertisement +     See session announcement. + +   Session Announcement +     A session announcement is a mechanism by which a session +     description is conveyed to users in a proactive fashion, i.e., the +     session description was not explicitly requested by the user. + + + + +Handley & Jacobson          Standards Track                     [Page 2] + +RFC 2327                          SDP                         April 1998 + + +   Session Description +     A well defined format for conveying sufficient information to +     discover and participate in a multimedia session. + +3.1.  Terminology + +   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", +   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this +   document are to be interpreted as described in RFC 2119. + +4.  SDP Usage + +4.1.  Multicast Announcements + +   SDP is a session description protocol for multimedia sessions. A +   common mode of usage is for a client to announce a conference session +   by periodically multicasting an announcement packet to a well known +   multicast address and port using the Session Announcement Protocol +   (SAP). + +   SAP packets are UDP packets with the following format: + +         |--------------------| +         | SAP header         | +         |--------------------| +         | text payload       | +         |////////// + + +   The header is the Session Announcement Protocol header.  SAP is +   described in more detail in a companion memo [4] + +   The text payload is an SDP session description, as described in this +   memo.  The text payload should be no greater than 1 Kbyte in length. +   If announced by SAP, only one session announcement is permitted in a +   single packet. + +4.2.  Email and WWW Announcements + +   Alternative means of conveying session descriptions include +   electronic mail and the World Wide Web. For both email and WWW +   distribution, the use of the MIME content type "application/sdp" +   should be used.  This enables the automatic launching of applications +   for participation in the session from the WWW client or mail reader +   in a standard manner. + + + + + + +Handley & Jacobson          Standards Track                     [Page 3] + +RFC 2327                          SDP                         April 1998 + + +   Note that announcements of multicast sessions made only via email or +   the World Wide Web (WWW) do not have the property that the receiver +   of a session announcement can necessarily receive the session because +   the multicast sessions may be restricted in scope, and access to the +   WWW server or reception of email is possible outside this scope.  SAP +   announcements do not suffer from this mismatch. + +5.  Requirements and Recommendations + +   The purpose of SDP is to convey information about media streams in +   multimedia sessions to allow the recipients of a session description +   to participate in the session.  SDP is primarily intended for use in +   an internetwork, although it is sufficiently general that it can +   describe conferences in other network environments. + +   A multimedia session, for these purposes, is defined as a set of +   media streams that exist for some duration of time.  Media streams +   can be many-to-many.  The times during which the session is active +   need not be continuous. + +   Thus far, multicast based sessions on the Internet have differed from +   many other forms of conferencing in that anyone receiving the traffic +   can join the session (unless the session traffic is encrypted).  In +   such an environment, SDP serves two primary purposes.  It is a means +   to communicate the existence of a session, and is a means to convey +   sufficient information to enable joining and participating in the +   session.  In a unicast environment, only the latter purpose is likely +   to be relevant. + +   Thus SDP includes: + +   o Session name and purpose + +   o Time(s) the session is active + +   o The media comprising the session + +   o Information to receive those media (addresses, ports, formats and +     so on) + +   As resources necessary to participate in a session may be limited, +   some additional information may also be desirable: + +   o Information about the bandwidth to be used by the conference + +   o Contact information for the person responsible for the session + + + + + +Handley & Jacobson          Standards Track                     [Page 4] + +RFC 2327                          SDP                         April 1998 + + +   In general, SDP must convey sufficient information to be able to join +   a session (with the possible exception of encryption keys) and to +   announce the resources to be used to non-participants that may need +   to know. + +5.1.  Media Information + +   SDP includes: + +   o The type of media (video, audio, etc) + +   o The transport protocol (RTP/UDP/IP, H.320, etc) + +   o The format of the media (H.261 video, MPEG video, etc) + +   For an IP multicast session, the following are also conveyed: + +   o Multicast address for media + +   o Transport Port for media + +   This address and port are the destination address and destination +   port of the multicast stream, whether being sent, received, or both. + +   For an IP unicast session, the following are conveyed: + +   o Remote address for media + +   o Transport port for contact address + +   The semantics of this address and port depend on the media and +   transport protocol defined.  By default, this is the remote address +   and remote port to which data is sent, and the remote address and +   local port on which to receive data.  However, some media may define +   to use these to establish a control channel for the actual media +   flow. + +5.2.  Timing Information + +   Sessions may either be bounded or unbounded in time. Whether or not +   they are bounded, they may be only active at specific times. + +   SDP can convey: + +   o An arbitrary list of start and stop times bounding the session + +   o For each bound, repeat times such as "every Wednesday at 10am for +     one hour" + + + +Handley & Jacobson          Standards Track                     [Page 5] + +RFC 2327                          SDP                         April 1998 + + +   This timing information is globally consistent, irrespective of local +   time zone or daylight saving time. + +5.3.  Private Sessions + +   It is possible to create both public sessions and private sessions. +   Private sessions will typically be conveyed by encrypting the session +   description to distribute it.  The details of how encryption is +   performed are dependent on the mechanism used to convey SDP - see [4] +   for how this is done for session announcements. + +   If a session announcement is private it is possible to use that +   private announcement to convey encryption keys necessary to decode +   each of the media in a conference, including enough information to +   know which encryption scheme is used for each media. + +5.4.  Obtaining Further Information about a Session + +   A session description should convey enough information to decide +   whether or not to participate in a session.  SDP may include +   additional pointers in the form of Universal Resources Identifiers +   (URIs) for more information about the session. + +5.5.  Categorisation + +   When many session descriptions are being distributed by SAP or any +   other advertisement mechanism, it may be desirable to filter +   announcements that are of interest from those that are not.  SDP +   supports a categorisation mechanism for sessions that is capable of +   being automated. + +5.6.  Internationalization + +   The SDP specification recommends the use of the ISO 10646 character +   sets in the UTF-8 encoding (RFC 2044) to allow many different +   languages to be represented.  However, to assist in compact +   representations, SDP also allows other character sets such as ISO +   8859-1 to be used when desired.  Internationalization only applies to +   free-text fields (session name and background information), and not +   to SDP as a whole. + +6.  SDP Specification + +   SDP session descriptions are entirely textual using the ISO 10646 +   character set in UTF-8 encoding. SDP field names and attributes names +   use only the US-ASCII subset of UTF-8, but textual fields and +   attribute values may use the full ISO 10646 character set.  The +   textual form, as opposed to a binary encoding such as ASN/1 or XDR, + + + +Handley & Jacobson          Standards Track                     [Page 6] + +RFC 2327                          SDP                         April 1998 + + +   was chosen to enhance portability, to enable a variety of transports +   to be used (e.g, session description in a MIME email message) and to +   allow flexible, text-based toolkits (e.g., Tcl/Tk ) to be used to +   generate and to process session descriptions.  However, since the +   total bandwidth allocated to all SAP announcements is strictly +   limited, the encoding is deliberately compact.  Also, since +   announcements may be transported via very unreliable means (e.g., +   email) or damaged by an intermediate caching server, the encoding was +   designed with strict order and formatting rules so that most errors +   would result in malformed announcements which could be detected +   easily and discarded. This also allows rapid discarding of encrypted +   announcements for which a receiver does not have the correct key. + +   An SDP session description consists of a number of lines of text of +   the form <type>=<value> <type> is always exactly one character and is +   case-significant.  <value> is a structured text string whose format +   depends on <type>.  It also will be case-significant unless a +   specific field defines otherwise.  Whitespace is not permitted either +   side of the `=' sign. In general <value> is either a number of fields +   delimited by a single space character or a free format string. + +   A session description consists of a session-level description +   (details that apply to the whole session and all media streams) and +   optionally several media-level descriptions (details that apply onto +   to a single media stream). + +   An announcement consists of a session-level section followed by zero +   or more media-level sections.  The session-level part starts with a +   `v=' line and continues to the first media-level section.  The media +   description starts with an `m=' line and continues to the next media +   description or end of the whole session description.  In general, +   session-level values are the default for all media unless overridden +   by an equivalent media-level value. + +   When SDP is conveyed by SAP, only one session description is allowed +   per packet.  When SDP is conveyed by other means, many SDP session +   descriptions may be concatenated together (the `v=' line indicating +   the start of a session description terminates the previous +   description).  Some lines in each description are required and some +   are optional but all must appear in exactly the order given here (the +   fixed order greatly enhances error detection and allows for a simple +   parser). Optional items are marked with a `*'. + +Session description +        v=  (protocol version) +        o=  (owner/creator and session identifier). +        s=  (session name) +        i=* (session information) + + + +Handley & Jacobson          Standards Track                     [Page 7] + +RFC 2327                          SDP                         April 1998 + + +        u=* (URI of description) +        e=* (email address) +        p=* (phone number) +        c=* (connection information - not required if included in all media) +        b=* (bandwidth information) +        One or more time descriptions (see below) +        z=* (time zone adjustments) +        k=* (encryption key) +        a=* (zero or more session attribute lines) +        Zero or more media descriptions (see below) + +Time description +        t=  (time the session is active) +        r=* (zero or more repeat times) + +Media description +        m=  (media name and transport address) +        i=* (media title) +        c=* (connection information - optional if included at session-level) +        b=* (bandwidth information) +        k=* (encryption key) +        a=* (zero or more media attribute lines) + +   The set of `type' letters is deliberately small and not intended to +   be extensible -- SDP parsers must completely ignore any announcement +   that contains a `type' letter that it does not understand. The +   `attribute' mechanism ("a=" described below) is the primary means for +   extending SDP and tailoring it to particular applications or media. +   Some attributes (the ones listed in this document) have a defined +   meaning but others may be added on an application-, media- or +   session-specific basis.  A session directory must ignore any +   attribute it doesn't understand. + +   The connection (`c=') and attribute (`a=') information in the +   session-level section applies to all the media of that session unless +   overridden by connection information or an attribute of the same name +   in the media description.  For instance, in the example below, each +   media behaves as if it were given a `recvonly' attribute. + +   An example SDP description is: + +        v=0 +        o=mhandley 2890844526 2890842807 IN IP4 126.16.64.4 +        s=SDP Seminar +        i=A Seminar on the session description protocol +        u=http://www.cs.ucl.ac.uk/staff/M.Handley/sdp.03.ps +        e=mjh@isi.edu (Mark Handley) +        c=IN IP4 224.2.17.12/127 + + + +Handley & Jacobson          Standards Track                     [Page 8] + +RFC 2327                          SDP                         April 1998 + + +        t=2873397496 2873404696 +        a=recvonly +        m=audio 49170 RTP/AVP 0 +        m=video 51372 RTP/AVP 31 +        m=application 32416 udp wb +        a=orient:portrait + +   Text records such as the session name and information are bytes +   strings which may contain any byte with the exceptions of 0x00 (Nul), +   0x0a (ASCII newline) and 0x0d (ASCII carriage return).  The sequence +   CRLF (0x0d0a) is used to end a record, although parsers should be +   tolerant and also accept records terminated with a single newline +   character.  By default these byte strings contain ISO-10646 +   characters in UTF-8 encoding, but this default may be changed using +   the `charset' attribute. + +   Protocol Version + +   v=0 + +   The "v=" field gives the version of the Session Description Protocol. +   There is no minor version number. + +   Origin + +   o=<username> <session id> <version> <network type> <address type> +   <address> + +   The "o=" field gives the originator of the session (their username +   and the address of the user's host) plus a session id and session +   version number. + +   <username> is the user's login on the originating host, or it is "-" +   if the originating host does not support the concept of user ids. +   <username> must not contain spaces.  <session id> is a numeric string +   such that the tuple of <username>, <session id>, <network type>, +   <address type> and <address> form a globally unique identifier for +   the session. + +   The method of <session id> allocation is up to the creating tool, but +   it has been suggested that a Network Time Protocol (NTP) timestamp be +   used to ensure uniqueness [1]. + +   <version> is a version number for this announcement.  It is needed +   for proxy announcements to detect which of several announcements for +   the same session is the most recent.  Again its usage is up to the + + + + + +Handley & Jacobson          Standards Track                     [Page 9] + +RFC 2327                          SDP                         April 1998 + + +   creating tool, so long as <version> is increased when a modification +   is made to the session data.  Again, it is recommended (but not +   mandatory) that an NTP timestamp is used. + +   <network type> is a text string giving the type of network. +   Initially "IN" is defined to have the meaning "Internet".  <address +   type> is a text string giving the type of the address that follows. +   Initially "IP4" and "IP6" are defined.  <address> is the globally +   unique address of the machine from which the session was created. +   For an address type of IP4, this is either the fully-qualified domain +   name of the machine, or the dotted-decimal representation of the IP +   version 4 address of the machine.  For an address type of IP6, this +   is either the fully-qualified domain name of the machine, or the +   compressed textual representation of the IP version 6 address of the +   machine.  For both IP4 and IP6, the fully-qualified domain name is +   the form that SHOULD be given unless this is unavailable, in which +   case the globally unique address may be substituted.  A local IP +   address MUST NOT be used in any context where the SDP description +   might leave the scope in which the address is meaningful. + +   In general, the "o=" field serves as a globally unique identifier for +   this version of this session description, and the subfields excepting +   the version taken together identify the session irrespective of any +   modifications. + +   Session Name + +   s=<session name> + +   The "s=" field is the session name.  There must be one and only one +   "s=" field per session description, and it must contain ISO 10646 +   characters (but see also the `charset' attribute below). + +   Session and Media Information + +   i=<session description> + +   The "i=" field is information about the session.  There may be at +   most one session-level "i=" field per session description, and at +   most one "i=" field per media. Although it may be omitted, this is +   discouraged for session announcements, and user interfaces for +   composing sessions should require text to be entered.  If it is +   present it must contain ISO 10646 characters (but see also the +   `charset' attribute below). + +   A single "i=" field can also be used for each media definition.  In +   media definitions, "i=" fields are primarily intended for labeling +   media streams. As such, they are most likely to be useful when a + + + +Handley & Jacobson          Standards Track                    [Page 10] + +RFC 2327                          SDP                         April 1998 + + +   single session has more than one distinct media stream of the same +   media type.  An example would be two different whiteboards, one for +   slides and one for feedback and questions. + +   URI + +   u=<URI> + +   o A URI is a Universal Resource Identifier as used by WWW clients + +   o The URI should be a pointer to additional information about the +     conference + +   o This field is optional, but if it is present it should be specified +     before the first media field + +   o No more than one URI field is allowed per session description + + +   Email Address and Phone Number + +   e=<email address> +   p=<phone number> + +   o These specify contact information for the person responsible for +     the conference.  This is not necessarily the same person that +     created the conference announcement. + +   o Either an email field or a phone field must be specified. +     Additional email and phone fields are allowed. + +   o If these are present, they should be specified before the first +     media field. + +   o More than one email or phone field can be given for a session +     description. + +   o Phone numbers should be given in the conventional international + +     format - preceded by a "+ and the international country code. +     There must be a space or a hyphen ("-") between the country code +     and the rest of the phone number.  Spaces and hyphens may be used +     to split up a phone field to aid readability if desired. For +     example: + +                   p=+44-171-380-7777 or p=+1 617 253 6011 + + + + + +Handley & Jacobson          Standards Track                    [Page 11] + +RFC 2327                          SDP                         April 1998 + + +   o Both email addresses and phone numbers can have an optional free +     text string associated with them, normally giving the name of the +     person who may be contacted.  This should be enclosed in +     parenthesis if it is present.  For example: + +                        e=mjh@isi.edu (Mark Handley) + +     The alternative RFC822 name quoting convention is also allowed for +     both email addresses and phone numbers.  For example, + +                        e=Mark Handley <mjh@isi.edu> + +     The free text string should be in the ISO-10646 character set with +     UTF-8 encoding, or alternatively in ISO-8859-1 or other encodings +     if the appropriate charset session-level attribute is set. + +   Connection Data + +   c=<network type> <address type> <connection address> + +   The "c=" field contains connection data. + +   A session announcement must contain one "c=" field in each media +   description (see below) or a "c=" field at the session-level.  It may +   contain a session-level "c=" field and one additional "c=" field per +   media description, in which case the per-media values override the +   session-level settings for the relevant media. + +   The first sub-field is the network type, which is a text string +   giving the type of network.  Initially "IN" is defined to have the +   meaning "Internet". + +   The second sub-field is the address type.  This allows SDP to be used +   for sessions that are not IP based.  Currently only IP4 is defined. + +   The third sub-field is the connection address.  Optional extra +   subfields may be added after the connection address depending on the +   value of the <address type> field. + +   For IP4 addresses, the connection address is defined as follows: + +   o Typically the connection address will be a class-D IP multicast + +     group address.  If the session is not multicast, then the +     connection address contains the fully-qualified domain name or the +     unicast IP address of the expected data source or data relay or +     data sink as determined by additional attribute fields. It is not +     expected that fully-qualified domain names or unicast addresses + + + +Handley & Jacobson          Standards Track                    [Page 12] + +RFC 2327                          SDP                         April 1998 + + +     will be given in a session description that is communicated by a +     multicast announcement, though this is not prohibited.  If a +     unicast data stream is to pass through a network address +     translator, the use of a fully-qualified domain name rather than an +     unicast IP address is RECOMMENDED.  In other cases, the use of an +     IP address to specify a particular interface on a multi-homed host +     might be required.  Thus this specification leaves the decision as +     to which to use up to the individual application, but all +     applications MUST be able to cope with receiving both formats. + +   o Conferences using an IP multicast connection address must also have +     a time to live (TTL) value present in addition to the multicast +     address.  The TTL and the address together define the scope with +     which multicast packets sent in this conference will be sent. TTL +     values must be in the range 0-255. + +     The TTL for the session is appended to the address using a slash as +     a separator.  An example is: + +                           c=IN IP4 224.2.1.1/127 + +     Hierarchical or layered encoding schemes are data streams where the +     encoding from a single media source is split into a number of +     layers.  The receiver can choose the desired quality (and hence +     bandwidth) by only subscribing to a subset of these layers.  Such +     layered encodings are normally transmitted in multiple multicast +     groups to allow multicast pruning.  This technique keeps unwanted +     traffic from sites only requiring certain levels of the hierarchy. +     For applications requiring multiple multicast groups, we allow the +     following notation to be used for the connection address: + +            <base multicast address>/<ttl>/<number of addresses> + +     If the number of addresses is not given it is assumed to be one. +     Multicast addresses so assigned are contiguously allocated above +     the base address, so that, for example: + +                          c=IN IP4 224.2.1.1/127/3 + +     would state that addresses 224.2.1.1, 224.2.1.2 and 224.2.1.3 are +     to be used at a ttl of 127.  This is semantically identical to +     including multiple "c=" lines in a media description: + +                           c=IN IP4 224.2.1.1/127 +                           c=IN IP4 224.2.1.2/127 +                           c=IN IP4 224.2.1.3/127 + + + + + +Handley & Jacobson          Standards Track                    [Page 13] + +RFC 2327                          SDP                         April 1998 + + +     Multiple addresses or "c=" lines can only be specified on a per- +     media basis, and not for a session-level "c=" field. + +     It is illegal for the slash notation described above to be used for +     IP unicast addresses. + +   Bandwidth + +   b=<modifier>:<bandwidth-value> + +   o This specifies the proposed bandwidth to be used by the session or +     media, and is optional. + +   o <bandwidth-value> is in kilobits per second + +   o <modifier> is a single alphanumeric word giving the meaning of the +     bandwidth figure. + +   o Two modifiers are initially defined: + +   CT Conference Total: An implicit maximum bandwidth is associated with +     each TTL on the Mbone or within a particular multicast +     administrative scope region (the Mbone bandwidth vs. TTL limits are +     given in the MBone FAQ). If the bandwidth of a session or media in +     a session is different from the bandwidth implicit from the scope, +     a `b=CT:...' line should be supplied for the session giving the +     proposed upper limit to the bandwidth used. The primary purpose of +     this is to give an approximate idea as to whether two or more +     conferences can co-exist simultaneously. + +   AS Application-Specific Maximum: The bandwidth is interpreted to be +     application-specific, i.e., will be the application's concept of +     maximum bandwidth.  Normally this will coincide with what is set on +     the application's "maximum bandwidth" control if applicable. + +     Note that CT gives a total bandwidth figure for all the media at +     all sites.  AS gives a bandwidth figure for a single media at a +     single site, although there may be many sites sending +     simultaneously. + +   o Extension Mechanism: Tool writers can define experimental bandwidth +     modifiers by prefixing their modifier with "X-". For example: + +                                 b=X-YZ:128 + +     SDP parsers should ignore bandwidth fields with unknown modifiers. +     Modifiers should be alpha-numeric and, although no length limit is +     given, they are recommended to be short. + + + +Handley & Jacobson          Standards Track                    [Page 14] + +RFC 2327                          SDP                         April 1998 + + +   Times, Repeat Times and Time Zones + +   t=<start time>  <stop time> + +   o "t=" fields specify the start and stop times for a conference +     session.  Multiple "t=" fields may be used if a session is active +     at multiple irregularly spaced times; each additional "t=" field +     specifies an additional period of time for which the session will +     be active.  If the session is active at regular times, an "r=" +     field (see below) should be used in addition to and following a +     "t=" field - in which case the "t=" field specifies the start and +     stop times of the repeat sequence. + +   o The first and second sub-fields give the start and stop times for +     the conference respectively.  These values are the decimal +     representation of Network Time Protocol (NTP) time values in +     seconds [1].  To convert these values to UNIX time, subtract +     decimal 2208988800. + +   o If the stop-time is set to zero, then the session is not bounded, +     though it will not become active until after the start-time.  If +     the start-time is also zero, the session is regarded as permanent. + +     User interfaces should strongly discourage the creation of +     unbounded and permanent sessions as they give no information about +     when the session is actually going to terminate, and so make +     scheduling difficult. + +     The general assumption may be made, when displaying unbounded +     sessions that have not timed out to the user, that an unbounded +     session will only be active until half an hour from the current +     time or the session start time, whichever is the later.  If +     behaviour other than this is required, an end-time should be given +     and modified as appropriate when new information becomes available +     about when the session should really end. + +     Permanent sessions may be shown to the user as never being active +     unless there are associated repeat times which state precisely when +     the session will be active.  In general, permanent sessions should +     not be created for any session expected to have a duration of less +     than 2 months, and should be discouraged for sessions expected to +     have a duration of less than 6 months. + +     r=<repeat interval> <active duration> <list of offsets from start- +     time> + +   o "r=" fields specify repeat times for a session.  For example, if +     a session is active at 10am on Monday and 11am on Tuesday for one + + + +Handley & Jacobson          Standards Track                    [Page 15] + +RFC 2327                          SDP                         April 1998 + + +     hour each week for three months, then the <start time> in the +     corresponding "t=" field would be the NTP representation of 10am on +     the first Monday, the <repeat interval> would be 1 week, the +     <active duration> would be 1 hour, and the offsets would be zero +     and 25 hours. The corresponding "t=" field stop time would be the +     NTP representation of the end of the last session three months +     later. By default all fields are in seconds, so the "r=" and "t=" +     fields might be: + +                           t=3034423619 3042462419 +                            r=604800 3600 0 90000 + +    To make announcements more compact, times may also be given in units +    of days, hours or minutes. The syntax for these is a number +    immediately followed by a single case-sensitive character. +    Fractional units are not allowed - a smaller unit should be used +    instead.  The following unit specification characters are allowed: + +                         d - days (86400 seconds) +                        h - minutes (3600 seconds) +                         m - minutes (60 seconds) +         s - seconds (allowed for completeness but not recommended) + +   Thus, the above announcement could also have been written: + +                               r=7d 1h 0 25h + +     Monthly and yearly repeats cannot currently be directly specified +     with a single SDP repeat time - instead separate "t" fields should +     be used to explicitly list the session times. + +        z=<adjustment time> <offset> <adjustment time> <offset> .... + +   o To schedule a repeated session which spans a change from daylight- +     saving time to standard time or vice-versa, it is necessary to +     specify offsets from the base repeat times. This is required +     because different time zones change time at different times of day, +     different countries change to or from daylight time on different +     dates, and some countries do not have daylight saving time at all. + +     Thus in order to schedule a session that is at the same time winter +     and summer, it must be possible to specify unambiguously by whose +     time zone a session is scheduled.  To simplify this task for +     receivers, we allow the sender to specify the NTP time that a time +     zone adjustment happens and the offset from the time when the +     session was first scheduled.  The "z" field allows the sender to +     specify a list of these adjustment times and offsets from the base +     time. + + + +Handley & Jacobson          Standards Track                    [Page 16] + +RFC 2327                          SDP                         April 1998 + + +     An example might be: + +                        z=2882844526 -1h 2898848070 0 + +     This specifies that at time 2882844526 the time base by which the +     session's repeat times are calculated is shifted back by 1 hour, +     and that at time 2898848070 the session's original time base is +     restored. Adjustments are always relative to the specified start +     time - they are not cumulative. + +   o    If a session is likely to last several years, it is  expected +   that +     the session announcement will be modified periodically rather than +     transmit several years worth of adjustments in one announcement. + +   Encryption Keys + +   k=<method> +   k=<method>:<encryption key> + +   o The session description protocol may be used to convey encryption +     keys.  A key field is permitted before the first media entry (in +     which case it applies to all media in the session), or for each +     media entry as required. + +   o The format of keys and their usage is outside the scope of this +     document, but see [3]. + +   o The method indicates the mechanism to be used to obtain a usable +     key by external means, or from the encoded encryption key given. + +     The following methods are defined: + +      k=clear:<encryption key> +        The encryption key (as described in [3] for  RTP  media  streams +        under  the  AV  profile)  is  included untransformed in this key +        field. + +      k=base64:<encoded encryption key> +        The encryption key (as described in [3] for RTP media streams +        under the AV profile) is included in this key field but has been +        base64 encoded because it includes characters that are +        prohibited in SDP. + +      k=uri:<URI to obtain key> +        A Universal Resource Identifier as used by WWW clients is +        included in this key field.  The URI refers to the data +        containing the key, and may require additional authentication + + + +Handley & Jacobson          Standards Track                    [Page 17] + +RFC 2327                          SDP                         April 1998 + + +        before the key can be returned.  When a request is made to the +        given URI, the MIME content-type of the reply specifies the +        encoding for the key in the reply.  The key should not be +        obtained until the user wishes to join the session to reduce +        synchronisation of requests to the WWW server(s). + +      k=prompt +        No key is included in this SDP description, but the session or +        media stream referred to by this key field is encrypted.  The +        user should be prompted for the key when attempting to join the +        session, and this user-supplied key should then be used to +        decrypt the media streams. + +   Attributes + +   a=<attribute> +   a=<attribute>:<value> + +   Attributes are the primary means for extending SDP.  Attributes may +   be defined to be used as "session-level" attributes, "media-level" +   attributes, or both. + +   A media description may have any number of attributes ("a=" fields) +   which are media specific.  These are referred to as "media-level" +   attributes and add information about the media stream.  Attribute +   fields can also be added before the first media field; these +   "session-level" attributes convey additional information that applies +   to the conference as a whole rather than to individual media; an +   example might be the conference's floor control policy. + +   Attribute fields may be of two forms: + +   o property attributes.  A property attribute is simply of the form +     "a=<flag>".  These are binary attributes, and the presence of the +     attribute conveys that the attribute is a property of the session. +     An example might be "a=recvonly". + +   o value attributes.  A value attribute is of the form +     "a=<attribute>:<value>".  An example might be that a whiteboard +     could have the value attribute "a=orient:landscape" + +   Attribute interpretation depends on the media tool being invoked. +   Thus receivers of session descriptions should be configurable in +   their interpretation of announcements in general and of attributes in +   particular. + +   Attribute names must be in the US-ASCII subset of ISO-10646/UTF-8. + + + + +Handley & Jacobson          Standards Track                    [Page 18] + +RFC 2327                          SDP                         April 1998 + + +   Attribute values are byte strings, and MAY use any byte value except +   0x00 (Nul), 0x0A (LF), and 0x0D (CR). By default, attribute values +   are to be interpreted as in ISO-10646 character set with UTF-8 +   encoding.  Unlike other text fields, attribute values are NOT +   normally affected by the `charset' attribute as this would make +   comparisons against known values problematic.  However, when an +   attribute is defined, it can be defined to be charset-dependent, in +   which case it's value should be interpreted in the session charset +   rather than in ISO-10646. + +   Attributes that will be commonly used can be registered with IANA +   (see Appendix B).  Unregistered attributes should begin with "X-" to +   prevent inadvertent collision with registered attributes.  In either +   case, if an attribute is received that is not understood, it should +   simply be ignored by the receiver. + +   Media Announcements + +   m=<media> <port> <transport> <fmt list> + +   A session description may contain a number of media descriptions. +   Each media description starts with an "m=" field, and is terminated +   by either the next "m=" field or by the end of the session +   description.  A media field also has several sub-fields: + +   o The first sub-field is the media type.  Currently defined media are +     "audio", "video", "application", "data" and "control", though this +     list may be extended as new communication modalities emerge (e.g., +     telepresense).  The difference between "application" and "data" is +     that the former is a media flow such as whiteboard information, and +     the latter is bulk-data transfer such as multicasting of program +     executables which will not typically be displayed to the user. +     "control" is used to specify an additional conference control +     channel for the session. + +   o The second sub-field is the transport port to which the media +     stream will be sent.  The meaning of the transport port depends on +     the network being used as specified in the relevant "c" field and +     on the transport protocol defined in the third sub-field.  Other +     ports used by the media application (such as the RTCP port, see +     [2]) should be derived algorithmically from the base media port. + +     Note: For transports based on UDP, the value should be in the range +     1024 to 65535 inclusive.  For RTP compliance it should be an even +     number. + + + + + + +Handley & Jacobson          Standards Track                    [Page 19] + +RFC 2327                          SDP                         April 1998 + + +     For applications where hierarchically encoded streams are being +     sent to a unicast address, it may be necessary to specify multiple +     transport ports.  This is done using a similar notation to that +     used for IP multicast addresses in the "c=" field: + +          m=<media> <port>/<number of ports> <transport> <fmt list> + +     In such a case, the ports used depend on the transport protocol. +     For RTP, only the even ports are used for data and the +     corresponding one-higher odd port is used for RTCP.  For example: + +                         m=video 49170/2 RTP/AVP 31 + +     would specify that ports 49170 and 49171 form one RTP/RTCP pair and +     49172 and 49173 form the second RTP/RTCP pair.  RTP/AVP is the +     transport protocol and 31 is the format (see below). + +     It is illegal for both multiple addresses to be specified in the +     "c=" field and for multiple ports to be specified in the "m=" field +     in the same session description. + +   o The third sub-field is the transport protocol.  The transport +     protocol values are dependent on the address-type field in the "c=" +     fields.  Thus a "c=" field of IP4 defines that the transport +     protocol runs over IP4.  For IP4, it is normally expected that most +     media traffic will be carried as RTP over UDP.  The following +     transport protocols are preliminarily defined, but may be extended +     through registration of new protocols with IANA: + +     - RTP/AVP - the IETF's Realtime Transport Protocol using the +       Audio/Video profile carried over UDP. + +     - udp - User Datagram Protocol + +     If an application uses a single combined proprietary media format +     and transport protocol over UDP, then simply specifying the +     transport protocol as udp and using the format field to distinguish +     the combined protocol is recommended.  If a transport protocol is +     used over UDP to carry several distinct media types that need to be +     distinguished by a session directory, then specifying the transport +     protocol and media format separately is necessary. RTP is an +     example of a transport-protocol that carries multiple payload +     formats that must be distinguished by the session directory for it +     to know how to start appropriate tools, relays, mixers or +     recorders. + + + + + + +Handley & Jacobson          Standards Track                    [Page 20] + +RFC 2327                          SDP                         April 1998 + + +     The main reason to specify the transport-protocol in addition to +     the media format is that the same standard media formats may be +     carried over different transport protocols even when the network +     protocol is the same - a historical example is vat PCM audio and +     RTP PCM audio.  In addition, relays and monitoring tools that are +     transport-protocol-specific but format-independent are possible. + +     For RTP media streams operating under the RTP Audio/Video Profile +     [3], the protocol field is "RTP/AVP".  Should other RTP profiles be +     defined in the future, their profiles will be specified in the same +     way.  For example, the protocol field "RTP/XYZ" would specify RTP +     operating under a profile whose short name is "XYZ". + +   o The fourth and subsequent sub-fields are media formats.  For audio +     and video, these will normally be a media payload type as defined +     in the RTP Audio/Video Profile. + +     When a list of payload formats is given, this implies that all of +     these formats may be used in the session, but the first of these +     formats is the default format for the session. + +     For media whose transport protocol is not RTP or UDP the format +     field is protocol specific.  Such formats should be defined in an +     additional specification document. + +     For media whose transport protocol is RTP, SDP can be used to +     provide a dynamic binding of media encoding to RTP payload type. +     The encoding names in the RTP AV Profile do not specify unique +     audio encodings (in terms of clock rate and number of audio +     channels), and so they are not used directly in SDP format fields. +     Instead, the payload type number should be used to specify the +     format for static payload types and the payload type number along +     with additional encoding information should be used for dynamically +     allocated payload types. + +     An example of a static payload type is u-law PCM coded single +     channel audio sampled at 8KHz.  This is completely defined in the +     RTP Audio/Video profile as payload type 0, so the media field for +     such a stream sent to UDP port 49232 is: + +                           m=video 49232 RTP/AVP 0 + +     An example of a dynamic payload type is 16 bit linear encoded +     stereo audio sampled at 16KHz.  If we wish to use dynamic RTP/AVP +     payload type 98 for such a stream, additional information is +     required to decode it: + +                          m=video 49232 RTP/AVP 98 + + + +Handley & Jacobson          Standards Track                    [Page 21] + +RFC 2327                          SDP                         April 1998 + + +                           a=rtpmap:98 L16/16000/2 + +     The general form of an rtpmap attribute is: + +     a=rtpmap:<payload type> <encoding name>/<clock rate>[/<encoding +     parameters>] + +     For audio streams, <encoding parameters> may specify the number of +     audio channels.  This parameter may be omitted if the number of +     channels is one provided no additional parameters are needed.  For +     video streams, no encoding parameters are currently specified. + +     Additional parameters may be defined in the future, but +     codecspecific parameters should not be added.  Parameters added to +     an rtpmap attribute should only be those required for a session +     directory to make the choice of appropriate media too to +     participate in a session.  Codec-specific parameters should be +     added in other attributes. + +     Up to one rtpmap attribute can be defined for each media format +     specified. Thus we might have: + +                       m=audio 49230 RTP/AVP 96 97 98 +                             a=rtpmap:96 L8/8000 +                            a=rtpmap:97 L16/8000 +                           a=rtpmap:98 L16/11025/2 + +     RTP profiles that specify the use of dynamic payload types must +     define the set of valid encoding names and/or a means to register +     encoding names if that profile is to be used with SDP. + +     Experimental encoding formats can also be specified using rtpmap. +     RTP formats that are not registered as standard format names must +     be preceded by "X-".  Thus a new experimental redundant audio +     stream called GSMLPC using dynamic payload type 99 could be +     specified as: + +                          m=video 49232 RTP/AVP 99 +                          a=rtpmap:99 X-GSMLPC/8000 + +     Such an experimental encoding requires that any site wishing to +     receive the media stream has relevant configured state in its +     session directory to know which tools are appropriate. + +     Note that RTP audio formats typically do not include information +     about the number of samples per packet.  If a non-default (as +     defined in the RTP Audio/Video Profile) packetisation is required, +     the "ptime" attribute is used as given below. + + + +Handley & Jacobson          Standards Track                    [Page 22] + +RFC 2327                          SDP                         April 1998 + + +     For more details on RTP audio and video formats, see [3]. + +   o Formats for non-RTP media should be registered as MIME content +     types as described in Appendix B.  For example, the LBL whiteboard +     application might be registered as MIME content-type application/wb +     with encoding considerations specifying that it operates over UDP, +     with no appropriate file format.  In SDP this would then be +     expressed using a combination of the "media" field and the "fmt" +     field, as follows: + +                         m=application 32416 udp wb + +   Suggested Attributes + +   The following attributes are suggested.  Since application writers +   may add new attributes as they are required, this list is not +   exhaustive. + +   a=cat:<category> +       This attribute gives the dot-separated hierarchical category of +       the session.  This is to enable a receiver to filter unwanted +       sessions by category.  It would probably have been a compulsory +       separate field, except for its experimental nature at this time. +       It is a session-level attribute, and is not dependent on charset. + +   a=keywds:<keywords> +       Like the cat attribute, this is to assist identifying wanted +       sessions at the receiver.  This allows a receiver to select +       interesting session based on keywords describing the purpose of +       the session.  It is a session-level attribute. It is a charset +       dependent attribute, meaning that its value should be interpreted +       in the charset specified for the session description if one is +       specified, or by default in ISO 10646/UTF-8. + +   a=tool:<name and version of tool> +       This gives the name and version number of the tool used to create +       the session description.  It is a session-level attribute, and is +       not dependent on charset. + +   a=ptime:<packet time> +       This gives the length of time in milliseconds represented by the +       media in a packet. This is probably only meaningful for audio +       data.  It should not be necessary to know ptime to decode RTP or +       vat audio, and it is intended as a recommendation for the +       encoding/packetisation of audio.  It is a media attribute, and is +       not dependent on charset. + + + + + +Handley & Jacobson          Standards Track                    [Page 23] + +RFC 2327                          SDP                         April 1998 + + +   a=recvonly +       This specifies that the tools should be started in receive-only +       mode where applicable. It can be either a session or media +       attribute, and is not dependent on charset. + +   a=sendrecv +       This specifies that the tools should be started in send and +       receive mode.  This is necessary for interactive conferences with +       tools such as wb which defaults to receive only mode. It can be +       either a session or media attribute, and is not dependent on +       charset. + +   a=sendonly +       This specifies that the tools should be started in send-only +       mode.  An example may be where a different unicast address is to +       be used for a traffic destination than for a traffic source. In +       such a case, two media descriptions may be use, one sendonly and +       one recvonly. It can be either a session or media attribute, but +       would normally only be used as a media attribute, and is not +       dependent on charset. + +   a=orient:<whiteboard orientation> +       Normally this is only used in a whiteboard media specification. +       It specifies the orientation of a the whiteboard on the screen. +       It is a media attribute. Permitted values are `portrait', +       `landscape' and `seascape' (upside down landscape). It is not +       dependent on charset + +   a=type:<conference type> +       This specifies the type of the conference.  Suggested values are +       `broadcast', `meeting', `moderated', `test' and `H332'. +       `recvonly' should be the default for `type:broadcast' sessions, +       `type:meeting' should imply `sendrecv' and `type:moderated' +       should indicate the use of a floor control tool and that the +       media tools are started so as to "mute" new sites joining the +       conference. + +       Specifying the attribute type:H332 indicates that this loosely +       coupled session is part of a H.332 session as defined in the ITU +       H.332 specification [10].  Media tools should be started +       `recvonly'. + +       Specifying the attribute type:test is suggested as a hint that, +       unless explicitly requested otherwise, receivers can safely avoid +       displaying this session description to users. + +       The type attribute is a session-level attribute, and is not +       dependent on charset. + + + +Handley & Jacobson          Standards Track                    [Page 24] + +RFC 2327                          SDP                         April 1998 + + +   a=charset:<character set> +       This specifies the character set to be used to display the +       session name and information data.  By default, the ISO-10646 +       character set in UTF-8 encoding is used. If a more compact +       representation is required, other character sets may be used such +       as ISO-8859-1 for Northern European languages.  In particular, +       the ISO 8859-1 is specified with the following SDP attribute: + +                             a=charset:ISO-8859-1 + +       This is a session-level attribute; if this attribute is present, +       it must be before the first media field.  The charset specified +       MUST be one of those registered with IANA, such as ISO-8859-1. +       The character set identifier is a US-ASCII string and MUST be +       compared against the IANA identifiers using a case-insensitive +       comparison.  If the identifier is not recognised or not +       supported, all strings that are affected by it SHOULD be regarded +       as byte strings. + +       Note that a character set specified MUST still prohibit the use +       of bytes 0x00 (Nul), 0x0A (LF) and 0x0d (CR). Character sets +       requiring the use of these characters MUST define a quoting +       mechanism that prevents these bytes appearing within text fields. + +   a=sdplang:<language tag> +       This can be a session level attribute or a media level attribute. +       As a session level attribute, it specifies the language for the +       session description.  As a media level attribute, it specifies +       the language for any media-level SDP information field associated +       with that media.  Multiple sdplang attributes can be provided +       either at session or media level if multiple languages in the +       session description or media use multiple languages, in which +       case the order of the attributes indicates the order of +       importance of the various languages in the session or media from +       most important to least important. + +       In general, sending session descriptions consisting of multiple +       languages should be discouraged.  Instead, multiple descriptions +       should be sent describing the session, one in each language. +       However this is not possible with all transport mechanisms, and +       so multiple sdplang attributes are allowed although not +       recommended. + +       The sdplang attribute value must be a single RFC 1766 language +       tag in US-ASCII.  It is not dependent on the charset attribute. +       An sdplang attribute SHOULD be specified when a session is of + + + + + +Handley & Jacobson          Standards Track                    [Page 25] + +RFC 2327                          SDP                         April 1998 + + +       sufficient scope to cross geographic boundaries where the +       language of recipients cannot be assumed, or where the session is +       in a different language from the locally assumed norm. + +   a=lang:<language tag> +       This can be a session level attribute or a media level attribute. +       As a session level attribute, it specifies the default language +       for the session being described.  As a media level attribute, it +       specifies the language for that media, overriding any session- +       level language specified.  Multiple lang attributes can be +       provided either at session or media level if multiple languages +       if the session description or media use multiple languages, in +       which case the order of the attributes indicates the order of +       importance of the various languages in the session or media from +       most important to least important. + +       The lang attribute value must be a single RFC 1766 language tag +       in US-ASCII. It is not dependent on the charset attribute.  A +       lang attribute SHOULD be specified when a session is of +       sufficient scope to cross geographic boundaries where the +       language of recipients cannot be assumed, or where the session is +       in a different language from the locally assumed norm. + +   a=framerate:<frame rate> +       This gives the maximum video frame rate in frames/sec.  It is +       intended as a recommendation for the encoding of video data. +       Decimal representations of fractional values using the notation +       "<integer>.<fraction>" are allowed.  It is a media attribute, is +       only defined for video media, and is not dependent on charset. + +   a=quality:<quality> +       This gives a suggestion for the quality of the encoding as an +       integer value. + +       The intention of the quality attribute for video is to specify a +       non-default trade-off between frame-rate and still-image quality. +       For video, the value in the range 0 to 10, with the following +       suggested meaning: + +       10 - the best still-image quality the compression scheme can +       give. + +       5 - the default behaviour given no quality suggestion. + +       0 - the worst still-image quality the codec designer thinks is +           still usable. + +       It is a media attribute, and is not dependent on charset. + + + +Handley & Jacobson          Standards Track                    [Page 26] + +RFC 2327                          SDP                         April 1998 + + +   a=fmtp:<format> <format specific parameters> +       This attribute allows parameters that are specific to a +       particular format to be conveyed in a way that SDP doesn't have +       to understand them.  The format must be one of the formats +       specified for the media.  Format-specific parameters may be any +       set of parameters required to be conveyed by SDP and given +       unchanged to the media tool that will use this format. + +       It is a media attribute, and is not dependent on charset. + +6.1.  Communicating Conference Control Policy + +   There is some debate over the way conference control policy should be +   communicated.  In general, the authors believe that an implicit +   declarative style of specifying conference control is desirable where +   possible. + +   A simple declarative style uses a single conference attribute field +   before the first media field, possibly supplemented by properties +   such as `recvonly' for some of the media tools.  This conference +   attribute conveys the conference control policy. An example might be: + +                             a=type:moderated + +   In some cases, however, it is possible that this may be insufficient +   to communicate the details of an unusual conference control policy. +   If this is the case, then a conference attribute specifying external +   control might be set, and then one or more "media" fields might be +   used to specify the conference control tools and configuration data +   for those tools. An example is an ITU H.332 session: + +                c=IN IP4 224.5.6.7 +                a=type:H332 +                m=audio 49230 RTP/AVP 0 +                m=video 49232 RTP/AVP 31 +                m=application 12349 udp wb +                m=control 49234 H323 mc +                c=IN IP4 134.134.157.81 + +   In this example, a general conference attribute (type:H332) is +   specified stating that conference control will be provided by an +   external H.332 tool, and a contact addresses for the H.323 session +   multipoint controller is given. + +   In this document, only the declarative style of conference control +   declaration is specified.  Other forms of conference control should +   specify an appropriate type attribute, and should define the +   implications this has for control media. + + + +Handley & Jacobson          Standards Track                    [Page 27] + +RFC 2327                          SDP                         April 1998 + + +7.  Security Considerations + +   SDP is a session description format that describes multimedia +   sessions.  A session description should not be trusted unless it has +   been obtained by an authenticated transport protocol from a trusted +   source.  Many different transport protocols may be used to distribute +   session description, and the nature of the authentication will differ +   from transport to transport. + +   One transport that will frequently be used to distribute session +   descriptions is the Session Announcement Protocol (SAP).  SAP +   provides both encryption and authentication mechanisms but due to the +   nature of session announcements it is likely that there are many +   occasions where the originator of a session announcement cannot be +   authenticated because they are previously unknown to the receiver of +   the announcement and because no common public key infrastructure is +   available. + +   On receiving a session description over an unauthenticated transport +   mechanism or from an untrusted party, software parsing the session +   should take a few precautions. Session description contain +   information required to start software on the receivers system. +   Software that parses a session description MUST not be able to start +   other software except that which is specifically configured as +   appropriate software to participate in multimedia sessions.  It is +   normally considered INAPPROPRIATE for software parsing a session +   description to start, on a user's system, software that is +   appropriate to participate in multimedia sessions, without the user +   first being informed that such software will be started and giving +   their consent.  Thus a session description arriving by session +   announcement, email, session invitation, or WWW page SHOULD not +   deliver the user into an {it interactive} multimedia session without +   the user being aware that this will happen.  As it is not always +   simple to tell whether a session is interactive or not, applications +   that are unsure should assume sessions are interactive. + +   In this specification, there are no attributes which would allow the +   recipient of a session description to be informed to start multimedia +   tools in a mode where they default to transmitting.  Under some +   circumstances it might be appropriate to define such attributes.  If +   this is done an application parsing a session description containing +   such attributes SHOULD either ignore them, or inform the user that +   joining this session will result in the automatic transmission of +   multimedia data.  The default behaviour for an unknown attribute is +   to ignore it. + + + + + + +Handley & Jacobson          Standards Track                    [Page 28] + +RFC 2327                          SDP                         April 1998 + + +   Session descriptions may be parsed at intermediate systems such as +   firewalls for the purposes of opening a hole in the firewall to allow +   the participation in multimedia sessions.  It is considered +   INAPPROPRIATE for a firewall to open such holes for unicast data +   streams unless the session description comes in a request from inside +   the firewall. + +   For multicast sessions, it is likely that local administrators will +   apply their own policies, but the exclusive use of "local" or "site- +   local" administrative scope within the firewall and the refusal of +   the firewall to open a hole for such scopes will provide separation +   of global multicast sessions from local ones. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson          Standards Track                    [Page 29] + +RFC 2327                          SDP                         April 1998 + + +Appendix A: SDP Grammar + +   This appendix provides an Augmented BNF grammar for SDP. ABNF is +   defined in RFC 2234. + + +   announcement =        proto-version +                         origin-field +                         session-name-field +                         information-field +                         uri-field +                         email-fields +                         phone-fields +                         connection-field +                         bandwidth-fields +                         time-fields +                         key-field +                         attribute-fields +                         media-descriptions + +   proto-version =       "v=" 1*DIGIT CRLF +                         ;this memo describes version 0 + +   origin-field =        "o=" username space +                         sess-id space sess-version space +                         nettype space addrtype space +                         addr CRLF + +   session-name-field =  "s=" text CRLF + +   information-field =   ["i=" text CRLF] + +   uri-field =           ["u=" uri CRLF] + +   email-fields =        *("e=" email-address CRLF) + +   phone-fields =        *("p=" phone-number CRLF) + + +   connection-field =    ["c=" nettype space addrtype space +                         connection-address CRLF] +                         ;a connection field must be present +                         ;in every media description or at the +                         ;session-level + + +   bandwidth-fields =    *("b=" bwtype ":" bandwidth CRLF) + + + + +Handley & Jacobson          Standards Track                    [Page 30] + +RFC 2327                          SDP                         April 1998 + + +   time-fields =         1*( "t=" start-time space stop-time +                         *(CRLF repeat-fields) CRLF) +                         [zone-adjustments CRLF] + + +   repeat-fields =       "r=" repeat-interval space typed-time +                         1*(space typed-time) + + +   zone-adjustments =    time space ["-"] typed-time +                         *(space time space ["-"] typed-time) + + +   key-field =           ["k=" key-type CRLF] + + +   key-type =            "prompt" | +                         "clear:" key-data | +                         "base64:" key-data | +                         "uri:" uri + + +   key-data =            email-safe | "~" | " + + +   attribute-fields =    *("a=" attribute CRLF) + + +   media-descriptions =  *( media-field +                         information-field +                         *(connection-field) +                         bandwidth-fields +                         key-field +                         attribute-fields ) + + +   media-field =         "m=" media space port ["/" integer] +                         space proto 1*(space fmt) CRLF + + +   media =               1*(alpha-numeric) +                         ;typically "audio", "video", "application" +                         ;or "data" + +   fmt =                 1*(alpha-numeric) +                         ;typically an RTP payload type for audio +                         ;and video media + + + + +Handley & Jacobson          Standards Track                    [Page 31] + +RFC 2327                          SDP                         April 1998 + + +   proto =               1*(alpha-numeric) +                         ;typically "RTP/AVP" or "udp" for IP4 + + +   port =                1*(DIGIT) +                         ;should in the range "1024" to "65535" inclusive +                         ;for UDP based media + + +   attribute =           (att-field ":" att-value) | att-field + + +   att-field =           1*(alpha-numeric) + + +   att-value =           byte-string + + +   sess-id =             1*(DIGIT) +                         ;should be unique for this originating username/host + + +   sess-version =        1*(DIGIT) +                         ;0 is a new session + + +   connection-address =  multicast-address +                         | addr + + +   multicast-address =   3*(decimal-uchar ".") decimal-uchar "/" ttl +                         [ "/" integer ] +                         ;multicast addresses may be in the range +                         ;224.0.0.0 to 239.255.255.255 + +   ttl =                 decimal-uchar + +   start-time =          time | "0" + +   stop-time =           time | "0" + +   time =                POS-DIGIT 9*(DIGIT) +                         ;sufficient for 2 more centuries + + +   repeat-interval =     typed-time + + + + + +Handley & Jacobson          Standards Track                    [Page 32] + +RFC 2327                          SDP                         April 1998 + + +   typed-time =          1*(DIGIT) [fixed-len-time-unit] + + +   fixed-len-time-unit = "d" | "h" | "m" | "s" + + +   bwtype =              1*(alpha-numeric) + +   bandwidth =           1*(DIGIT) + + +   username =            safe +                         ;pretty wide definition, but doesn't include space + + +   email-address =       email | email "(" email-safe ")" | +                         email-safe "<" email ">" + + +   email =               ;defined in RFC822 + + +   uri=                  ;defined in RFC1630 + + +   phone-number =        phone | phone "(" email-safe ")" | +                         email-safe "<" phone ">" + + +   phone =               "+" POS-DIGIT 1*(space | "-" | DIGIT) +                         ;there must be a space or hyphen between the +                         ;international code and the rest of the number. + + +   nettype =             "IN" +                         ;list to be extended + + +   addrtype =            "IP4" | "IP6" +                         ;list to be extended + + +   addr =                FQDN | unicast-address + + +   FQDN =                4*(alpha-numeric|"-"|".") +                         ;fully qualified domain name as specified in RFC1035 + + + + +Handley & Jacobson          Standards Track                    [Page 33] + +RFC 2327                          SDP                         April 1998 + + +   unicast-address =     IP4-address | IP6-address + + +   IP4-address =         b1 "." decimal-uchar "." decimal-uchar "." b4 +   b1 =                  decimal-uchar +                         ;less than "224"; not "0" or "127" +   b4 =                  decimal-uchar +                         ;not "0" + +   IP6-address =         ;to be defined + + +   text =                byte-string +                         ;default is to interpret this as IS0-10646 UTF8 +                         ;ISO 8859-1 requires a "a=charset:ISO-8859-1" +                         ;session-level attribute to be used + + +   byte-string =         1*(0x01..0x09|0x0b|0x0c|0x0e..0xff) +                         ;any byte except NUL, CR or LF + + +   decimal-uchar =       DIGIT +                         | POS-DIGIT DIGIT +                         | ("1" 2*(DIGIT)) +                         | ("2" ("0"|"1"|"2"|"3"|"4") DIGIT) +                         | ("2" "5" ("0"|"1"|"2"|"3"|"4"|"5")) + + +   integer =             POS-DIGIT *(DIGIT) + + +   alpha-numeric =       ALPHA | DIGIT + + +   DIGIT =               "0" | POS-DIGIT + + +   POS-DIGIT =           "1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9" + + +   ALPHA =               "a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"| +                         "l"|"m"|"n"|"o "|"p"|"q"|"r"|"s"|"t"|"u"|"v"| +                         "w"|"x"|"y"|"z"|"A"|"B"|"C "|"D"|"E"|"F"|"G"| +                         "H"|"I"|"J"|"K"|"L"|"M"|"N"|"O"|"P"|" Q"|"R"| +                         "S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z" + + + + + +Handley & Jacobson          Standards Track                    [Page 34] + +RFC 2327                          SDP                         April 1998 + + +   email-safe =          safe | space | tab + + +   safe =                alpha-numeric | +                         "'" | "'" | "-" | "." | "/" | ":" | "?" | """ | +                         "#" | "$" | "&" | "*" | ";" | "=" | "@" | "[" | +                         "]" | "^" | "_" | "`" | "{" | "|" | "}" | "+" | +                         "~" | " + + +   space =               %d32 +   tab =                 %d9 +   CRLF =                %d13.10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson          Standards Track                    [Page 35] + +RFC 2327                          SDP                         April 1998 + + +Appendix B: Guidelines for registering SDP names with IANA + +   There are seven field names that may be registered with IANA. Using +   the terminology in the SDP specification BNF, they are "media", +   "proto", "fmt", "att-field", "bwtype", "nettype" and "addrtype". + +   "media" (eg, audio, video, application, data). + +       Packetized media types, such as those used by RTP, share the +       namespace used by media types registry [RFC 2048] (i.e. "MIME +       types").  The list of valid media names is the set of top-level +       MIME content types.  The set of media is intended to be small and +       not to be extended except under rare circumstances.  (The MIME +       subtype corresponds to the "fmt" parameter below). + +   "proto" + +       In general this should be an IETF standards-track transport +       protocol identifier such as RTP/AVP (rfc 1889 under the rfc 1890 +       profile). + +       However, people will want to invent their own proprietary +       transport protocols.  Some of these should be registered as a +       "fmt" using "udp" as the protocol and some of which probably +       can't be. + +       Where the protocol and the application are intimately linked, +       such as with the LBL whiteboard wb which used a proprietary and +       special purpose protocol over UDP, the protocol name should be +       "udp" and the format name that should be registered is "wb".  The +       rules for formats (see below) apply to such registrations. + +       Where the proprietary transport protocol really carries many +       different data formats, it is possible to register a new protocol +       name with IANA. In such a case, an RFC MUST be produced +       describing the protocol and referenced in the registration.  Such +       an RFC MAY be informational, although it is preferable if it is +       standards-track. + +   "fmt" + +       The format namespace is dependent on the context of the "proto" +       field, so a format cannot be registered without specifying one or +       more transport protocols that it applies to. + +       Formats cover all the possible encodings that might want to be +       transported in a multimedia session. + + + + +Handley & Jacobson          Standards Track                    [Page 36] + +RFC 2327                          SDP                         April 1998 + + +       For RTP formats that have been assigned static payload types, the +       payload type number is used.  For RTP formats using a dynamic +       payload type number, the dynamic payload type number is given as +       the format and an additional "rtpmap" attribute specifies the +       format and parameters. + +       For non-RTP formats, any unregistered format name may be +       registered through the MIME-type registration process [RFC 2048]. +       The type given here is the MIME subtype only (the top-level MIME +       content type is specified by the media parameter).  The MIME type +       registration SHOULD reference a standards-track RFC which +       describes the transport protocol for this media type.  If there +       is an existing MIME type for this format, the MIME registration +       should be augmented to reference the transport specification for +       this media type.  If there is not an existing MIME type for this +       format, and there exists no appropriate file format, this should +       be noted in the encoding considerations as "no appropriate file +       format". + +   "att-field" (Attribute names) + +       Attribute field names MAY be registered with IANA, although this +       is not compulsory, and unknown attributes are simply ignored. + +       When an attribute is registered, it must be accompanied by a +       brief specification stating the following: + +       o contact name, email address and telephone number + +       o attribute-name (as it will appear in SDP) + +       o long-form attribute name in English + +       o type of attribute (session level, media level, or both) + +       o whether the attribute value is subject to the charset +       attribute. + +       o a one paragraph explanation of the purpose of the attribute. + +       o a specification of appropriate attribute values for this +         attribute. + +       IANA will not sanity check such attribute registrations except to +       ensure that they do not clash with existing registrations. + + + + + + +Handley & Jacobson          Standards Track                    [Page 37] + +RFC 2327                          SDP                         April 1998 + + +       Although the above is the minimum that IANA will accept, if the +       attribute is expected to see widespread use and interoperability +       is an issue, authors are encouraged to produce a standards-track +       RFC that specifies the attribute more precisely. + +       Submitters of registrations should ensure that the specification +       is in the spirit of SDP attributes, most notably that the +       attribute is platform independent in the sense that it makes no +       implicit assumptions about operating systems and does not name +       specific pieces of software in a manner that might inhibit +       interoperability. + +   "bwtype" (bandwidth specifiers) + +       A proliferation of bandwidth specifiers is strongly discouraged. + +       New bandwidth specifiers may be registered with IANA.  The +       submission MUST reference a standards-track RFC specifying the +       semantics of the bandwidth specifier precisely, and indicating +       when it should be used, and why the existing registered bandwidth +       specifiers do not suffice. + +   "nettype" (Network Type) + +       New network types may be registered with IANA if SDP needs to be +       used in the context of non-internet environments. Whilst these +       are not normally the preserve of IANA, there may be circumstances +       when an Internet application needs to interoperate with a non- +       internet application, such as when gatewaying an internet +       telephony call into the PSTN.  The number of network types should +       be small and should be rarely extended.  A new network type +       cannot be registered without registering at least one address +       type to be used with that network type.  A new network type +       registration MUST reference an RFC which gives details of the +       network type and address type and specifies how and when they +       would be used.  Such an RFC MAY be Informational. + +   "addrtype" (Address Type) + +       New address types may be registered with IANA.  An address type +       is only meaningful in the context of a network type, and any +       registration of an address type MUST specify a registered network +       type, or be submitted along with a network type registration.  A +       new address type registration MUST reference an RFC giving +       details of the syntax of the address type.  Such an RFC MAY be +       Informational.  Address types are not expected to be registered +       frequently. + + + + +Handley & Jacobson          Standards Track                    [Page 38] + +RFC 2327                          SDP                         April 1998 + + +   Registration Procedure + +   To register a name the above guidelines should be followed regarding +   the required  level  of  documentation  that  is required.  The +   registration itself should be sent to IANA.  Attribute registrations +   should  include the  information  given  above.   Other registrations +   should include the following additional information: + +   o contact name, email address and telephone number + +   o name being registered (as it will appear in SDP) + +   o long-form name in English + +   o type of name ("media", "proto", "fmt", "bwtype", "nettype", or +     "addrtype") + +   o a one paragraph explanation of the purpose of the registered name. + +   o a reference to the specification (eg RFC number) of the registered +     name. + +   IANA may refer any registration to the IESG or to any appropriate +   IETF working group for review, and may request revisions to be made +   before a registration will be made. + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson          Standards Track                    [Page 39] + +RFC 2327                          SDP                         April 1998 + + +Appendix C: Authors' Addresses + +   Mark Handley +   Information Sciences Institute +   c/o MIT Laboratory for Computer Science +   545 Technology Square +   Cambridge, MA 02139 +   United States +   electronic mail: mjh@isi.edu + +   Van Jacobson +   MS 46a-1121 +   Lawrence Berkeley Laboratory +   Berkeley, CA 94720 +   United States +   electronic mail: van@ee.lbl.gov + +Acknowledgments + +   Many people in the IETF MMUSIC working group have made comments and +   suggestions contributing to this document.  In particular, we would +   like to thank Eve Schooler, Steve Casner, Bill Fenner, Allison +   Mankin, Ross Finlayson, Peter Parnes, Joerg Ott, Carsten Bormann, Rob +   Lanphier and Steve Hanna. + +References + +   [1] Mills, D., "Network Time Protocol (version 3) specification and +   implementation", RFC 1305, March 1992. + +   [2] Schulzrinne, H., Casner, S., Frederick, R. and V. Jacobson, "RTP: +   A Transport Protocol for Real-Time Applications", RFC 1889, January +   1996. + +   [3] Schulzrinne, H., "RTP Profile for Audio and Video Conferences +   with Minimal Control", RFC 1890, January 1996 + +   [4] Handley, M., "SAP - Session Announcement Protocol", Work in +   Progress. + +   [5] V. Jacobson, S. McCanne, "vat - X11-based audio teleconferencing +   tool" vat manual page, Lawrence Berkeley Laboratory, 1994. + +   [6] The Unicode Consortium, "The Unicode Standard -- Version 2.0", +   Addison-Wesley, 1996. + + + + + + +Handley & Jacobson          Standards Track                    [Page 40] + +RFC 2327                          SDP                         April 1998 + + +   [7] ISO/IEC 10646-1:1993. International Standard -- Information +   technol- ogy -- Universal Multiple-Octet Coded Character Set (UCS) -- +   Part 1: Architecture and Basic Multilingual Plane.  Five amendments +   and a techn- ical  corrigendum  have been published up to now.  UTF-8 +   is described in Annex R, published as Amendment 2. + +   [8] Goldsmith, D., and M. Davis, "Using Unicode with MIME", RFC 1641, +   July 1994. + +   [9] Yergeau, F., "UTF-8, a transformation format of Unicode and ISO +   10646", RFC 2044, October 1996. + +   [10] ITU-T Recommendation H.332 (1998): "Multimedia Terminal for +   Receiving Internet-based H.323 Conferences", ITU, Geneva. + +   [11] Handley, M., Schooler, E., and H. Schulzrinne, "Session +   Initiation Protocol (SIP)", Work in Progress. + +   [12] Schulzrinne, H., Rao, A., and R. Lanphier, "Real Time Streaming +   Protocol (RTSP)", RFC 2326, April 1998. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson          Standards Track                    [Page 41] + +RFC 2327                          SDP                         April 1998 + + +Full Copyright Statement + +   Copyright (C) The Internet Society (1998).  All Rights Reserved. + +   This document and translations of it may be copied and furnished to +   others, and derivative works that comment on or otherwise explain it +   or assist in its implementation may be prepared, copied, published +   and distributed, in whole or in part, without restriction of any +   kind, provided that the above copyright notice and this paragraph are +   included on all such copies and derivative works.  However, this +   document itself may not be modified in any way, such as by removing +   the copyright notice or references to the Internet Society or other +   Internet organizations, except as needed for the purpose of +   developing Internet standards in which case the procedures for +   copyrights defined in the Internet Standards process must be +   followed, or as required to translate it into languages other than +   English. + +   The limited permissions granted above are perpetual and will not be +   revoked by the Internet Society or its successors or assigns. + +   This document and the information contained herein is provided on an +   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING +   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING +   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION +   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF +   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson          Standards Track                    [Page 42] + diff --git a/src/modules/rtp/rfc2974.txt b/src/modules/rtp/rfc2974.txt new file mode 100644 index 00000000..4a5aa626 --- /dev/null +++ b/src/modules/rtp/rfc2974.txt @@ -0,0 +1,1011 @@ + + + + + + +Network Working Group                                         M. Handley +Request for Comments: 2974                                         ACIRI +Category: Experimental                                        C. Perkins +                                                                 USC/ISI +                                                               E. Whelan +                                                                     UCL +                                                            October 2000 + + +                     Session Announcement Protocol + +Status of this Memo + +   This memo defines an Experimental Protocol for the Internet +   community.  It does not specify an Internet standard of any kind. +   Discussion and suggestions for improvement are requested. +   Distribution of this memo is unlimited. + +Copyright Notice + +   Copyright (C) The Internet Society (2000).  All Rights Reserved. + +Abstract + +   This document describes version 2 of the multicast session directory +   announcement protocol, Session Announcement Protocol (SAP), and the +   related issues affecting security and scalability that should be +   taken into account by implementors. + +1  Introduction + +   In order to assist the advertisement of multicast multimedia +   conferences and other multicast sessions, and to communicate the +   relevant session setup information to prospective participants, a +   distributed session directory may be used.  An instance of such a +   session directory periodically multicasts packets containing a +   description of the session, and these advertisements are received by +   other session directories such that potential remote participants can +   use the session description to start the tools required to +   participate in the session. + +   This memo describes the issues involved in the multicast announcement +   of session description information and defines an announcement +   protocol to be used.  Sessions are described using the session +   description protocol which is described in a companion memo [4]. + + + + + + +Handley, et al.               Experimental                      [Page 1] + +RFC 2974             Session Announcement Protocol          October 2000 + + +2  Terminology + +   A SAP announcer periodically multicasts an announcement packet to a +   well known multicast address and port.  The announcement is multicast +   with the same scope as the session it is announcing, ensuring that +   the recipients of the announcement are within the scope of the +   session the announcement describes (bandwidth and other such +   constraints permitting).  This is also important for the scalability +   of the protocol, as it keeps local session announcements local. + +   A SAP listener learns of the multicast scopes it is within (for +   example, using the Multicast-Scope Zone Announcement Protocol [5]) +   and listens on the well known SAP address and port for those scopes. +   In this manner, it will eventually learn of all the sessions being +   announced, allowing those sessions to be joined. + +   The key words `MUST', `MUST NOT', `REQUIRED', `SHALL', `SHALL NOT', +   `SHOULD', `SHOULD NOT', `RECOMMENDED', `MAY', and `OPTIONAL' in this +   document are to be interpreted as described in [1]. + +3  Session Announcement + +   As noted previously, a SAP announcer periodically sends an +   announcement packet to a well known multicast address and port. +   There is no rendezvous mechanism - the SAP announcer is not aware of +   the presence or absence of any SAP listeners - and no additional +   reliability is provided over the standard best-effort UDP/IP +   semantics. + +   That announcement contains a session description and SHOULD contain +   an authentication header.  The session description MAY be encrypted +   although this is NOT RECOMMENDED (see section 7). + +   A SAP announcement is multicast with the same scope as the session it +   is announcing, ensuring that the recipients of the announcement are +   within the scope of the session the announcement describes. There are +   a number of possibilities: + +   IPv4 global scope sessions use multicast addresses in the range +      224.2.128.0 - 224.2.255.255 with SAP announcements being sent to +      224.2.127.254 (note that 224.2.127.255 is used by the obsolete +      SAPv0 and MUST NOT be used). + + + + + + + + + +Handley, et al.               Experimental                      [Page 2] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   IPv4 administrative scope sessions using administratively scoped IP +      multicast as defined in [7].  The multicast address to be used for +      announcements is the highest multicast address in the relevant +      administrative scope zone.  For example, if the scope range is +      239.16.32.0 - 239.16.33.255, then 239.16.33.255 is used for SAP +      announcements. + +   IPv6 sessions are announced on the address FF0X:0:0:0:0:0:2:7FFE +      where X is the 4-bit scope value.  For example, an announcement +      for a link-local session assigned the address +      FF02:0:0:0:0:0:1234:5678, should be advertised on SAP address +      FF02:0:0:0:0:0:2:7FFE. + +   Ensuring that a description is not used by a potential participant +   outside the session scope is not addressed in this memo. + +   SAP announcements MUST be sent on port 9875 and SHOULD be sent with +   an IP time-to-live of 255 (the use of TTL scoping for multicast is +   discouraged [7]). + +   If a session uses addresses in multiple administrative scope ranges, +   it is necessary for the announcer to send identical copies of the +   announcement to each administrative scope range.  It is up to the +   listeners to parse such multiple announcements as the same session +   (as identified by the SDP origin field, for example).  The +   announcement rate for each administrative scope range MUST be +   calculated separately, as if the multiple announcements were +   separate. + +   Multiple announcers may announce a single session, as an aid to +   robustness in the face of packet loss and failure of one or more +   announcers.  The rate at which each announcer repeats its +   announcement MUST be scaled back such that the total announcement +   rate is equal to that which a single server would choose. +   Announcements made in this manner MUST be identical. + +   If multiple announcements are being made for a session, then each +   announcement MUST carry an authentication header signed by the same +   key, or be treated as a completely separate announcement by +   listeners. + +   An IPv4 SAP listener SHOULD listen on the IPv4 global scope SAP +   address and on the SAP addresses for each IPv4 administrative scope +   zone it is within.  The discovery of administrative scope zones is +   outside the scope of this memo, but it is assumed that each SAP +   listener within a particular scope zone is aware of that scope zone. +   A SAP listener which supports IPv6 SHOULD also listen to the IPv6 SAP +   addresses. + + + +Handley, et al.               Experimental                      [Page 3] + +RFC 2974             Session Announcement Protocol          October 2000 + + +3.1 Announcement Interval + +   The time period between repetitions of an announcement is chosen such +   that the total bandwidth used by all announcements on a single SAP +   group remains below a preconfigured limit.  If not otherwise +   specified, the bandwidth limit SHOULD be assumed to be 4000 bits per +   second. + +   Each announcer is expected to listen to other announcements in order +   to determine the total number of sessions being announced on a +   particular group.  Sessions are uniquely identified by the +   combination of the message identifier hash and originating source +   fields of the SAP header (note that SAP v0 announcers always set the +   message identifier hash to zero, and if such an announcement is +   received the entire message MUST be compared to determine +   uniqueness). + +   Announcements are made by periodic multicast to the group.  The base +   interval between announcements is derived from the number of +   announcements being made in that group, the size of the announcement +   and the configured bandwidth limit.  The actual transmission time is +   derived from this base interval as follows: + +      1. The announcer initializes the variable tp to be the last time a +         particular announcement was transmitted (or the current time if +         this is the first time this announcement is to be made). + +      2. Given a configured bandwidth limit in bits/second and an +         announcement of ad_size bytes, the base announcement interval +         in seconds is + +                interval =max(300; (8*no_of_ads*ad_size)/limit) + +      3. An offset is calculated based on the base announcement interval + +                offset= rand(interval* 2/3)-(interval/3) + +      4. The next transmission time for an announcement derived as + +                tn =tp+ interval+ offset + +   The announcer then sets a timer to expire at tn and waits.  At time +   tn the announcer SHOULD recalculate the next transmission time.  If +   the new value of tn is before the current time, the announcement is +   sent immediately.  Otherwise the transmission is rescheduled for the +   new tn.  This reconsideration prevents transient packet bursts on +   startup and when a network partition heals. + + + + +Handley, et al.               Experimental                      [Page 4] + +RFC 2974             Session Announcement Protocol          October 2000 + + +4  Session Deletion + +   Sessions may be deleted in one of several ways: + +   Explicit Timeout The session description payload may contain +      timestamp information specifying the start- and end-times of the +      session.  If the current time is later than the end-time of the +      session, then the session SHOULD be deleted from the receiver's +      session cache. + +   Implicit Timeout A session announcement message should be received +      periodically for each session description in a receiver's session +      cache.  The announcement period can be predicted by the receiver +      from the set of sessions currently being announced.  If a session +      announcement message has not been received for ten times the +      announcement period, or one hour, whichever is the greater, then +      the session is deleted from the receiver's session cache.  The one +      hour minimum is to allow for transient network partitionings. + +   Explicit Deletion A session deletion packet is received specifying +      the session to be deleted.  Session deletion packets SHOULD have a +      valid authentication header, matching that used to authenticate +      previous announcement packets.  If this authentication is missing, +      the deletion message SHOULD be ignored. + +5  Session Modification + +   A pre-announced session can be modified by simply announcing the +   modified session description.  In this case, the version hash in the +   SAP header MUST be changed to indicate to receivers that the packet +   contents should be parsed (or decrypted and parsed if it is +   encrypted).  The session itself, as distinct from the session +   announcement, is uniquely identified by the payload and not by the +   message identifier hash in the header. + +   The same rules apply for session modification as for session +   deletion: + +    o Either the modified announcement must contain an authentication +      header signed by the same key as the cached session announcement +      it is modifying, or: + +    o The cached session announcement must not contain an authentication +      header, and the session modification announcement must originate +      from the same host as the session it is modifying. + + + + + + +Handley, et al.               Experimental                      [Page 5] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   If an announcement is received containing an authentication header +   and the cached announcement did not contain an authentication header, +   or it contained a different authentication header, then the modified +   announcement MUST be treated as a new and different announcement, and +   displayed in addition to the un-authenticated announcement.  The same +   should happen if a modified packet without an authentication header +   is received from a different source than the original announcement. + +   These rules prevent an announcement having an authentication header +   added by a malicious user and then being deleted using that header, +   and it also prevents a denial-of-service attack by someone putting +   out a spoof announcement which, due to packet loss, reaches some +   participants before the original announcement.  Note that under such +   circumstances, being able to authenticate the message originator is +   the only way to discover which session is the correct session. + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   | V=1 |A|R|T|E|C|   auth len    |         msg id hash           | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |                                                               | +   :                originating source (32 or 128 bits)            : +   :                                                               : +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |                    optional authentication data               | +   :                              ....                             : +   *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* +   |                      optional payload type                    | +   +                                         +-+- - - - - - - - - -+ +   |                                         |0|                   | +   + - - - - - - - - - - - - - - - - - - - - +-+                   | +   |                                                               | +   :                            payload                            : +   |                                                               | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +                     Figure 1: Packet format + +6  Packet Format + +   SAP data packets have the format described in figure 1. + +   V: Version Number. The version number field MUST be set to 1 (SAPv2 +      announcements which use only SAPv1 features are backwards +      compatible, those which use new features can be detected by other +      means, so the SAP version number doesn't need to change). + + + + +Handley, et al.               Experimental                      [Page 6] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   A: Address type. If the A bit is 0, the originating source field +      contains a 32-bit IPv4 address.  If the A bit is 1, the +      originating source contains a 128-bit IPv6 address. + +   R: Reserved. SAP announcers MUST set this to 0, SAP listeners MUST +      ignore the contents of this field. + +   T: Message Type. If the T field is set to 0 this is a session +      announcement packet, if 1 this is a session deletion packet. + +   E: Encryption Bit. If the encryption bit is set to 1, the payload of +      the SAP packet is encrypted.  If this bit is 0 the packet is not +      encrypted.  See section 7 for details of the encryption process. + +   C: Compressed bit. If the compressed bit is set to 1, the payload is +      compressed using the zlib compression algorithm [3].  If the +      payload is to be compressed and encrypted, the compression MUST be +      performed first. + +   Authentication Length. An 8 bit unsigned quantity giving the number +      of 32 bit words following the main SAP header that contain +      authentication data.  If it is zero, no authentication header is +      present. + +   Authentication data containing a digital signature of the packet, +      with length as specified by the authentication length header +      field.  See section 8 for details of the authentication process. + +   Message Identifier Hash. A 16 bit quantity that, used in combination +      with the originating source, provides a globally unique identifier +      indicating the precise version of this announcement.  The choice +      of value for this field is not specified here, except that it MUST +      be unique for each session announced by a particular SAP announcer +      and it MUST be changed if the session description is modified (and +      a session deletion message SHOULD be sent for the old version of +      the session). + +      Earlier versions of SAP used a value of zero to mean that the hash +      should be ignored and the payload should always be parsed.  This +      had the unfortunate side-effect that SAP announcers had to study +      the payload data to determine how many unique sessions were being +      advertised, making the calculation of the announcement interval +      more complex that necessary.  In order to decouple the session +      announcement process from the contents of those announcements, SAP +      announcers SHOULD NOT set the message identifier hash to zero. + +      SAP listeners MAY silently discard messages if the message +      identifier hash is set to zero. + + + +Handley, et al.               Experimental                      [Page 7] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   Originating Source. This gives the IP address of the original source +      of the message.  This is an IPv4 address if the A field is set to +      zero, else it is an IPv6 address.  The address is stored in +      network byte order. + +      SAPv0 permitted the originating source to be zero if the message +      identifier hash was also zero.  This practise is no longer legal, +      and SAP announcers SHOULD NOT set the originating source to zero. +      SAP listeners MAY silently discard packets with the originating +      source set to zero. + +   The header is followed by an optional payload type field and the +   payload data itself.  If the E or C bits are set in the header both +   the payload type and payload are encrypted and/or compressed. + +   The payload type field is a MIME content type specifier, describing +   the format of the payload.  This is a variable length ASCII text +   string, followed by a single zero byte (ASCII NUL).  The payload type +   SHOULD be included in all packets.  If the payload type is +   `application/sdp' both the payload type and its terminating zero byte +   MAY be omitted, although this is intended for backwards compatibility +   with SAP v1 listeners only. + +   The absence of a payload type field may be noted since the payload +   section of such a packet will start with an SDP `v=0' field, which is +   not a legal MIME content type specifier. + +   All implementations MUST support payloads of type `application/sdp' +   [4].  Other formats MAY be supported although since there is no +   negotiation in SAP an announcer which chooses to use a session +   description format other than SDP cannot know that the listeners are +   able to understand the announcement.  A proliferation of payload +   types in announcements has the potential to lead to severe +   interoperability problems, and for this reason, the use of non-SDP +   payloads is NOT RECOMMENDED. + +   If the packet is an announcement packet, the payload contains a +   session description. + +   If the packet is a session deletion packet, the payload contains a +   session deletion message.  If the payload format is `application/sdp' +   the deletion message is a single SDP line consisting of the origin +   field of the announcement to be deleted. + +   It is desirable for the payload to be sufficiently small that SAP +   packets do not get fragmented by the underlying network. +   Fragmentation has a loss multiplier effect, which is known to +   significantly affect the reliability of announcements.  It is + + + +Handley, et al.               Experimental                      [Page 8] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   RECOMMENDED that SAP packets are smaller than 1kByte in length, +   although if it is known that announcements will use a network with a +   smaller MTU than this, then that SHOULD be used as the maximum +   recommended packet size. + +7  Encrypted Announcements + +   An announcement is received by all listeners in the scope to which it +   is sent.  If an announcement is encrypted, and many of the receivers +   do not have the encryption key, there is a considerable waste of +   bandwidth since those receivers cannot use the announcement they have +   received.  For this reason, the use of encrypted SAP announcements is +   NOT RECOMMENDED on the global scope SAP group or on administrative +   scope groups which may have many receivers which cannot decrypt those +   announcements. + +   The opinion of the authors is that encrypted SAP is useful in special +   cases only, and that the vast majority of scenarios where encrypted +   SAP has been proposed may be better served by distributing session +   details using another mechanism.  There are, however, certain +   scenarios where encrypted announcements may be useful.  For this +   reason, the encryption bit is included in the SAP header to allow +   experimentation with encrypted announcements. + +   This memo does not specify details of the encryption algorithm to be +   used or the means by which keys are generated and distributed.  An +   additional specification should define these, if it is desired to use +   encrypted SAP. + +   Note that if an encrypted announcement is being announced via a +   proxy, then there may be no way for the proxy to discover that the +   announcement has been superseded, and so it may continue to relay the +   old announcement in addition to the new announcement.  SAP provides +   no mechanism to chain modified encrypted announcements, so it is +   advisable to announce the unmodified session as deleted for a short +   time after the modification has occurred.  This does not guarantee +   that all proxies have deleted the session, and so receivers of +   encrypted sessions should be prepared to discard old versions of +   session announcements that they may receive.  In most cases however, +   the only stateful proxy will be local to (and known to) the sender, +   and an additional (local-area) protocol involving a handshake for +   such session modifications can be used to avoid this problem. + +   Session announcements that are encrypted with a symmetric algorithm +   may allow a degree of privacy in the announcement of a session, but +   it should be recognized that a user in possession of such a key can +   pass it on to other users who should not be in possession of such a +   key.  Thus announcements to such a group of key holders cannot be + + + +Handley, et al.               Experimental                      [Page 9] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   assumed to have come from an authorized key holder unless there is an +   appropriate authentication header signed by an authorized key holder. +   In addition the recipients of such encrypted announcements cannot be +   assumed to only be authorized key holders.  Such encrypted +   announcements do not provide any real security unless all of the +   authorized key holders are trusted to maintain security of such +   session directory keys.  This property is shared by the multicast +   session tools themselves, where it is possible for an un-trustworthy +   member of the session to pass on encryption keys to un-authorized +   users.  However it is likely that keys used for the session tools +   will be more short lived than those used for session directories. + +   Similar considerations should apply when session announcements are +   encrypted with an asymmetric algorithm, but then it is possible to +   restrict the possessor(s) of the private key, so that announcements +   to a key-holder group can not be made, even if one of the untrusted +   members of the group proves to be un-trustworthy. + +                        1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   | V=1 |P| Auth  |                                               | +   +-+-+-+-+-+-+-+-+                                               | +   |              Format  specific authentication subheader        | +   :                        ..................                     : +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +    Figure 2:  Format of the authentication data in the SAP header + +8  Authenticated Announcements + +   The authentication header can be used for two purposes: + +    o Verification that changes to a session description or deletion of +      a session are permitted. + +    o Authentication of the identity of the session creator. + +   In some circumstances only verification is possible because a +   certificate signed by a mutually trusted person or authority is not +   available.  However, under such circumstances, the session originator +   may still be authenticated to be the same as the session originator +   of previous sessions claiming to be from the same person.  This may +   or may not be sufficient depending on the purpose of the session and +   the people involved. + + + + + + +Handley, et al.               Experimental                     [Page 10] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   Clearly the key used for the authentication should not be trusted to +   belong to the session originator unless it has been separately +   authenticated by some other means, such as being certified by a +   trusted third party.  Such certificates are not normally included in +   an SAP header because they take more space than can normally be +   afforded in an SAP packet, and such verification must therefore take +   place by some other mechanism.  However, as certified public keys are +   normally locally cached, authentication of a particular key only has +   to take place once, rather than every time the session directory +   retransmits the announcement. + +   SAP is not tied to any single authentication mechanism. +   Authentication data in the header is self-describing, but the precise +   format depends on the authentication mechanism in use.  The generic +   format of the authentication data is given in figure 2.  The +   structure of the format specific authentication subheader, using both +   the PGP and the CMS formats, is discussed in sections 8.1 and 8.2 +   respectively.  Additional formats may be added in future. + +   Version Number, V:  The version number of the authentication format +      specified by this memo is 1. + +   Padding Bit, P:  If necessary the authentication data is padded to be +      a multiple of 32 bits and the padding bit is set.  In this case +      the last byte of the authentication data contains the number of +      padding bytes (including the last byte) that must be discarded. + +   Authentication Type, Auth: The authentication type is a  4 bit +      encoded field that denotes the authentication infrastructure the +      sender expects the recipients to use to check the authenticity and +      integrity of the information.  This defines the format of the +      authentication subheader and can take the values:  0 = PGP format, +      1 = CMS format.  All other values are undefined and SHOULD be +      ignored. + +   If a SAP packet is to be compressed or encrypted, this MUST be done +   before the authentication is added. + +   The digital signature in the authentication data MUST be calculated +   over the entire packet, including the header.  The authentication +   length MUST be set to zero and the authentication data excluded when +   calculating the digital signature. + +   It is to be expected that sessions may be announced by a number of +   different mechanisms, not only SAP.  For example, a session +   description may placed on a web page, sent by email or conveyed in a + + + + + +Handley, et al.               Experimental                     [Page 11] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   session initiation protocol.  To ease interoperability with these +   other mechanisms, application level security is employed, rather than +   using IPsec authentication headers. + +8.1 PGP Authentication + +   A full description of the PGP protocol can be found in [2].  When +   using PGP for SAP authentication the basic format specific +   authentication subheader comprises a digital signature packet as +   described in [2].  The signature type MUST be 0x01 which means the +   signature is that of a canonical text document. + +8.2 CMS Authentication + +   A full description of the Cryptographic Message Syntax can be found +   in [6].  The format specific authentication subheader will, in the +   CMS case, have an ASN.1 ContentInfo type with the ContentType being +   signedData. + +   Use is made of the option available in PKCS#7 to leave the content +   itself blank as the content which is signed is already present in the +   packet.  Inclusion of it within the SignedData type would duplicate +   this data and increase the packet length unnecessarily.  In addition +   this allows recipients with either no interest in the authentication, +   or with no mechanism for checking it, to more easily skip the +   authentication information. + +   There SHOULD be only one signerInfo and related fields corresponding +   to the originator of the SAP announcement.  The signingTime SHOULD be +   present as a signedAttribute.  However, due to the strict size +   limitations on the size of SAP packets, certificates and CRLs SHOULD +   NOT be included in the signedData structure.  It is expected that +   users of the protocol will have other methods for certificate and CRL +   distribution. + +9  Scalability and caching + +   SAP is intended to announce the existence of long-lived wide-area +   multicast sessions.  It is not an especially timely protocol: +   sessions are announced by periodic multicast with a repeat rate on +   the order of tens of minutes, and no enhanced reliability over UDP. +   This leads to a long startup delay before a complete set of +   announcements is heard by a listener.  This delay is clearly +   undesirable for interactive browsing of announced sessions. + +   In order to reduce the delays inherent in SAP, it is recommended that +   proxy caches are deployed.  A SAP proxy cache is expected to listen +   to all SAP groups in its scope, and to maintain an up-to-date list of + + + +Handley, et al.               Experimental                     [Page 12] + +RFC 2974             Session Announcement Protocol          October 2000 + + +   all announced sessions along with the time each announcement was last +   received.  When a new SAP listeners starts, it should contact its +   local proxy to download this information, which is then sufficient +   for it to process future announcements directly, as if it has been +   continually listening. + +   The protocol by which a SAP listener contacts its local proxy cache +   is not specified here. + +10 Security Considerations + +   SAP contains mechanisms for ensuring integrity of session +   announcements, for authenticating the origin of an announcement and +   for encrypting such announcements (sections 7 and 8). + +   As stated in section 5, if a session modification announcement is +   received that contains a valid authentication header, but which is +   not signed by the original creator of the session, then the session +   must be treated as a new session in addition to the original session +   with the same SDP origin information unless the originator of one of +   the session descriptions can be authenticated using a certificate +   signed by a trusted third party.  If this were not done, there would +   be a possible denial of service attack whereby a party listens for +   new announcements, strips off the original authentication header, +   modifies the session description, adds a new authentication header +   and re-announces the session.  If a rule was imposed that such spoof +   announcements were ignored, then if packet loss or late starting of a +   session directory instance caused the original announcement to fail +   to arrive at a site, but the spoof announcement did so, this would +   then prevent the original announcement from being accepted at that +   site. + +   A similar denial-of-service attack is possible if a session +   announcement receiver relies completely on the originating source and +   hash fields to indicate change, and fails to parse the remainder of +   announcements for which it has seen the origin/hash combination +   before. + +   A denial of service attack is possible from a malicious site close to +   a legitimate site which is making a session announcement.  This can +   happen if the malicious site floods the legitimate site with huge +   numbers of (illegal) low TTL announcements describing high TTL +   sessions.  This may reduce the session announcement rate of the +   legitimate announcement to below a tenth of the rate expected at +   remote sites and therefore cause the session to time out.  Such an +   attack is likely to be easily detectable, and we do not provide any +   mechanism here to prevent it. + + + + +Handley, et al.               Experimental                     [Page 13] + +RFC 2974             Session Announcement Protocol          October 2000 + + +A. Summary of differences between SAPv0 and SAPv1 + +   For this purpose SAPv0 is defined as the protocol in use by version +   2.2 of the session directory tool, sdr.  SAPv1 is the protocol +   described in the 19 November 1996 version of this memo.  The packet +   headers of SAP messages are the same in V0 and V1 in that a V1 tool +   can parse a V0 announcement header but not vice-versa.  In SAPv0, the +   fields have the following values: + +     o Version Number:  0 + +     o Message Type:  0 (Announcement) + +     o Authentication Type:  0 (No Authentication) + +     o Encryption Bit:  0 (No Encryption) + +     o Compression Bit:  0 (No compression) + +     o Message Id Hash:  0 (No Hash Specified) + +     o Originating Source:  0 (No source specified, announcement has +       not been relayed) + +B. Summary of differences between SAPv1 and SAPv2 + +   The packet headers of SAP messages are the same in V1 and V2 in that +   a V2 tool can parse a V1 announcement header but not necessarily +   vice-versa. + +    o The A bit has been added to the SAP header, replacing one of the +      bits of the SAPv1 message type field.  If set to zero the +      announcement is of an IPv4 session, and the packet is backwards +      compatible with SAPv1.  If set to one the announcement is of an +      IPv6 session, and SAPv1 listeners (which do not support IPv6) will +      see this as an illegal message type (MT) field. + +    o The second bit of the message type field in SAPv1 has been +      replaced by a reserved, must-be-zero, bit.  This bit was unused in +      SAPv1, so this change just codifies existing usage. + +    o SAPv1 specified encryption of the payload.  SAPv2 includes the E +      bit in the SAP header to indicate that the payload is encrypted, +      but does not specify any details of the encryption. + +    o SAPv1 allowed the message identifier hash and originating source +      fields to be set to zero, for backwards compatibility.  This is no +      longer legal. + + + +Handley, et al.               Experimental                     [Page 14] + +RFC 2974             Session Announcement Protocol          October 2000 + + +    o SAPv1 specified gzip compression.  SAPv2 uses zlib (the only known +      implementation of SAP compression used zlib, and gzip compression +      was a mistake). + +    o SAPv2 provides a more complete specification for authentication. + +    o SAPv2 allows for non-SDP payloads to be transported.  SAPv1 +      required that the payload was SDP. + +    o SAPv1 included a timeout field for encrypted announcement, SAPv2 +      does not (and relies of explicit deletion messages or implicit +      timeouts). + +C. Acknowledgements + +   SAP and SDP were originally based on the protocol used by the sd +   session directory from Van Jacobson at LBNL.  Version 1 of SAP was +   designed by Mark Handley as part of the European Commission MICE +   (Esprit 7602) and MERCI (Telematics 1007) projects.  Version 2 +   includes authentication features developed by Edmund Whelan, Goli +   Montasser-Kohsari and Peter Kirstein as part of the European +   Commission ICE-TEL project (Telematics 1005), and support for IPv6 +   developed by Maryann P. Maher and Colin Perkins. + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley, et al.               Experimental                     [Page 15] + +RFC 2974             Session Announcement Protocol          October 2000 + + +D. Authors' Addresses + +   Mark Handley +   AT&T Center for Internet Research at ICSI, +   International Computer Science Institute, +   1947 Center Street, Suite 600, +   Berkeley, CA 94704, USA + +   EMail: mjh@aciri.org + + +   Colin Perkins +   USC Information Sciences Institute +   4350 N. Fairfax Drive, Suite 620 +   Arlington, VA 22203, USA + +   EMail: csp@isi.edu + + +   Edmund Whelan +   Department of Computer Science, +   University College London, +   Gower Street, +   London, WC1E 6BT, UK + +   EMail: e.whelan@cs.ucl.ac.uk + + + + + + + + + + + + + + + + + + + + + + + + + +Handley, et al.               Experimental                     [Page 16] + +RFC 2974             Session Announcement Protocol          October 2000 + + +References + +   [1] Bradner, S., "Key words for use in RFCs to indicate requirement +       levels", BCP 14, RFC 2119, March 1997. + +   [2] Callas, J., Donnerhacke, L., Finney, H. and R. Thayer. "OpenPGP +       message format", RFC 2440, November 1998. + +   [3] Deutsch, P. and J.-L. Gailly, "Zlib compressed data format +       specification version 3.3", RFC 1950, May 1996. + +   [4] Handley, M. and V. Jacobson, "SDP: Session Description Protocol", +       RFC 2327, April 1998. + +   [5] Handley, M., Thaler, D. and R. Kermode, "Multicast-scope zone +       announcement protocol (MZAP)", RFC 2776, February 2000. + +   [6] Housley, R., "Cryptographic message syntax", RFC 2630, June 1999. + +   [7] Mayer, D., "Administratively scoped IP multicast", RFC 2365, July +       1998. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley, et al.               Experimental                     [Page 17] + +RFC 2974             Session Announcement Protocol          October 2000 + + +Full Copyright Statement + +   Copyright (C) The Internet Society (2000).  All Rights Reserved. + +   This document and translations of it may be copied and furnished to +   others, and derivative works that comment on or otherwise explain it +   or assist in its implementation may be prepared, copied, published +   and distributed, in whole or in part, without restriction of any +   kind, provided that the above copyright notice and this paragraph are +   included on all such copies and derivative works.  However, this +   document itself may not be modified in any way, such as by removing +   the copyright notice or references to the Internet Society or other +   Internet organizations, except as needed for the purpose of +   developing Internet standards in which case the procedures for +   copyrights defined in the Internet Standards process must be +   followed, or as required to translate it into languages other than +   English. + +   The limited permissions granted above are perpetual and will not be +   revoked by the Internet Society or its successors or assigns. + +   This document and the information contained herein is provided on an +   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING +   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING +   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION +   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF +   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + +   Funding for the RFC Editor function is currently provided by the +   Internet Society. + + + + + + + + + + + + + + + + + + + +Handley, et al.               Experimental                     [Page 18] + diff --git a/src/modules/rtp/rfc3550.txt b/src/modules/rtp/rfc3550.txt new file mode 100644 index 00000000..165736cf --- /dev/null +++ b/src/modules/rtp/rfc3550.txt @@ -0,0 +1,5827 @@ + + + + + + +Network Working Group                                     H. Schulzrinne +Request for Comments: 3550                           Columbia University +Obsoletes: 1889                                               S.  Casner +Category: Standards Track                                  Packet Design +                                                            R. Frederick +                                                  Blue Coat Systems Inc. +                                                             V. Jacobson +                                                           Packet Design +                                                               July 2003 + + +          RTP: A Transport Protocol for Real-Time Applications + +Status of this Memo + +   This document specifies an Internet standards track protocol for the +   Internet community, and requests discussion and suggestions for +   improvements.  Please refer to the current edition of the "Internet +   Official Protocol Standards" (STD 1) for the standardization state +   and status of this protocol.  Distribution of this memo is unlimited. + +Copyright Notice + +   Copyright (C) The Internet Society (2003).  All Rights Reserved. + +Abstract + +   This memorandum describes RTP, the real-time transport protocol.  RTP +   provides end-to-end network transport functions suitable for +   applications transmitting real-time data, such as audio, video or +   simulation data, over multicast or unicast network services.  RTP +   does not address resource reservation and does not guarantee +   quality-of-service for real-time services.  The data transport is +   augmented by a control protocol (RTCP) to allow monitoring of the +   data delivery in a manner scalable to large multicast networks, and +   to provide minimal control and identification functionality.  RTP and +   RTCP are designed to be independent of the underlying transport and +   network layers.  The protocol supports the use of RTP-level +   translators and mixers. + +   Most of the text in this memorandum is identical to RFC 1889 which it +   obsoletes.  There are no changes in the packet formats on the wire, +   only changes to the rules and algorithms governing how the protocol +   is used.  The biggest change is an enhancement to the scalable timer +   algorithm for calculating when to send RTCP packets in order to +   minimize transmission in excess of the intended rate when many +   participants join a session simultaneously. + + + + +Schulzrinne, et al.         Standards Track                     [Page 1] + +RFC 3550                          RTP                          July 2003 + + +Table of Contents + +   1.  Introduction ................................................   4 +       1.1  Terminology ............................................   5 +   2.  RTP Use Scenarios ...........................................   5 +       2.1  Simple Multicast Audio Conference ......................   6 +       2.2  Audio and Video Conference .............................   7 +       2.3  Mixers and Translators .................................   7 +       2.4  Layered Encodings ......................................   8 +   3.  Definitions .................................................   8 +   4.  Byte Order, Alignment, and Time Format ......................  12 +   5.  RTP Data Transfer Protocol ..................................  13 +       5.1  RTP Fixed Header Fields ................................  13 +       5.2  Multiplexing RTP Sessions ..............................  16 +       5.3  Profile-Specific Modifications to the RTP Header .......  18 +            5.3.1  RTP Header Extension ............................  18 +   6.  RTP Control Protocol -- RTCP ................................  19 +       6.1  RTCP Packet Format .....................................  21 +       6.2  RTCP Transmission Interval .............................  24 +            6.2.1  Maintaining the Number of Session Members .......  28 +       6.3  RTCP Packet Send and Receive Rules .....................  28 +            6.3.1  Computing the RTCP Transmission Interval ........  29 +            6.3.2  Initialization ..................................  30 +            6.3.3  Receiving an RTP or Non-BYE RTCP Packet .........  31 +            6.3.4  Receiving an RTCP BYE Packet ....................  31 +            6.3.5  Timing Out an SSRC ..............................  32 +            6.3.6  Expiration of Transmission Timer ................  32 +            6.3.7  Transmitting a BYE Packet .......................  33 +            6.3.8  Updating we_sent ................................  34 +            6.3.9  Allocation of Source Description Bandwidth ......  34 +       6.4  Sender and Receiver Reports ............................  35 +            6.4.1  SR: Sender Report RTCP Packet ...................  36 +            6.4.2  RR: Receiver Report RTCP Packet .................  42 +            6.4.3  Extending the Sender and Receiver Reports .......  42 +            6.4.4  Analyzing Sender and Receiver Reports ...........  43 +       6.5  SDES: Source Description RTCP Packet ...................  45 +            6.5.1  CNAME: Canonical End-Point Identifier SDES Item .  46 +            6.5.2  NAME: User Name SDES Item .......................  48 +            6.5.3  EMAIL: Electronic Mail Address SDES Item ........  48 +            6.5.4  PHONE: Phone Number SDES Item ...................  49 +            6.5.5  LOC: Geographic User Location SDES Item .........  49 +            6.5.6  TOOL: Application or Tool Name SDES Item ........  49 +            6.5.7  NOTE: Notice/Status SDES Item ...................  50 +            6.5.8  PRIV: Private Extensions SDES Item ..............  50 +       6.6  BYE: Goodbye RTCP Packet ...............................  51 +       6.7  APP: Application-Defined RTCP Packet ...................  52 +   7.  RTP Translators and Mixers ..................................  53 +       7.1  General Description ....................................  53 + + + +Schulzrinne, et al.         Standards Track                     [Page 2] + +RFC 3550                          RTP                          July 2003 + + +       7.2  RTCP Processing in Translators .........................  55 +       7.3  RTCP Processing in Mixers ..............................  57 +       7.4  Cascaded Mixers ........................................  58 +   8.  SSRC Identifier Allocation and Use ..........................  59 +       8.1  Probability of Collision ...............................  59 +       8.2  Collision Resolution and Loop Detection ................  60 +       8.3  Use with Layered Encodings .............................  64 +   9.  Security ....................................................  65 +       9.1  Confidentiality ........................................  65 +       9.2  Authentication and Message Integrity ...................  67 +   10. Congestion Control ..........................................  67 +   11. RTP over Network and Transport Protocols ....................  68 +   12. Summary of Protocol Constants ...............................  69 +       12.1 RTCP Packet Types ......................................  70 +       12.2 SDES Types .............................................  70 +   13. RTP Profiles and Payload Format Specifications ..............  71 +   14. Security Considerations .....................................  73 +   15. IANA Considerations .........................................  73 +   16. Intellectual Property Rights Statement ......................  74 +   17. Acknowledgments .............................................  74 +   Appendix A.   Algorithms ........................................  75 +   Appendix A.1  RTP Data Header Validity Checks ...................  78 +   Appendix A.2  RTCP Header Validity Checks .......................  82 +   Appendix A.3  Determining Number of Packets Expected and Lost ...  83 +   Appendix A.4  Generating RTCP SDES Packets ......................  84 +   Appendix A.5  Parsing RTCP SDES Packets .........................  85 +   Appendix A.6  Generating a Random 32-bit Identifier .............  85 +   Appendix A.7  Computing the RTCP Transmission Interval ..........  87 +   Appendix A.8  Estimating the Interarrival Jitter ................  94 +   Appendix B.   Changes from RFC 1889 .............................  95 +   References ...................................................... 100 +   Normative References ............................................ 100 +   Informative References .......................................... 100 +   Authors' Addresses .............................................. 103 +   Full Copyright Statement ........................................ 104 + + + + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                     [Page 3] + +RFC 3550                          RTP                          July 2003 + + +1. Introduction + +   This memorandum specifies the real-time transport protocol (RTP), +   which provides end-to-end delivery services for data with real-time +   characteristics, such as interactive audio and video.  Those services +   include payload type identification, sequence numbering, timestamping +   and delivery monitoring.  Applications typically run RTP on top of +   UDP to make use of its multiplexing and checksum services; both +   protocols contribute parts of the transport protocol functionality. +   However, RTP may be used with other suitable underlying network or +   transport protocols (see Section 11).  RTP supports data transfer to +   multiple destinations using multicast distribution if provided by the +   underlying network. + +   Note that RTP itself does not provide any mechanism to ensure timely +   delivery or provide other quality-of-service guarantees, but relies +   on lower-layer services to do so.  It does not guarantee delivery or +   prevent out-of-order delivery, nor does it assume that the underlying +   network is reliable and delivers packets in sequence.  The sequence +   numbers included in RTP allow the receiver to reconstruct the +   sender's packet sequence, but sequence numbers might also be used to +   determine the proper location of a packet, for example in video +   decoding, without necessarily decoding packets in sequence. + +   While RTP is primarily designed to satisfy the needs of multi- +   participant multimedia conferences, it is not limited to that +   particular application.  Storage of continuous data, interactive +   distributed simulation, active badge, and control and measurement +   applications may also find RTP applicable. + +   This document defines RTP, consisting of two closely-linked parts: + +   o  the real-time transport protocol (RTP), to carry data that has +      real-time properties. + +   o  the RTP control protocol (RTCP), to monitor the quality of service +      and to convey information about the participants in an on-going +      session.  The latter aspect of RTCP may be sufficient for "loosely +      controlled" sessions, i.e., where there is no explicit membership +      control and set-up, but it is not necessarily intended to support +      all of an application's control communication requirements.  This +      functionality may be fully or partially subsumed by a separate +      session control protocol, which is beyond the scope of this +      document. + +   RTP represents a new style of protocol following the principles of +   application level framing and integrated layer processing proposed by +   Clark and Tennenhouse [10].  That is, RTP is intended to be malleable + + + +Schulzrinne, et al.         Standards Track                     [Page 4] + +RFC 3550                          RTP                          July 2003 + + +   to provide the information required by a particular application and +   will often be integrated into the application processing rather than +   being implemented as a separate layer.  RTP is a protocol framework +   that is deliberately not complete.  This document specifies those +   functions expected to be common across all the applications for which +   RTP would be appropriate.  Unlike conventional protocols in which +   additional functions might be accommodated by making the protocol +   more general or by adding an option mechanism that would require +   parsing, RTP is intended to be tailored through modifications and/or +   additions to the headers as needed.  Examples are given in Sections +   5.3 and 6.4.3. + +   Therefore, in addition to this document, a complete specification of +   RTP for a particular application will require one or more companion +   documents (see Section 13): + +   o  a profile specification document, which defines a set of payload +      type codes and their mapping to payload formats (e.g., media +      encodings).  A profile may also define extensions or modifications +      to RTP that are specific to a particular class of applications. +      Typically an application will operate under only one profile.  A +      profile for audio and video data may be found in the companion RFC +      3551 [1]. + +   o  payload format specification documents, which define how a +      particular payload, such as an audio or video encoding, is to be +      carried in RTP. + +   A discussion of real-time services and algorithms for their +   implementation as well as background discussion on some of the RTP +   design decisions can be found in [11]. + +1.1 Terminology + +   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", +   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this +   document are to be interpreted as described in BCP 14, RFC 2119 [2] +   and indicate requirement levels for compliant RTP implementations. + +2. RTP Use Scenarios + +   The following sections describe some aspects of the use of RTP.  The +   examples were chosen to illustrate the basic operation of +   applications using RTP, not to limit what RTP may be used for.  In +   these examples, RTP is carried on top of IP and UDP, and follows the +   conventions established by the profile for audio and video specified +   in the companion RFC 3551. + + + + +Schulzrinne, et al.         Standards Track                     [Page 5] + +RFC 3550                          RTP                          July 2003 + + +2.1 Simple Multicast Audio Conference + +   A working group of the IETF meets to discuss the latest protocol +   document, using the IP multicast services of the Internet for voice +   communications.  Through some allocation mechanism the working group +   chair obtains a multicast group address and pair of ports.  One port +   is used for audio data, and the other is used for control (RTCP) +   packets.  This address and port information is distributed to the +   intended participants.  If privacy is desired, the data and control +   packets may be encrypted as specified in Section 9.1, in which case +   an encryption key must also be generated and distributed.  The exact +   details of these allocation and distribution mechanisms are beyond +   the scope of RTP. + +   The audio conferencing application used by each conference +   participant sends audio data in small chunks of, say, 20 ms duration. +   Each chunk of audio data is preceded by an RTP header; RTP header and +   data are in turn contained in a UDP packet.  The RTP header indicates +   what type of audio encoding (such as PCM, ADPCM or LPC) is contained +   in each packet so that senders can change the encoding during a +   conference, for example, to accommodate a new participant that is +   connected through a low-bandwidth link or react to indications of +   network congestion. + +   The Internet, like other packet networks, occasionally loses and +   reorders packets and delays them by variable amounts of time.  To +   cope with these impairments, the RTP header contains timing +   information and a sequence number that allow the receivers to +   reconstruct the timing produced by the source, so that in this +   example, chunks of audio are contiguously played out the speaker +   every 20 ms.  This timing reconstruction is performed separately for +   each source of RTP packets in the conference.  The sequence number +   can also be used by the receiver to estimate how many packets are +   being lost. + +   Since members of the working group join and leave during the +   conference, it is useful to know who is participating at any moment +   and how well they are receiving the audio data.  For that purpose, +   each instance of the audio application in the conference periodically +   multicasts a reception report plus the name of its user on the RTCP +   (control) port.  The reception report indicates how well the current +   speaker is being received and may be used to control adaptive +   encodings.  In addition to the user name, other identifying +   information may also be included subject to control bandwidth limits. +   A site sends the RTCP BYE packet (Section 6.6) when it leaves the +   conference. + + + + + +Schulzrinne, et al.         Standards Track                     [Page 6] + +RFC 3550                          RTP                          July 2003 + + +2.2 Audio and Video Conference + +   If both audio and video media are used in a conference, they are +   transmitted as separate RTP sessions.  That is, separate RTP and RTCP +   packets are transmitted for each medium using two different UDP port +   pairs and/or multicast addresses.  There is no direct coupling at the +   RTP level between the audio and video sessions, except that a user +   participating in both sessions should use the same distinguished +   (canonical) name in the RTCP packets for both so that the sessions +   can be associated. + +   One motivation for this separation is to allow some participants in +   the conference to receive only one medium if they choose.  Further +   explanation is given in Section 5.2.  Despite the separation, +   synchronized playback of a source's audio and video can be achieved +   using timing information carried in the RTCP packets for both +   sessions. + +2.3 Mixers and Translators + +   So far, we have assumed that all sites want to receive media data in +   the same format.  However, this may not always be appropriate. +   Consider the case where participants in one area are connected +   through a low-speed link to the majority of the conference +   participants who enjoy high-speed network access.  Instead of forcing +   everyone to use a lower-bandwidth, reduced-quality audio encoding, an +   RTP-level relay called a mixer may be placed near the low-bandwidth +   area.  This mixer resynchronizes incoming audio packets to +   reconstruct the constant 20 ms spacing generated by the sender, mixes +   these reconstructed audio streams into a single stream, translates +   the audio encoding to a lower-bandwidth one and forwards the lower- +   bandwidth packet stream across the low-speed link.  These packets +   might be unicast to a single recipient or multicast on a different +   address to multiple recipients.  The RTP header includes a means for +   mixers to identify the sources that contributed to a mixed packet so +   that correct talker indication can be provided at the receivers. + +   Some of the intended participants in the audio conference may be +   connected with high bandwidth links but might not be directly +   reachable via IP multicast.  For example, they might be behind an +   application-level firewall that will not let any IP packets pass. +   For these sites, mixing may not be necessary, in which case another +   type of RTP-level relay called a translator may be used.  Two +   translators are installed, one on either side of the firewall, with +   the outside one funneling all multicast packets received through a +   secure connection to the translator inside the firewall.  The +   translator inside the firewall sends them again as multicast packets +   to a multicast group restricted to the site's internal network. + + + +Schulzrinne, et al.         Standards Track                     [Page 7] + +RFC 3550                          RTP                          July 2003 + + +   Mixers and translators may be designed for a variety of purposes.  An +   example is a video mixer that scales the images of individual people +   in separate video streams and composites them into one video stream +   to simulate a group scene.  Other examples of translation include the +   connection of a group of hosts speaking only IP/UDP to a group of +   hosts that understand only ST-II, or the packet-by-packet encoding +   translation of video streams from individual sources without +   resynchronization or mixing.  Details of the operation of mixers and +   translators are given in Section 7. + +2.4 Layered Encodings + +   Multimedia applications should be able to adjust the transmission +   rate to match the capacity of the receiver or to adapt to network +   congestion.  Many implementations place the responsibility of rate- +   adaptivity at the source.  This does not work well with multicast +   transmission because of the conflicting bandwidth requirements of +   heterogeneous receivers.  The result is often a least-common +   denominator scenario, where the smallest pipe in the network mesh +   dictates the quality and fidelity of the overall live multimedia +   "broadcast". + +   Instead, responsibility for rate-adaptation can be placed at the +   receivers by combining a layered encoding with a layered transmission +   system.  In the context of RTP over IP multicast, the source can +   stripe the progressive layers of a hierarchically represented signal +   across multiple RTP sessions each carried on its own multicast group. +   Receivers can then adapt to network heterogeneity and control their +   reception bandwidth by joining only the appropriate subset of the +   multicast groups. + +   Details of the use of RTP with layered encodings are given in +   Sections 6.3.9, 8.3 and 11. + +3. Definitions + +   RTP payload: The data transported by RTP in a packet, for +      example audio samples or compressed video data.  The payload +      format and interpretation are beyond the scope of this document. + +   RTP packet: A data packet consisting of the fixed RTP header, a +      possibly empty list of contributing sources (see below), and the +      payload data.  Some underlying protocols may require an +      encapsulation of the RTP packet to be defined.  Typically one +      packet of the underlying protocol contains a single RTP packet, +      but several RTP packets MAY be contained if permitted by the +      encapsulation method (see Section 11). + + + + +Schulzrinne, et al.         Standards Track                     [Page 8] + +RFC 3550                          RTP                          July 2003 + + +   RTCP packet: A control packet consisting of a fixed header part +      similar to that of RTP data packets, followed by structured +      elements that vary depending upon the RTCP packet type.  The +      formats are defined in Section 6.  Typically, multiple RTCP +      packets are sent together as a compound RTCP packet in a single +      packet of the underlying protocol; this is enabled by the length +      field in the fixed header of each RTCP packet. + +   Port: The "abstraction that transport protocols use to +      distinguish among multiple destinations within a given host +      computer.  TCP/IP protocols identify ports using small positive +      integers." [12] The transport selectors (TSEL) used by the OSI +      transport layer are equivalent to ports.  RTP depends upon the +      lower-layer protocol to provide some mechanism such as ports to +      multiplex the RTP and RTCP packets of a session. + +   Transport address: The combination of a network address and port +      that identifies a transport-level endpoint, for example an IP +      address and a UDP port.  Packets are transmitted from a source +      transport address to a destination transport address. + +   RTP media type: An RTP media type is the collection of payload +      types which can be carried within a single RTP session.  The RTP +      Profile assigns RTP media types to RTP payload types. + +   Multimedia session: A set of concurrent RTP sessions among a +      common group of participants.  For example, a videoconference +      (which is a multimedia session) may contain an audio RTP session +      and a video RTP session. + +   RTP session: An association among a set of participants +      communicating with RTP.  A participant may be involved in multiple +      RTP sessions at the same time.  In a multimedia session, each +      medium is typically carried in a separate RTP session with its own +      RTCP packets unless the the encoding itself multiplexes multiple +      media into a single data stream.  A participant distinguishes +      multiple RTP sessions by reception of different sessions using +      different pairs of destination transport addresses, where a pair +      of transport addresses comprises one network address plus a pair +      of ports for RTP and RTCP.  All participants in an RTP session may +      share a common destination transport address pair, as in the case +      of IP multicast, or the pairs may be different for each +      participant, as in the case of individual unicast network +      addresses and port pairs.  In the unicast case, a participant may +      receive from all other participants in the session using the same +      pair of ports, or may use a distinct pair of ports for each. + + + + + +Schulzrinne, et al.         Standards Track                     [Page 9] + +RFC 3550                          RTP                          July 2003 + + +      The distinguishing feature of an RTP session is that each +      maintains a full, separate space of SSRC identifiers (defined +      next).  The set of participants included in one RTP session +      consists of those that can receive an SSRC identifier transmitted +      by any one of the participants either in RTP as the SSRC or a CSRC +      (also defined below) or in RTCP.  For example, consider a three- +      party conference implemented using unicast UDP with each +      participant receiving from the other two on separate port pairs. +      If each participant sends RTCP feedback about data received from +      one other participant only back to that participant, then the +      conference is composed of three separate point-to-point RTP +      sessions.  If each participant provides RTCP feedback about its +      reception of one other participant to both of the other +      participants, then the conference is composed of one multi-party +      RTP session.  The latter case simulates the behavior that would +      occur with IP multicast communication among the three +      participants. + +      The RTP framework allows the variations defined here, but a +      particular control protocol or application design will usually +      impose constraints on these variations. + +   Synchronization source (SSRC): The source of a stream of RTP +      packets, identified by a 32-bit numeric SSRC identifier carried in +      the RTP header so as not to be dependent upon the network address. +      All packets from a synchronization source form part of the same +      timing and sequence number space, so a receiver groups packets by +      synchronization source for playback.  Examples of synchronization +      sources include the sender of a stream of packets derived from a +      signal source such as a microphone or a camera, or an RTP mixer +      (see below).  A synchronization source may change its data format, +      e.g., audio encoding, over time.  The SSRC identifier is a +      randomly chosen value meant to be globally unique within a +      particular RTP session (see Section 8).  A participant need not +      use the same SSRC identifier for all the RTP sessions in a +      multimedia session; the binding of the SSRC identifiers is +      provided through RTCP (see Section 6.5.1).  If a participant +      generates multiple streams in one RTP session, for example from +      separate video cameras, each MUST be identified as a different +      SSRC. + +   Contributing source (CSRC): A source of a stream of RTP packets +      that has contributed to the combined stream produced by an RTP +      mixer (see below).  The mixer inserts a list of the SSRC +      identifiers of the sources that contributed to the generation of a +      particular packet into the RTP header of that packet.  This list +      is called the CSRC list.  An example application is audio +      conferencing where a mixer indicates all the talkers whose speech + + + +Schulzrinne, et al.         Standards Track                    [Page 10] + +RFC 3550                          RTP                          July 2003 + + +      was combined to produce the outgoing packet, allowing the receiver +      to indicate the current talker, even though all the audio packets +      contain the same SSRC identifier (that of the mixer). + +   End system: An application that generates the content to be sent +      in RTP packets and/or consumes the content of received RTP +      packets.  An end system can act as one or more synchronization +      sources in a particular RTP session, but typically only one. + +   Mixer: An intermediate system that receives RTP packets from one +      or more sources, possibly changes the data format, combines the +      packets in some manner and then forwards a new RTP packet.  Since +      the timing among multiple input sources will not generally be +      synchronized, the mixer will make timing adjustments among the +      streams and generate its own timing for the combined stream. +      Thus, all data packets originating from a mixer will be identified +      as having the mixer as their synchronization source. + +   Translator: An intermediate system that forwards RTP packets +      with their synchronization source identifier intact.  Examples of +      translators include devices that convert encodings without mixing, +      replicators from multicast to unicast, and application-level +      filters in firewalls. + +   Monitor: An application that receives RTCP packets sent by +      participants in an RTP session, in particular the reception +      reports, and estimates the current quality of service for +      distribution monitoring, fault diagnosis and long-term statistics. +      The monitor function is likely to be built into the application(s) +      participating in the session, but may also be a separate +      application that does not otherwise participate and does not send +      or receive the RTP data packets (since they are on a separate +      port).  These are called third-party monitors.  It is also +      acceptable for a third-party monitor to receive the RTP data +      packets but not send RTCP packets or otherwise be counted in the +      session. + +   Non-RTP means: Protocols and mechanisms that may be needed in +      addition to RTP to provide a usable service.  In particular, for +      multimedia conferences, a control protocol may distribute +      multicast addresses and keys for encryption, negotiate the +      encryption algorithm to be used, and define dynamic mappings +      between RTP payload type values and the payload formats they +      represent for formats that do not have a predefined payload type +      value.  Examples of such protocols include the Session Initiation +      Protocol (SIP) (RFC 3261 [13]), ITU Recommendation H.323 [14] and +      applications using SDP (RFC 2327 [15]), such as RTSP (RFC 2326 +      [16]).  For simple + + + +Schulzrinne, et al.         Standards Track                    [Page 11] + +RFC 3550                          RTP                          July 2003 + + +      applications, electronic mail or a conference database may also be +      used.  The specification of such protocols and mechanisms is +      outside the scope of this document. + +4. Byte Order, Alignment, and Time Format + +   All integer fields are carried in network byte order, that is, most +   significant byte (octet) first.  This byte order is commonly known as +   big-endian.  The transmission order is described in detail in [3]. +   Unless otherwise noted, numeric constants are in decimal (base 10). + +   All header data is aligned to its natural length, i.e., 16-bit fields +   are aligned on even offsets, 32-bit fields are aligned at offsets +   divisible by four, etc.  Octets designated as padding have the value +   zero. + +   Wallclock time (absolute date and time) is represented using the +   timestamp format of the Network Time Protocol (NTP), which is in +   seconds relative to 0h UTC on 1 January 1900 [4].  The full +   resolution NTP timestamp is a 64-bit unsigned fixed-point number with +   the integer part in the first 32 bits and the fractional part in the +   last 32 bits.  In some fields where a more compact representation is +   appropriate, only the middle 32 bits are used; that is, the low 16 +   bits of the integer part and the high 16 bits of the fractional part. +   The high 16 bits of the integer part must be determined +   independently. + +   An implementation is not required to run the Network Time Protocol in +   order to use RTP.  Other time sources, or none at all, may be used +   (see the description of the NTP timestamp field in Section 6.4.1). +   However, running NTP may be useful for synchronizing streams +   transmitted from separate hosts. + +   The NTP timestamp will wrap around to zero some time in the year +   2036, but for RTP purposes, only differences between pairs of NTP +   timestamps are used.  So long as the pairs of timestamps can be +   assumed to be within 68 years of each other, using modular arithmetic +   for subtractions and comparisons makes the wraparound irrelevant. + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 12] + +RFC 3550                          RTP                          July 2003 + + +5. RTP Data Transfer Protocol + +5.1 RTP Fixed Header Fields + +   The RTP header has the following format: + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |V=2|P|X|  CC   |M|     PT      |       sequence number         | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |                           timestamp                           | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |           synchronization source (SSRC) identifier            | +   +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +   |            contributing source (CSRC) identifiers             | +   |                             ....                              | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The first twelve octets are present in every RTP packet, while the +   list of CSRC identifiers is present only when inserted by a mixer. +   The fields have the following meaning: + +   version (V): 2 bits +      This field identifies the version of RTP.  The version defined by +      this specification is two (2).  (The value 1 is used by the first +      draft version of RTP and the value 0 is used by the protocol +      initially implemented in the "vat" audio tool.) + +   padding (P): 1 bit +      If the padding bit is set, the packet contains one or more +      additional padding octets at the end which are not part of the +      payload.  The last octet of the padding contains a count of how +      many padding octets should be ignored, including itself.  Padding +      may be needed by some encryption algorithms with fixed block sizes +      or for carrying several RTP packets in a lower-layer protocol data +      unit. + +   extension (X): 1 bit +      If the extension bit is set, the fixed header MUST be followed by +      exactly one header extension, with a format defined in Section +      5.3.1. + +   CSRC count (CC): 4 bits +      The CSRC count contains the number of CSRC identifiers that follow +      the fixed header. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 13] + +RFC 3550                          RTP                          July 2003 + + +   marker (M): 1 bit +      The interpretation of the marker is defined by a profile.  It is +      intended to allow significant events such as frame boundaries to +      be marked in the packet stream.  A profile MAY define additional +      marker bits or specify that there is no marker bit by changing the +      number of bits in the payload type field (see Section 5.3). + +   payload type (PT): 7 bits +      This field identifies the format of the RTP payload and determines +      its interpretation by the application.  A profile MAY specify a +      default static mapping of payload type codes to payload formats. +      Additional payload type codes MAY be defined dynamically through +      non-RTP means (see Section 3).  A set of default mappings for +      audio and video is specified in the companion RFC 3551 [1].  An +      RTP source MAY change the payload type during a session, but this +      field SHOULD NOT be used for multiplexing separate media streams +      (see Section 5.2). + +      A receiver MUST ignore packets with payload types that it does not +      understand. + +   sequence number: 16 bits +      The sequence number increments by one for each RTP data packet +      sent, and may be used by the receiver to detect packet loss and to +      restore packet sequence.  The initial value of the sequence number +      SHOULD be random (unpredictable) to make known-plaintext attacks +      on encryption more difficult, even if the source itself does not +      encrypt according to the method in Section 9.1, because the +      packets may flow through a translator that does.  Techniques for +      choosing unpredictable numbers are discussed in [17]. + +   timestamp: 32 bits +      The timestamp reflects the sampling instant of the first octet in +      the RTP data packet.  The sampling instant MUST be derived from a +      clock that increments monotonically and linearly in time to allow +      synchronization and jitter calculations (see Section 6.4.1).  The +      resolution of the clock MUST be sufficient for the desired +      synchronization accuracy and for measuring packet arrival jitter +      (one tick per video frame is typically not sufficient).  The clock +      frequency is dependent on the format of data carried as payload +      and is specified statically in the profile or payload format +      specification that defines the format, or MAY be specified +      dynamically for payload formats defined through non-RTP means.  If +      RTP packets are generated periodically, the nominal sampling +      instant as determined from the sampling clock is to be used, not a +      reading of the system clock.  As an example, for fixed-rate audio +      the timestamp clock would likely increment by one for each +      sampling period.  If an audio application reads blocks covering + + + +Schulzrinne, et al.         Standards Track                    [Page 14] + +RFC 3550                          RTP                          July 2003 + + +      160 sampling periods from the input device, the timestamp would be +      increased by 160 for each such block, regardless of whether the +      block is transmitted in a packet or dropped as silent. + +      The initial value of the timestamp SHOULD be random, as for the +      sequence number.  Several consecutive RTP packets will have equal +      timestamps if they are (logically) generated at once, e.g., belong +      to the same video frame.  Consecutive RTP packets MAY contain +      timestamps that are not monotonic if the data is not transmitted +      in the order it was sampled, as in the case of MPEG interpolated +      video frames.  (The sequence numbers of the packets as transmitted +      will still be monotonic.) + +      RTP timestamps from different media streams may advance at +      different rates and usually have independent, random offsets. +      Therefore, although these timestamps are sufficient to reconstruct +      the timing of a single stream, directly comparing RTP timestamps +      from different media is not effective for synchronization. +      Instead, for each medium the RTP timestamp is related to the +      sampling instant by pairing it with a timestamp from a reference +      clock (wallclock) that represents the time when the data +      corresponding to the RTP timestamp was sampled.  The reference +      clock is shared by all media to be synchronized.  The timestamp +      pairs are not transmitted in every data packet, but at a lower +      rate in RTCP SR packets as described in Section 6.4. + +      The sampling instant is chosen as the point of reference for the +      RTP timestamp because it is known to the transmitting endpoint and +      has a common definition for all media, independent of encoding +      delays or other processing.  The purpose is to allow synchronized +      presentation of all media sampled at the same time. + +      Applications transmitting stored data rather than data sampled in +      real time typically use a virtual presentation timeline derived +      from wallclock time to determine when the next frame or other unit +      of each medium in the stored data should be presented.  In this +      case, the RTP timestamp would reflect the presentation time for +      each unit.  That is, the RTP timestamp for each unit would be +      related to the wallclock time at which the unit becomes current on +      the virtual presentation timeline.  Actual presentation occurs +      some time later as determined by the receiver. + +      An example describing live audio narration of prerecorded video +      illustrates the significance of choosing the sampling instant as +      the reference point.  In this scenario, the video would be +      presented locally for the narrator to view and would be +      simultaneously transmitted using RTP.  The "sampling instant" of a +      video frame transmitted in RTP would be established by referencing + + + +Schulzrinne, et al.         Standards Track                    [Page 15] + +RFC 3550                          RTP                          July 2003 + + +      its timestamp to the wallclock time when that video frame was +      presented to the narrator.  The sampling instant for the audio RTP +      packets containing the narrator's speech would be established by +      referencing the same wallclock time when the audio was sampled. +      The audio and video may even be transmitted by different hosts if +      the reference clocks on the two hosts are synchronized by some +      means such as NTP.  A receiver can then synchronize presentation +      of the audio and video packets by relating their RTP timestamps +      using the timestamp pairs in RTCP SR packets. + +   SSRC: 32 bits +      The SSRC field identifies the synchronization source.  This +      identifier SHOULD be chosen randomly, with the intent that no two +      synchronization sources within the same RTP session will have the +      same SSRC identifier.  An example algorithm for generating a +      random identifier is presented in Appendix A.6.  Although the +      probability of multiple sources choosing the same identifier is +      low, all RTP implementations must be prepared to detect and +      resolve collisions.  Section 8 describes the probability of +      collision along with a mechanism for resolving collisions and +      detecting RTP-level forwarding loops based on the uniqueness of +      the SSRC identifier.  If a source changes its source transport +      address, it must also choose a new SSRC identifier to avoid being +      interpreted as a looped source (see Section 8.2). + +   CSRC list: 0 to 15 items, 32 bits each +      The CSRC list identifies the contributing sources for the payload +      contained in this packet.  The number of identifiers is given by +      the CC field.  If there are more than 15 contributing sources, +      only 15 can be identified.  CSRC identifiers are inserted by +      mixers (see Section 7.1), using the SSRC identifiers of +      contributing sources.  For example, for audio packets the SSRC +      identifiers of all sources that were mixed together to create a +      packet are listed, allowing correct talker indication at the +      receiver. + +5.2 Multiplexing RTP Sessions + +   For efficient protocol processing, the number of multiplexing points +   should be minimized, as described in the integrated layer processing +   design principle [10].  In RTP, multiplexing is provided by the +   destination transport address (network address and port number) which +   is different for each RTP session.  For example, in a teleconference +   composed of audio and video media encoded separately, each medium +   SHOULD be carried in a separate RTP session with its own destination +   transport address. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 16] + +RFC 3550                          RTP                          July 2003 + + +   Separate audio and video streams SHOULD NOT be carried in a single +   RTP session and demultiplexed based on the payload type or SSRC +   fields.  Interleaving packets with different RTP media types but +   using the same SSRC would introduce several problems: + +   1. If, say, two audio streams shared the same RTP session and the +      same SSRC value, and one were to change encodings and thus acquire +      a different RTP payload type, there would be no general way of +      identifying which stream had changed encodings. + +   2. An SSRC is defined to identify a single timing and sequence number +      space.  Interleaving multiple payload types would require +      different timing spaces if the media clock rates differ and would +      require different sequence number spaces to tell which payload +      type suffered packet loss. + +   3. The RTCP sender and receiver reports (see Section 6.4) can only +      describe one timing and sequence number space per SSRC and do not +      carry a payload type field. + +   4. An RTP mixer would not be able to combine interleaved streams of +      incompatible media into one stream. + +   5. Carrying multiple media in one RTP session precludes: the use of +      different network paths or network resource allocations if +      appropriate; reception of a subset of the media if desired, for +      example just audio if video would exceed the available bandwidth; +      and receiver implementations that use separate processes for the +      different media, whereas using separate RTP sessions permits +      either single- or multiple-process implementations. + +   Using a different SSRC for each medium but sending them in the same +   RTP session would avoid the first three problems but not the last +   two. + +   On the other hand, multiplexing multiple related sources of the same +   medium in one RTP session using different SSRC values is the norm for +   multicast sessions.  The problems listed above don't apply: an RTP +   mixer can combine multiple audio sources, for example, and the same +   treatment is applicable for all of them.  It may also be appropriate +   to multiplex streams of the same medium using different SSRC values +   in other scenarios where the last two problems do not apply. + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 17] + +RFC 3550                          RTP                          July 2003 + + +5.3 Profile-Specific Modifications to the RTP Header + +   The existing RTP data packet header is believed to be complete for +   the set of functions required in common across all the application +   classes that RTP might support.  However, in keeping with the ALF +   design principle, the header MAY be tailored through modifications or +   additions defined in a profile specification while still allowing +   profile-independent monitoring and recording tools to function. + +   o  The marker bit and payload type field carry profile-specific +      information, but they are allocated in the fixed header since many +      applications are expected to need them and might otherwise have to +      add another 32-bit word just to hold them.  The octet containing +      these fields MAY be redefined by a profile to suit different +      requirements, for example with more or fewer marker bits.  If +      there are any marker bits, one SHOULD be located in the most +      significant bit of the octet since profile-independent monitors +      may be able to observe a correlation between packet loss patterns +      and the marker bit. + +   o  Additional information that is required for a particular payload +      format, such as a video encoding, SHOULD be carried in the payload +      section of the packet.  This might be in a header that is always +      present at the start of the payload section, or might be indicated +      by a reserved value in the data pattern. + +   o  If a particular class of applications needs additional +      functionality independent of payload format, the profile under +      which those applications operate SHOULD define additional fixed +      fields to follow immediately after the SSRC field of the existing +      fixed header.  Those applications will be able to quickly and +      directly access the additional fields while profile-independent +      monitors or recorders can still process the RTP packets by +      interpreting only the first twelve octets. + +   If it turns out that additional functionality is needed in common +   across all profiles, then a new version of RTP should be defined to +   make a permanent change to the fixed header. + +5.3.1 RTP Header Extension + +   An extension mechanism is provided to allow individual +   implementations to experiment with new payload-format-independent +   functions that require additional information to be carried in the +   RTP data packet header.  This mechanism is designed so that the +   header extension may be ignored by other interoperating +   implementations that have not been extended. + + + + +Schulzrinne, et al.         Standards Track                    [Page 18] + +RFC 3550                          RTP                          July 2003 + + +   Note that this header extension is intended only for limited use. +   Most potential uses of this mechanism would be better done another +   way, using the methods described in the previous section.  For +   example, a profile-specific extension to the fixed header is less +   expensive to process because it is not conditional nor in a variable +   location.  Additional information required for a particular payload +   format SHOULD NOT use this header extension, but SHOULD be carried in +   the payload section of the packet. + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |      defined by profile       |           length              | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |                        header extension                       | +   |                             ....                              | + +   If the X bit in the RTP header is one, a variable-length header +   extension MUST be appended to the RTP header, following the CSRC list +   if present.  The header extension contains a 16-bit length field that +   counts the number of 32-bit words in the extension, excluding the +   four-octet extension header (therefore zero is a valid length).  Only +   a single extension can be appended to the RTP data header.  To allow +   multiple interoperating implementations to each experiment +   independently with different header extensions, or to allow a +   particular implementation to experiment with more than one type of +   header extension, the first 16 bits of the header extension are left +   open for distinguishing identifiers or parameters.  The format of +   these 16 bits is to be defined by the profile specification under +   which the implementations are operating.  This RTP specification does +   not define any header extensions itself. + +6. RTP Control Protocol -- RTCP + +   The RTP control protocol (RTCP) is based on the periodic transmission +   of control packets to all participants in the session, using the same +   distribution mechanism as the data packets.  The underlying protocol +   MUST provide multiplexing of the data and control packets, for +   example using separate port numbers with UDP.  RTCP performs four +   functions: + +   1. The primary function is to provide feedback on the quality of the +      data distribution.  This is an integral part of the RTP's role as +      a transport protocol and is related to the flow and congestion +      control functions of other transport protocols (see Section 10 on +      the requirement for congestion control).  The feedback may be +      directly useful for control of adaptive encodings [18,19], but +      experiments with IP multicasting have shown that it is also + + + +Schulzrinne, et al.         Standards Track                    [Page 19] + +RFC 3550                          RTP                          July 2003 + + +      critical to get feedback from the receivers to diagnose faults in +      the distribution.  Sending reception feedback reports to all +      participants allows one who is observing problems to evaluate +      whether those problems are local or global.  With a distribution +      mechanism like IP multicast, it is also possible for an entity +      such as a network service provider who is not otherwise involved +      in the session to receive the feedback information and act as a +      third-party monitor to diagnose network problems.  This feedback +      function is performed by the RTCP sender and receiver reports, +      described below in Section 6.4. + +   2. RTCP carries a persistent transport-level identifier for an RTP +      source called the canonical name or CNAME, Section 6.5.1.  Since +      the SSRC identifier may change if a conflict is discovered or a +      program is restarted, receivers require the CNAME to keep track of +      each participant.  Receivers may also require the CNAME to +      associate multiple data streams from a given participant in a set +      of related RTP sessions, for example to synchronize audio and +      video.  Inter-media synchronization also requires the NTP and RTP +      timestamps included in RTCP packets by data senders. + +   3. The first two functions require that all participants send RTCP +      packets, therefore the rate must be controlled in order for RTP to +      scale up to a large number of participants.  By having each +      participant send its control packets to all the others, each can +      independently observe the number of participants.  This number is +      used to calculate the rate at which the packets are sent, as +      explained in Section 6.2. + +   4. A fourth, OPTIONAL function is to convey minimal session control +      information, for example participant identification to be +      displayed in the user interface.  This is most likely to be useful +      in "loosely controlled" sessions where participants enter and +      leave without membership control or parameter negotiation.  RTCP +      serves as a convenient channel to reach all the participants, but +      it is not necessarily expected to support all the control +      communication requirements of an application.  A higher-level +      session control protocol, which is beyond the scope of this +      document, may be needed. + +   Functions 1-3 SHOULD be used in all environments, but particularly in +   the IP multicast environment.  RTP application designers SHOULD avoid +   mechanisms that can only work in unicast mode and will not scale to +   larger numbers.  Transmission of RTCP MAY be controlled separately +   for senders and receivers, as described in Section 6.2, for cases +   such as unidirectional links where feedback from receivers is not +   possible. + + + + +Schulzrinne, et al.         Standards Track                    [Page 20] + +RFC 3550                          RTP                          July 2003 + + +   Non-normative note:  In the multicast routing approach +      called Source-Specific Multicast (SSM), there is only one sender +      per "channel" (a source address, group address pair), and +      receivers (except for the channel source) cannot use multicast to +      communicate directly with other channel members.  The +      recommendations here accommodate SSM only through Section 6.2's +      option of turning off receivers' RTCP entirely.  Future work will +      specify adaptation of RTCP for SSM so that feedback from receivers +      can be maintained. + +6.1 RTCP Packet Format + +   This specification defines several RTCP packet types to carry a +   variety of control information: + +   SR:   Sender report, for transmission and reception statistics from +         participants that are active senders + +   RR:   Receiver report, for reception statistics from participants +         that are not active senders and in combination with SR for +         active senders reporting on more than 31 sources + +   SDES: Source description items, including CNAME + +   BYE:  Indicates end of participation + +   APP:  Application-specific functions + +   Each RTCP packet begins with a fixed part similar to that of RTP data +   packets, followed by structured elements that MAY be of variable +   length according to the packet type but MUST end on a 32-bit +   boundary.  The alignment requirement and a length field in the fixed +   part of each packet are included to make RTCP packets "stackable". +   Multiple RTCP packets can be concatenated without any intervening +   separators to form a compound RTCP packet that is sent in a single +   packet of the lower layer protocol, for example UDP.  There is no +   explicit count of individual RTCP packets in the compound packet +   since the lower layer protocols are expected to provide an overall +   length to determine the end of the compound packet. + +   Each individual RTCP packet in the compound packet may be processed +   independently with no requirements upon the order or combination of +   packets.  However, in order to perform the functions of the protocol, +   the following constraints are imposed: + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 21] + +RFC 3550                          RTP                          July 2003 + + +   o  Reception statistics (in SR or RR) should be sent as often as +      bandwidth constraints will allow to maximize the resolution of the +      statistics, therefore each periodically transmitted compound RTCP +      packet MUST include a report packet. + +   o  New receivers need to receive the CNAME for a source as soon as +      possible to identify the source and to begin associating media for +      purposes such as lip-sync, so each compound RTCP packet MUST also +      include the SDES CNAME except when the compound RTCP packet is +      split for partial encryption as described in Section 9.1. + +   o  The number of packet types that may appear first in the compound +      packet needs to be limited to increase the number of constant bits +      in the first word and the probability of successfully validating +      RTCP packets against misaddressed RTP data packets or other +      unrelated packets. + +   Thus, all RTCP packets MUST be sent in a compound packet of at least +   two individual packets, with the following format: + +   Encryption prefix:  If and only if the compound packet is to be +      encrypted according to the method in Section 9.1, it MUST be +      prefixed by a random 32-bit quantity redrawn for every compound +      packet transmitted.  If padding is required for the encryption, it +      MUST be added to the last packet of the compound packet. + +   SR or RR:  The first RTCP packet in the compound packet MUST +      always be a report packet to facilitate header validation as +      described in Appendix A.2.  This is true even if no data has been +      sent or received, in which case an empty RR MUST be sent, and even +      if the only other RTCP packet in the compound packet is a BYE. + +   Additional RRs:  If the number of sources for which reception +      statistics are being reported exceeds 31, the number that will fit +      into one SR or RR packet, then additional RR packets SHOULD follow +      the initial report packet. + +   SDES:  An SDES packet containing a CNAME item MUST be included +      in each compound RTCP packet, except as noted in Section 9.1. +      Other source description items MAY optionally be included if +      required by a particular application, subject to bandwidth +      constraints (see Section 6.3.9). + +   BYE or APP:  Other RTCP packet types, including those yet to be +      defined, MAY follow in any order, except that BYE SHOULD be the +      last packet sent with a given SSRC/CSRC.  Packet types MAY appear +      more than once. + + + + +Schulzrinne, et al.         Standards Track                    [Page 22] + +RFC 3550                          RTP                          July 2003 + + +   An individual RTP participant SHOULD send only one compound RTCP +   packet per report interval in order for the RTCP bandwidth per +   participant to be estimated correctly (see Section 6.2), except when +   the compound RTCP packet is split for partial encryption as described +   in Section 9.1.  If there are too many sources to fit all the +   necessary RR packets into one compound RTCP packet without exceeding +   the maximum transmission unit (MTU) of the network path, then only +   the subset that will fit into one MTU SHOULD be included in each +   interval.  The subsets SHOULD be selected round-robin across multiple +   intervals so that all sources are reported. + +   It is RECOMMENDED that translators and mixers combine individual RTCP +   packets from the multiple sources they are forwarding into one +   compound packet whenever feasible in order to amortize the packet +   overhead (see Section 7).  An example RTCP compound packet as might +   be produced by a mixer is shown in Fig. 1.  If the overall length of +   a compound packet would exceed the MTU of the network path, it SHOULD +   be segmented into multiple shorter compound packets to be transmitted +   in separate packets of the underlying protocol.  This does not impair +   the RTCP bandwidth estimation because each compound packet represents +   at least one distinct participant.  Note that each of the compound +   packets MUST begin with an SR or RR packet. + +   An implementation SHOULD ignore incoming RTCP packets with types +   unknown to it.  Additional RTCP packet types may be registered with +   the Internet Assigned Numbers Authority (IANA) as described in +   Section 15. + +   if encrypted: random 32-bit integer +   | +   |[--------- packet --------][---------- packet ----------][-packet-] +   | +   |                receiver            chunk        chunk +   V                reports           item  item   item  item +   -------------------------------------------------------------------- +   R[SR #sendinfo #site1#site2][SDES #CNAME PHONE #CNAME LOC][BYE##why] +   -------------------------------------------------------------------- +   |                                                                  | +   |<-----------------------  compound packet ----------------------->| +   |<--------------------------  UDP packet ------------------------->| + +   #: SSRC/CSRC identifier + +              Figure 1: Example of an RTCP compound packet + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 23] + +RFC 3550                          RTP                          July 2003 + + +6.2 RTCP Transmission Interval + +   RTP is designed to allow an application to scale automatically over +   session sizes ranging from a few participants to thousands.  For +   example, in an audio conference the data traffic is inherently self- +   limiting because only one or two people will speak at a time, so with +   multicast distribution the data rate on any given link remains +   relatively constant independent of the number of participants. +   However, the control traffic is not self-limiting.  If the reception +   reports from each participant were sent at a constant rate, the +   control traffic would grow linearly with the number of participants. +   Therefore, the rate must be scaled down by dynamically calculating +   the interval between RTCP packet transmissions. + +   For each session, it is assumed that the data traffic is subject to +   an aggregate limit called the "session bandwidth" to be divided among +   the participants.  This bandwidth might be reserved and the limit +   enforced by the network.  If there is no reservation, there may be +   other constraints, depending on the environment, that establish the +   "reasonable" maximum for the session to use, and that would be the +   session bandwidth.  The session bandwidth may be chosen based on some +   cost or a priori knowledge of the available network bandwidth for the +   session.  It is somewhat independent of the media encoding, but the +   encoding choice may be limited by the session bandwidth.  Often, the +   session bandwidth is the sum of the nominal bandwidths of the senders +   expected to be concurrently active.  For teleconference audio, this +   number would typically be one sender's bandwidth.  For layered +   encodings, each layer is a separate RTP session with its own session +   bandwidth parameter. + +   The session bandwidth parameter is expected to be supplied by a +   session management application when it invokes a media application, +   but media applications MAY set a default based on the single-sender +   data bandwidth for the encoding selected for the session.  The +   application MAY also enforce bandwidth limits based on multicast +   scope rules or other criteria.  All participants MUST use the same +   value for the session bandwidth so that the same RTCP interval will +   be calculated. + +   Bandwidth calculations for control and data traffic include lower- +   layer transport and network protocols (e.g., UDP and IP) since that +   is what the resource reservation system would need to know.  The +   application can also be expected to know which of these protocols are +   in use.  Link level headers are not included in the calculation since +   the packet will be encapsulated with different link level headers as +   it travels. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 24] + +RFC 3550                          RTP                          July 2003 + + +   The control traffic should be limited to a small and known fraction +   of the session bandwidth: small so that the primary function of the +   transport protocol to carry data is not impaired; known so that the +   control traffic can be included in the bandwidth specification given +   to a resource reservation protocol, and so that each participant can +   independently calculate its share.  The control traffic bandwidth is +   in addition to the session bandwidth for the data traffic.  It is +   RECOMMENDED that the fraction of the session bandwidth added for RTCP +   be fixed at 5%.  It is also RECOMMENDED that 1/4 of the RTCP +   bandwidth be dedicated to participants that are sending data so that +   in sessions with a large number of receivers but a small number of +   senders, newly joining participants will more quickly receive the +   CNAME for the sending sites.  When the proportion of senders is +   greater than 1/4 of the participants, the senders get their +   proportion of the full RTCP bandwidth.  While the values of these and +   other constants in the interval calculation are not critical, all +   participants in the session MUST use the same values so the same +   interval will be calculated.  Therefore, these constants SHOULD be +   fixed for a particular profile. + +   A profile MAY specify that the control traffic bandwidth may be a +   separate parameter of the session rather than a strict percentage of +   the session bandwidth.  Using a separate parameter allows rate- +   adaptive applications to set an RTCP bandwidth consistent with a +   "typical" data bandwidth that is lower than the maximum bandwidth +   specified by the session bandwidth parameter. + +   The profile MAY further specify that the control traffic bandwidth +   may be divided into two separate session parameters for those +   participants which are active data senders and those which are not; +   let us call the parameters S and R.  Following the recommendation +   that 1/4 of the RTCP bandwidth be dedicated to data senders, the +   RECOMMENDED default values for these two parameters would be 1.25% +   and 3.75%, respectively.  When the proportion of senders is greater +   than S/(S+R) of the participants, the senders get their proportion of +   the sum of these parameters.  Using two parameters allows RTCP +   reception reports to be turned off entirely for a particular session +   by setting the RTCP bandwidth for non-data-senders to zero while +   keeping the RTCP bandwidth for data senders non-zero so that sender +   reports can still be sent for inter-media synchronization.  Turning +   off RTCP reception reports is NOT RECOMMENDED because they are needed +   for the functions listed at the beginning of Section 6, particularly +   reception quality feedback and congestion control.  However, doing so +   may be appropriate for systems operating on unidirectional links or +   for sessions that don't require feedback on the quality of reception +   or liveness of receivers and that have other means to avoid +   congestion. + + + + +Schulzrinne, et al.         Standards Track                    [Page 25] + +RFC 3550                          RTP                          July 2003 + + +   The calculated interval between transmissions of compound RTCP +   packets SHOULD also have a lower bound to avoid having bursts of +   packets exceed the allowed bandwidth when the number of participants +   is small and the traffic isn't smoothed according to the law of large +   numbers.  It also keeps the report interval from becoming too small +   during transient outages like a network partition such that +   adaptation is delayed when the partition heals.  At application +   startup, a delay SHOULD be imposed before the first compound RTCP +   packet is sent to allow time for RTCP packets to be received from +   other participants so the report interval will converge to the +   correct value more quickly.  This delay MAY be set to half the +   minimum interval to allow quicker notification that the new +   participant is present.  The RECOMMENDED value for a fixed minimum +   interval is 5 seconds. + +   An implementation MAY scale the minimum RTCP interval to a smaller +   value inversely proportional to the session bandwidth parameter with +   the following limitations: + +   o  For multicast sessions, only active data senders MAY use the +      reduced minimum value to calculate the interval for transmission +      of compound RTCP packets. + +   o  For unicast sessions, the reduced value MAY be used by +      participants that are not active data senders as well, and the +      delay before sending the initial compound RTCP packet MAY be zero. + +   o  For all sessions, the fixed minimum SHOULD be used when +      calculating the participant timeout interval (see Section 6.3.5) +      so that implementations which do not use the reduced value for +      transmitting RTCP packets are not timed out by other participants +      prematurely. + +   o  The RECOMMENDED value for the reduced minimum in seconds is 360 +      divided by the session bandwidth in kilobits/second.  This minimum +      is smaller than 5 seconds for bandwidths greater than 72 kb/s. + +   The algorithm described in Section 6.3 and Appendix A.7 was designed +   to meet the goals outlined in this section.  It calculates the +   interval between sending compound RTCP packets to divide the allowed +   control traffic bandwidth among the participants.  This allows an +   application to provide fast response for small sessions where, for +   example, identification of all participants is important, yet +   automatically adapt to large sessions.  The algorithm incorporates +   the following characteristics: + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 26] + +RFC 3550                          RTP                          July 2003 + + +   o  The calculated interval between RTCP packets scales linearly with +      the number of members in the group.  It is this linear factor +      which allows for a constant amount of control traffic when summed +      across all members. + +   o  The interval between RTCP packets is varied randomly over the +      range [0.5,1.5] times the calculated interval to avoid unintended +      synchronization of all participants [20].  The first RTCP packet +      sent after joining a session is also delayed by a random variation +      of half the minimum RTCP interval. + +   o  A dynamic estimate of the average compound RTCP packet size is +      calculated, including all those packets received and sent, to +      automatically adapt to changes in the amount of control +      information carried. + +   o  Since the calculated interval is dependent on the number of +      observed group members, there may be undesirable startup effects +      when a new user joins an existing session, or many users +      simultaneously join a new session.  These new users will initially +      have incorrect estimates of the group membership, and thus their +      RTCP transmission interval will be too short.  This problem can be +      significant if many users join the session simultaneously.  To +      deal with this, an algorithm called "timer reconsideration" is +      employed.  This algorithm implements a simple back-off mechanism +      which causes users to hold back RTCP packet transmission if the +      group sizes are increasing. + +   o  When users leave a session, either with a BYE or by timeout, the +      group membership decreases, and thus the calculated interval +      should decrease.  A "reverse reconsideration" algorithm is used to +      allow members to more quickly reduce their intervals in response +      to group membership decreases. + +   o  BYE packets are given different treatment than other RTCP packets. +      When a user leaves a group, and wishes to send a BYE packet, it +      may do so before its next scheduled RTCP packet.  However, +      transmission of BYEs follows a back-off algorithm which avoids +      floods of BYE packets should a large number of members +      simultaneously leave the session. + +   This algorithm may be used for sessions in which all participants are +   allowed to send.  In that case, the session bandwidth parameter is +   the product of the individual sender's bandwidth times the number of +   participants, and the RTCP bandwidth is 5% of that. + +   Details of the algorithm's operation are given in the sections that +   follow.  Appendix A.7 gives an example implementation. + + + +Schulzrinne, et al.         Standards Track                    [Page 27] + +RFC 3550                          RTP                          July 2003 + + +6.2.1 Maintaining the Number of Session Members + +   Calculation of the RTCP packet interval depends upon an estimate of +   the number of sites participating in the session.  New sites are +   added to the count when they are heard, and an entry for each SHOULD +   be created in a table indexed by the SSRC or CSRC identifier (see +   Section 8.2) to keep track of them.  New entries MAY be considered +   not valid until multiple packets carrying the new SSRC have been +   received (see Appendix A.1), or until an SDES RTCP packet containing +   a CNAME for that SSRC has been received.  Entries MAY be deleted from +   the table when an RTCP BYE packet with the corresponding SSRC +   identifier is received, except that some straggler data packets might +   arrive after the BYE and cause the entry to be recreated.  Instead, +   the entry SHOULD be marked as having received a BYE and then deleted +   after an appropriate delay. + +   A participant MAY mark another site inactive, or delete it if not yet +   valid, if no RTP or RTCP packet has been received for a small number +   of RTCP report intervals (5 is RECOMMENDED).  This provides some +   robustness against packet loss.  All sites must have the same value +   for this multiplier and must calculate roughly the same value for the +   RTCP report interval in order for this timeout to work properly. +   Therefore, this multiplier SHOULD be fixed for a particular profile. + +   For sessions with a very large number of participants, it may be +   impractical to maintain a table to store the SSRC identifier and +   state information for all of them.  An implementation MAY use SSRC +   sampling, as described in [21], to reduce the storage requirements. +   An implementation MAY use any other algorithm with similar +   performance.  A key requirement is that any algorithm considered +   SHOULD NOT substantially underestimate the group size, although it +   MAY overestimate. + +6.3 RTCP Packet Send and Receive Rules + +   The rules for how to send, and what to do when receiving an RTCP +   packet are outlined here.  An implementation that allows operation in +   a multicast environment or a multipoint unicast environment MUST meet +   the requirements in Section 6.2.  Such an implementation MAY use the +   algorithm defined in this section to meet those requirements, or MAY +   use some other algorithm so long as it provides equivalent or better +   performance.  An implementation which is constrained to two-party +   unicast operation SHOULD still use randomization of the RTCP +   transmission interval to avoid unintended synchronization of multiple +   instances operating in the same environment, but MAY omit the "timer +   reconsideration" and "reverse reconsideration" algorithms in Sections +   6.3.3, 6.3.6 and 6.3.7. + + + + +Schulzrinne, et al.         Standards Track                    [Page 28] + +RFC 3550                          RTP                          July 2003 + + +   To execute these rules, a session participant must maintain several +   pieces of state: + +   tp: the last time an RTCP packet was transmitted; + +   tc: the current time; + +   tn: the next scheduled transmission time of an RTCP packet; + +   pmembers: the estimated number of session members at the time tn +      was last recomputed; + +   members: the most current estimate for the number of session +      members; + +   senders: the most current estimate for the number of senders in +      the session; + +   rtcp_bw: The target RTCP bandwidth, i.e., the total bandwidth +      that will be used for RTCP packets by all members of this session, +      in octets per second.  This will be a specified fraction of the +      "session bandwidth" parameter supplied to the application at +      startup. + +   we_sent: Flag that is true if the application has sent data +      since the 2nd previous RTCP report was transmitted. + +   avg_rtcp_size: The average compound RTCP packet size, in octets, +      over all RTCP packets sent and received by this participant.  The +      size includes lower-layer transport and network protocol headers +      (e.g., UDP and IP) as explained in Section 6.2. + +   initial: Flag that is true if the application has not yet sent +      an RTCP packet. + +   Many of these rules make use of the "calculated interval" between +   packet transmissions.  This interval is described in the following +   section. + +6.3.1 Computing the RTCP Transmission Interval + +   To maintain scalability, the average interval between packets from a +   session participant should scale with the group size.  This interval +   is called the calculated interval.  It is obtained by combining a +   number of the pieces of state described above.  The calculated +   interval T is then determined as follows: + + + + + +Schulzrinne, et al.         Standards Track                    [Page 29] + +RFC 3550                          RTP                          July 2003 + + +   1. If the number of senders is less than or equal to 25% of the +      membership (members), the interval depends on whether the +      participant is a sender or not (based on the value of we_sent). +      If the participant is a sender (we_sent true), the constant C is +      set to the average RTCP packet size (avg_rtcp_size) divided by 25% +      of the RTCP bandwidth (rtcp_bw), and the constant n is set to the +      number of senders.  If we_sent is not true, the constant C is set +      to the average RTCP packet size divided by 75% of the RTCP +      bandwidth.  The constant n is set to the number of receivers +      (members - senders).  If the number of senders is greater than +      25%, senders and receivers are treated together.  The constant C +      is set to the average RTCP packet size divided by the total RTCP +      bandwidth and n is set to the total number of members.  As stated +      in Section 6.2, an RTP profile MAY specify that the RTCP bandwidth +      may be explicitly defined by two separate parameters (call them S +      and R) for those participants which are senders and those which +      are not.  In that case, the 25% fraction becomes S/(S+R) and the +      75% fraction becomes R/(S+R).  Note that if R is zero, the +      percentage of senders is never greater than S/(S+R), and the +      implementation must avoid division by zero. + +   2. If the participant has not yet sent an RTCP packet (the variable +      initial is true), the constant Tmin is set to 2.5 seconds, else it +      is set to 5 seconds. + +   3. The deterministic calculated interval Td is set to max(Tmin, n*C). + +   4. The calculated interval T is set to a number uniformly distributed +      between 0.5 and 1.5 times the deterministic calculated interval. + +   5. The resulting value of T is divided by e-3/2=1.21828 to compensate +      for the fact that the timer reconsideration algorithm converges to +      a value of the RTCP bandwidth below the intended average. + +   This procedure results in an interval which is random, but which, on +   average, gives at least 25% of the RTCP bandwidth to senders and the +   rest to receivers.  If the senders constitute more than one quarter +   of the membership, this procedure splits the bandwidth equally among +   all participants, on average. + +6.3.2 Initialization + +   Upon joining the session, the participant initializes tp to 0, tc to +   0, senders to 0, pmembers to 1, members to 1, we_sent to false, +   rtcp_bw to the specified fraction of the session bandwidth, initial +   to true, and avg_rtcp_size to the probable size of the first RTCP +   packet that the application will later construct.  The calculated +   interval T is then computed, and the first packet is scheduled for + + + +Schulzrinne, et al.         Standards Track                    [Page 30] + +RFC 3550                          RTP                          July 2003 + + +   time tn = T.  This means that a transmission timer is set which +   expires at time T.  Note that an application MAY use any desired +   approach for implementing this timer. + +   The participant adds its own SSRC to the member table. + +6.3.3 Receiving an RTP or Non-BYE RTCP Packet + +   When an RTP or RTCP packet is received from a participant whose SSRC +   is not in the member table, the SSRC is added to the table, and the +   value for members is updated once the participant has been validated +   as described in Section 6.2.1.  The same processing occurs for each +   CSRC in a validated RTP packet. + +   When an RTP packet is received from a participant whose SSRC is not +   in the sender table, the SSRC is added to the table, and the value +   for senders is updated. + +   For each compound RTCP packet received, the value of avg_rtcp_size is +   updated: + +      avg_rtcp_size = (1/16) * packet_size + (15/16) * avg_rtcp_size + +   where packet_size is the size of the RTCP packet just received. + +6.3.4 Receiving an RTCP BYE Packet + +   Except as described in Section 6.3.7 for the case when an RTCP BYE is +   to be transmitted, if the received packet is an RTCP BYE packet, the +   SSRC is checked against the member table.  If present, the entry is +   removed from the table, and the value for members is updated.  The +   SSRC is then checked against the sender table.  If present, the entry +   is removed from the table, and the value for senders is updated. + +   Furthermore, to make the transmission rate of RTCP packets more +   adaptive to changes in group membership, the following "reverse +   reconsideration" algorithm SHOULD be executed when a BYE packet is +   received that reduces members to a value less than pmembers: + +   o  The value for tn is updated according to the following formula: + +         tn = tc + (members/pmembers) * (tn - tc) + +   o  The value for tp is updated according the following formula: + +         tp = tc - (members/pmembers) * (tc - tp). + + + + + +Schulzrinne, et al.         Standards Track                    [Page 31] + +RFC 3550                          RTP                          July 2003 + + +   o  The next RTCP packet is rescheduled for transmission at time tn, +      which is now earlier. + +   o  The value of pmembers is set equal to members. + +   This algorithm does not prevent the group size estimate from +   incorrectly dropping to zero for a short time due to premature +   timeouts when most participants of a large session leave at once but +   some remain.  The algorithm does make the estimate return to the +   correct value more rapidly.  This situation is unusual enough and the +   consequences are sufficiently harmless that this problem is deemed +   only a secondary concern. + +6.3.5 Timing Out an SSRC + +   At occasional intervals, the participant MUST check to see if any of +   the other participants time out.  To do this, the participant +   computes the deterministic (without the randomization factor) +   calculated interval Td for a receiver, that is, with we_sent false. +   Any other session member who has not sent an RTP or RTCP packet since +   time tc - MTd (M is the timeout multiplier, and defaults to 5) is +   timed out.  This means that its SSRC is removed from the member list, +   and members is updated.  A similar check is performed on the sender +   list.  Any member on the sender list who has not sent an RTP packet +   since time tc - 2T (within the last two RTCP report intervals) is +   removed from the sender list, and senders is updated. + +   If any members time out, the reverse reconsideration algorithm +   described in Section 6.3.4 SHOULD be performed. + +   The participant MUST perform this check at least once per RTCP +   transmission interval. + +6.3.6 Expiration of Transmission Timer + +   When the packet transmission timer expires, the participant performs +   the following operations: + +   o  The transmission interval T is computed as described in Section +      6.3.1, including the randomization factor. + +   o  If tp + T is less than or equal to tc, an RTCP packet is +      transmitted.  tp is set to tc, then another value for T is +      calculated as in the previous step and tn is set to tc + T.  The +      transmission timer is set to expire again at time tn.  If tp + T +      is greater than tc, tn is set to tp + T.  No RTCP packet is +      transmitted.  The transmission timer is set to expire at time tn. + + + + +Schulzrinne, et al.         Standards Track                    [Page 32] + +RFC 3550                          RTP                          July 2003 + + +   o  pmembers is set to members. + +   If an RTCP packet is transmitted, the value of initial is set to +   FALSE.  Furthermore, the value of avg_rtcp_size is updated: + +      avg_rtcp_size = (1/16) * packet_size + (15/16) * avg_rtcp_size + +   where packet_size is the size of the RTCP packet just transmitted. + +6.3.7 Transmitting a BYE Packet + +   When a participant wishes to leave a session, a BYE packet is +   transmitted to inform the other participants of the event.  In order +   to avoid a flood of BYE packets when many participants leave the +   system, a participant MUST execute the following algorithm if the +   number of members is more than 50 when the participant chooses to +   leave.  This algorithm usurps the normal role of the members variable +   to count BYE packets instead: + +   o  When the participant decides to leave the system, tp is reset to +      tc, the current time, members and pmembers are initialized to 1, +      initial is set to 1, we_sent is set to false, senders is set to 0, +      and avg_rtcp_size is set to the size of the compound BYE packet. +      The calculated interval T is computed.  The BYE packet is then +      scheduled for time tn = tc + T. + +   o  Every time a BYE packet from another participant is received, +      members is incremented by 1 regardless of whether that participant +      exists in the member table or not, and when SSRC sampling is in +      use, regardless of whether or not the BYE SSRC would be included +      in the sample.  members is NOT incremented when other RTCP packets +      or RTP packets are received, but only for BYE packets.  Similarly, +      avg_rtcp_size is updated only for received BYE packets.  senders +      is NOT updated when RTP packets arrive; it remains 0. + +   o  Transmission of the BYE packet then follows the rules for +      transmitting a regular RTCP packet, as above. + +   This allows BYE packets to be sent right away, yet controls their +   total bandwidth usage.  In the worst case, this could cause RTCP +   control packets to use twice the bandwidth as normal (10%) -- 5% for +   non-BYE RTCP packets and 5% for BYE. + +   A participant that does not want to wait for the above mechanism to +   allow transmission of a BYE packet MAY leave the group without +   sending a BYE at all.  That participant will eventually be timed out +   by the other group members. + + + + +Schulzrinne, et al.         Standards Track                    [Page 33] + +RFC 3550                          RTP                          July 2003 + + +   If the group size estimate members is less than 50 when the +   participant decides to leave, the participant MAY send a BYE packet +   immediately.  Alternatively, the participant MAY choose to execute +   the above BYE backoff algorithm. + +   In either case, a participant which never sent an RTP or RTCP packet +   MUST NOT send a BYE packet when they leave the group. + +6.3.8 Updating we_sent + +   The variable we_sent contains true if the participant has sent an RTP +   packet recently, false otherwise.  This determination is made by +   using the same mechanisms as for managing the set of other +   participants listed in the senders table.  If the participant sends +   an RTP packet when we_sent is false, it adds itself to the sender +   table and sets we_sent to true.  The reverse reconsideration +   algorithm described in Section 6.3.4 SHOULD be performed to possibly +   reduce the delay before sending an SR packet.  Every time another RTP +   packet is sent, the time of transmission of that packet is maintained +   in the table.  The normal sender timeout algorithm is then applied to +   the participant -- if an RTP packet has not been transmitted since +   time tc - 2T, the participant removes itself from the sender table, +   decrements the sender count, and sets we_sent to false. + +6.3.9 Allocation of Source Description Bandwidth + +   This specification defines several source description (SDES) items in +   addition to the mandatory CNAME item, such as NAME (personal name) +   and EMAIL (email address).  It also provides a means to define new +   application-specific RTCP packet types.  Applications should exercise +   caution in allocating control bandwidth to this additional +   information because it will slow down the rate at which reception +   reports and CNAME are sent, thus impairing the performance of the +   protocol.  It is RECOMMENDED that no more than 20% of the RTCP +   bandwidth allocated to a single participant be used to carry the +   additional information.  Furthermore, it is not intended that all +   SDES items will be included in every application.  Those that are +   included SHOULD be assigned a fraction of the bandwidth according to +   their utility.  Rather than estimate these fractions dynamically, it +   is recommended that the percentages be translated statically into +   report interval counts based on the typical length of an item. + +   For example, an application may be designed to send only CNAME, NAME +   and EMAIL and not any others.  NAME might be given much higher +   priority than EMAIL because the NAME would be displayed continuously +   in the application's user interface, whereas EMAIL would be displayed +   only when requested.  At every RTCP interval, an RR packet and an +   SDES packet with the CNAME item would be sent.  For a small session + + + +Schulzrinne, et al.         Standards Track                    [Page 34] + +RFC 3550                          RTP                          July 2003 + + +   operating at the minimum interval, that would be every 5 seconds on +   the average.  Every third interval (15 seconds), one extra item would +   be included in the SDES packet.  Seven out of eight times this would +   be the NAME item, and every eighth time (2 minutes) it would be the +   EMAIL item. + +   When multiple applications operate in concert using cross-application +   binding through a common CNAME for each participant, for example in a +   multimedia conference composed of an RTP session for each medium, the +   additional SDES information MAY be sent in only one RTP session.  The +   other sessions would carry only the CNAME item.  In particular, this +   approach should be applied to the multiple sessions of a layered +   encoding scheme (see Section 2.4). + +6.4 Sender and Receiver Reports + +   RTP receivers provide reception quality feedback using RTCP report +   packets which may take one of two forms depending upon whether or not +   the receiver is also a sender.  The only difference between the +   sender report (SR) and receiver report (RR) forms, besides the packet +   type code, is that the sender report includes a 20-byte sender +   information section for use by active senders.  The SR is issued if a +   site has sent any data packets during the interval since issuing the +   last report or the previous one, otherwise the RR is issued. + +   Both the SR and RR forms include zero or more reception report +   blocks, one for each of the synchronization sources from which this +   receiver has received RTP data packets since the last report. +   Reports are not issued for contributing sources listed in the CSRC +   list.  Each reception report block provides statistics about the data +   received from the particular source indicated in that block.  Since a +   maximum of 31 reception report blocks will fit in an SR or RR packet, +   additional RR packets SHOULD be stacked after the initial SR or RR +   packet as needed to contain the reception reports for all sources +   heard during the interval since the last report.  If there are too +   many sources to fit all the necessary RR packets into one compound +   RTCP packet without exceeding the MTU of the network path, then only +   the subset that will fit into one MTU SHOULD be included in each +   interval.  The subsets SHOULD be selected round-robin across multiple +   intervals so that all sources are reported. + +   The next sections define the formats of the two reports, how they may +   be extended in a profile-specific manner if an application requires +   additional feedback information, and how the reports may be used. +   Details of reception reporting by translators and mixers is given in +   Section 7. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 35] + +RFC 3550                          RTP                          July 2003 + + +6.4.1 SR: Sender Report RTCP Packet + +        0                   1                   2                   3 +        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +header |V=2|P|    RC   |   PT=SR=200   |             length            | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                         SSRC of sender                        | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +sender |              NTP timestamp, most significant word             | +info   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |             NTP timestamp, least significant word             | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                         RTP timestamp                         | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                     sender's packet count                     | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                      sender's octet count                     | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report |                 SSRC_1 (SSRC of first source)                 | +block  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +  1    | fraction lost |       cumulative number of packets lost       | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |           extended highest sequence number received           | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                      interarrival jitter                      | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                         last SR (LSR)                         | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                   delay since last SR (DLSR)                  | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report |                 SSRC_2 (SSRC of second source)                | +block  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +  2    :                               ...                             : +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +       |                  profile-specific extensions                  | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The sender report packet consists of three sections, possibly +   followed by a fourth profile-specific extension section if defined. +   The first section, the header, is 8 octets long.  The fields have the +   following meaning: + +   version (V): 2 bits +      Identifies the version of RTP, which is the same in RTCP packets +      as in RTP data packets.  The version defined by this specification +      is two (2). + + + + +Schulzrinne, et al.         Standards Track                    [Page 36] + +RFC 3550                          RTP                          July 2003 + + +   padding (P): 1 bit +      If the padding bit is set, this individual RTCP packet contains +      some additional padding octets at the end which are not part of +      the control information but are included in the length field.  The +      last octet of the padding is a count of how many padding octets +      should be ignored, including itself (it will be a multiple of +      four).  Padding may be needed by some encryption algorithms with +      fixed block sizes.  In a compound RTCP packet, padding is only +      required on one individual packet because the compound packet is +      encrypted as a whole for the method in Section 9.1.  Thus, padding +      MUST only be added to the last individual packet, and if padding +      is added to that packet, the padding bit MUST be set only on that +      packet.  This convention aids the header validity checks described +      in Appendix A.2 and allows detection of packets from some early +      implementations that incorrectly set the padding bit on the first +      individual packet and add padding to the last individual packet. + +   reception report count (RC): 5 bits +      The number of reception report blocks contained in this packet.  A +      value of zero is valid. + +   packet type (PT): 8 bits +      Contains the constant 200 to identify this as an RTCP SR packet. + +   length: 16 bits +      The length of this RTCP packet in 32-bit words minus one, +      including the header and any padding.  (The offset of one makes +      zero a valid length and avoids a possible infinite loop in +      scanning a compound RTCP packet, while counting 32-bit words +      avoids a validity check for a multiple of 4.) + +   SSRC: 32 bits +      The synchronization source identifier for the originator of this +      SR packet. + +   The second section, the sender information, is 20 octets long and is +   present in every sender report packet.  It summarizes the data +   transmissions from this sender.  The fields have the following +   meaning: + +   NTP timestamp: 64 bits +      Indicates the wallclock time (see Section 4) when this report was +      sent so that it may be used in combination with timestamps +      returned in reception reports from other receivers to measure +      round-trip propagation to those receivers.  Receivers should +      expect that the measurement accuracy of the timestamp may be +      limited to far less than the resolution of the NTP timestamp.  The +      measurement uncertainty of the timestamp is not indicated as it + + + +Schulzrinne, et al.         Standards Track                    [Page 37] + +RFC 3550                          RTP                          July 2003 + + +      may not be known.  On a system that has no notion of wallclock +      time but does have some system-specific clock such as "system +      uptime", a sender MAY use that clock as a reference to calculate +      relative NTP timestamps.  It is important to choose a commonly +      used clock so that if separate implementations are used to produce +      the individual streams of a multimedia session, all +      implementations will use the same clock.  Until the year 2036, +      relative and absolute timestamps will differ in the high bit so +      (invalid) comparisons will show a large difference; by then one +      hopes relative timestamps will no longer be needed.  A sender that +      has no notion of wallclock or elapsed time MAY set the NTP +      timestamp to zero. + +   RTP timestamp: 32 bits +      Corresponds to the same time as the NTP timestamp (above), but in +      the same units and with the same random offset as the RTP +      timestamps in data packets.  This correspondence may be used for +      intra- and inter-media synchronization for sources whose NTP +      timestamps are synchronized, and may be used by media-independent +      receivers to estimate the nominal RTP clock frequency.  Note that +      in most cases this timestamp will not be equal to the RTP +      timestamp in any adjacent data packet.  Rather, it MUST be +      calculated from the corresponding NTP timestamp using the +      relationship between the RTP timestamp counter and real time as +      maintained by periodically checking the wallclock time at a +      sampling instant. + +   sender's packet count: 32 bits +      The total number of RTP data packets transmitted by the sender +      since starting transmission up until the time this SR packet was +      generated.  The count SHOULD be reset if the sender changes its +      SSRC identifier. + +   sender's octet count: 32 bits +      The total number of payload octets (i.e., not including header or +      padding) transmitted in RTP data packets by the sender since +      starting transmission up until the time this SR packet was +      generated.  The count SHOULD be reset if the sender changes its +      SSRC identifier.  This field can be used to estimate the average +      payload data rate. + +   The third section contains zero or more reception report blocks +   depending on the number of other sources heard by this sender since +   the last report.  Each reception report block conveys statistics on +   the reception of RTP packets from a single synchronization source. +   Receivers SHOULD NOT carry over statistics when a source changes its +   SSRC identifier due to a collision.  These statistics are: + + + + +Schulzrinne, et al.         Standards Track                    [Page 38] + +RFC 3550                          RTP                          July 2003 + + +   SSRC_n (source identifier): 32 bits +      The SSRC identifier of the source to which the information in this +      reception report block pertains. + +   fraction lost: 8 bits +      The fraction of RTP data packets from source SSRC_n lost since the +      previous SR or RR packet was sent, expressed as a fixed point +      number with the binary point at the left edge of the field.  (That +      is equivalent to taking the integer part after multiplying the +      loss fraction by 256.)  This fraction is defined to be the number +      of packets lost divided by the number of packets expected, as +      defined in the next paragraph.  An implementation is shown in +      Appendix A.3.  If the loss is negative due to duplicates, the +      fraction lost is set to zero.  Note that a receiver cannot tell +      whether any packets were lost after the last one received, and +      that there will be no reception report block issued for a source +      if all packets from that source sent during the last reporting +      interval have been lost. + +   cumulative number of packets lost: 24 bits +      The total number of RTP data packets from source SSRC_n that have +      been lost since the beginning of reception.  This number is +      defined to be the number of packets expected less the number of +      packets actually received, where the number of packets received +      includes any which are late or duplicates.  Thus, packets that +      arrive late are not counted as lost, and the loss may be negative +      if there are duplicates.  The number of packets expected is +      defined to be the extended last sequence number received, as +      defined next, less the initial sequence number received.  This may +      be calculated as shown in Appendix A.3. + +   extended highest sequence number received: 32 bits +      The low 16 bits contain the highest sequence number received in an +      RTP data packet from source SSRC_n, and the most significant 16 +      bits extend that sequence number with the corresponding count of +      sequence number cycles, which may be maintained according to the +      algorithm in Appendix A.1.  Note that different receivers within +      the same session will generate different extensions to the +      sequence number if their start times differ significantly. + +   interarrival jitter: 32 bits +      An estimate of the statistical variance of the RTP data packet +      interarrival time, measured in timestamp units and expressed as an +      unsigned integer.  The interarrival jitter J is defined to be the +      mean deviation (smoothed absolute value) of the difference D in +      packet spacing at the receiver compared to the sender for a pair +      of packets.  As shown in the equation below, this is equivalent to +      the difference in the "relative transit time" for the two packets; + + + +Schulzrinne, et al.         Standards Track                    [Page 39] + +RFC 3550                          RTP                          July 2003 + + +      the relative transit time is the difference between a packet's RTP +      timestamp and the receiver's clock at the time of arrival, +      measured in the same units. + +      If Si is the RTP timestamp from packet i, and Ri is the time of +      arrival in RTP timestamp units for packet i, then for two packets +      i and j, D may be expressed as + +         D(i,j) = (Rj - Ri) - (Sj - Si) = (Rj - Sj) - (Ri - Si) + +      The interarrival jitter SHOULD be calculated continuously as each +      data packet i is received from source SSRC_n, using this +      difference D for that packet and the previous packet i-1 in order +      of arrival (not necessarily in sequence), according to the formula + +         J(i) = J(i-1) + (|D(i-1,i)| - J(i-1))/16 + +      Whenever a reception report is issued, the current value of J is +      sampled. + +      The jitter calculation MUST conform to the formula specified here +      in order to allow profile-independent monitors to make valid +      interpretations of reports coming from different implementations. +      This algorithm is the optimal first-order estimator and the gain +      parameter 1/16 gives a good noise reduction ratio while +      maintaining a reasonable rate of convergence [22].  A sample +      implementation is shown in Appendix A.8.  See Section 6.4.4 for a +      discussion of the effects of varying packet duration and delay +      before transmission. + +   last SR timestamp (LSR): 32 bits +      The middle 32 bits out of 64 in the NTP timestamp (as explained in +      Section 4) received as part of the most recent RTCP sender report +      (SR) packet from source SSRC_n.  If no SR has been received yet, +      the field is set to zero. + +   delay since last SR (DLSR): 32 bits +      The delay, expressed in units of 1/65536 seconds, between +      receiving the last SR packet from source SSRC_n and sending this +      reception report block.  If no SR packet has been received yet +      from SSRC_n, the DLSR field is set to zero. + +      Let SSRC_r denote the receiver issuing this receiver report. +      Source SSRC_n can compute the round-trip propagation delay to +      SSRC_r by recording the time A when this reception report block is +      received.  It calculates the total round-trip time A-LSR using the +      last SR timestamp (LSR) field, and then subtracting this field to +      leave the round-trip propagation delay as (A - LSR - DLSR).  This + + + +Schulzrinne, et al.         Standards Track                    [Page 40] + +RFC 3550                          RTP                          July 2003 + + +      is illustrated in Fig. 2.  Times are shown in both a hexadecimal +      representation of the 32-bit fields and the equivalent floating- +      point decimal representation.  Colons indicate a 32-bit field +      divided into a 16-bit integer part and 16-bit fraction part. + +      This may be used as an approximate measure of distance to cluster +      receivers, although some links have very asymmetric delays. + +   [10 Nov 1995 11:33:25.125 UTC]       [10 Nov 1995 11:33:36.5 UTC] +   n                 SR(n)              A=b710:8000 (46864.500 s) +   ----------------------------------------------------------------> +                      v                 ^ +   ntp_sec =0xb44db705 v               ^ dlsr=0x0005:4000 (    5.250s) +   ntp_frac=0x20000000  v             ^  lsr =0xb705:2000 (46853.125s) +     (3024992005.125 s)  v           ^ +   r                      v         ^ RR(n) +   ----------------------------------------------------------------> +                          |<-DLSR->| +                           (5.250 s) + +   A     0xb710:8000 (46864.500 s) +   DLSR -0x0005:4000 (    5.250 s) +   LSR  -0xb705:2000 (46853.125 s) +   ------------------------------- +   delay 0x0006:2000 (    6.125 s) + +           Figure 2: Example for round-trip time computation + + + + + + + + + + + + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 41] + +RFC 3550                          RTP                          July 2003 + + +6.4.2 RR: Receiver Report RTCP Packet + +        0                   1                   2                   3 +        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +header |V=2|P|    RC   |   PT=RR=201   |             length            | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                     SSRC of packet sender                     | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report |                 SSRC_1 (SSRC of first source)                 | +block  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +  1    | fraction lost |       cumulative number of packets lost       | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |           extended highest sequence number received           | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                      interarrival jitter                      | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                         last SR (LSR)                         | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                   delay since last SR (DLSR)                  | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report |                 SSRC_2 (SSRC of second source)                | +block  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +  2    :                               ...                             : +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +       |                  profile-specific extensions                  | +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The format of the receiver report (RR) packet is the same as that of +   the SR packet except that the packet type field contains the constant +   201 and the five words of sender information are omitted (these are +   the NTP and RTP timestamps and sender's packet and octet counts). +   The remaining fields have the same meaning as for the SR packet. + +   An empty RR packet (RC = 0) MUST be put at the head of a compound +   RTCP packet when there is no data transmission or reception to +   report. + +6.4.3 Extending the Sender and Receiver Reports + +   A profile SHOULD define profile-specific extensions to the sender +   report and receiver report if there is additional information that +   needs to be reported regularly about the sender or receivers.  This +   method SHOULD be used in preference to defining another RTCP packet +   type because it requires less overhead: + +   o  fewer octets in the packet (no RTCP header or SSRC field); + + + + +Schulzrinne, et al.         Standards Track                    [Page 42] + +RFC 3550                          RTP                          July 2003 + + +   o  simpler and faster parsing because applications running under that +      profile would be programmed to always expect the extension fields +      in the directly accessible location after the reception reports. + +   The extension is a fourth section in the sender- or receiver-report +   packet which comes at the end after the reception report blocks, if +   any.  If additional sender information is required, then for sender +   reports it would be included first in the extension section, but for +   receiver reports it would not be present.  If information about +   receivers is to be included, that data SHOULD be structured as an +   array of blocks parallel to the existing array of reception report +   blocks; that is, the number of blocks would be indicated by the RC +   field. + +6.4.4 Analyzing Sender and Receiver Reports + +   It is expected that reception quality feedback will be useful not +   only for the sender but also for other receivers and third-party +   monitors.  The sender may modify its transmissions based on the +   feedback; receivers can determine whether problems are local, +   regional or global; network managers may use profile-independent +   monitors that receive only the RTCP packets and not the corresponding +   RTP data packets to evaluate the performance of their networks for +   multicast distribution. + +   Cumulative counts are used in both the sender information and +   receiver report blocks so that differences may be calculated between +   any two reports to make measurements over both short and long time +   periods, and to provide resilience against the loss of a report.  The +   difference between the last two reports received can be used to +   estimate the recent quality of the distribution.  The NTP timestamp +   is included so that rates may be calculated from these differences +   over the interval between two reports.  Since that timestamp is +   independent of the clock rate for the data encoding, it is possible +   to implement encoding- and profile-independent quality monitors. + +   An example calculation is the packet loss rate over the interval +   between two reception reports.  The difference in the cumulative +   number of packets lost gives the number lost during that interval. +   The difference in the extended last sequence numbers received gives +   the number of packets expected during the interval.  The ratio of +   these two is the packet loss fraction over the interval.  This ratio +   should equal the fraction lost field if the two reports are +   consecutive, but otherwise it may not.  The loss rate per second can +   be obtained by dividing the loss fraction by the difference in NTP +   timestamps, expressed in seconds.  The number of packets received is +   the number of packets expected minus the number lost.  The number of + + + + +Schulzrinne, et al.         Standards Track                    [Page 43] + +RFC 3550                          RTP                          July 2003 + + +   packets expected may also be used to judge the statistical validity +   of any loss estimates.  For example, 1 out of 5 packets lost has a +   lower significance than 200 out of 1000. + +   From the sender information, a third-party monitor can calculate the +   average payload data rate and the average packet rate over an +   interval without receiving the data.  Taking the ratio of the two +   gives the average payload size.  If it can be assumed that packet +   loss is independent of packet size, then the number of packets +   received by a particular receiver times the average payload size (or +   the corresponding packet size) gives the apparent throughput +   available to that receiver. + +   In addition to the cumulative counts which allow long-term packet +   loss measurements using differences between reports, the fraction +   lost field provides a short-term measurement from a single report. +   This becomes more important as the size of a session scales up enough +   that reception state information might not be kept for all receivers +   or the interval between reports becomes long enough that only one +   report might have been received from a particular receiver. + +   The interarrival jitter field provides a second short-term measure of +   network congestion.  Packet loss tracks persistent congestion while +   the jitter measure tracks transient congestion.  The jitter measure +   may indicate congestion before it leads to packet loss.  The +   interarrival jitter field is only a snapshot of the jitter at the +   time of a report and is not intended to be taken quantitatively. +   Rather, it is intended for comparison across a number of reports from +   one receiver over time or from multiple receivers, e.g., within a +   single network, at the same time.  To allow comparison across +   receivers, it is important the the jitter be calculated according to +   the same formula by all receivers. + +   Because the jitter calculation is based on the RTP timestamp which +   represents the instant when the first data in the packet was sampled, +   any variation in the delay between that sampling instant and the time +   the packet is transmitted will affect the resulting jitter that is +   calculated.  Such a variation in delay would occur for audio packets +   of varying duration.  It will also occur for video encodings because +   the timestamp is the same for all the packets of one frame but those +   packets are not all transmitted at the same time.  The variation in +   delay until transmission does reduce the accuracy of the jitter +   calculation as a measure of the behavior of the network by itself, +   but it is appropriate to include considering that the receiver buffer +   must accommodate it.  When the jitter calculation is used as a +   comparative measure, the (constant) component due to variation in +   delay until transmission subtracts out so that a change in the + + + + +Schulzrinne, et al.         Standards Track                    [Page 44] + +RFC 3550                          RTP                          July 2003 + + +   network jitter component can then be observed unless it is relatively +   small.  If the change is small, then it is likely to be +   inconsequential. + +6.5 SDES: Source Description RTCP Packet + +        0                   1                   2                   3 +        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +header |V=2|P|    SC   |  PT=SDES=202  |             length            | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +chunk  |                          SSRC/CSRC_1                          | +  1    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                           SDES items                          | +       |                              ...                              | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +chunk  |                          SSRC/CSRC_2                          | +  2    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +       |                           SDES items                          | +       |                              ...                              | +       +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + +   The SDES packet is a three-level structure composed of a header and +   zero or more chunks, each of which is composed of items describing +   the source identified in that chunk.  The items are described +   individually in subsequent sections. + +   version (V), padding (P), length: +      As described for the SR packet (see Section 6.4.1). + +   packet type (PT): 8 bits +      Contains the constant 202 to identify this as an RTCP SDES packet. + +   source count (SC): 5 bits +      The number of SSRC/CSRC chunks contained in this SDES packet.  A +      value of zero is valid but useless. + +   Each chunk consists of an SSRC/CSRC identifier followed by a list of +   zero or more items, which carry information about the SSRC/CSRC. +   Each chunk starts on a 32-bit boundary.  Each item consists of an 8- +   bit type field, an 8-bit octet count describing the length of the +   text (thus, not including this two-octet header), and the text +   itself.  Note that the text can be no longer than 255 octets, but +   this is consistent with the need to limit RTCP bandwidth consumption. + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 45] + +RFC 3550                          RTP                          July 2003 + + +   The text is encoded according to the UTF-8 encoding specified in RFC +   2279 [5].  US-ASCII is a subset of this encoding and requires no +   additional encoding.  The presence of multi-octet encodings is +   indicated by setting the most significant bit of a character to a +   value of one. + +   Items are contiguous, i.e., items are not individually padded to a +   32-bit boundary.  Text is not null terminated because some multi- +   octet encodings include null octets.  The list of items in each chunk +   MUST be terminated by one or more null octets, the first of which is +   interpreted as an item type of zero to denote the end of the list. +   No length octet follows the null item type octet, but additional null +   octets MUST be included if needed to pad until the next 32-bit +   boundary.  Note that this padding is separate from that indicated by +   the P bit in the RTCP header.  A chunk with zero items (four null +   octets) is valid but useless. + +   End systems send one SDES packet containing their own source +   identifier (the same as the SSRC in the fixed RTP header).  A mixer +   sends one SDES packet containing a chunk for each contributing source +   from which it is receiving SDES information, or multiple complete +   SDES packets in the format above if there are more than 31 such +   sources (see Section 7). + +   The SDES items currently defined are described in the next sections. +   Only the CNAME item is mandatory.  Some items shown here may be +   useful only for particular profiles, but the item types are all +   assigned from one common space to promote shared use and to simplify +   profile-independent applications.  Additional items may be defined in +   a profile by registering the type numbers with IANA as described in +   Section 15. + +6.5.1 CNAME: Canonical End-Point Identifier SDES Item + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |    CNAME=1    |     length    | user and domain name        ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The CNAME identifier has the following properties: + +   o  Because the randomly allocated SSRC identifier may change if a +      conflict is discovered or if a program is restarted, the CNAME +      item MUST be included to provide the binding from the SSRC +      identifier to an identifier for the source (sender or receiver) +      that remains constant. + + + + +Schulzrinne, et al.         Standards Track                    [Page 46] + +RFC 3550                          RTP                          July 2003 + + +   o  Like the SSRC identifier, the CNAME identifier SHOULD also be +      unique among all participants within one RTP session. + +   o  To provide a binding across multiple media tools used by one +      participant in a set of related RTP sessions, the CNAME SHOULD be +      fixed for that participant. + +   o  To facilitate third-party monitoring, the CNAME SHOULD be suitable +      for either a program or a person to locate the source. + +   Therefore, the CNAME SHOULD be derived algorithmically and not +   entered manually, when possible.  To meet these requirements, the +   following format SHOULD be used unless a profile specifies an +   alternate syntax or semantics.  The CNAME item SHOULD have the format +   "user@host", or "host" if a user name is not available as on single- +   user systems.  For both formats, "host" is either the fully qualified +   domain name of the host from which the real-time data originates, +   formatted according to the rules specified in RFC 1034 [6], RFC 1035 +   [7] and Section 2.1 of RFC 1123 [8]; or the standard ASCII +   representation of the host's numeric address on the interface used +   for the RTP communication.  For example, the standard ASCII +   representation of an IP Version 4 address is "dotted decimal", also +   known as dotted quad, and for IP Version 6, addresses are textually +   represented as groups of hexadecimal digits separated by colons (with +   variations as detailed in RFC 3513 [23]).  Other address types are +   expected to have ASCII representations that are mutually unique.  The +   fully qualified domain name is more convenient for a human observer +   and may avoid the need to send a NAME item in addition, but it may be +   difficult or impossible to obtain reliably in some operating +   environments.  Applications that may be run in such environments +   SHOULD use the ASCII representation of the address instead. + +   Examples are "doe@sleepy.example.com", "doe@192.0.2.89" or +   "doe@2201:056D::112E:144A:1E24" for a multi-user system.  On a system +   with no user name, examples would be "sleepy.example.com", +   "192.0.2.89" or "2201:056D::112E:144A:1E24". + +   The user name SHOULD be in a form that a program such as "finger" or +   "talk" could use, i.e., it typically is the login name rather than +   the personal name.  The host name is not necessarily identical to the +   one in the participant's electronic mail address. + +   This syntax will not provide unique identifiers for each source if an +   application permits a user to generate multiple sources from one +   host.  Such an application would have to rely on the SSRC to further +   identify the source, or the profile for that application would have +   to specify additional syntax for the CNAME identifier. + + + + +Schulzrinne, et al.         Standards Track                    [Page 47] + +RFC 3550                          RTP                          July 2003 + + +   If each application creates its CNAME independently, the resulting +   CNAMEs may not be identical as would be required to provide a binding +   across multiple media tools belonging to one participant in a set of +   related RTP sessions.  If cross-media binding is required, it may be +   necessary for the CNAME of each tool to be externally configured with +   the same value by a coordination tool. + +   Application writers should be aware that private network address +   assignments such as the Net-10 assignment proposed in RFC 1918 [24] +   may create network addresses that are not globally unique.  This +   would lead to non-unique CNAMEs if hosts with private addresses and +   no direct IP connectivity to the public Internet have their RTP +   packets forwarded to the public Internet through an RTP-level +   translator.  (See also RFC 1627 [25].)  To handle this case, +   applications MAY provide a means to configure a unique CNAME, but the +   burden is on the translator to translate CNAMEs from private +   addresses to public addresses if necessary to keep private addresses +   from being exposed. + +6.5.2 NAME: User Name SDES Item + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |     NAME=2    |     length    | common name of source       ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   This is the real name used to describe the source, e.g., "John Doe, +   Bit Recycler".  It may be in any form desired by the user.  For +   applications such as conferencing, this form of name may be the most +   desirable for display in participant lists, and therefore might be +   sent most frequently of those items other than CNAME.  Profiles MAY +   establish such priorities.  The NAME value is expected to remain +   constant at least for the duration of a session.  It SHOULD NOT be +   relied upon to be unique among all participants in the session. + +6.5.3 EMAIL: Electronic Mail Address SDES Item + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |    EMAIL=3    |     length    | email address of source     ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The email address is formatted according to RFC 2822 [9], for +   example, "John.Doe@example.com".  The EMAIL value is expected to +   remain constant for the duration of a session. + + + + +Schulzrinne, et al.         Standards Track                    [Page 48] + +RFC 3550                          RTP                          July 2003 + + +6.5.4 PHONE: Phone Number SDES Item + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |    PHONE=4    |     length    | phone number of source      ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The phone number SHOULD be formatted with the plus sign replacing the +   international access code.  For example, "+1 908 555 1212" for a +   number in the United States. + +6.5.5 LOC: Geographic User Location SDES Item + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |     LOC=5     |     length    | geographic location of site ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   Depending on the application, different degrees of detail are +   appropriate for this item.  For conference applications, a string +   like "Murray Hill, New Jersey" may be sufficient, while, for an +   active badge system, strings like "Room 2A244, AT&T BL MH" might be +   appropriate.  The degree of detail is left to the implementation +   and/or user, but format and content MAY be prescribed by a profile. +   The LOC value is expected to remain constant for the duration of a +   session, except for mobile hosts. + +6.5.6 TOOL: Application or Tool Name SDES Item + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |     TOOL=6    |     length    |name/version of source appl. ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   A string giving the name and possibly version of the application +   generating the stream, e.g., "videotool 1.2".  This information may +   be useful for debugging purposes and is similar to the Mailer or +   Mail-System-Version SMTP headers.  The TOOL value is expected to +   remain constant for the duration of the session. + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 49] + +RFC 3550                          RTP                          July 2003 + + +6.5.7 NOTE: Notice/Status SDES Item + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |     NOTE=7    |     length    | note about the source       ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The following semantics are suggested for this item, but these or +   other semantics MAY be explicitly defined by a profile.  The NOTE +   item is intended for transient messages describing the current state +   of the source, e.g., "on the phone, can't talk".  Or, during a +   seminar, this item might be used to convey the title of the talk.  It +   should be used only to carry exceptional information and SHOULD NOT +   be included routinely by all participants because this would slow +   down the rate at which reception reports and CNAME are sent, thus +   impairing the performance of the protocol.  In particular, it SHOULD +   NOT be included as an item in a user's configuration file nor +   automatically generated as in a quote-of-the-day. + +   Since the NOTE item may be important to display while it is active, +   the rate at which other non-CNAME items such as NAME are transmitted +   might be reduced so that the NOTE item can take that part of the RTCP +   bandwidth.  When the transient message becomes inactive, the NOTE +   item SHOULD continue to be transmitted a few times at the same +   repetition rate but with a string of length zero to signal the +   receivers.  However, receivers SHOULD also consider the NOTE item +   inactive if it is not received for a small multiple of the repetition +   rate, or perhaps 20-30 RTCP intervals. + +6.5.8 PRIV: Private Extensions SDES Item + +     0                   1                   2                   3 +     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +    |     PRIV=8    |     length    | prefix length |prefix string... +    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +    ...             |                  value string               ... +    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   This item is used to define experimental or application-specific SDES +   extensions.  The item contains a prefix consisting of a length-string +   pair, followed by the value string filling the remainder of the item +   and carrying the desired information.  The prefix length field is 8 +   bits long.  The prefix string is a name chosen by the person defining +   the PRIV item to be unique with respect to other PRIV items this +   application might receive.  The application creator might choose to +   use the application name plus an additional subtype identification if + + + +Schulzrinne, et al.         Standards Track                    [Page 50] + +RFC 3550                          RTP                          July 2003 + + +   needed.  Alternatively, it is RECOMMENDED that others choose a name +   based on the entity they represent, then coordinate the use of the +   name within that entity. + +   Note that the prefix consumes some space within the item's total +   length of 255 octets, so the prefix should be kept as short as +   possible.  This facility and the constrained RTCP bandwidth SHOULD +   NOT be overloaded; it is not intended to satisfy all the control +   communication requirements of all applications. + +   SDES PRIV prefixes will not be registered by IANA.  If some form of +   the PRIV item proves to be of general utility, it SHOULD instead be +   assigned a regular SDES item type registered with IANA so that no +   prefix is required.  This simplifies use and increases transmission +   efficiency. + +6.6 BYE: Goodbye RTCP Packet + +       0                   1                   2                   3 +       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |V=2|P|    SC   |   PT=BYE=203  |             length            | +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |                           SSRC/CSRC                           | +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      :                              ...                              : +      +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +(opt) |     length    |               reason for leaving            ... +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The BYE packet indicates that one or more sources are no longer +   active. + +   version (V), padding (P), length: +      As described for the SR packet (see Section 6.4.1). + +   packet type (PT): 8 bits +      Contains the constant 203 to identify this as an RTCP BYE packet. + +   source count (SC): 5 bits +      The number of SSRC/CSRC identifiers included in this BYE packet. +      A count value of zero is valid, but useless. + +   The rules for when a BYE packet should be sent are specified in +   Sections 6.3.7 and 8.2. + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 51] + +RFC 3550                          RTP                          July 2003 + + +   If a BYE packet is received by a mixer, the mixer SHOULD forward the +   BYE packet with the SSRC/CSRC identifier(s) unchanged.  If a mixer +   shuts down, it SHOULD send a BYE packet listing all contributing +   sources it handles, as well as its own SSRC identifier.  Optionally, +   the BYE packet MAY include an 8-bit octet count followed by that many +   octets of text indicating the reason for leaving, e.g., "camera +   malfunction" or "RTP loop detected".  The string has the same +   encoding as that described for SDES.  If the string fills the packet +   to the next 32-bit boundary, the string is not null terminated.  If +   not, the BYE packet MUST be padded with null octets to the next 32- +   bit boundary.  This padding is separate from that indicated by the P +   bit in the RTCP header. + +6.7 APP: Application-Defined RTCP Packet + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |V=2|P| subtype |   PT=APP=204  |             length            | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |                           SSRC/CSRC                           | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |                          name (ASCII)                         | +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |                   application-dependent data                ... +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +   The APP packet is intended for experimental use as new applications +   and new features are developed, without requiring packet type value +   registration.  APP packets with unrecognized names SHOULD be ignored. +   After testing and if wider use is justified, it is RECOMMENDED that +   each APP packet be redefined without the subtype and name fields and +   registered with IANA using an RTCP packet type. + +   version (V), padding (P), length: +      As described for the SR packet (see Section 6.4.1). + +   subtype: 5 bits +      May be used as a subtype to allow a set of APP packets to be +      defined under one unique name, or for any application-dependent +      data. + +   packet type (PT): 8 bits +      Contains the constant 204 to identify this as an RTCP APP packet. + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 52] + +RFC 3550                          RTP                          July 2003 + + +   name: 4 octets +      A name chosen by the person defining the set of APP packets to be +      unique with respect to other APP packets this application might +      receive.  The application creator might choose to use the +      application name, and then coordinate the allocation of subtype +      values to others who want to define new packet types for the +      application.  Alternatively, it is RECOMMENDED that others choose +      a name based on the entity they represent, then coordinate the use +      of the name within that entity.  The name is interpreted as a +      sequence of four ASCII characters, with uppercase and lowercase +      characters treated as distinct. + +   application-dependent data: variable length +      Application-dependent data may or may not appear in an APP packet. +      It is interpreted by the application and not RTP itself.  It MUST +      be a multiple of 32 bits long. + +7. RTP Translators and Mixers + +   In addition to end systems, RTP supports the notion of "translators" +   and "mixers", which could be considered as "intermediate systems" at +   the RTP level.  Although this support adds some complexity to the +   protocol, the need for these functions has been clearly established +   by experiments with multicast audio and video applications in the +   Internet.  Example uses of translators and mixers given in Section +   2.3 stem from the presence of firewalls and low bandwidth +   connections, both of which are likely to remain. + +7.1 General Description + +   An RTP translator/mixer connects two or more transport-level +   "clouds".  Typically, each cloud is defined by a common network and +   transport protocol (e.g., IP/UDP) plus a multicast address and +   transport level destination port or a pair of unicast addresses and +   ports.  (Network-level protocol translators, such as IP version 4 to +   IP version 6, may be present within a cloud invisibly to RTP.)  One +   system may serve as a translator or mixer for a number of RTP +   sessions, but each is considered a logically separate entity. + +   In order to avoid creating a loop when a translator or mixer is +   installed, the following rules MUST be observed: + +   o  Each of the clouds connected by translators and mixers +      participating in one RTP session either MUST be distinct from all +      the others in at least one of these parameters (protocol, address, +      port), or MUST be isolated at the network level from the others. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 53] + +RFC 3550                          RTP                          July 2003 + + +   o  A derivative of the first rule is that there MUST NOT be multiple +      translators or mixers connected in parallel unless by some +      arrangement they partition the set of sources to be forwarded. + +   Similarly, all RTP end systems that can communicate through one or +   more RTP translators or mixers share the same SSRC space, that is, +   the SSRC identifiers MUST be unique among all these end systems. +   Section 8.2 describes the collision resolution algorithm by which +   SSRC identifiers are kept unique and loops are detected. + +   There may be many varieties of translators and mixers designed for +   different purposes and applications.  Some examples are to add or +   remove encryption, change the encoding of the data or the underlying +   protocols, or replicate between a multicast address and one or more +   unicast addresses.  The distinction between translators and mixers is +   that a translator passes through the data streams from different +   sources separately, whereas a mixer combines them to form one new +   stream: + +   Translator: Forwards RTP packets with their SSRC identifier +      intact; this makes it possible for receivers to identify +      individual sources even though packets from all the sources pass +      through the same translator and carry the translator's network +      source address.  Some kinds of translators will pass through the +      data untouched, but others MAY change the encoding of the data and +      thus the RTP data payload type and timestamp.  If multiple data +      packets are re-encoded into one, or vice versa, a translator MUST +      assign new sequence numbers to the outgoing packets.  Losses in +      the incoming packet stream may induce corresponding gaps in the +      outgoing sequence numbers.  Receivers cannot detect the presence +      of a translator unless they know by some other means what payload +      type or transport address was used by the original source. + +   Mixer: Receives streams of RTP data packets from one or more +      sources, possibly changes the data format, combines the streams in +      some manner and then forwards the combined stream.  Since the +      timing among multiple input sources will not generally be +      synchronized, the mixer will make timing adjustments among the +      streams and generate its own timing for the combined stream, so it +      is the synchronization source.  Thus, all data packets forwarded +      by a mixer MUST be marked with the mixer's own SSRC identifier. +      In order to preserve the identity of the original sources +      contributing to the mixed packet, the mixer SHOULD insert their +      SSRC identifiers into the CSRC identifier list following the fixed +      RTP header of the packet.  A mixer that is also itself a +      contributing source for some packet SHOULD explicitly include its +      own SSRC identifier in the CSRC list for that packet. + + + + +Schulzrinne, et al.         Standards Track                    [Page 54] + +RFC 3550                          RTP                          July 2003 + + +      For some applications, it MAY be acceptable for a mixer not to +      identify sources in the CSRC list.  However, this introduces the +      danger that loops involving those sources could not be detected. + +   The advantage of a mixer over a translator for applications like +   audio is that the output bandwidth is limited to that of one source +   even when multiple sources are active on the input side.  This may be +   important for low-bandwidth links.  The disadvantage is that +   receivers on the output side don't have any control over which +   sources are passed through or muted, unless some mechanism is +   implemented for remote control of the mixer.  The regeneration of +   synchronization information by mixers also means that receivers can't +   do inter-media synchronization of the original streams.  A multi- +   media mixer could do it. + +         [E1]                                    [E6] +          |                                       | +    E1:17 |                                 E6:15 | +          |                                       |   E6:15 +          V  M1:48 (1,17)         M1:48 (1,17)    V   M1:48 (1,17) +         (M1)-------------><T1>-----------------><T2>-------------->[E7] +          ^                 ^     E4:47           ^   E4:47 +     E2:1 |           E4:47 |                     |   M3:89 (64,45) +          |                 |                     | +         [E2]              [E4]     M3:89 (64,45) | +                                                  |        legend: +   [E3] --------->(M2)----------->(M3)------------|        [End system] +          E3:64        M2:12 (64)  ^                       (Mixer) +                                   | E5:45                 <Translator> +                                   | +                                  [E5]          source: SSRC (CSRCs) +                                                -------------------> + +   Figure 3: Sample RTP network with end systems, mixers and translators + +   A collection of mixers and translators is shown in Fig. 3 to +   illustrate their effect on SSRC and CSRC identifiers.  In the figure, +   end systems are shown as rectangles (named E), translators as +   triangles (named T) and mixers as ovals (named M).  The notation "M1: +   48(1,17)" designates a packet originating a mixer M1, identified by +   M1's (random) SSRC value of 48 and two CSRC identifiers, 1 and 17, +   copied from the SSRC identifiers of packets from E1 and E2. + +7.2 RTCP Processing in Translators + +   In addition to forwarding data packets, perhaps modified, translators +   and mixers MUST also process RTCP packets.  In many cases, they will +   take apart the compound RTCP packets received from end systems to + + + +Schulzrinne, et al.         Standards Track                    [Page 55] + +RFC 3550                          RTP                          July 2003 + + +   aggregate SDES information and to modify the SR or RR packets. +   Retransmission of this information may be triggered by the packet +   arrival or by the RTCP interval timer of the translator or mixer +   itself. + +   A translator that does not modify the data packets, for example one +   that just replicates between a multicast address and a unicast +   address, MAY simply forward RTCP packets unmodified as well.  A +   translator that transforms the payload in some way MUST make +   corresponding transformations in the SR and RR information so that it +   still reflects the characteristics of the data and the reception +   quality.  These translators MUST NOT simply forward RTCP packets.  In +   general, a translator SHOULD NOT aggregate SR and RR packets from +   different sources into one packet since that would reduce the +   accuracy of the propagation delay measurements based on the LSR and +   DLSR fields. + +   SR sender information:  A translator does not generate its own +      sender information, but forwards the SR packets received from one +      cloud to the others.  The SSRC is left intact but the sender +      information MUST be modified if required by the translation.  If a +      translator changes the data encoding, it MUST change the "sender's +      byte count" field.  If it also combines several data packets into +      one output packet, it MUST change the "sender's packet count" +      field.  If it changes the timestamp frequency, it MUST change the +      "RTP timestamp" field in the SR packet. + +   SR/RR reception report blocks:  A translator forwards reception +      reports received from one cloud to the others.  Note that these +      flow in the direction opposite to the data.  The SSRC is left +      intact.  If a translator combines several data packets into one +      output packet, and therefore changes the sequence numbers, it MUST +      make the inverse manipulation for the packet loss fields and the +      "extended last sequence number" field.  This may be complex.  In +      the extreme case, there may be no meaningful way to translate the +      reception reports, so the translator MAY pass on no reception +      report at all or a synthetic report based on its own reception. +      The general rule is to do what makes sense for a particular +      translation. + +      A translator does not require an SSRC identifier of its own, but +      MAY choose to allocate one for the purpose of sending reports +      about what it has received.  These would be sent to all the +      connected clouds, each corresponding to the translation of the +      data stream as sent to that cloud, since reception reports are +      normally multicast to all participants. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 56] + +RFC 3550                          RTP                          July 2003 + + +   SDES:  Translators typically forward without change the SDES +      information they receive from one cloud to the others, but MAY, +      for example, decide to filter non-CNAME SDES information if +      bandwidth is limited.  The CNAMEs MUST be forwarded to allow SSRC +      identifier collision detection to work.  A translator that +      generates its own RR packets MUST send SDES CNAME information +      about itself to the same clouds that it sends those RR packets. + +   BYE:  Translators forward BYE packets unchanged.  A translator +      that is about to cease forwarding packets SHOULD send a BYE packet +      to each connected cloud containing all the SSRC identifiers that +      were previously being forwarded to that cloud, including the +      translator's own SSRC identifier if it sent reports of its own. + +   APP:  Translators forward APP packets unchanged. + +7.3 RTCP Processing in Mixers + +   Since a mixer generates a new data stream of its own, it does not +   pass through SR or RR packets at all and instead generates new +   information for both sides. + +   SR sender information:  A mixer does not pass through sender +      information from the sources it mixes because the characteristics +      of the source streams are lost in the mix.  As a synchronization +      source, the mixer SHOULD generate its own SR packets with sender +      information about the mixed data stream and send them in the same +      direction as the mixed stream. + +   SR/RR reception report blocks:  A mixer generates its own +      reception reports for sources in each cloud and sends them out +      only to the same cloud.  It MUST NOT send these reception reports +      to the other clouds and MUST NOT forward reception reports from +      one cloud to the others because the sources would not be SSRCs +      there (only CSRCs). + +   SDES:  Mixers typically forward without change the SDES +      information they receive from one cloud to the others, but MAY, +      for example, decide to filter non-CNAME SDES information if +      bandwidth is limited.  The CNAMEs MUST be forwarded to allow SSRC +      identifier collision detection to work.  (An identifier in a CSRC +      list generated by a mixer might collide with an SSRC identifier +      generated by an end system.)  A mixer MUST send SDES CNAME +      information about itself to the same clouds that it sends SR or RR +      packets. + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 57] + +RFC 3550                          RTP                          July 2003 + + +      Since mixers do not forward SR or RR packets, they will typically +      be extracting SDES packets from a compound RTCP packet.  To +      minimize overhead, chunks from the SDES packets MAY be aggregated +      into a single SDES packet which is then stacked on an SR or RR +      packet originating from the mixer.  A mixer which aggregates SDES +      packets will use more RTCP bandwidth than an individual source +      because the compound packets will be longer, but that is +      appropriate since the mixer represents multiple sources. +      Similarly, a mixer which passes through SDES packets as they are +      received will be transmitting RTCP packets at higher than the +      single source rate, but again that is correct since the packets +      come from multiple sources.  The RTCP packet rate may be different +      on each side of the mixer. + +      A mixer that does not insert CSRC identifiers MAY also refrain +      from forwarding SDES CNAMEs.  In this case, the SSRC identifier +      spaces in the two clouds are independent.  As mentioned earlier, +      this mode of operation creates a danger that loops can't be +      detected. + +   BYE:  Mixers MUST forward BYE packets.  A mixer that is about to +      cease forwarding packets SHOULD send a BYE packet to each +      connected cloud containing all the SSRC identifiers that were +      previously being forwarded to that cloud, including the mixer's +      own SSRC identifier if it sent reports of its own. + +   APP:  The treatment of APP packets by mixers is application-specific. + +7.4 Cascaded Mixers + +   An RTP session may involve a collection of mixers and translators as +   shown in Fig. 3.  If two mixers are cascaded, such as M2 and M3 in +   the figure, packets received by a mixer may already have been mixed +   and may include a CSRC list with multiple identifiers.  The second +   mixer SHOULD build the CSRC list for the outgoing packet using the +   CSRC identifiers from already-mixed input packets and the SSRC +   identifiers from unmixed input packets.  This is shown in the output +   arc from mixer M3 labeled M3:89(64,45) in the figure.  As in the case +   of mixers that are not cascaded, if the resulting CSRC list has more +   than 15 identifiers, the remainder cannot be included. + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 58] + +RFC 3550                          RTP                          July 2003 + + +8.  SSRC Identifier Allocation and Use + +   The SSRC identifier carried in the RTP header and in various fields +   of RTCP packets is a random 32-bit number that is required to be +   globally unique within an RTP session.  It is crucial that the number +   be chosen with care in order that participants on the same network or +   starting at the same time are not likely to choose the same number. + +   It is not sufficient to use the local network address (such as an +   IPv4 address) for the identifier because the address may not be +   unique.  Since RTP translators and mixers enable interoperation among +   multiple networks with different address spaces, the allocation +   patterns for addresses within two spaces might result in a much +   higher rate of collision than would occur with random allocation. + +   Multiple sources running on one host would also conflict. + +   It is also not sufficient to obtain an SSRC identifier simply by +   calling random() without carefully initializing the state.  An +   example of how to generate a random identifier is presented in +   Appendix A.6. + +8.1 Probability of Collision + +   Since the identifiers are chosen randomly, it is possible that two or +   more sources will choose the same number.  Collision occurs with the +   highest probability when all sources are started simultaneously, for +   example when triggered automatically by some session management +   event.  If N is the number of sources and L the length of the +   identifier (here, 32 bits), the probability that two sources +   independently pick the same value can be approximated for large N +   [26] as 1 - exp(-N**2 / 2**(L+1)).  For N=1000, the probability is +   roughly 10**-4. + +   The typical collision probability is much lower than the worst-case +   above.  When one new source joins an RTP session in which all the +   other sources already have unique identifiers, the probability of +   collision is just the fraction of numbers used out of the space. +   Again, if N is the number of sources and L the length of the +   identifier, the probability of collision is N / 2**L.  For N=1000, +   the probability is roughly 2*10**-7. + +   The probability of collision is further reduced by the opportunity +   for a new source to receive packets from other participants before +   sending its first packet (either data or control).  If the new source +   keeps track of the other participants (by SSRC identifier), then + + + + + +Schulzrinne, et al.         Standards Track                    [Page 59] + +RFC 3550                          RTP                          July 2003 + + +   before transmitting its first packet the new source can verify that +   its identifier does not conflict with any that have been received, or +   else choose again. + +8.2 Collision Resolution and Loop Detection + +   Although the probability of SSRC identifier collision is low, all RTP +   implementations MUST be prepared to detect collisions and take the +   appropriate actions to resolve them.  If a source discovers at any +   time that another source is using the same SSRC identifier as its +   own, it MUST send an RTCP BYE packet for the old identifier and +   choose another random one.  (As explained below, this step is taken +   only once in case of a loop.)  If a receiver discovers that two other +   sources are colliding, it MAY keep the packets from one and discard +   the packets from the other when this can be detected by different +   source transport addresses or CNAMEs.  The two sources are expected +   to resolve the collision so that the situation doesn't last. + +   Because the random SSRC identifiers are kept globally unique for each +   RTP session, they can also be used to detect loops that may be +   introduced by mixers or translators.  A loop causes duplication of +   data and control information, either unmodified or possibly mixed, as +   in the following examples: + +   o  A translator may incorrectly forward a packet to the same +      multicast group from which it has received the packet, either +      directly or through a chain of translators.  In that case, the +      same packet appears several times, originating from different +      network sources. + +   o  Two translators incorrectly set up in parallel, i.e., with the +      same multicast groups on both sides, would both forward packets +      from one multicast group to the other.  Unidirectional translators +      would produce two copies; bidirectional translators would form a +      loop. + +   o  A mixer can close a loop by sending to the same transport +      destination upon which it receives packets, either directly or +      through another mixer or translator.  In this case a source might +      show up both as an SSRC on a data packet and a CSRC in a mixed +      data packet. + +   A source may discover that its own packets are being looped, or that +   packets from another source are being looped (a third-party loop). +   Both loops and collisions in the random selection of a source +   identifier result in packets arriving with the same SSRC identifier +   but a different source transport address, which may be that of the +   end system originating the packet or an intermediate system. + + + +Schulzrinne, et al.         Standards Track                    [Page 60] + +RFC 3550                          RTP                          July 2003 + + +   Therefore, if a source changes its source transport address, it MAY +   also choose a new SSRC identifier to avoid being interpreted as a +   looped source.  (This is not MUST because in some applications of RTP +   sources may be expected to change addresses during a session.)  Note +   that if a translator restarts and consequently changes the source +   transport address (e.g., changes the UDP source port number) on which +   it forwards packets, then all those packets will appear to receivers +   to be looped because the SSRC identifiers are applied by the original +   source and will not change.  This problem can be avoided by keeping +   the source transport address fixed across restarts, but in any case +   will be resolved after a timeout at the receivers. + +   Loops or collisions occurring on the far side of a translator or +   mixer cannot be detected using the source transport address if all +   copies of the packets go through the translator or mixer, however, +   collisions may still be detected when chunks from two RTCP SDES +   packets contain the same SSRC identifier but different CNAMEs. + +   To detect and resolve these conflicts, an RTP implementation MUST +   include an algorithm similar to the one described below, though the +   implementation MAY choose a different policy for which packets from +   colliding third-party sources are kept.  The algorithm described +   below ignores packets from a new source or loop that collide with an +   established source.  It resolves collisions with the participant's +   own SSRC identifier by sending an RTCP BYE for the old identifier and +   choosing a new one.  However, when the collision was induced by a +   loop of the participant's own packets, the algorithm will choose a +   new identifier only once and thereafter ignore packets from the +   looping source transport address.  This is required to avoid a flood +   of BYE packets. + +   This algorithm requires keeping a table indexed by the source +   identifier and containing the source transport addresses from the +   first RTP packet and first RTCP packet received with that identifier, +   along with other state for that source.  Two source transport +   addresses are required since, for example, the UDP source port +   numbers may be different on RTP and RTCP packets.  However, it may be +   assumed that the network address is the same in both source transport +   addresses. + +   Each SSRC or CSRC identifier received in an RTP or RTCP packet is +   looked up in the source identifier table in order to process that +   data or control information.  The source transport address from the +   packet is compared to the corresponding source transport address in +   the table to detect a loop or collision if they don't match.  For +   control packets, each element with its own SSRC identifier, for +   example an SDES chunk, requires a separate lookup.  (The SSRC +   identifier in a reception report block is an exception because it + + + +Schulzrinne, et al.         Standards Track                    [Page 61] + +RFC 3550                          RTP                          July 2003 + + +   identifies a source heard by the reporter, and that SSRC identifier +   is unrelated to the source transport address of the RTCP packet sent +   by the reporter.)  If the SSRC or CSRC is not found, a new entry is +   created.  These table entries are removed when an RTCP BYE packet is +   received with the corresponding SSRC identifier and validated by a +   matching source transport address, or after no packets have arrived +   for a relatively long time (see Section 6.2.1). + +   Note that if two sources on the same host are transmitting with the +   same source identifier at the time a receiver begins operation, it +   would be possible that the first RTP packet received came from one of +   the sources while the first RTCP packet received came from the other. +   This would cause the wrong RTCP information to be associated with the +   RTP data, but this situation should be sufficiently rare and harmless +   that it may be disregarded. + +   In order to track loops of the participant's own data packets, the +   implementation MUST also keep a separate list of source transport +   addresses (not identifiers) that have been found to be conflicting. +   As in the source identifier table, two source transport addresses +   MUST be kept to separately track conflicting RTP and RTCP packets. +   Note that the conflicting address list should be short, usually +   empty.  Each element in this list stores the source addresses plus +   the time when the most recent conflicting packet was received.  An +   element MAY be removed from the list when no conflicting packet has +   arrived from that source for a time on the order of 10 RTCP report +   intervals (see Section 6.2). + +   For the algorithm as shown, it is assumed that the participant's own +   source identifier and state are included in the source identifier +   table.  The algorithm could be restructured to first make a separate +   comparison against the participant's own source identifier. + +      if (SSRC or CSRC identifier is not found in the source +          identifier table) { +          create a new entry storing the data or control source +              transport address, the SSRC or CSRC and other state; +      } + +      /* Identifier is found in the table */ + +      else if (table entry was created on receipt of a control packet +               and this is the first data packet or vice versa) { +          store the source transport address from this packet; +      } +      else if (source transport address from the packet does not match +               the one saved in the table entry for this identifier) { + + + + +Schulzrinne, et al.         Standards Track                    [Page 62] + +RFC 3550                          RTP                          July 2003 + + +          /* An identifier collision or a loop is indicated */ + +          if (source identifier is not the participant's own) { +              /* OPTIONAL error counter step */ +              if (source identifier is from an RTCP SDES chunk +                  containing a CNAME item that differs from the CNAME +                  in the table entry) { +                  count a third-party collision; +              } else { +                  count a third-party loop; +              } +              abort processing of data packet or control element; +              /* MAY choose a different policy to keep new source */ +          } + +          /* A collision or loop of the participant's own packets */ + +          else if (source transport address is found in the list of +                   conflicting data or control source transport +                   addresses) { +              /* OPTIONAL error counter step */ +              if (source identifier is not from an RTCP SDES chunk +                  containing a CNAME item or CNAME is the +                  participant's own) { +                  count occurrence of own traffic looped; +              } +              mark current time in conflicting address list entry; +              abort processing of data packet or control element; +          } + +          /* New collision, change SSRC identifier */ + +          else { +              log occurrence of a collision; +              create a new entry in the conflicting data or control +                  source transport address list and mark current time; +              send an RTCP BYE packet with the old SSRC identifier; +              choose a new SSRC identifier; +              create a new entry in the source identifier table with +                  the old SSRC plus the source transport address from +                  the data or control packet being processed; +          } +      } + +   In this algorithm, packets from a newly conflicting source address +   will be ignored and packets from the original source address will be +   kept.  If no packets arrive from the original source for an extended +   period, the table entry will be timed out and the new source will be + + + +Schulzrinne, et al.         Standards Track                    [Page 63] + +RFC 3550                          RTP                          July 2003 + + +   able to take over.  This might occur if the original source detects +   the collision and moves to a new source identifier, but in the usual +   case an RTCP BYE packet will be received from the original source to +   delete the state without having to wait for a timeout. + +   If the original source address was received through a mixer (i.e., +   learned as a CSRC) and later the same source is received directly, +   the receiver may be well advised to switch to the new source address +   unless other sources in the mix would be lost.  Furthermore, for +   applications such as telephony in which some sources such as mobile +   entities may change addresses during the course of an RTP session, +   the RTP implementation SHOULD modify the collision detection +   algorithm to accept packets from the new source transport address. +   To guard against flip-flopping between addresses if a genuine +   collision does occur, the algorithm SHOULD include some means to +   detect this case and avoid switching. + +   When a new SSRC identifier is chosen due to a collision, the +   candidate identifier SHOULD first be looked up in the source +   identifier table to see if it was already in use by some other +   source.  If so, another candidate MUST be generated and the process +   repeated. + +   A loop of data packets to a multicast destination can cause severe +   network flooding.  All mixers and translators MUST implement a loop +   detection algorithm like the one here so that they can break loops. +   This should limit the excess traffic to no more than one duplicate +   copy of the original traffic, which may allow the session to continue +   so that the cause of the loop can be found and fixed.  However, in +   extreme cases where a mixer or translator does not properly break the +   loop and high traffic levels result, it may be necessary for end +   systems to cease transmitting data or control packets entirely.  This +   decision may depend upon the application.  An error condition SHOULD +   be indicated as appropriate.  Transmission MAY be attempted again +   periodically after a long, random time (on the order of minutes). + +8.3 Use with Layered Encodings + +   For layered encodings transmitted on separate RTP sessions (see +   Section 2.4), a single SSRC identifier space SHOULD be used across +   the sessions of all layers and the core (base) layer SHOULD be used +   for SSRC identifier allocation and collision resolution.  When a +   source discovers that it has collided, it transmits an RTCP BYE +   packet on only the base layer but changes the SSRC identifier to the +   new value in all layers. + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 64] + +RFC 3550                          RTP                          July 2003 + + +9. Security + +   Lower layer protocols may eventually provide all the security +   services that may be desired for applications of RTP, including +   authentication, integrity, and confidentiality.  These services have +   been specified for IP in [27].  Since the initial audio and video +   applications using RTP needed a confidentiality service before such +   services were available for the IP layer, the confidentiality service +   described in the next section was defined for use with RTP and RTCP. +   That description is included here to codify existing practice.  New +   applications of RTP MAY implement this RTP-specific confidentiality +   service for backward compatibility, and/or they MAY implement +   alternative security services.  The overhead on the RTP protocol for +   this confidentiality service is low, so the penalty will be minimal +   if this service is obsoleted by other services in the future. + +   Alternatively, other services, other implementations of services and +   other algorithms may be defined for RTP in the future.  In +   particular, an RTP profile called Secure Real-time Transport Protocol +   (SRTP) [28] is being developed to provide confidentiality of the RTP +   payload while leaving the RTP header in the clear so that link-level +   header compression algorithms can still operate.  It is expected that +   SRTP will be the correct choice for many applications.  SRTP is based +   on the Advanced Encryption Standard (AES) and provides stronger +   security than the service described here.  No claim is made that the +   methods presented here are appropriate for a particular security +   need.  A profile may specify which services and algorithms should be +   offered by applications, and may provide guidance as to their +   appropriate use. + +   Key distribution and certificates are outside the scope of this +   document. + +9.1 Confidentiality + +   Confidentiality means that only the intended receiver(s) can decode +   the received packets; for others, the packet contains no useful +   information.  Confidentiality of the content is achieved by +   encryption. + +   When it is desired to encrypt RTP or RTCP according to the method +   specified in this section, all the octets that will be encapsulated +   for transmission in a single lower-layer packet are encrypted as a +   unit.  For RTCP, a 32-bit random number redrawn for each unit MUST be +   prepended to the unit before encryption.  For RTP, no prefix is +   prepended; instead, the sequence number and timestamp fields are +   initialized with random offsets.  This is considered to be a weak + + + + +Schulzrinne, et al.         Standards Track                    [Page 65] + +RFC 3550                          RTP                          July 2003 + + +   initialization vector (IV) because of poor randomness properties.  In +   addition, if the subsequent field, the SSRC, can be manipulated by an +   enemy, there is further weakness of the encryption method. + +   For RTCP, an implementation MAY segregate the individual RTCP packets +   in a compound RTCP packet into two separate compound RTCP packets, +   one to be encrypted and one to be sent in the clear.  For example, +   SDES information might be encrypted while reception reports were sent +   in the clear to accommodate third-party monitors that are not privy +   to the encryption key.  In this example, depicted in Fig. 4, the SDES +   information MUST be appended to an RR packet with no reports (and the +   random number) to satisfy the requirement that all compound RTCP +   packets begin with an SR or RR packet.  The SDES CNAME item is +   required in either the encrypted or unencrypted packet, but not both. +   The same SDES information SHOULD NOT be carried in both packets as +   this may compromise the encryption. + +             UDP packet                     UDP packet +   -----------------------------  ------------------------------ +   [random][RR][SDES #CNAME ...]  [SR #senderinfo #site1 #site2] +   -----------------------------  ------------------------------ +             encrypted                     not encrypted + +   #: SSRC identifier + +       Figure 4: Encrypted and non-encrypted RTCP packets + +   The presence of encryption and the use of the correct key are +   confirmed by the receiver through header or payload validity checks. +   Examples of such validity checks for RTP and RTCP headers are given +   in Appendices A.1 and A.2. + +   To be consistent with existing implementations of the initial +   specification of RTP in RFC 1889, the default encryption algorithm is +   the Data Encryption Standard (DES) algorithm in cipher block chaining +   (CBC) mode, as described in Section 1.1 of RFC 1423 [29], except that +   padding to a multiple of 8 octets is indicated as described for the P +   bit in Section 5.1.  The initialization vector is zero because random +   values are supplied in the RTP header or by the random prefix for +   compound RTCP packets.  For details on the use of CBC initialization +   vectors, see [30]. + +   Implementations that support the encryption method specified here +   SHOULD always support the DES algorithm in CBC mode as the default +   cipher for this method to maximize interoperability.  This method was +   chosen because it has been demonstrated to be easy and practical to +   use in experimental audio and video tools in operation on the +   Internet.  However, DES has since been found to be too easily broken. + + + +Schulzrinne, et al.         Standards Track                    [Page 66] + +RFC 3550                          RTP                          July 2003 + + +   It is RECOMMENDED that stronger encryption algorithms such as +   Triple-DES be used in place of the default algorithm.  Furthermore, +   secure CBC mode requires that the first block of each packet be XORed +   with a random, independent IV of the same size as the cipher's block +   size.  For RTCP, this is (partially) achieved by prepending each +   packet with a 32-bit random number, independently chosen for each +   packet.  For RTP, the timestamp and sequence number start from random +   values, but consecutive packets will not be independently randomized. +   It should be noted that the randomness in both cases (RTP and RTCP) +   is limited.  High-security applications SHOULD consider other, more +   conventional, protection means.  Other encryption algorithms MAY be +   specified dynamically for a session by non-RTP means.  In particular, +   the SRTP profile [28] based on AES is being developed to take into +   account known plaintext and CBC plaintext manipulation concerns, and +   will be the correct choice in the future. + +   As an alternative to encryption at the IP level or at the RTP level +   as described above, profiles MAY define additional payload types for +   encrypted encodings.  Those encodings MUST specify how padding and +   other aspects of the encryption are to be handled.  This method +   allows encrypting only the data while leaving the headers in the +   clear for applications where that is desired.  It may be particularly +   useful for hardware devices that will handle both decryption and +   decoding.  It is also valuable for applications where link-level +   compression of RTP and lower-layer headers is desired and +   confidentiality of the payload (but not addresses) is sufficient +   since encryption of the headers precludes compression. + +9.2 Authentication and Message Integrity + +   Authentication and message integrity services are not defined at the +   RTP level since these services would not be directly feasible without +   a key management infrastructure.  It is expected that authentication +   and integrity services will be provided by lower layer protocols. + +10. Congestion Control + +   All transport protocols used on the Internet need to address +   congestion control in some way [31].  RTP is not an exception, but +   because the data transported over RTP is often inelastic (generated +   at a fixed or controlled rate), the means to control congestion in +   RTP may be quite different from those for other transport protocols +   such as TCP.  In one sense, inelasticity reduces the risk of +   congestion because the RTP stream will not expand to consume all +   available bandwidth as a TCP stream can.  However, inelasticity also +   means that the RTP stream cannot arbitrarily reduce its load on the +   network to eliminate congestion when it occurs. + + + + +Schulzrinne, et al.         Standards Track                    [Page 67] + +RFC 3550                          RTP                          July 2003 + + +   Since RTP may be used for a wide variety of applications in many +   different contexts, there is no single congestion control mechanism +   that will work for all.  Therefore, congestion control SHOULD be +   defined in each RTP profile as appropriate.  For some profiles, it +   may be sufficient to include an applicability statement restricting +   the use of that profile to environments where congestion is avoided +   by engineering.  For other profiles, specific methods such as data +   rate adaptation based on RTCP feedback may be required. + +11. RTP over Network and Transport Protocols + +   This section describes issues specific to carrying RTP packets within +   particular network and transport protocols.  The following rules +   apply unless superseded by protocol-specific definitions outside this +   specification. + +   RTP relies on the underlying protocol(s) to provide demultiplexing of +   RTP data and RTCP control streams.  For UDP and similar protocols, +   RTP SHOULD use an even destination port number and the corresponding +   RTCP stream SHOULD use the next higher (odd) destination port number. +   For applications that take a single port number as a parameter and +   derive the RTP and RTCP port pair from that number, if an odd number +   is supplied then the application SHOULD replace that number with the +   next lower (even) number to use as the base of the port pair.  For +   applications in which the RTP and RTCP destination port numbers are +   specified via explicit, separate parameters (using a signaling +   protocol or other means), the application MAY disregard the +   restrictions that the port numbers be even/odd and consecutive +   although the use of an even/odd port pair is still encouraged.  The +   RTP and RTCP port numbers MUST NOT be the same since RTP relies on +   the port numbers to demultiplex the RTP data and RTCP control +   streams. + +   In a unicast session, both participants need to identify a port pair +   for receiving RTP and RTCP packets.  Both participants MAY use the +   same port pair.  A participant MUST NOT assume that the source port +   of the incoming RTP or RTCP packet can be used as the destination +   port for outgoing RTP or RTCP packets.  When RTP data packets are +   being sent in both directions, each participant's RTCP SR packets +   MUST be sent to the port that the other participant has specified for +   reception of RTCP.  The RTCP SR packets combine sender information +   for the outgoing data plus reception report information for the +   incoming data.  If a side is not actively sending data (see Section +   6.4), an RTCP RR packet is sent instead. + +   It is RECOMMENDED that layered encoding applications (see Section +   2.4) use a set of contiguous port numbers.  The port numbers MUST be +   distinct because of a widespread deficiency in existing operating + + + +Schulzrinne, et al.         Standards Track                    [Page 68] + +RFC 3550                          RTP                          July 2003 + + +   systems that prevents use of the same port with multiple multicast +   addresses, and for unicast, there is only one permissible address. +   Thus for layer n, the data port is P + 2n, and the control port is P +   + 2n + 1.  When IP multicast is used, the addresses MUST also be +   distinct because multicast routing and group membership are managed +   on an address granularity.  However, allocation of contiguous IP +   multicast addresses cannot be assumed because some groups may require +   different scopes and may therefore be allocated from different +   address ranges. + +   The previous paragraph conflicts with the SDP specification, RFC 2327 +   [15], which says that it is illegal for both multiple addresses and +   multiple ports to be specified in the same session description +   because the association of addresses with ports could be ambiguous. +   It is intended that this restriction will be relaxed in a revision of +   RFC 2327 to allow an equal number of addresses and ports to be +   specified with a one-to-one mapping implied. + +   RTP data packets contain no length field or other delineation, +   therefore RTP relies on the underlying protocol(s) to provide a +   length indication.  The maximum length of RTP packets is limited only +   by the underlying protocols. + +   If RTP packets are to be carried in an underlying protocol that +   provides the abstraction of a continuous octet stream rather than +   messages (packets), an encapsulation of the RTP packets MUST be +   defined to provide a framing mechanism.  Framing is also needed if +   the underlying protocol may contain padding so that the extent of the +   RTP payload cannot be determined.  The framing mechanism is not +   defined here. + +   A profile MAY specify a framing method to be used even when RTP is +   carried in protocols that do provide framing in order to allow +   carrying several RTP packets in one lower-layer protocol data unit, +   such as a UDP packet.  Carrying several RTP packets in one network or +   transport packet reduces header overhead and may simplify +   synchronization between different streams. + +12. Summary of Protocol Constants + +   This section contains a summary listing of the constants defined in +   this specification. + +   The RTP payload type (PT) constants are defined in profiles rather +   than this document.  However, the octet of the RTP header which +   contains the marker bit(s) and payload type MUST avoid the reserved +   values 200 and 201 (decimal) to distinguish RTP packets from the RTCP +   SR and RR packet types for the header validation procedure described + + + +Schulzrinne, et al.         Standards Track                    [Page 69] + +RFC 3550                          RTP                          July 2003 + + +   in Appendix A.1.  For the standard definition of one marker bit and a +   7-bit payload type field as shown in this specification, this +   restriction means that payload types 72 and 73 are reserved. + +12.1 RTCP Packet Types + +   abbrev.  name                 value +   SR       sender report          200 +   RR       receiver report        201 +   SDES     source description     202 +   BYE      goodbye                203 +   APP      application-defined    204 + +   These type values were chosen in the range 200-204 for improved +   header validity checking of RTCP packets compared to RTP packets or +   other unrelated packets.  When the RTCP packet type field is compared +   to the corresponding octet of the RTP header, this range corresponds +   to the marker bit being 1 (which it usually is not in data packets) +   and to the high bit of the standard payload type field being 1 (since +   the static payload types are typically defined in the low half). +   This range was also chosen to be some distance numerically from 0 and +   255 since all-zeros and all-ones are common data patterns. + +   Since all compound RTCP packets MUST begin with SR or RR, these codes +   were chosen as an even/odd pair to allow the RTCP validity check to +   test the maximum number of bits with mask and value. + +   Additional RTCP packet types may be registered through IANA (see +   Section 15). + +12.2 SDES Types + +   abbrev.  name                            value +   END      end of SDES list                    0 +   CNAME    canonical name                      1 +   NAME     user name                           2 +   EMAIL    user's electronic mail address      3 +   PHONE    user's phone number                 4 +   LOC      geographic user location            5 +   TOOL     name of application or tool         6 +   NOTE     notice about the source             7 +   PRIV     private extensions                  8 + +   Additional SDES types may be registered through IANA (see Section +   15). + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 70] + +RFC 3550                          RTP                          July 2003 + + +13.  RTP Profiles and Payload Format Specifications + +   A complete specification of RTP for a particular application will +   require one or more companion documents of two types described here: +   profiles, and payload format specifications. + +   RTP may be used for a variety of applications with somewhat differing +   requirements.  The flexibility to adapt to those requirements is +   provided by allowing multiple choices in the main protocol +   specification, then selecting the appropriate choices or defining +   extensions for a particular environment and class of applications in +   a separate profile document.  Typically an application will operate +   under only one profile in a particular RTP session, so there is no +   explicit indication within the RTP protocol itself as to which +   profile is in use.  A profile for audio and video applications may be +   found in the companion RFC 3551.  Profiles are typically titled "RTP +   Profile for ...". + +   The second type of companion document is a payload format +   specification, which defines how a particular kind of payload data, +   such as H.261 encoded video, should be carried in RTP.  These +   documents are typically titled "RTP Payload Format for XYZ +   Audio/Video Encoding".  Payload formats may be useful under multiple +   profiles and may therefore be defined independently of any particular +   profile.  The profile documents are then responsible for assigning a +   default mapping of that format to a payload type value if needed. + +   Within this specification, the following items have been identified +   for possible definition within a profile, but this list is not meant +   to be exhaustive: + +   RTP data header: The octet in the RTP data header that contains +      the marker bit and payload type field MAY be redefined by a +      profile to suit different requirements, for example with more or +      fewer marker bits (Section 5.3, p. 18). + +   Payload types: Assuming that a payload type field is included, +      the profile will usually define a set of payload formats (e.g., +      media encodings) and a default static mapping of those formats to +      payload type values.  Some of the payload formats may be defined +      by reference to separate payload format specifications.  For each +      payload type defined, the profile MUST specify the RTP timestamp +      clock rate to be used (Section 5.1, p. 14). + +   RTP data header additions: Additional fields MAY be appended to +      the fixed RTP data header if some additional functionality is +      required across the profile's class of applications independent of +      payload type (Section 5.3, p. 18). + + + +Schulzrinne, et al.         Standards Track                    [Page 71] + +RFC 3550                          RTP                          July 2003 + + +   RTP data header extensions: The contents of the first 16 bits of +      the RTP data header extension structure MUST be defined if use of +      that mechanism is to be allowed under the profile for +      implementation-specific extensions (Section 5.3.1, p. 18). + +   RTCP packet types: New application-class-specific RTCP packet +      types MAY be defined and registered with IANA. + +   RTCP report interval: A profile SHOULD specify that the values +      suggested in Section 6.2 for the constants employed in the +      calculation of the RTCP report interval will be used.  Those are +      the RTCP fraction of session bandwidth, the minimum report +      interval, and the bandwidth split between senders and receivers. +      A profile MAY specify alternate values if they have been +      demonstrated to work in a scalable manner. + +   SR/RR extension: An extension section MAY be defined for the +      RTCP SR and RR packets if there is additional information that +      should be reported regularly about the sender or receivers +      (Section 6.4.3, p. 42 and 43). + +   SDES use: The profile MAY specify the relative priorities for +      RTCP SDES items to be transmitted or excluded entirely (Section +      6.3.9); an alternate syntax or semantics for the CNAME item +      (Section 6.5.1); the format of the LOC item (Section 6.5.5); the +      semantics and use of the NOTE item (Section 6.5.7); or new SDES +      item types to be registered with IANA. + +   Security: A profile MAY specify which security services and +      algorithms should be offered by applications, and MAY provide +      guidance as to their appropriate use (Section 9, p. 65). + +   String-to-key mapping: A profile MAY specify how a user-provided +      password or pass phrase is mapped into an encryption key. + +   Congestion: A profile SHOULD specify the congestion control +      behavior appropriate for that profile. + +   Underlying protocol: Use of a particular underlying network or +      transport layer protocol to carry RTP packets MAY be required. + +   Transport mapping: A mapping of RTP and RTCP to transport-level +      addresses, e.g., UDP ports, other than the standard mapping +      defined in Section 11, p. 68 may be specified. + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 72] + +RFC 3550                          RTP                          July 2003 + + +   Encapsulation: An encapsulation of RTP packets may be defined to +      allow multiple RTP data packets to be carried in one lower-layer +      packet or to provide framing over underlying protocols that do not +      already do so (Section 11, p. 69). + +   It is not expected that a new profile will be required for every +   application.  Within one application class, it would be better to +   extend an existing profile rather than make a new one in order to +   facilitate interoperation among the applications since each will +   typically run under only one profile.  Simple extensions such as the +   definition of additional payload type values or RTCP packet types may +   be accomplished by registering them through IANA and publishing their +   descriptions in an addendum to the profile or in a payload format +   specification. + +14. Security Considerations + +   RTP suffers from the same security liabilities as the underlying +   protocols.  For example, an impostor can fake source or destination +   network addresses, or change the header or payload.  Within RTCP, the +   CNAME and NAME information may be used to impersonate another +   participant.  In addition, RTP may be sent via IP multicast, which +   provides no direct means for a sender to know all the receivers of +   the data sent and therefore no measure of privacy.  Rightly or not, +   users may be more sensitive to privacy concerns with audio and video +   communication than they have been with more traditional forms of +   network communication [33].  Therefore, the use of security +   mechanisms with RTP is important.  These mechanisms are discussed in +   Section 9. + +   RTP-level translators or mixers may be used to allow RTP traffic to +   reach hosts behind firewalls.  Appropriate firewall security +   principles and practices, which are beyond the scope of this +   document, should be followed in the design and installation of these +   devices and in the admission of RTP applications for use behind the +   firewall. + +15. IANA Considerations + +   Additional RTCP packet types and SDES item types may be registered +   through the Internet Assigned Numbers Authority (IANA).  Since these +   number spaces are small, allowing unconstrained registration of new +   values would not be prudent.  To facilitate review of requests and to +   promote shared use of new types among multiple applications, requests +   for registration of new values must be documented in an RFC or other +   permanent and readily available reference such as the product of +   another cooperative standards body (e.g., ITU-T).  Other requests may +   also be accepted, under the advice of a "designated expert." + + + +Schulzrinne, et al.         Standards Track                    [Page 73] + +RFC 3550                          RTP                          July 2003 + + +   (Contact the IANA for the contact information of the current expert.) + +   RTP profile specifications SHOULD register with IANA a name for the +   profile in the form "RTP/xxx", where xxx is a short abbreviation of +   the profile title.  These names are for use by higher-level control +   protocols, such as the Session Description Protocol (SDP), RFC 2327 +   [15], to refer to transport methods. + +16. Intellectual Property Rights Statement + +   The IETF takes no position regarding the validity or scope of any +   intellectual property or other rights that might be claimed to +   pertain to the implementation or use of the technology described in +   this document or the extent to which any license under such rights +   might or might not be available; neither does it represent that it +   has made any effort to identify any such rights.  Information on the +   IETF's procedures with respect to rights in standards-track and +   standards-related documentation can be found in BCP-11.  Copies of +   claims of rights made available for publication and any assurances of +   licenses to be made available, or the result of an attempt made to +   obtain a general license or permission for the use of such +   proprietary rights by implementors or users of this specification can +   be obtained from the IETF Secretariat. + +   The IETF invites any interested party to bring to its attention any +   copyrights, patents or patent applications, or other proprietary +   rights which may cover technology that may be required to practice +   this standard.  Please address the information to the IETF Executive +   Director. + +17.  Acknowledgments + +   This memorandum is based on discussions within the IETF Audio/Video +   Transport working group chaired by Stephen Casner and Colin Perkins. +   The current protocol has its origins in the Network Voice Protocol +   and the Packet Video Protocol (Danny Cohen and Randy Cole) and the +   protocol implemented by the vat application (Van Jacobson and Steve +   McCanne).  Christian Huitema provided ideas for the random identifier +   generator.  Extensive analysis and simulation of the timer +   reconsideration algorithm was done by Jonathan Rosenberg.  The +   additions for layered encodings were specified by Michael Speer and +   Steve McCanne. + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 74] + +RFC 3550                          RTP                          July 2003 + + +Appendix A - Algorithms + +   We provide examples of C code for aspects of RTP sender and receiver +   algorithms.  There may be other implementation methods that are +   faster in particular operating environments or have other advantages. +   These implementation notes are for informational purposes only and +   are meant to clarify the RTP specification. + +   The following definitions are used for all examples; for clarity and +   brevity, the structure definitions are only valid for 32-bit big- +   endian (most significant octet first) architectures.  Bit fields are +   assumed to be packed tightly in big-endian bit order, with no +   additional padding.  Modifications would be required to construct a +   portable implementation. + +   /* +    * rtp.h  --  RTP header file +    */ +   #include <sys/types.h> + +   /* +    * The type definitions below are valid for 32-bit architectures and +    * may have to be adjusted for 16- or 64-bit architectures. +    */ +   typedef unsigned char  u_int8; +   typedef unsigned short u_int16; +   typedef unsigned int   u_int32; +   typedef          short int16; + +   /* +    * Current protocol version. +    */ +   #define RTP_VERSION    2 + +   #define RTP_SEQ_MOD (1<<16) +   #define RTP_MAX_SDES 255      /* maximum text length for SDES */ + +   typedef enum { +       RTCP_SR   = 200, +       RTCP_RR   = 201, +       RTCP_SDES = 202, +       RTCP_BYE  = 203, +       RTCP_APP  = 204 +   } rtcp_type_t; + +   typedef enum { +       RTCP_SDES_END   = 0, +       RTCP_SDES_CNAME = 1, + + + +Schulzrinne, et al.         Standards Track                    [Page 75] + +RFC 3550                          RTP                          July 2003 + + +       RTCP_SDES_NAME  = 2, +       RTCP_SDES_EMAIL = 3, +       RTCP_SDES_PHONE = 4, +       RTCP_SDES_LOC   = 5, +       RTCP_SDES_TOOL  = 6, +       RTCP_SDES_NOTE  = 7, +       RTCP_SDES_PRIV  = 8 +   } rtcp_sdes_type_t; + +   /* +    * RTP data header +    */ +   typedef struct { +       unsigned int version:2;   /* protocol version */ +       unsigned int p:1;         /* padding flag */ +       unsigned int x:1;         /* header extension flag */ +       unsigned int cc:4;        /* CSRC count */ +       unsigned int m:1;         /* marker bit */ +       unsigned int pt:7;        /* payload type */ +       unsigned int seq:16;      /* sequence number */ +       u_int32 ts;               /* timestamp */ +       u_int32 ssrc;             /* synchronization source */ +       u_int32 csrc[1];          /* optional CSRC list */ +   } rtp_hdr_t; + +   /* +    * RTCP common header word +    */ +   typedef struct { +       unsigned int version:2;   /* protocol version */ +       unsigned int p:1;         /* padding flag */ +       unsigned int count:5;     /* varies by packet type */ +       unsigned int pt:8;        /* RTCP packet type */ +       u_int16 length;           /* pkt len in words, w/o this word */ +   } rtcp_common_t; + +   /* +    * Big-endian mask for version, padding bit and packet type pair +    */ +   #define RTCP_VALID_MASK (0xc000 | 0x2000 | 0xfe) +   #define RTCP_VALID_VALUE ((RTP_VERSION << 14) | RTCP_SR) + +   /* +    * Reception report block +    */ +   typedef struct { +       u_int32 ssrc;             /* data source being reported */ +       unsigned int fraction:8;  /* fraction lost since last SR/RR */ + + + +Schulzrinne, et al.         Standards Track                    [Page 76] + +RFC 3550                          RTP                          July 2003 + + +       int lost:24;              /* cumul. no. pkts lost (signed!) */ +       u_int32 last_seq;         /* extended last seq. no. received */ +       u_int32 jitter;           /* interarrival jitter */ +       u_int32 lsr;              /* last SR packet from this source */ +       u_int32 dlsr;             /* delay since last SR packet */ +   } rtcp_rr_t; + +   /* +    * SDES item +    */ +   typedef struct { +       u_int8 type;              /* type of item (rtcp_sdes_type_t) */ +       u_int8 length;            /* length of item (in octets) */ +       char data[1];             /* text, not null-terminated */ +   } rtcp_sdes_item_t; + +   /* +    * One RTCP packet +    */ +   typedef struct { +       rtcp_common_t common;     /* common header */ +       union { +           /* sender report (SR) */ +           struct { +               u_int32 ssrc;     /* sender generating this report */ +               u_int32 ntp_sec;  /* NTP timestamp */ +               u_int32 ntp_frac; +               u_int32 rtp_ts;   /* RTP timestamp */ +               u_int32 psent;    /* packets sent */ +               u_int32 osent;    /* octets sent */ +               rtcp_rr_t rr[1];  /* variable-length list */ +           } sr; + +           /* reception report (RR) */ +           struct { +               u_int32 ssrc;     /* receiver generating this report */ +               rtcp_rr_t rr[1];  /* variable-length list */ +           } rr; + +           /* source description (SDES) */ +           struct rtcp_sdes { +               u_int32 src;      /* first SSRC/CSRC */ +               rtcp_sdes_item_t item[1]; /* list of SDES items */ +           } sdes; + +           /* BYE */ +           struct { +               u_int32 src[1];   /* list of sources */ + + + +Schulzrinne, et al.         Standards Track                    [Page 77] + +RFC 3550                          RTP                          July 2003 + + +               /* can't express trailing text for reason */ +           } bye; +       } r; +   } rtcp_t; + +   typedef struct rtcp_sdes rtcp_sdes_t; + +   /* +    * Per-source state information +    */ +   typedef struct { +       u_int16 max_seq;        /* highest seq. number seen */ +       u_int32 cycles;         /* shifted count of seq. number cycles */ +       u_int32 base_seq;       /* base seq number */ +       u_int32 bad_seq;        /* last 'bad' seq number + 1 */ +       u_int32 probation;      /* sequ. packets till source is valid */ +       u_int32 received;       /* packets received */ +       u_int32 expected_prior; /* packet expected at last interval */ +       u_int32 received_prior; /* packet received at last interval */ +       u_int32 transit;        /* relative trans time for prev pkt */ +       u_int32 jitter;         /* estimated jitter */ +       /* ... */ +   } source; + +A.1 RTP Data Header Validity Checks + +   An RTP receiver should check the validity of the RTP header on +   incoming packets since they might be encrypted or might be from a +   different application that happens to be misaddressed.  Similarly, if +   encryption according to the method described in Section 9 is enabled, +   the header validity check is needed to verify that incoming packets +   have been correctly decrypted, although a failure of the header +   validity check (e.g., unknown payload type) may not necessarily +   indicate decryption failure. + +   Only weak validity checks are possible on an RTP data packet from a +   source that has not been heard before: + +   o  RTP version field must equal 2. + +   o  The payload type must be known, and in particular it must not be +      equal to SR or RR. + +   o  If the P bit is set, then the last octet of the packet must +      contain a valid octet count, in particular, less than the total +      packet length minus the header size. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 78] + +RFC 3550                          RTP                          July 2003 + + +   o  The X bit must be zero if the profile does not specify that the +      header extension mechanism may be used.  Otherwise, the extension +      length field must be less than the total packet size minus the +      fixed header length and padding. + +   o  The length of the packet must be consistent with CC and payload +      type (if payloads have a known length). + +   The last three checks are somewhat complex and not always possible, +   leaving only the first two which total just a few bits.  If the SSRC +   identifier in the packet is one that has been received before, then +   the packet is probably valid and checking if the sequence number is +   in the expected range provides further validation.  If the SSRC +   identifier has not been seen before, then data packets carrying that +   identifier may be considered invalid until a small number of them +   arrive with consecutive sequence numbers.  Those invalid packets MAY +   be discarded or they MAY be stored and delivered once validation has +   been achieved if the resulting delay is acceptable. + +   The routine update_seq shown below ensures that a source is declared +   valid only after MIN_SEQUENTIAL packets have been received in +   sequence.  It also validates the sequence number seq of a newly +   received packet and updates the sequence state for the packet's +   source in the structure to which s points. + +   When a new source is heard for the first time, that is, its SSRC +   identifier is not in the table (see Section 8.2), and the per-source +   state is allocated for it, s->probation is set to the number of +   sequential packets required before declaring a source valid +   (parameter MIN_SEQUENTIAL) and other variables are initialized: + +      init_seq(s, seq); +      s->max_seq = seq - 1; +      s->probation = MIN_SEQUENTIAL; + +   A non-zero s->probation marks the source as not yet valid so the +   state may be discarded after a short timeout rather than a long one, +   as discussed in Section 6.2.1. + +   After a source is considered valid, the sequence number is considered +   valid if it is no more than MAX_DROPOUT ahead of s->max_seq nor more +   than MAX_MISORDER behind.  If the new sequence number is ahead of +   max_seq modulo the RTP sequence number range (16 bits), but is +   smaller than max_seq, it has wrapped around and the (shifted) count +   of sequence number cycles is incremented.  A value of one is returned +   to indicate a valid sequence number. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 79] + +RFC 3550                          RTP                          July 2003 + + +   Otherwise, the value zero is returned to indicate that the validation +   failed, and the bad sequence number plus 1 is stored.  If the next +   packet received carries the next higher sequence number, it is +   considered the valid start of a new packet sequence presumably caused +   by an extended dropout or a source restart.  Since multiple complete +   sequence number cycles may have been missed, the packet loss +   statistics are reset. + +   Typical values for the parameters are shown, based on a maximum +   misordering time of 2 seconds at 50 packets/second and a maximum +   dropout of 1 minute.  The dropout parameter MAX_DROPOUT should be a +   small fraction of the 16-bit sequence number space to give a +   reasonable probability that new sequence numbers after a restart will +   not fall in the acceptable range for sequence numbers from before the +   restart. + +   void init_seq(source *s, u_int16 seq) +   { +       s->base_seq = seq; +       s->max_seq = seq; +       s->bad_seq = RTP_SEQ_MOD + 1;   /* so seq == bad_seq is false */ +       s->cycles = 0; +       s->received = 0; +       s->received_prior = 0; +       s->expected_prior = 0; +       /* other initialization */ +   } + +   int update_seq(source *s, u_int16 seq) +   { +       u_int16 udelta = seq - s->max_seq; +       const int MAX_DROPOUT = 3000; +       const int MAX_MISORDER = 100; +       const int MIN_SEQUENTIAL = 2; + +       /* +        * Source is not valid until MIN_SEQUENTIAL packets with +        * sequential sequence numbers have been received. +        */ +       if (s->probation) { +           /* packet is in sequence */ +           if (seq == s->max_seq + 1) { +               s->probation--; +               s->max_seq = seq; +               if (s->probation == 0) { +                   init_seq(s, seq); +                   s->received++; +                   return 1; + + + +Schulzrinne, et al.         Standards Track                    [Page 80] + +RFC 3550                          RTP                          July 2003 + + +               } +           } else { +               s->probation = MIN_SEQUENTIAL - 1; +               s->max_seq = seq; +           } +           return 0; +       } else if (udelta < MAX_DROPOUT) { +           /* in order, with permissible gap */ +           if (seq < s->max_seq) { +               /* +                * Sequence number wrapped - count another 64K cycle. +                */ +               s->cycles += RTP_SEQ_MOD; +           } +           s->max_seq = seq; +       } else if (udelta <= RTP_SEQ_MOD - MAX_MISORDER) { +           /* the sequence number made a very large jump */ +           if (seq == s->bad_seq) { +               /* +                * Two sequential packets -- assume that the other side +                * restarted without telling us so just re-sync +                * (i.e., pretend this was the first packet). +                */ +               init_seq(s, seq); +           } +           else { +               s->bad_seq = (seq + 1) & (RTP_SEQ_MOD-1); +               return 0; +           } +       } else { +           /* duplicate or reordered packet */ +       } +       s->received++; +       return 1; +   } + +   The validity check can be made stronger requiring more than two +   packets in sequence.  The disadvantages are that a larger number of +   initial packets will be discarded (or delayed in a queue) and that +   high packet loss rates could prevent validation.  However, because +   the RTCP header validation is relatively strong, if an RTCP packet is +   received from a source before the data packets, the count could be +   adjusted so that only two packets are required in sequence.  If +   initial data loss for a few seconds can be tolerated, an application +   MAY choose to discard all data packets from a source until a valid +   RTCP packet has been received from that source. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 81] + +RFC 3550                          RTP                          July 2003 + + +   Depending on the application and encoding, algorithms may exploit +   additional knowledge about the payload format for further validation. +   For payload types where the timestamp increment is the same for all +   packets, the timestamp values can be predicted from the previous +   packet received from the same source using the sequence number +   difference (assuming no change in payload type). + +   A strong "fast-path" check is possible since with high probability +   the first four octets in the header of a newly received RTP data +   packet will be just the same as that of the previous packet from the +   same SSRC except that the sequence number will have increased by one. +   Similarly, a single-entry cache may be used for faster SSRC lookups +   in applications where data is typically received from one source at a +   time. + +A.2 RTCP Header Validity Checks + +   The following checks should be applied to RTCP packets. + +   o  RTP version field must equal 2. + +   o  The payload type field of the first RTCP packet in a compound +      packet must be equal to SR or RR. + +   o  The padding bit (P) should be zero for the first packet of a +      compound RTCP packet because padding should only be applied, if it +      is needed, to the last packet. + +   o  The length fields of the individual RTCP packets must add up to +      the overall length of the compound RTCP packet as received.  This +      is a fairly strong check. + +   The code fragment below performs all of these checks.  The packet +   type is not checked for subsequent packets since unknown packet types +   may be present and should be ignored. + +      u_int32 len;        /* length of compound RTCP packet in words */ +      rtcp_t *r;          /* RTCP header */ +      rtcp_t *end;        /* end of compound RTCP packet */ + +      if ((*(u_int16 *)r & RTCP_VALID_MASK) != RTCP_VALID_VALUE) { +          /* something wrong with packet format */ +      } +      end = (rtcp_t *)((u_int32 *)r + len); + +      do r = (rtcp_t *)((u_int32 *)r + r->common.length + 1); +      while (r < end && r->common.version == 2); + + + + +Schulzrinne, et al.         Standards Track                    [Page 82] + +RFC 3550                          RTP                          July 2003 + + +      if (r != end) { +          /* something wrong with packet format */ +      } + +A.3 Determining Number of Packets Expected and Lost + +   In order to compute packet loss rates, the number of RTP packets +   expected and actually received from each source needs to be known, +   using per-source state information defined in struct source +   referenced via pointer s in the code below.  The number of packets +   received is simply the count of packets as they arrive, including any +   late or duplicate packets.  The number of packets expected can be +   computed by the receiver as the difference between the highest +   sequence number received (s->max_seq) and the first sequence number +   received (s->base_seq).  Since the sequence number is only 16 bits +   and will wrap around, it is necessary to extend the highest sequence +   number with the (shifted) count of sequence number wraparounds +   (s->cycles).  Both the received packet count and the count of cycles +   are maintained the RTP header validity check routine in Appendix A.1. + +      extended_max = s->cycles + s->max_seq; +      expected = extended_max - s->base_seq + 1; + +   The number of packets lost is defined to be the number of packets +   expected less the number of packets actually received: + +      lost = expected - s->received; + +   Since this signed number is carried in 24 bits, it should be clamped +   at 0x7fffff for positive loss or 0x800000 for negative loss rather +   than wrapping around. + +   The fraction of packets lost during the last reporting interval +   (since the previous SR or RR packet was sent) is calculated from +   differences in the expected and received packet counts across the +   interval, where expected_prior and received_prior are the values +   saved when the previous reception report was generated: + +      expected_interval = expected - s->expected_prior; +      s->expected_prior = expected; +      received_interval = s->received - s->received_prior; +      s->received_prior = s->received; +      lost_interval = expected_interval - received_interval; +      if (expected_interval == 0 || lost_interval <= 0) fraction = 0; +      else fraction = (lost_interval << 8) / expected_interval; + +   The resulting fraction is an 8-bit fixed point number with the binary +   point at the left edge. + + + +Schulzrinne, et al.         Standards Track                    [Page 83] + +RFC 3550                          RTP                          July 2003 + + +A.4 Generating RTCP SDES Packets + +   This function builds one SDES chunk into buffer b composed of argc +   items supplied in arrays type, value and length.  It returns a +   pointer to the next available location within b. + +   char *rtp_write_sdes(char *b, u_int32 src, int argc, +                        rtcp_sdes_type_t type[], char *value[], +                        int length[]) +   { +       rtcp_sdes_t *s = (rtcp_sdes_t *)b; +       rtcp_sdes_item_t *rsp; +       int i; +       int len; +       int pad; + +       /* SSRC header */ +       s->src = src; +       rsp = &s->item[0]; + +       /* SDES items */ +       for (i = 0; i < argc; i++) { +           rsp->type = type[i]; +           len = length[i]; +           if (len > RTP_MAX_SDES) { +               /* invalid length, may want to take other action */ +               len = RTP_MAX_SDES; +           } +           rsp->length = len; +           memcpy(rsp->data, value[i], len); +           rsp = (rtcp_sdes_item_t *)&rsp->data[len]; +       } + +       /* terminate with end marker and pad to next 4-octet boundary */ +       len = ((char *) rsp) - b; +       pad = 4 - (len & 0x3); +       b = (char *) rsp; +       while (pad--) *b++ = RTCP_SDES_END; + +       return b; +   } + + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 84] + +RFC 3550                          RTP                          July 2003 + + +A.5 Parsing RTCP SDES Packets + +   This function parses an SDES packet, calling functions find_member() +   to find a pointer to the information for a session member given the +   SSRC identifier and member_sdes() to store the new SDES information +   for that member.  This function expects a pointer to the header of +   the RTCP packet. + +   void rtp_read_sdes(rtcp_t *r) +   { +       int count = r->common.count; +       rtcp_sdes_t *sd = &r->r.sdes; +       rtcp_sdes_item_t *rsp, *rspn; +       rtcp_sdes_item_t *end = (rtcp_sdes_item_t *) +                               ((u_int32 *)r + r->common.length + 1); +       source *s; + +       while (--count >= 0) { +           rsp = &sd->item[0]; +           if (rsp >= end) break; +           s = find_member(sd->src); + +           for (; rsp->type; rsp = rspn ) { +               rspn = (rtcp_sdes_item_t *)((char*)rsp+rsp->length+2); +               if (rspn >= end) { +                   rsp = rspn; +                   break; +               } +               member_sdes(s, rsp->type, rsp->data, rsp->length); +           } +           sd = (rtcp_sdes_t *) +                ((u_int32 *)sd + (((char *)rsp - (char *)sd) >> 2)+1); +       } +       if (count >= 0) { +           /* invalid packet format */ +       } +   } + +A.6 Generating a Random 32-bit Identifier + +   The following subroutine generates a random 32-bit identifier using +   the MD5 routines published in RFC 1321 [32].  The system routines may +   not be present on all operating systems, but they should serve as +   hints as to what kinds of information may be used.  Other system +   calls that may be appropriate include + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 85] + +RFC 3550                          RTP                          July 2003 + + +   o  getdomainname(), + +   o  getwd(), or + +   o  getrusage(). + +   "Live" video or audio samples are also a good source of random +   numbers, but care must be taken to avoid using a turned-off +   microphone or blinded camera as a source [17]. + +   Use of this or a similar routine is recommended to generate the +   initial seed for the random number generator producing the RTCP +   period (as shown in Appendix A.7), to generate the initial values for +   the sequence number and timestamp, and to generate SSRC values. +   Since this routine is likely to be CPU-intensive, its direct use to +   generate RTCP periods is inappropriate because predictability is not +   an issue.  Note that this routine produces the same result on +   repeated calls until the value of the system clock changes unless +   different values are supplied for the type argument. + +   /* +    * Generate a random 32-bit quantity. +    */ +   #include <sys/types.h>   /* u_long */ +   #include <sys/time.h>    /* gettimeofday() */ +   #include <unistd.h>      /* get..() */ +   #include <stdio.h>       /* printf() */ +   #include <time.h>        /* clock() */ +   #include <sys/utsname.h> /* uname() */ +   #include "global.h"      /* from RFC 1321 */ +   #include "md5.h"         /* from RFC 1321 */ + +   #define MD_CTX MD5_CTX +   #define MDInit MD5Init +   #define MDUpdate MD5Update +   #define MDFinal MD5Final + +   static u_long md_32(char *string, int length) +   { +       MD_CTX context; +       union { +           char   c[16]; +           u_long x[4]; +       } digest; +       u_long r; +       int i; + +       MDInit (&context); + + + +Schulzrinne, et al.         Standards Track                    [Page 86] + +RFC 3550                          RTP                          July 2003 + + +       MDUpdate (&context, string, length); +       MDFinal ((unsigned char *)&digest, &context); +       r = 0; +       for (i = 0; i < 3; i++) { +           r ^= digest.x[i]; +       } +       return r; +   }                               /* md_32 */ + +   /* +    * Return random unsigned 32-bit quantity.  Use 'type' argument if +    * you need to generate several different values in close succession. +    */ +   u_int32 random32(int type) +   { +       struct { +           int     type; +           struct  timeval tv; +           clock_t cpu; +           pid_t   pid; +           u_long  hid; +           uid_t   uid; +           gid_t   gid; +           struct  utsname name; +       } s; + +       gettimeofday(&s.tv, 0); +       uname(&s.name); +       s.type = type; +       s.cpu  = clock(); +       s.pid  = getpid(); +       s.hid  = gethostid(); +       s.uid  = getuid(); +       s.gid  = getgid(); +       /* also: system uptime */ + +       return md_32((char *)&s, sizeof(s)); +   }                               /* random32 */ + +A.7 Computing the RTCP Transmission Interval + +   The following functions implement the RTCP transmission and reception +   rules described in Section 6.2.  These rules are coded in several +   functions: + +   o  rtcp_interval() computes the deterministic calculated interval, +      measured in seconds.  The parameters are defined in Section 6.3. + + + + +Schulzrinne, et al.         Standards Track                    [Page 87] + +RFC 3550                          RTP                          July 2003 + + +   o  OnExpire() is called when the RTCP transmission timer expires. + +   o  OnReceive() is called whenever an RTCP packet is received. + +   Both OnExpire() and OnReceive() have event e as an argument.  This is +   the next scheduled event for that participant, either an RTCP report +   or a BYE packet.  It is assumed that the following functions are +   available: + +   o  Schedule(time t, event e) schedules an event e to occur at time t. +      When time t arrives, the function OnExpire is called with e as an +      argument. + +   o  Reschedule(time t, event e) reschedules a previously scheduled +      event e for time t. + +   o  SendRTCPReport(event e) sends an RTCP report. + +   o  SendBYEPacket(event e) sends a BYE packet. + +   o  TypeOfEvent(event e) returns EVENT_BYE if the event being +      processed is for a BYE packet to be sent, else it returns +      EVENT_REPORT. + +   o  PacketType(p) returns PACKET_RTCP_REPORT if packet p is an RTCP +      report (not BYE), PACKET_BYE if its a BYE RTCP packet, and +      PACKET_RTP if its a regular RTP data packet. + +   o  ReceivedPacketSize() and SentPacketSize() return the size of the +      referenced packet in octets. + +   o  NewMember(p) returns a 1 if the participant who sent packet p is +      not currently in the member list, 0 otherwise.  Note this function +      is not sufficient for a complete implementation because each CSRC +      identifier in an RTP packet and each SSRC in a BYE packet should +      be processed. + +   o  NewSender(p) returns a 1 if the participant who sent packet p is +      not currently in the sender sublist of the member list, 0 +      otherwise. + +   o  AddMember() and RemoveMember() to add and remove participants from +      the member list. + +   o  AddSender() and RemoveSender() to add and remove participants from +      the sender sublist of the member list. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 88] + +RFC 3550                          RTP                          July 2003 + + +   These functions would have to be extended for an implementation that +   allows the RTCP bandwidth fractions for senders and non-senders to be +   specified as explicit parameters rather than fixed values of 25% and +   75%.  The extended implementation of rtcp_interval() would need to +   avoid division by zero if one of the parameters was zero. + +   double rtcp_interval(int members, +                        int senders, +                        double rtcp_bw, +                        int we_sent, +                        double avg_rtcp_size, +                        int initial) +   { +       /* +        * Minimum average time between RTCP packets from this site (in +        * seconds).  This time prevents the reports from `clumping' when +        * sessions are small and the law of large numbers isn't helping +        * to smooth out the traffic.  It also keeps the report interval +        * from becoming ridiculously small during transient outages like +        * a network partition. +        */ +       double const RTCP_MIN_TIME = 5.; +       /* +        * Fraction of the RTCP bandwidth to be shared among active +        * senders.  (This fraction was chosen so that in a typical +        * session with one or two active senders, the computed report +        * time would be roughly equal to the minimum report time so that +        * we don't unnecessarily slow down receiver reports.)  The +        * receiver fraction must be 1 - the sender fraction. +        */ +       double const RTCP_SENDER_BW_FRACTION = 0.25; +       double const RTCP_RCVR_BW_FRACTION = (1-RTCP_SENDER_BW_FRACTION); +       /* +       /* To compensate for "timer reconsideration" converging to a +        * value below the intended average. +        */ +       double const COMPENSATION = 2.71828 - 1.5; + +       double t;                   /* interval */ +       double rtcp_min_time = RTCP_MIN_TIME; +       int n;                      /* no. of members for computation */ + +       /* +        * Very first call at application start-up uses half the min +        * delay for quicker notification while still allowing some time +        * before reporting for randomization and to learn about other +        * sources so the report interval will converge to the correct +        * interval more quickly. + + + +Schulzrinne, et al.         Standards Track                    [Page 89] + +RFC 3550                          RTP                          July 2003 + + +        */ +       if (initial) { +           rtcp_min_time /= 2; +       } +       /* +        * Dedicate a fraction of the RTCP bandwidth to senders unless +        * the number of senders is large enough that their share is +        * more than that fraction. +        */ +       n = members; +       if (senders <= members * RTCP_SENDER_BW_FRACTION) { +           if (we_sent) { +               rtcp_bw *= RTCP_SENDER_BW_FRACTION; +               n = senders; +           } else { +               rtcp_bw *= RTCP_RCVR_BW_FRACTION; +               n -= senders; +           } +       } + +       /* +        * The effective number of sites times the average packet size is +        * the total number of octets sent when each site sends a report. +        * Dividing this by the effective bandwidth gives the time +        * interval over which those packets must be sent in order to +        * meet the bandwidth target, with a minimum enforced.  In that +        * time interval we send one report so this time is also our +        * average time between reports. +        */ +       t = avg_rtcp_size * n / rtcp_bw; +       if (t < rtcp_min_time) t = rtcp_min_time; + +       /* +        * To avoid traffic bursts from unintended synchronization with +        * other sites, we then pick our actual next report interval as a +        * random number uniformly distributed between 0.5*t and 1.5*t. +        */ +       t = t * (drand48() + 0.5); +       t = t / COMPENSATION; +       return t; +   } + +   void OnExpire(event e, +                 int    members, +                 int    senders, +                 double rtcp_bw, +                 int    we_sent, +                 double *avg_rtcp_size, + + + +Schulzrinne, et al.         Standards Track                    [Page 90] + +RFC 3550                          RTP                          July 2003 + + +                 int    *initial, +                 time_tp   tc, +                 time_tp   *tp, +                 int    *pmembers) +   { +       /* This function is responsible for deciding whether to send an +        * RTCP report or BYE packet now, or to reschedule transmission. +        * It is also responsible for updating the pmembers, initial, tp, +        * and avg_rtcp_size state variables.  This function should be +        * called upon expiration of the event timer used by Schedule(). +        */ + +       double t;     /* Interval */ +       double tn;    /* Next transmit time */ + +       /* In the case of a BYE, we use "timer reconsideration" to +        * reschedule the transmission of the BYE if necessary */ + +       if (TypeOfEvent(e) == EVENT_BYE) { +           t = rtcp_interval(members, +                             senders, +                             rtcp_bw, +                             we_sent, +                             *avg_rtcp_size, +                             *initial); +           tn = *tp + t; +           if (tn <= tc) { +               SendBYEPacket(e); +               exit(1); +           } else { +               Schedule(tn, e); +           } + +       } else if (TypeOfEvent(e) == EVENT_REPORT) { +           t = rtcp_interval(members, +                             senders, +                             rtcp_bw, +                             we_sent, +                             *avg_rtcp_size, +                             *initial); +           tn = *tp + t; +           if (tn <= tc) { +               SendRTCPReport(e); +               *avg_rtcp_size = (1./16.)*SentPacketSize(e) + +                   (15./16.)*(*avg_rtcp_size); +               *tp = tc; + +               /* We must redraw the interval.  Don't reuse the + + + +Schulzrinne, et al.         Standards Track                    [Page 91] + +RFC 3550                          RTP                          July 2003 + + +                  one computed above, since its not actually +                  distributed the same, as we are conditioned +                  on it being small enough to cause a packet to +                  be sent */ + +               t = rtcp_interval(members, +                                 senders, +                                 rtcp_bw, +                                 we_sent, +                                 *avg_rtcp_size, +                                 *initial); + +               Schedule(t+tc,e); +               *initial = 0; +           } else { +               Schedule(tn, e); +           } +           *pmembers = members; +       } +   } + +   void OnReceive(packet p, +                  event e, +                  int *members, +                  int *pmembers, +                  int *senders, +                  double *avg_rtcp_size, +                  double *tp, +                  double tc, +                  double tn) +   { +       /* What we do depends on whether we have left the group, and are +        * waiting to send a BYE (TypeOfEvent(e) == EVENT_BYE) or an RTCP +        * report.  p represents the packet that was just received.  */ + +       if (PacketType(p) == PACKET_RTCP_REPORT) { +           if (NewMember(p) && (TypeOfEvent(e) == EVENT_REPORT)) { +               AddMember(p); +               *members += 1; +           } +           *avg_rtcp_size = (1./16.)*ReceivedPacketSize(p) + +               (15./16.)*(*avg_rtcp_size); +       } else if (PacketType(p) == PACKET_RTP) { +           if (NewMember(p) && (TypeOfEvent(e) == EVENT_REPORT)) { +               AddMember(p); +               *members += 1; +           } +           if (NewSender(p) && (TypeOfEvent(e) == EVENT_REPORT)) { + + + +Schulzrinne, et al.         Standards Track                    [Page 92] + +RFC 3550                          RTP                          July 2003 + + +               AddSender(p); +               *senders += 1; +           } +       } else if (PacketType(p) == PACKET_BYE) { +           *avg_rtcp_size = (1./16.)*ReceivedPacketSize(p) + +               (15./16.)*(*avg_rtcp_size); + +           if (TypeOfEvent(e) == EVENT_REPORT) { +               if (NewSender(p) == FALSE) { +                   RemoveSender(p); +                   *senders -= 1; +               } + +               if (NewMember(p) == FALSE) { +                   RemoveMember(p); +                   *members -= 1; +               } + +               if (*members < *pmembers) { +                   tn = tc + +                       (((double) *members)/(*pmembers))*(tn - tc); +                   *tp = tc - +                       (((double) *members)/(*pmembers))*(tc - *tp); + +                   /* Reschedule the next report for time tn */ + +                   Reschedule(tn, e); +                   *pmembers = *members; +               } + +           } else if (TypeOfEvent(e) == EVENT_BYE) { +               *members += 1; +           } +       } +   } + + + + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 93] + +RFC 3550                          RTP                          July 2003 + + +A.8 Estimating the Interarrival Jitter + +   The code fragments below implement the algorithm given in Section +   6.4.1 for calculating an estimate of the statistical variance of the +   RTP data interarrival time to be inserted in the interarrival jitter +   field of reception reports.  The inputs are r->ts, the timestamp from +   the incoming packet, and arrival, the current time in the same units. +   Here s points to state for the source; s->transit holds the relative +   transit time for the previous packet, and s->jitter holds the +   estimated jitter.  The jitter field of the reception report is +   measured in timestamp units and expressed as an unsigned integer, but +   the jitter estimate is kept in a floating point.  As each data packet +   arrives, the jitter estimate is updated: + +      int transit = arrival - r->ts; +      int d = transit - s->transit; +      s->transit = transit; +      if (d < 0) d = -d; +      s->jitter += (1./16.) * ((double)d - s->jitter); + +   When a reception report block (to which rr points) is generated for +   this member, the current jitter estimate is returned: + +      rr->jitter = (u_int32) s->jitter; + +   Alternatively, the jitter estimate can be kept as an integer, but +   scaled to reduce round-off error.  The calculation is the same except +   for the last line: + +      s->jitter += d - ((s->jitter + 8) >> 4); + +   In this case, the estimate is sampled for the reception report as: + +      rr->jitter = s->jitter >> 4; + + + + + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 94] + +RFC 3550                          RTP                          July 2003 + + +Appendix B - Changes from RFC 1889 + +   Most of this RFC is identical to RFC 1889.  There are no changes in +   the packet formats on the wire, only changes to the rules and +   algorithms governing how the protocol is used.  The biggest change is +   an enhancement to the scalable timer algorithm for calculating when +   to send RTCP packets: + +   o  The algorithm for calculating the RTCP transmission interval +      specified in Sections 6.2 and 6.3 and illustrated in Appendix A.7 +      is augmented to include "reconsideration" to minimize transmission +      in excess of the intended rate when many participants join a +      session simultaneously, and "reverse reconsideration" to reduce +      the incidence and duration of false participant timeouts when the +      number of participants drops rapidly.  Reverse reconsideration is +      also used to possibly shorten the delay before sending RTCP SR +      when transitioning from passive receiver to active sender mode. + +   o  Section 6.3.7 specifies new rules controlling when an RTCP BYE +      packet should be sent in order to avoid a flood of packets when +      many participants leave a session simultaneously. + +   o  The requirement to retain state for inactive participants for a +      period long enough to span typical network partitions was removed +      from Section 6.2.1.  In a session where many participants join for +      a brief time and fail to send BYE, this requirement would cause a +      significant overestimate of the number of participants.  The +      reconsideration algorithm added in this revision compensates for +      the large number of new participants joining simultaneously when a +      partition heals. + +   It should be noted that these enhancements only have a significant +   effect when the number of session participants is large (thousands) +   and most of the participants join or leave at the same time.  This +   makes testing in a live network difficult.  However, the algorithm +   was subjected to a thorough analysis and simulation to verify its +   performance.  Furthermore, the enhanced algorithm was designed to +   interoperate with the algorithm in RFC 1889 such that the degree of +   reduction in excess RTCP bandwidth during a step join is proportional +   to the fraction of participants that implement the enhanced +   algorithm.  Interoperation of the two algorithms has been verified +   experimentally on live networks. + +   Other functional changes were: + +   o  Section 6.2.1 specifies that implementations may store only a +      sampling of the participants' SSRC identifiers to allow scaling to +      very large sessions.  Algorithms are specified in RFC 2762 [21]. + + + +Schulzrinne, et al.         Standards Track                    [Page 95] + +RFC 3550                          RTP                          July 2003 + + +   o  In Section 6.2 it is specified that RTCP sender and non-sender +      bandwidths may be set as separate parameters of the session rather +      than a strict percentage of the session bandwidth, and may be set +      to zero.  The requirement that RTCP was mandatory for RTP sessions +      using IP multicast was relaxed.  However, a clarification was also +      added that turning off RTCP is NOT RECOMMENDED. + +   o  In Sections 6.2, 6.3.1 and Appendix A.7, it is specified that the +      fraction of participants below which senders get dedicated RTCP +      bandwidth changes from the fixed 1/4 to a ratio based on the RTCP +      sender and non-sender bandwidth parameters when those are given. +      The condition that no bandwidth is dedicated to senders when there +      are no senders was removed since that is expected to be a +      transitory state.  It also keeps non-senders from using sender +      RTCP bandwidth when that is not intended. + +   o  Also in Section 6.2 it is specified that the minimum RTCP interval +      may be scaled to smaller values for high bandwidth sessions, and +      that the initial RTCP delay may be set to zero for unicast +      sessions. + +   o  Timing out a participant is to be based on inactivity for a number +      of RTCP report intervals calculated using the receiver RTCP +      bandwidth fraction even for active senders. + +   o  Sections 7.2 and 7.3 specify that translators and mixers should +      send BYE packets for the sources they are no longer forwarding. + +   o  Rule changes for layered encodings are defined in Sections 2.4, +      6.3.9, 8.3 and 11.  In the last of these, it is noted that the +      address and port assignment rule conflicts with the SDP +      specification, RFC 2327 [15], but it is intended that this +      restriction will be relaxed in a revision of RFC 2327. + +   o  The convention for using even/odd port pairs for RTP and RTCP in +      Section 11 was clarified to refer to destination ports.  The +      requirement to use an even/odd port pair was removed if the two +      ports are specified explicitly.  For unicast RTP sessions, +      distinct port pairs may be used for the two ends (Sections 3, 7.1 +      and 11). + +   o  A new Section 10 was added to explain the requirement for +      congestion control in applications using RTP. + +   o  In Section 8.2, the requirement that a new SSRC identifier MUST be +      chosen whenever the source transport address is changed has been +      relaxed to say that a new SSRC identifier MAY be chosen. +      Correspondingly, it was clarified that an implementation MAY + + + +Schulzrinne, et al.         Standards Track                    [Page 96] + +RFC 3550                          RTP                          July 2003 + + +      choose to keep packets from the new source address rather than the +      existing source address when an SSRC collision occurs between two +      other participants, and SHOULD do so for applications such as +      telephony in which some sources such as mobile entities may change +      addresses during the course of an RTP session. + +   o  An indentation bug in the RFC 1889 printing of the pseudo-code for +      the collision detection and resolution algorithm in Section 8.2 +      has been corrected by translating the syntax to pseudo C language, +      and the algorithm has been modified to remove the restriction that +      both RTP and RTCP must be sent from the same source port number. + +   o  The description of the padding mechanism for RTCP packets was +      clarified and it is specified that padding MUST only be applied to +      the last packet of a compound RTCP packet. + +   o  In Section A.1, initialization of base_seq was corrected to be seq +      rather than seq - 1, and the text was corrected to say the bad +      sequence number plus 1 is stored.  The initialization of max_seq +      and other variables for the algorithm was separated from the text +      to make clear that this initialization must be done in addition to +      calling the init_seq() function (and a few words lost in RFC 1889 +      when processing the document from source to output form were +      restored). + +   o  Clamping of number of packets lost in Section A.3 was corrected to +      use both positive and negative limits. + +   o  The specification of "relative" NTP timestamp in the RTCP SR +      section now defines these timestamps to be based on the most +      common system-specific clock, such as system uptime, rather than +      on session elapsed time which would not be the same for multiple +      applications started on the same machine at different times. + +   Non-functional changes: + +   o  It is specified that a receiver MUST ignore packets with payload +      types it does not understand. + +   o  In Fig. 2, the floating point NTP timestamp value was corrected, +      some missing leading zeros were added in a hex number, and the UTC +      timezone was specified. + +   o  The inconsequence of NTP timestamps wrapping around in the year +      2036 is explained. + + + + + + +Schulzrinne, et al.         Standards Track                    [Page 97] + +RFC 3550                          RTP                          July 2003 + + +   o  The policy for registration of RTCP packet types and SDES types +      was clarified in a new Section 15, IANA Considerations.  The +      suggestion that experimenters register the numbers they need and +      then unregister those which prove to be unneeded has been removed +      in favor of using APP and PRIV.  Registration of profile names was +      also specified. + +   o  The reference for the UTF-8 character set was changed from an +      X/Open Preliminary Specification to be RFC 2279. + +   o  The reference for RFC 1597 was updated to RFC 1918 and the +      reference for RFC 2543 was updated to RFC 3261. + +   o  The last paragraph of the introduction in RFC 1889, which +      cautioned implementors to limit deployment in the Internet, was +      removed because it was deemed no longer relevant. + +   o  A non-normative note regarding the use of RTP with Source-Specific +      Multicast (SSM) was added in Section 6. + +   o  The definition of "RTP session" in Section 3 was expanded to +      acknowledge that a single session may use multiple destination +      transport addresses (as was always the case for a translator or +      mixer) and to explain that the distinguishing feature of an RTP +      session is that each corresponds to a separate SSRC identifier +      space.  A new definition of "multimedia session" was added to +      reduce confusion about the word "session". + +   o  The meaning of "sampling instant" was explained in more detail as +      part of the definition of the timestamp field of the RTP header in +      Section 5.1. + +   o  Small clarifications of the text have been made in several places, +      some in response to questions from readers.  In particular: + +      -  In RFC 1889, the first five words of the second sentence of +         Section 2.2 were lost in processing the document from source to +         output form, but are now restored. + +      -  A definition for "RTP media type" was added in Section 3 to +         allow the explanation of multiplexing RTP sessions in Section +         5.2 to be more clear regarding the multiplexing of multiple +         media.  That section also now explains that multiplexing +         multiple sources of the same medium based on SSRC identifiers +         may be appropriate and is the norm for multicast sessions. + +      -  The definition for "non-RTP means" was expanded to include +         examples of other protocols constituting non-RTP means. + + + +Schulzrinne, et al.         Standards Track                    [Page 98] + +RFC 3550                          RTP                          July 2003 + + +      -  The description of the session bandwidth parameter is expanded +         in Section 6.2, including a clarification that the control +         traffic bandwidth is in addition to the session bandwidth for +         the data traffic. + +      -  The effect of varying packet duration on the jitter calculation +         was explained in Section 6.4.4. + +      -  The method for terminating and padding a sequence of SDES items +         was clarified in Section 6.5. + +      -  IPv6 address examples were added in the description of SDES +         CNAME in Section 6.5.1, and "example.com" was used in place of +         other example domain names. + +      -  The Security section added a formal reference to IPSEC now that +         it is available, and says that the confidentiality method +         defined in this specification is primarily to codify existing +         practice.  It is RECOMMENDED that stronger encryption +         algorithms such as Triple-DES be used in place of the default +         algorithm, and noted that the SRTP profile based on AES will be +         the correct choice in the future.  A caution about the weakness +         of the RTP header as an initialization vector was added.  It +         was also noted that payload-only encryption is necessary to +         allow for header compression. + +      -  The method for partial encryption of RTCP was clarified; in +         particular, SDES CNAME is carried in only one part when the +         compound RTCP packet is split. + +      -  It is clarified that only one compound RTCP packet should be +         sent per reporting interval and that if there are too many +         active sources for the reports to fit in the MTU, then a subset +         of the sources should be selected round-robin over multiple +         intervals. + +      -  A note was added in Appendix A.1 that packets may be saved +         during RTP header validation and delivered upon success. + +      -  Section 7.3 now explains that a mixer aggregating SDES packets +         uses more RTCP bandwidth due to longer packets, and a mixer +         passing through RTCP naturally sends packets at higher than the +         single source rate, but both behaviors are valid. + +      -  Section 13 clarifies that an RTP application may use multiple +         profiles but typically only one in a given session. + + + + + +Schulzrinne, et al.         Standards Track                    [Page 99] + +RFC 3550                          RTP                          July 2003 + + +      -  The terms MUST, SHOULD, MAY, etc. are used as defined in RFC +         2119. + +      -  The bibliography was divided into normative and informative +         references. + +References + +Normative References + +   [1]  Schulzrinne, H. and S. Casner, "RTP Profile for Audio and Video +        Conferences with Minimal Control", RFC 3551, July 2003. + +   [2]  Bradner, S., "Key Words for Use in RFCs to Indicate Requirement +        Levels", BCP 14, RFC 2119, March 1997. + +   [3]  Postel, J., "Internet Protocol", STD 5, RFC 791, September 1981. + +   [4]  Mills, D., "Network Time Protocol (Version 3) Specification, +        Implementation and Analysis", RFC 1305, March 1992. + +   [5]  Yergeau, F., "UTF-8, a Transformation Format of ISO 10646", RFC +        2279, January 1998. + +   [6]  Mockapetris, P., "Domain Names - Concepts and Facilities", STD +        13, RFC 1034, November 1987. + +   [7]  Mockapetris, P., "Domain Names - Implementation and +        Specification", STD 13, RFC 1035, November 1987. + +   [8]  Braden, R., "Requirements for Internet Hosts - Application and +        Support", STD 3, RFC 1123, October 1989. + +   [9]  Resnick, P., "Internet Message Format", RFC 2822, April 2001. + +Informative References + +   [10] Clark, D. and D. Tennenhouse, "Architectural Considerations for +        a New Generation of Protocols," in SIGCOMM Symposium on +        Communications Architectures and Protocols , (Philadelphia, +        Pennsylvania), pp. 200--208, IEEE Computer Communications +        Review, Vol. 20(4), September 1990. + +   [11] Schulzrinne, H., "Issues in designing a transport protocol for +        audio and video conferences and other multiparticipant real-time +        applications." expired Internet Draft, October 1993. + + + + + +Schulzrinne, et al.         Standards Track                   [Page 100] + +RFC 3550                          RTP                          July 2003 + + +   [12] Comer, D., Internetworking with TCP/IP , vol. 1.  Englewood +        Cliffs, New Jersey: Prentice Hall, 1991. + +   [13] Rosenberg, J., Schulzrinne, H., Camarillo, G., Johnston, A., +        Peterson, J., Sparks, R., Handley, M. and E. Schooler, "SIP: +        Session Initiation Protocol", RFC 3261, June 2002. + +   [14] International Telecommunication Union, "Visual telephone systems +        and equipment for local area networks which provide a non- +        guaranteed quality of service", Recommendation H.323, +        Telecommunication Standardization Sector of ITU, Geneva, +        Switzerland, July 2003. + +   [15] Handley, M. and V. Jacobson, "SDP: Session Description +        Protocol", RFC 2327, April 1998. + +   [16] Schulzrinne, H., Rao, A. and R. Lanphier, "Real Time Streaming +        Protocol (RTSP)", RFC 2326, April 1998. + +   [17] Eastlake 3rd, D., Crocker, S. and J. Schiller, "Randomness +        Recommendations for Security", RFC 1750, December 1994. + +   [18] Bolot, J.-C., Turletti, T. and I. Wakeman, "Scalable Feedback +        Control for Multicast Video Distribution in the Internet", in +        SIGCOMM Symposium on Communications Architectures and Protocols, +        (London, England), pp. 58--67, ACM, August 1994. + +   [19] Busse, I., Deffner, B. and H. Schulzrinne, "Dynamic QoS Control +        of Multimedia Applications Based on RTP", Computer +        Communications , vol. 19, pp. 49--58, January 1996. + +   [20] Floyd, S. and V. Jacobson, "The Synchronization of Periodic +        Routing Messages", in SIGCOMM Symposium on Communications +        Architectures and Protocols (D. P. Sidhu, ed.), (San Francisco, +        California), pp. 33--44, ACM, September 1993.  Also in [34]. + +   [21] Rosenberg, J. and H. Schulzrinne, "Sampling of the Group +        Membership in RTP", RFC 2762, February 2000. + +   [22] Cadzow, J., Foundations of Digital Signal Processing and Data +        Analysis New York, New York: Macmillan, 1987. + +   [23] Hinden, R. and S. Deering, "Internet Protocol Version 6 (IPv6) +        Addressing Architecture", RFC 3513, April 2003. + +   [24] Rekhter, Y., Moskowitz, B., Karrenberg, D., de Groot, G. and E. +        Lear, "Address Allocation for Private Internets", RFC 1918, +        February 1996. + + + +Schulzrinne, et al.         Standards Track                   [Page 101] + +RFC 3550                          RTP                          July 2003 + + +   [25] Lear, E., Fair, E., Crocker, D. and T. Kessler, "Network 10 +        Considered Harmful (Some Practices Shouldn't be Codified)", RFC +        1627, July 1994. + +   [26] Feller, W., An Introduction to Probability Theory and its +        Applications, vol. 1.  New York, New York: John Wiley and Sons, +        third ed., 1968. + +   [27] Kent, S. and R. Atkinson, "Security Architecture for the +        Internet Protocol", RFC 2401, November 1998. + +   [28] Baugher, M., Blom, R., Carrara, E., McGrew, D., Naslund, M., +        Norrman, K. and D. Oran, "Secure Real-time Transport Protocol", +        Work in Progress, April 2003. + +   [29] Balenson, D., "Privacy Enhancement for Internet Electronic Mail: +        Part III", RFC 1423, February 1993. + +   [30] Voydock, V. and S. Kent, "Security Mechanisms in High-Level +        Network Protocols", ACM Computing Surveys, vol. 15, pp. 135-171, +        June 1983. + +   [31] Floyd, S., "Congestion Control Principles", BCP 41, RFC 2914, +        September 2000. + +   [32] Rivest, R., "The MD5 Message-Digest Algorithm", RFC 1321, April +        1992. + +   [33] Stubblebine, S., "Security Services for Multimedia +        Conferencing", in 16th National Computer Security Conference, +        (Baltimore, Maryland), pp. 391--395, September 1993. + +   [34] Floyd, S. and V. Jacobson, "The Synchronization of Periodic +        Routing Messages", IEEE/ACM Transactions on Networking, vol. 2, +        pp. 122--136, April 1994. + + + + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                   [Page 102] + +RFC 3550                          RTP                          July 2003 + + +Authors' Addresses + +   Henning Schulzrinne +   Department of Computer Science +   Columbia University +   1214 Amsterdam Avenue +   New York, NY 10027 +   United States + +   EMail: schulzrinne@cs.columbia.edu + + +   Stephen L. Casner +   Packet Design +   3400 Hillview Avenue, Building 3 +   Palo Alto, CA 94304 +   United States + +   EMail: casner@acm.org + + +   Ron Frederick +   Blue Coat Systems Inc. +   650 Almanor Avenue +   Sunnyvale, CA 94085 +   United States + +   EMail: ronf@bluecoat.com + + +   Van Jacobson +   Packet Design +   3400 Hillview Avenue, Building 3 +   Palo Alto, CA 94304 +   United States + +   EMail: van@packetdesign.com + + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                   [Page 103] + +RFC 3550                          RTP                          July 2003 + + +Full Copyright Statement + +   Copyright (C) The Internet Society (2003).  All Rights Reserved. + +   This document and translations of it may be copied and furnished to +   others, and derivative works that comment on or otherwise explain it +   or assist in its implementation may be prepared, copied, published +   and distributed, in whole or in part, without restriction of any +   kind, provided that the above copyright notice and this paragraph are +   included on all such copies and derivative works.  However, this +   document itself may not be modified in any way, such as by removing +   the copyright notice or references to the Internet Society or other +   Internet organizations, except as needed for the purpose of +   developing Internet standards in which case the procedures for +   copyrights defined in the Internet Standards process must be +   followed, or as required to translate it into languages other than +   English. + +   The limited permissions granted above are perpetual and will not be +   revoked by the Internet Society or its successors or assigns. + +   This document and the information contained herein is provided on an +   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING +   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING +   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION +   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF +   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + +   Funding for the RFC Editor function is currently provided by the +   Internet Society. + + + + + + + + + + + + + + + + + + + +Schulzrinne, et al.         Standards Track                   [Page 104] + diff --git a/src/modules/rtp/rfc3551.txt b/src/modules/rtp/rfc3551.txt new file mode 100644 index 00000000..c43ff34d --- /dev/null +++ b/src/modules/rtp/rfc3551.txt @@ -0,0 +1,2467 @@ + + + + + + +Network Working Group                                     H. Schulzrinne +Request for Comments: 3551                           Columbia University +Obsoletes: 1890                                                S. Casner +Category: Standards Track                                  Packet Design +                                                               July 2003 + + +              RTP Profile for Audio and Video Conferences +                          with Minimal Control + +Status of this Memo + +   This document specifies an Internet standards track protocol for the +   Internet community, and requests discussion and suggestions for +   improvements.  Please refer to the current edition of the "Internet +   Official Protocol Standards" (STD 1) for the standardization state +   and status of this protocol.  Distribution of this memo is unlimited. + +Copyright Notice + +   Copyright (C) The Internet Society (2003).  All Rights Reserved. + +Abstract + +   This document describes a profile called "RTP/AVP" for the use of the +   real-time transport protocol (RTP), version 2, and the associated +   control protocol, RTCP, within audio and video multiparticipant +   conferences with minimal control.  It provides interpretations of +   generic fields within the RTP specification suitable for audio and +   video conferences.  In particular, this document defines a set of +   default mappings from payload type numbers to encodings. + +   This document also describes how audio and video data may be carried +   within RTP.  It defines a set of standard encodings and their names +   when used within RTP.  The descriptions provide pointers to reference +   implementations and the detailed standards.  This document is meant +   as an aid for implementors of audio, video and other real-time +   multimedia applications. + +   This memorandum obsoletes RFC 1890.  It is mostly backwards- +   compatible except for functions removed because two interoperable +   implementations were not found.  The additions to RFC 1890 codify +   existing practice in the use of payload formats under this profile +   and include new payload formats defined since RFC 1890 was published. + + + + + + + +Schulzrinne & Casner        Standards Track                     [Page 1] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +Table of Contents + +   1.  Introduction .................................................  3 +       1.1  Terminology .............................................  3 +   2.  RTP and RTCP Packet Forms and Protocol Behavior ..............  4 +   3.  Registering Additional Encodings .............................  6 +   4.  Audio ........................................................  8 +       4.1  Encoding-Independent Rules ..............................  8 +       4.2  Operating Recommendations ...............................  9 +       4.3  Guidelines for Sample-Based Audio Encodings ............. 10 +       4.4  Guidelines for Frame-Based Audio Encodings .............. 11 +       4.5  Audio Encodings ......................................... 12 +            4.5.1   DVI4 ............................................ 13 +            4.5.2   G722 ............................................ 14 +            4.5.3   G723 ............................................ 14 +            4.5.4   G726-40, G726-32, G726-24, and G726-16 .......... 18 +            4.5.5   G728 ............................................ 19 +            4.5.6   G729 ............................................ 20 +            4.5.7   G729D and G729E ................................. 22 +            4.5.8   GSM ............................................. 24 +            4.5.9   GSM-EFR ......................................... 27 +            4.5.10  L8 .............................................. 27 +            4.5.11  L16 ............................................. 27 +            4.5.12  LPC ............................................. 27 +            4.5.13  MPA ............................................. 28 +            4.5.14  PCMA and PCMU ................................... 28 +            4.5.15  QCELP ........................................... 28 +            4.5.16  RED ............................................. 29 +            4.5.17  VDVI ............................................ 29 +   5.  Video ........................................................ 30 +       5.1  CelB .................................................... 30 +       5.2  JPEG .................................................... 30 +       5.3  H261 .................................................... 30 +       5.4  H263 .................................................... 31 +       5.5  H263-1998 ............................................... 31 +       5.6  MPV ..................................................... 31 +       5.7  MP2T .................................................... 31 +       5.8  nv ...................................................... 32 +   6.  Payload Type Definitions ..................................... 32 +   7.  RTP over TCP and Similar Byte Stream Protocols ............... 34 +   8.  Port Assignment .............................................. 34 +   9.  Changes from RFC 1890 ........................................ 35 +   10. Security Considerations ...................................... 38 +   11. IANA Considerations .......................................... 39 +   12. References ................................................... 39 +       12.1 Normative References .................................... 39 +       12.2 Informative References .................................. 39 +   13. Current Locations of Related Resources ....................... 41 + + + +Schulzrinne & Casner        Standards Track                     [Page 2] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   14. Acknowledgments .............................................. 42 +   15. Intellectual Property Rights Statement ....................... 43 +   16. Authors' Addresses ........................................... 43 +   17. Full Copyright Statement ..................................... 44 + +1. Introduction + +   This profile defines aspects of RTP left unspecified in the RTP +   Version 2 protocol definition (RFC 3550) [1].  This profile is +   intended for the use within audio and video conferences with minimal +   session control.  In particular, no support for the negotiation of +   parameters or membership control is provided.  The profile is +   expected to be useful in sessions where no negotiation or membership +   control are used (e.g., using the static payload types and the +   membership indications provided by RTCP), but this profile may also +   be useful in conjunction with a higher-level control protocol. + +   Use of this profile may be implicit in the use of the appropriate +   applications; there may be no explicit indication by port number, +   protocol identifier or the like.  Applications such as session +   directories may use the name for this profile specified in Section +   11. + +   Other profiles may make different choices for the items specified +   here. + +   This document also defines a set of encodings and payload formats for +   audio and video.  These payload format descriptions are included here +   only as a matter of convenience since they are too small to warrant +   separate documents.  Use of these payload formats is NOT REQUIRED to +   use this profile.  Only the binding of some of the payload formats to +   static payload type numbers in Tables 4 and 5 is normative. + +1.1 Terminology + +   The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", +   "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this +   document are to be interpreted as described in RFC 2119 [2] and +   indicate requirement levels for implementations compliant with this +   RTP profile. + +   This document defines the term media type as dividing encodings of +   audio and video content into three classes: audio, video and +   audio/video (interleaved). + + + + + + + +Schulzrinne & Casner        Standards Track                     [Page 3] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +2. RTP and RTCP Packet Forms and Protocol Behavior + +   The section "RTP Profiles and Payload Format Specifications" of RFC +   3550 enumerates a number of items that can be specified or modified +   in a profile.  This section addresses these items.  Generally, this +   profile follows the default and/or recommended aspects of the RTP +   specification. + +   RTP data header: The standard format of the fixed RTP data +      header is used (one marker bit). + +   Payload types: Static payload types are defined in Section 6. + +   RTP data header additions: No additional fixed fields are +      appended to the RTP data header. + +   RTP data header extensions: No RTP header extensions are +      defined, but applications operating under this profile MAY use +      such extensions.  Thus, applications SHOULD NOT assume that the +      RTP header X bit is always zero and SHOULD be prepared to ignore +      the header extension.  If a header extension is defined in the +      future, that definition MUST specify the contents of the first 16 +      bits in such a way that multiple different extensions can be +      identified. + +   RTCP packet types: No additional RTCP packet types are defined +      by this profile specification. + +   RTCP report interval: The suggested constants are to be used for +      the RTCP report interval calculation.  Sessions operating under +      this profile MAY specify a separate parameter for the RTCP traffic +      bandwidth rather than using the default fraction of the session +      bandwidth.  The RTCP traffic bandwidth MAY be divided into two +      separate session parameters for those participants which are +      active data senders and those which are not.  Following the +      recommendation in the RTP specification [1] that 1/4 of the RTCP +      bandwidth be dedicated to data senders, the RECOMMENDED default +      values for these two parameters would be 1.25% and 3.75%, +      respectively.  For a particular session, the RTCP bandwidth for +      non-data-senders MAY be set to zero when operating on +      unidirectional links or for sessions that don't require feedback +      on the quality of reception.  The RTCP bandwidth for data senders +      SHOULD be kept non-zero so that sender reports can still be sent +      for inter-media synchronization and to identify the source by +      CNAME.  The means by which the one or two session parameters for +      RTCP bandwidth are specified is beyond the scope of this memo. + + + + + +Schulzrinne & Casner        Standards Track                     [Page 4] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   SR/RR extension: No extension section is defined for the RTCP SR +      or RR packet. + +   SDES use: Applications MAY use any of the SDES items described +      in the RTP specification.  While CNAME information MUST be sent +      every reporting interval, other items SHOULD only be sent every +      third reporting interval, with NAME sent seven out of eight times +      within that slot and the remaining SDES items cyclically taking up +      the eighth slot, as defined in Section 6.2.2 of the RTP +      specification.  In other words, NAME is sent in RTCP packets 1, 4, +      7, 10, 13, 16, 19, while, say, EMAIL is used in RTCP packet 22. + +   Security: The RTP default security services are also the default +      under this profile. + +   String-to-key mapping: No mapping is specified by this profile. + +   Congestion: RTP and this profile may be used in the context of +      enhanced network service, for example, through Integrated Services +      (RFC 1633) [4] or Differentiated Services (RFC 2475) [5], or they +      may be used with best effort service. + +      If enhanced service is being used, RTP receivers SHOULD monitor +      packet loss to ensure that the service that was requested is +      actually being delivered.  If it is not, then they SHOULD assume +      that they are receiving best-effort service and behave +      accordingly. + +      If best-effort service is being used, RTP receivers SHOULD monitor +      packet loss to ensure that the packet loss rate is within +      acceptable parameters.  Packet loss is considered acceptable if a +      TCP flow across the same network path and experiencing the same +      network conditions would achieve an average throughput, measured +      on a reasonable timescale, that is not less than the RTP flow is +      achieving.  This condition can be satisfied by implementing +      congestion control mechanisms to adapt the transmission rate (or +      the number of layers subscribed for a layered multicast session), +      or by arranging for a receiver to leave the session if the loss +      rate is unacceptably high. + +      The comparison to TCP cannot be specified exactly, but is intended +      as an "order-of-magnitude" comparison in timescale and throughput. +      The timescale on which TCP throughput is measured is the round- +      trip time of the connection.  In essence, this requirement states +      that it is not acceptable to deploy an application (using RTP or +      any other transport protocol) on the best-effort Internet which +      consumes bandwidth arbitrarily and does not compete fairly with +      TCP within an order of magnitude. + + + +Schulzrinne & Casner        Standards Track                     [Page 5] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   Underlying protocol: The profile specifies the use of RTP over +      unicast and multicast UDP as well as TCP.  (This does not preclude +      the use of these definitions when RTP is carried by other lower- +      layer protocols.) + +   Transport mapping: The standard mapping of RTP and RTCP to +      transport-level addresses is used. + +   Encapsulation: This profile leaves to applications the +      specification of RTP encapsulation in protocols other than UDP. + +3.  Registering Additional Encodings + +   This profile lists a set of encodings, each of which is comprised of +   a particular media data compression or representation plus a payload +   format for encapsulation within RTP.  Some of those payload formats +   are specified here, while others are specified in separate RFCs.  It +   is expected that additional encodings beyond the set listed here will +   be created in the future and specified in additional payload format +   RFCs. + +   This profile also assigns to each encoding a short name which MAY be +   used by higher-level control protocols, such as the Session +   Description Protocol (SDP), RFC 2327 [6], to identify encodings +   selected for a particular RTP session. + +   In some contexts it may be useful to refer to these encodings in the +   form of a MIME content-type.  To facilitate this, RFC 3555 [7] +   provides registrations for all of the encodings names listed here as +   MIME subtype names under the "audio" and "video" MIME types through +   the MIME registration procedure as specified in RFC 2048 [8]. + +   Any additional encodings specified for use under this profile (or +   others) may also be assigned names registered as MIME subtypes with +   the Internet Assigned Numbers Authority (IANA).  This registry +   provides a means to insure that the names assigned to the additional +   encodings are kept unique.  RFC 3555 specifies the information that +   is required for the registration of RTP encodings. + +   In addition to assigning names to encodings, this profile also +   assigns static RTP payload type numbers to some of them.  However, +   the payload type number space is relatively small and cannot +   accommodate assignments for all existing and future encodings. +   During the early stages of RTP development, it was necessary to use +   statically assigned payload types because no other mechanism had been +   specified to bind encodings to payload types.  It was anticipated +   that non-RTP means beyond the scope of this memo (such as directory +   services or invitation protocols) would be specified to establish a + + + +Schulzrinne & Casner        Standards Track                     [Page 6] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   dynamic mapping between a payload type and an encoding.  Now, +   mechanisms for defining dynamic payload type bindings have been +   specified in the Session Description Protocol (SDP) and in other +   protocols such as ITU-T Recommendation H.323/H.245.  These mechanisms +   associate the registered name of the encoding/payload format, along +   with any additional required parameters, such as the RTP timestamp +   clock rate and number of channels, with a payload type number.  This +   association is effective only for the duration of the RTP session in +   which the dynamic payload type binding is made.  This association +   applies only to the RTP session for which it is made, thus the +   numbers can be re-used for different encodings in different sessions +   so the number space limitation is avoided. + +   This profile reserves payload type numbers in the range 96-127 +   exclusively for dynamic assignment.  Applications SHOULD first use +   values in this range for dynamic payload types.  Those applications +   which need to define more than 32 dynamic payload types MAY bind +   codes below 96, in which case it is RECOMMENDED that unassigned +   payload type numbers be used first.  However, the statically assigned +   payload types are default bindings and MAY be dynamically bound to +   new encodings if needed.  Redefining payload types below 96 may cause +   incorrect operation if an attempt is made to join a session without +   obtaining session description information that defines the dynamic +   payload types. + +   Dynamic payload types SHOULD NOT be used without a well-defined +   mechanism to indicate the mapping.  Systems that expect to +   interoperate with others operating under this profile SHOULD NOT make +   their own assignments of proprietary encodings to particular, fixed +   payload types. + +   This specification establishes the policy that no additional static +   payload types will be assigned beyond the ones defined in this +   document.  Establishing this policy avoids the problem of trying to +   create a set of criteria for accepting static assignments and +   encourages the implementation and deployment of the dynamic payload +   type mechanisms. + +   The final set of static payload type assignments is provided in +   Tables 4 and 5. + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                     [Page 7] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.  Audio + +4.1  Encoding-Independent Rules + +   Since the ability to suppress silence is one of the primary +   motivations for using packets to transmit voice, the RTP header +   carries both a sequence number and a timestamp to allow a receiver to +   distinguish between lost packets and periods of time when no data was +   transmitted.  Discontiguous transmission (silence suppression) MAY be +   used with any audio payload format.  Receivers MUST assume that +   senders may suppress silence unless this is restricted by signaling +   specified elsewhere.  (Even if the transmitter does not suppress +   silence, the receiver should be prepared to handle periods when no +   data is present since packets may be lost.) + +   Some payload formats (see Sections 4.5.3 and 4.5.6) define a "silence +   insertion descriptor" or "comfort noise" frame to specify parameters +   for artificial noise that may be generated during a period of silence +   to approximate the background noise at the source.  For other payload +   formats, a generic Comfort Noise (CN) payload format is specified in +   RFC 3389 [9].  When the CN payload format is used with another +   payload format, different values in the RTP payload type field +   distinguish comfort-noise packets from those of the selected payload +   format. + +   For applications which send either no packets or occasional comfort- +   noise packets during silence, the first packet of a talkspurt, that +   is, the first packet after a silence period during which packets have +   not been transmitted contiguously, SHOULD be distinguished by setting +   the marker bit in the RTP data header to one.  The marker bit in all +   other packets is zero.  The beginning of a talkspurt MAY be used to +   adjust the playout delay to reflect changing network delays. +   Applications without silence suppression MUST set the marker bit to +   zero. + +   The RTP clock rate used for generating the RTP timestamp is +   independent of the number of channels and the encoding; it usually +   equals the number of sampling periods per second.  For N-channel +   encodings, each sampling period (say, 1/8,000 of a second) generates +   N samples.  (This terminology is standard, but somewhat confusing, as +   the total number of samples generated per second is then the sampling +   rate times the channel count.) + +   If multiple audio channels are used, channels are numbered left-to- +   right, starting at one.  In RTP audio packets, information from +   lower-numbered channels precedes that from higher-numbered channels. + + + + + +Schulzrinne & Casner        Standards Track                     [Page 8] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   For more than two channels, the convention followed by the AIFF-C +   audio interchange format SHOULD be followed [3], using the following +   notation, unless some other convention is specified for a particular +   encoding or payload format: + +      l  left +      r  right +      c  center +      S  surround +      F  front +      R  rear + +      channels  description  channel +                                1     2   3   4   5   6 +      _________________________________________________ +      2         stereo          l     r +      3                         l     r   c +      4                         l     c   r   S +      5                        Fl     Fr  Fc  Sl  Sr +      6                         l     lc  c   r   rc  S + +         Note: RFC 1890 defined two conventions for the ordering of four +         audio channels.  Since the ordering is indicated implicitly by +         the number of channels, this was ambiguous.  In this revision, +         the order described as "quadrophonic" has been eliminated to +         remove the ambiguity.  This choice was based on the observation +         that quadrophonic consumer audio format did not become popular +         whereas surround-sound subsequently has. + +   Samples for all channels belonging to a single sampling instant MUST +   be within the same packet.  The interleaving of samples from +   different channels depends on the encoding.  General guidelines are +   given in Section 4.3 and 4.4. + +   The sampling frequency SHOULD be drawn from the set:  8,000, 11,025, +   16,000, 22,050, 24,000, 32,000, 44,100 and 48,000 Hz.  (Older Apple +   Macintosh computers had a native sample rate of 22,254.54 Hz, which +   can be converted to 22,050 with acceptable quality by dropping 4 +   samples in a 20 ms frame.)  However, most audio encodings are defined +   for a more restricted set of sampling frequencies.  Receivers SHOULD +   be prepared to accept multi-channel audio, but MAY choose to only +   play a single channel. + +4.2  Operating Recommendations + +   The following recommendations are default operating parameters. +   Applications SHOULD be prepared to handle other values.  The ranges +   given are meant to give guidance to application writers, allowing a + + + +Schulzrinne & Casner        Standards Track                     [Page 9] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   set of applications conforming to these guidelines to interoperate +   without additional negotiation.  These guidelines are not intended to +   restrict operating parameters for applications that can negotiate a +   set of interoperable parameters, e.g., through a conference control +   protocol. + +   For packetized audio, the default packetization interval SHOULD have +   a duration of 20 ms or one frame, whichever is longer, unless +   otherwise noted in Table 1 (column "ms/packet").  The packetization +   interval determines the minimum end-to-end delay; longer packets +   introduce less header overhead but higher delay and make packet loss +   more noticeable.  For non-interactive applications such as lectures +   or for links with severe bandwidth constraints, a higher +   packetization delay MAY be used.  A receiver SHOULD accept packets +   representing between 0 and 200 ms of audio data.  (For framed audio +   encodings, a receiver SHOULD accept packets with a number of frames +   equal to 200 ms divided by the frame duration, rounded up.)  This +   restriction allows reasonable buffer sizing for the receiver. + +4.3  Guidelines for Sample-Based Audio Encodings + +   In sample-based encodings, each audio sample is represented by a +   fixed number of bits.  Within the compressed audio data, codes for +   individual samples may span octet boundaries.  An RTP audio packet +   may contain any number of audio samples, subject to the constraint +   that the number of bits per sample times the number of samples per +   packet yields an integral octet count.  Fractional encodings produce +   less than one octet per sample. + +   The duration of an audio packet is determined by the number of +   samples in the packet. + +   For sample-based encodings producing one or more octets per sample, +   samples from different channels sampled at the same sampling instant +   SHOULD be packed in consecutive octets.  For example, for a two- +   channel encoding, the octet sequence is (left channel, first sample), +   (right channel, first sample), (left channel, second sample), (right +   channel, second sample), ....  For multi-octet encodings, octets +   SHOULD be transmitted in network byte order (i.e., most significant +   octet first). + +   The packing of sample-based encodings producing less than one octet +   per sample is encoding-specific. + +   The RTP timestamp reflects the instant at which the first sample in +   the packet was sampled, that is, the oldest information in the +   packet. + + + + +Schulzrinne & Casner        Standards Track                    [Page 10] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.4  Guidelines for Frame-Based Audio Encodings + +   Frame-based encodings encode a fixed-length block of audio into +   another block of compressed data, typically also of fixed length. +   For frame-based encodings, the sender MAY choose to combine several +   such frames into a single RTP packet.  The receiver can tell the +   number of frames contained in an RTP packet, if all the frames have +   the same length, by dividing the RTP payload length by the audio +   frame size which is defined as part of the encoding.  This does not +   work when carrying frames of different sizes unless the frame sizes +   are relatively prime.  If not, the frames MUST indicate their size. + +   For frame-based codecs, the channel order is defined for the whole +   block.  That is, for two-channel audio, right and left samples SHOULD +   be coded independently, with the encoded frame for the left channel +   preceding that for the right channel. + +   All frame-oriented audio codecs SHOULD be able to encode and decode +   several consecutive frames within a single packet.  Since the frame +   size for the frame-oriented codecs is given, there is no need to use +   a separate designation for the same encoding, but with different +   number of frames per packet. + +   RTP packets SHALL contain a whole number of frames, with frames +   inserted according to age within a packet, so that the oldest frame +   (to be played first) occurs immediately after the RTP packet header. +   The RTP timestamp reflects the instant at which the first sample in +   the first frame was sampled, that is, the oldest information in the +   packet. + + + + + + + + + + + + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 11] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5 Audio Encodings + +   name of                              sampling              default +   encoding  sample/frame  bits/sample      rate  ms/frame  ms/packet +   __________________________________________________________________ +   DVI4      sample        4                var.                   20 +   G722      sample        8              16,000                   20 +   G723      frame         N/A             8,000        30         30 +   G726-40   sample        5               8,000                   20 +   G726-32   sample        4               8,000                   20 +   G726-24   sample        3               8,000                   20 +   G726-16   sample        2               8,000                   20 +   G728      frame         N/A             8,000       2.5         20 +   G729      frame         N/A             8,000        10         20 +   G729D     frame         N/A             8,000        10         20 +   G729E     frame         N/A             8,000        10         20 +   GSM       frame         N/A             8,000        20         20 +   GSM-EFR   frame         N/A             8,000        20         20 +   L8        sample        8                var.                   20 +   L16       sample        16               var.                   20 +   LPC       frame         N/A             8,000        20         20 +   MPA       frame         N/A              var.      var. +   PCMA      sample        8                var.                   20 +   PCMU      sample        8                var.                   20 +   QCELP     frame         N/A             8,000        20         20 +   VDVI      sample        var.             var.                   20 + +   Table 1: Properties of Audio Encodings (N/A: not applicable; var.: +            variable) + +   The characteristics of the audio encodings described in this document +   are shown in Table 1; they are listed in order of their payload type +   in Table 4.  While most audio codecs are only specified for a fixed +   sampling rate, some sample-based algorithms (indicated by an entry of +   "var." in the sampling rate column of Table 1) may be used with +   different sampling rates, resulting in different coded bit rates. +   When used with a sampling rate other than that for which a static +   payload type is defined, non-RTP means beyond the scope of this memo +   MUST be used to define a dynamic payload type and MUST indicate the +   selected RTP timestamp clock rate, which is usually the same as the +   sampling rate for audio. + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 12] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5.1 DVI4 + +   DVI4 uses an adaptive delta pulse code modulation (ADPCM) encoding +   scheme that was specified by the Interactive Multimedia Association +   (IMA) as the "IMA ADPCM wave type".  However, the encoding defined +   here as DVI4 differs in three respects from the IMA specification: + +   o  The RTP DVI4 header contains the predicted value rather than the +      first sample value contained the IMA ADPCM block header. + +   o  IMA ADPCM blocks contain an odd number of samples, since the first +      sample of a block is contained just in the header (uncompressed), +      followed by an even number of compressed samples.  DVI4 has an +      even number of compressed samples only, using the `predict' word +      from the header to decode the first sample. + +   o  For DVI4, the 4-bit samples are packed with the first sample in +      the four most significant bits and the second sample in the four +      least significant bits.  In the IMA ADPCM codec, the samples are +      packed in the opposite order. + +   Each packet contains a single DVI block.  This profile only defines +   the 4-bit-per-sample version, while IMA also specified a 3-bit-per- +   sample encoding. + +   The "header" word for each channel has the following structure: + +      int16  predict;  /* predicted value of first sample +                          from the previous block (L16 format) */ +      u_int8 index;    /* current index into stepsize table */ +      u_int8 reserved; /* set to zero by sender, ignored by receiver */ + +   Each octet following the header contains two 4-bit samples, thus the +   number of samples per packet MUST be even because there is no means +   to indicate a partially filled last octet. + +   Packing of samples for multiple channels is for further study. + +   The IMA ADPCM algorithm was described in the document IMA Recommended +   Practices for Enhancing Digital Audio Compatibility in Multimedia +   Systems (version 3.0).  However, the Interactive Multimedia +   Association ceased operations in 1997.  Resources for an archived +   copy of that document and a software implementation of the RTP DVI4 +   encoding are listed in Section 13. + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 13] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5.2 G722 + +   G722 is specified in ITU-T Recommendation G.722, "7 kHz audio-coding +   within 64 kbit/s".  The G.722 encoder produces a stream of octets, +   each of which SHALL be octet-aligned in an RTP packet.  The first bit +   transmitted in the G.722 octet, which is the most significant bit of +   the higher sub-band sample, SHALL correspond to the most significant +   bit of the octet in the RTP packet. + +   Even though the actual sampling rate for G.722 audio is 16,000 Hz, +   the RTP clock rate for the G722 payload format is 8,000 Hz because +   that value was erroneously assigned in RFC 1890 and must remain +   unchanged for backward compatibility.  The octet rate or sample-pair +   rate is 8,000 Hz. + +4.5.3 G723 + +   G723 is specified in ITU Recommendation G.723.1, "Dual-rate speech +   coder for multimedia communications transmitting at 5.3 and 6.3 +   kbit/s".  The G.723.1 5.3/6.3 kbit/s codec was defined by the ITU-T +   as a mandatory codec for ITU-T H.324 GSTN videophone terminal +   applications.  The algorithm has a floating point specification in +   Annex B to G.723.1, a silence compression algorithm in Annex A to +   G.723.1 and a scalable channel coding scheme for wireless +   applications in G.723.1 Annex C. + +   This Recommendation specifies a coded representation that can be used +   for compressing the speech signal component of multi-media services +   at a very low bit rate.  Audio is encoded in 30 ms frames, with an +   additional delay of 7.5 ms due to look-ahead.  A G.723.1 frame can be +   one of three sizes:  24 octets (6.3 kb/s frame), 20 octets (5.3 kb/s +   frame), or 4 octets.  These 4-octet frames are called SID frames +   (Silence Insertion Descriptor) and are used to specify comfort noise +   parameters.  There is no restriction on how 4, 20, and 24 octet +   frames are intermixed.  The least significant two bits of the first +   octet in the frame determine the frame size and codec type: + +         bits  content                      octets/frame +         00    high-rate speech (6.3 kb/s)            24 +         01    low-rate speech  (5.3 kb/s)            20 +         10    SID frame                               4 +         11    reserved + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 14] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   It is possible to switch between the two rates at any 30 ms frame +   boundary.  Both (5.3 kb/s and 6.3 kb/s) rates are a mandatory part of +   the encoder and decoder.  Receivers MUST accept both data rates and +   MUST accept SID frames unless restriction of these capabilities has +   been signaled.  The MIME registration for G723 in RFC 3555 [7] +   specifies parameters that MAY be used with MIME or SDP to restrict to +   a single data rate or to restrict the use of SID frames.  This coder +   was optimized to represent speech with near-toll quality at the above +   rates using a limited amount of complexity. + +   The packing of the encoded bit stream into octets and the +   transmission order of the octets is specified in Rec. G.723.1 and is +   the same as that produced by the G.723 C code reference +   implementation.  For the 6.3 kb/s data rate, this packing is +   illustrated as follows, where the header (HDR) bits are always "0 0" +   as shown in Fig. 1 to indicate operation at 6.3 kb/s, and the Z bit +   is always set to zero.  The diagrams show the bit packing in "network +   byte order", also known as big-endian order.  The bits of each 32-bit +   word are numbered 0 to 31, with the most significant bit on the left +   and numbered 0.  The octets (bytes) of each word are transmitted most +   significant octet first.  The bits of each data field are numbered in +   the order of the bit stream representation of the encoding (least +   significant bit first).  The vertical bars indicate the boundaries +   between field fragments. + + + + + + + + + + + + + + + + + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 15] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |    LPC    |HDR|      LPC      |      LPC      |    ACL0   |LPC| +   |           |   |               |               |           |   | +   |0 0 0 0 0 0|0 0|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2| +   |5 4 3 2 1 0|   |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |  ACL2   |ACL|A| GAIN0 |ACL|ACL|    GAIN0      |    GAIN1      | +   |         | 1 |C|       | 3 | 2 |               |               | +   |0 0 0 0 0|0 0|0|0 0 0 0|0 0|0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0| +   |4 3 2 1 0|1 0|6|3 2 1 0|1 0|6 5|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   | GAIN2 | GAIN1 |     GAIN2     |     GAIN3     | GRID  | GAIN3 | +   |       |       |               |               |       |       | +   |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0| +   |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|3 2 1 0|1 0 9 8| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |   MSBPOS    |Z|POS|  MSBPOS   |     POS0      |POS|   POS0    | +   |             | | 0 |           |               | 1 |           | +   |0 0 0 0 0 0 0|0|0 0|1 1 1 0 0 0|0 0 0 0 0 0 0 0|0 0|1 1 1 1 1 1| +   |6 5 4 3 2 1 0| |1 0|2 1 0 9 8 7|9 8 7 6 5 4 3 2|1 0|5 4 3 2 1 0| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |     POS1      | POS2  | POS1  |     POS2      | POS3  | POS2  | +   |               |       |       |               |       |       | +   |0 0 0 0 0 0 0 0|0 0 0 0|1 1 1 1|1 1 0 0 0 0 0 0|0 0 0 0|1 1 1 1| +   |9 8 7 6 5 4 3 2|3 2 1 0|3 2 1 0|1 0 9 8 7 6 5 4|3 2 1 0|5 4 3 2| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |     POS3      |   PSIG0   |POS|PSIG2|  PSIG1  |  PSIG3  |PSIG2| +   |               |           | 3 |     |         |         |     | +   |1 1 0 0 0 0 0 0|0 0 0 0 0 0|1 1|0 0 0|0 0 0 0 0|0 0 0 0 0|0 0 0| +   |1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2|2 1 0|4 3 2 1 0|4 3 2 1 0|5 4 3| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +                  Figure 1: G.723 (6.3 kb/s) bit packing + +   For the 5.3 kb/s data rate, the header (HDR) bits are always "0 1", +   as shown in Fig. 2, to indicate operation at 5.3 kb/s. + + + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 16] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |    LPC    |HDR|      LPC      |      LPC      |   ACL0    |LPC| +   |           |   |               |               |           |   | +   |0 0 0 0 0 0|0 1|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2| +   |5 4 3 2 1 0|   |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |  ACL2   |ACL|A| GAIN0 |ACL|ACL|     GAIN0     |     GAIN1     | +   |         | 1 |C|       | 3 | 2 |               |               | +   |0 0 0 0 0|0 0|0|0 0 0 0|0 0|0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0| +   |4 3 2 1 0|1 0|6|3 2 1 0|1 0|6 5|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   | GAIN2 | GAIN1 |     GAIN2     |    GAIN3      | GRID  | GAIN3 | +   |       |       |               |               |       |       | +   |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0| +   |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|4 3 2 1|1 0 9 8| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |     POS0      | POS1  | POS0  |     POS1      |     POS2      | +   |               |       |       |               |               | +   |0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0| +   |7 6 5 4 3 2 1 0|3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   | POS3  | POS2  |     POS3      | PSIG1 | PSIG0 | PSIG3 | PSIG2 | +   |       |       |               |       |       |       |       | +   |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0|0 0 0 0|0 0 0 0|0 0 0 0| +   |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|3 2 1 0|3 2 1 0|3 2 1 0|3 2 1 0| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +                  Figure 2: G.723 (5.3 kb/s) bit packing + +   The packing of G.723.1 SID (silence) frames, which are indicated by +   the header (HDR) bits having the pattern "1 0", is depicted in Fig. +   3. + +    0                   1                   2                   3 +    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +   |    LPC    |HDR|      LPC      |      LPC      |   GAIN    |LPC| +   |           |   |               |               |           |   | +   |0 0 0 0 0 0|1 0|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2| +   |5 4 3 2 1 0|   |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2| +   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +                   Figure 3: G.723 SID mode bit packing + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 17] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5.4  G726-40, G726-32, G726-24, and G726-16 + +   ITU-T Recommendation G.726 describes, among others, the algorithm +   recommended for conversion of a single 64 kbit/s A-law or mu-law PCM +   channel encoded at 8,000 samples/sec to and from a 40, 32, 24, or 16 +   kbit/s channel.  The conversion is applied to the PCM stream using an +   Adaptive Differential Pulse Code Modulation (ADPCM) transcoding +   technique.  The ADPCM representation consists of a series of +   codewords with a one-to-one correspondence to the samples in the PCM +   stream.  The G726 data rates of 40, 32, 24, and 16 kbit/s have +   codewords of 5, 4, 3, and 2 bits, respectively. + +   The 16 and 24 kbit/s encodings do not provide toll quality speech. +   They are designed for used in overloaded Digital Circuit +   Multiplication Equipment (DCME).  ITU-T G.726 recommends that the 16 +   and 24 kbit/s encodings should be alternated with higher data rate +   encodings to provide an average sample size of between 3.5 and 3.7 +   bits per sample. + +   The encodings of G.726 are here denoted as G726-40, G726-32, G726-24, +   and G726-16.  Prior to 1990, G721 described the 32 kbit/s ADPCM +   encoding, and G723 described the 40, 32, and 16 kbit/s encodings. +   Thus, G726-32 designates the same algorithm as G721 in RFC 1890. + +   A stream of G726 codewords contains no information on the encoding +   being used, therefore transitions between G726 encoding types are not +   permitted within a sequence of packed codewords.  Applications MUST +   determine the encoding type of packed codewords from the RTP payload +   identifier. + +   No payload-specific header information SHALL be included as part of +   the audio data.  A stream of G726 codewords MUST be packed into +   octets as follows:  the first codeword is placed into the first octet +   such that the least significant bit of the codeword aligns with the +   least significant bit in the octet, the second codeword is then +   packed so that its least significant bit coincides with the least +   significant unoccupied bit in the octet.  When a complete codeword +   cannot be placed into an octet, the bits overlapping the octet +   boundary are placed into the least significant bits of the next +   octet.  Packing MUST end with a completely packed final octet.  The +   number of codewords packed will therefore be a multiple of 8, 2, 8, +   and 4 for G726-40, G726-32, G726-24, and G726-16, respectively.  An +   example of the packing scheme for G726-32 codewords is as shown, +   where bit 7 is the least significant bit of the first octet, and bit +   A3 is the least significant bit of the first codeword: + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 18] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +          0                   1 +          0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 +         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- +         |B B B B|A A A A|D D D D|C C C C| ... +         |0 1 2 3|0 1 2 3|0 1 2 3|0 1 2 3| +         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- + +   An example of the packing scheme for G726-24 codewords follows, where +   again bit 7 is the least significant bit of the first octet, and bit +   A2 is the least significant bit of the first codeword: + +          0                   1                   2 +          0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 +         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- +         |C C|B B B|A A A|F|E E E|D D D|C|H H H|G G G|F F| ... +         |1 2|0 1 2|0 1 2|2|0 1 2|0 1 2|0|0 1 2|0 1 2|0 1| +         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- + +   Note that the "little-endian" direction in which samples are packed +   into octets in the G726-16, -24, -32 and -40 payload formats +   specified here is consistent with ITU-T Recommendation X.420, but is +   the opposite of what is specified in ITU-T Recommendation I.366.2 +   Annex E for ATM AAL2 transport.  A second set of RTP payload formats +   matching the packetization of I.366.2 Annex E and identified by MIME +   subtypes AAL2-G726-16, -24, -32 and -40 will be specified in a +   separate document. + +4.5.5 G728 + +   G728 is specified in ITU-T Recommendation G.728, "Coding of speech at +   16 kbit/s using low-delay code excited linear prediction". + +   A G.278 encoder translates 5 consecutive audio samples into a 10-bit +   codebook index, resulting in a bit rate of 16 kb/s for audio sampled +   at 8,000 samples per second.  The group of five consecutive samples +   is called a vector.  Four consecutive vectors, labeled V1 to V4 +   (where V1 is to be played first by the receiver), build one G.728 +   frame.  The four vectors of 40 bits are packed into 5 octets, labeled +   B1 through B5.  B1 SHALL be placed first in the RTP packet. + +   Referring to the figure below, the principle for bit order is +   "maintenance of bit significance".  Bits from an older vector are +   more significant than bits from newer vectors.  The MSB of the frame +   goes to the MSB of B1 and the LSB of the frame goes to LSB of B5. + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 19] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +                   1         2         3        3 +         0         0         0         0        9 +         ++++++++++++++++++++++++++++++++++++++++ +         <---V1---><---V2---><---V3---><---V4---> vectors +         <--B1--><--B2--><--B3--><--B4--><--B5--> octets +         <------------- frame 1 ----------------> + +   In particular, B1 contains the eight most significant bits of V1, +   with the MSB of V1 being the MSB of B1.  B2 contains the two least +   significant bits of V1, the more significant of the two in its MSB, +   and the six most significant bits of V2.  B1 SHALL be placed first in +   the RTP packet and B5 last. + +4.5.6 G729 + +   G729 is specified in ITU-T Recommendation G.729, "Coding of speech at +   8 kbit/s using conjugate structure-algebraic code excited linear +   prediction (CS-ACELP)".  A reduced-complexity version of the G.729 +   algorithm is specified in Annex A to Rec. G.729.  The speech coding +   algorithms in the main body of G.729 and in G.729 Annex A are fully +   interoperable with each other, so there is no need to further +   distinguish between them.  An implementation that signals or accepts +   use of G729 payload format may implement either G.729 or G.729A +   unless restricted by additional signaling specified elsewhere related +   specifically to the encoding rather than the payload format.  The +   G.729 and G.729 Annex A codecs were optimized to represent speech +   with high quality, where G.729 Annex A trades some speech quality for +   an approximate 50% complexity reduction [10].  See the next Section +   (4.5.7) for other data rates added in later G.729 Annexes.  For all +   data rates, the sampling frequency (and RTP timestamp clock rate) is +   8,000 Hz. + +   A voice activity detector (VAD) and comfort noise generator (CNG) +   algorithm in Annex B of G.729 is RECOMMENDED for digital simultaneous +   voice and data applications and can be used in conjunction with G.729 +   or G.729 Annex A.  A G.729 or G.729 Annex A frame contains 10 octets, +   while the G.729 Annex B comfort noise frame occupies 2 octets. +   Receivers MUST accept comfort noise frames if restriction of their +   use has not been signaled.  The MIME registration for G729 in RFC +   3555 [7] specifies a parameter that MAY be used with MIME or SDP to +   restrict the use of comfort noise frames. + +   A G729 RTP packet may consist of zero or more G.729 or G.729 Annex A +   frames, followed by zero or one G.729 Annex B frames.  The presence +   of a comfort noise frame can be deduced from the length of the RTP +   payload.  The default packetization interval is 20 ms (two frames), +   but in some situations it may be desirable to send 10 ms packets.  An + + + + +Schulzrinne & Casner        Standards Track                    [Page 20] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   example would be a transition from speech to comfort noise in the +   first 10 ms of the packet.  For some applications, a longer +   packetization interval may be required to reduce the packet rate. + +       0                   1                   2                   3 +       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |L|      L1     |    L2   |    L3   |       P1      |P|    C1   | +      |0|             |         |         |               |0|         | +      | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7| |0 1 2 3 4| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |       C1      |  S1   | GA1 |  GB1  |    P2   |      C2       | +      |          1 1 1|       |     |       |         |               | +      |5 6 7 8 9 0 1 2|0 1 2 3|0 1 2|0 1 2 3|0 1 2 3 4|0 1 2 3 4 5 6 7| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |   C2    |  S2   | GA2 |  GB2  | +      |    1 1 1|       |     |       | +      |8 9 0 1 2|0 1 2 3|0 1 2|0 1 2 3| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +                    Figure 4: G.729 and G.729A bit packing + +   The transmitted parameters of a G.729/G.729A 10-ms frame, consisting +   of 80 bits, are defined in Recommendation G.729, Table 8/G.729.  The +   mapping of the these parameters is given below in Fig. 4.  The +   diagrams show the bit packing in "network byte order", also known as +   big-endian order.  The bits of each 32-bit word are numbered 0 to 31, +   with the most significant bit on the left and numbered 0.  The octets +   (bytes) of each word are transmitted most significant octet first. +   The bits of each data field are numbered in the order as produced by +   the G.729 C code reference implementation. + +   The packing of the G.729 Annex B comfort noise frame is shown in Fig. +   5. + +          0                   1 +          0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 +         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +         |L|  LSF1   |  LSF2 |   GAIN  |R| +         |S|         |       |         |E| +         |F|         |       |         |S| +         |0|0 1 2 3 4|0 1 2 3|0 1 2 3 4|V|    RESV = Reserved (zero) +         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +                       Figure 5: G.729 Annex B bit packing + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 21] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5.7 G729D and G729E + +   Annexes D and E to ITU-T Recommendation G.729 provide additional data +   rates.  Because the data rate is not signaled in the bitstream, the +   different data rates are given distinct RTP encoding names which are +   mapped to distinct payload type numbers.  G729D indicates a 6.4 +   kbit/s coding mode (G.729 Annex D, for momentary reduction in channel +   capacity), while G729E indicates an 11.8 kbit/s mode (G.729 Annex E, +   for improved performance with a wide range of narrow-band input +   signals, e.g., music and background noise).  Annex E has two +   operating modes, backward adaptive and forward adaptive, which are +   signaled by the first two bits in each frame (the most significant +   two bits of the first octet). + +   The voice activity detector (VAD) and comfort noise generator (CNG) +   algorithm specified in Annex B of G.729 may be used with Annex D and +   Annex E frames in addition to G.729 and G.729 Annex A frames.  The +   algorithm details for the operation of Annexes D and E with the Annex +   B CNG are specified in G.729 Annexes F and G.  Note that Annexes F +   and G do not introduce any new encodings.  Receivers MUST accept +   comfort noise frames if restriction of their use has not been +   signaled.  The MIME registrations for G729D and G729E in RFC 3555 [7] +   specify a parameter that MAY be used with MIME or SDP to restrict the +   use of comfort noise frames. + +   For G729D, an RTP packet may consist of zero or more G.729 Annex D +   frames, followed by zero or one G.729 Annex B frame.  Similarly, for +   G729E, an RTP packet may consist of zero or more G.729 Annex E +   frames, followed by zero or one G.729 Annex B frame.  The presence of +   a comfort noise frame can be deduced from the length of the RTP +   payload. + +   A single RTP packet must contain frames of only one data rate, +   optionally followed by one comfort noise frame.  The data rate may be +   changed from packet to packet by changing the payload type number. +   G.729 Annexes D, E and H describe what the encoding and decoding +   algorithms must do to accommodate a change in data rate. + +   For G729D, the bits of a G.729 Annex D frame are formatted as shown +   below in Fig. 6 (cf.  Table D.1/G.729).  The frame length is 64 bits. + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 22] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +       0                   1                   2                   3 +       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |L|      L1     |    L2   |    L3   |        P1     |     C1    | +      |0|             |         |         |               |           | +      | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7|0 1 2 3 4 5| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      | C1  |S1 | GA1 | GB1 |  P2   |        C2       |S2 | GA2 | GB2 | +      |     |   |     |     |       |                 |   |     |     | +      |6 7 8|0 1|0 1 2|0 1 2|0 1 2 3|0 1 2 3 4 5 6 7 8|0 1|0 1 2|0 1 2| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +                     Figure 6: G.729 Annex D bit packing + +   The net bit rate for the G.729 Annex E algorithm is 11.8 kbit/s and a +   total of 118 bits are used.  Two bits are appended as "don't care" +   bits to complete an integer number of octets for the frame.  For +   G729E, the bits of a data frame are formatted as shown in the next +   two diagrams (cf. Table E.1/G.729).  The fields for the G729E forward +   adaptive mode are packed as shown in Fig. 7. + +       0                   1                   2                   3 +       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |0 0|L|      L1     |    L2   |    L3   |        P1     |P| C0_1| +      |   |0|             |         |         |               |0|     | +      |   | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7| |0 1 2| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |       |   C1_1      |     C2_1    |   C3_1      |    C4_1     | +      |       |             |             |             |             | +      |3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      | GA1 |  GB1  |    P2   |   C0_2      |     C1_2    |   C2_2    | +      |     |       |         |             |             |           | +      |0 1 2|0 1 2 3|0 1 2 3 4|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      | |    C3_2     |     C4_2    | GA2 | GB2   |DC | +      | |             |             |     |       |   | +      |6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +         Figure 7: G.729 Annex E (forward adaptive mode) bit packing + +   The fields for the G729E backward adaptive mode are packed as shown +   in Fig. 8. + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 23] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +       0                   1                   2                   3 +       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |1 1|       P1      |P|       C0_1              |     C1_1      | +      |   |               |0|                    1 1 1|               | +      |   |0 1 2 3 4 5 6 7|0|0 1 2 3 4 5 6 7 8 9 0 1 2|0 1 2 3 4 5 6 7| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |   |  C2_1       | C3_1        | C4_1        |GA1  | GB1   |P2 | +      |   |             |             |             |     |       |   | +      |8 9|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      |     |          C0_2           |       C1_2        |    C2_2   | +      |     |                    1 1 1|                   |           | +      |2 3 4|0 1 2 3 4 5 6 7 8 9 0 1 2|0 1 2 3 4 5 6 7 8 9|0 1 2 3 4 5| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +      | |    C3_2     |     C4_2    | GA2 | GB2   |DC | +      | |             |             |     |       |   | +      |6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1| +      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +         Figure 8: G.729 Annex E (backward adaptive mode) bit packing + +4.5.8 GSM + +   GSM (Group Speciale Mobile) denotes the European GSM 06.10 standard +   for full-rate speech transcoding, ETS 300 961, which is based on +   RPE/LTP (residual pulse excitation/long term prediction) coding at a +   rate of 13 kb/s [11,12,13].  The text of the standard can be obtained +   from: + +   ETSI (European Telecommunications Standards Institute) +   ETSI Secretariat: B.P.152 +   F-06561 Valbonne Cedex +   France +   Phone: +33 92 94 42 00 +   Fax:   +33 93 65 47 16 + +   Blocks of 160 audio samples are compressed into 33 octets, for an +   effective data rate of 13,200 b/s. + +4.5.8.1  General Packaging Issues + +   The GSM standard (ETS 300 961) specifies the bit stream produced by +   the codec, but does not specify how these bits should be packed for +   transmission.  The packetization specified here has subsequently been +   adopted in ETSI Technical Specification TS 101 318.  Some software +   implementations of the GSM codec use a different packing than that +   specified here. + + + +Schulzrinne & Casner        Standards Track                    [Page 24] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +               field  field name  bits  field  field name  bits +               ________________________________________________ +               1      LARc[0]     6     39     xmc[22]     3 +               2      LARc[1]     6     40     xmc[23]     3 +               3      LARc[2]     5     41     xmc[24]     3 +               4      LARc[3]     5     42     xmc[25]     3 +               5      LARc[4]     4     43     Nc[2]       7 +               6      LARc[5]     4     44     bc[2]       2 +               7      LARc[6]     3     45     Mc[2]       2 +               8      LARc[7]     3     46     xmaxc[2]    6 +               9      Nc[0]       7     47     xmc[26]     3 +               10     bc[0]       2     48     xmc[27]     3 +               11     Mc[0]       2     49     xmc[28]     3 +               12     xmaxc[0]    6     50     xmc[29]     3 +               13     xmc[0]      3     51     xmc[30]     3 +               14     xmc[1]      3     52     xmc[31]     3 +               15     xmc[2]      3     53     xmc[32]     3 +               16     xmc[3]      3     54     xmc[33]     3 +               17     xmc[4]      3     55     xmc[34]     3 +               18     xmc[5]      3     56     xmc[35]     3 +               19     xmc[6]      3     57     xmc[36]     3 +               20     xmc[7]      3     58     xmc[37]     3 +               21     xmc[8]      3     59     xmc[38]     3 +               22     xmc[9]      3     60     Nc[3]       7 +               23     xmc[10]     3     61     bc[3]       2 +               24     xmc[11]     3     62     Mc[3]       2 +               25     xmc[12]     3     63     xmaxc[3]    6 +               26     Nc[1]       7     64     xmc[39]     3 +               27     bc[1]       2     65     xmc[40]     3 +               28     Mc[1]       2     66     xmc[41]     3 +               29     xmaxc[1]    6     67     xmc[42]     3 +               30     xmc[13]     3     68     xmc[43]     3 +               31     xmc[14]     3     69     xmc[44]     3 +               32     xmc[15]     3     70     xmc[45]     3 +               33     xmc[16]     3     71     xmc[46]     3 +               34     xmc[17]     3     72     xmc[47]     3 +               35     xmc[18]     3     73     xmc[48]     3 +               36     xmc[19]     3     74     xmc[49]     3 +               37     xmc[20]     3     75     xmc[50]     3 +               38     xmc[21]     3     76     xmc[51]     3 + +                      Table 2: Ordering of GSM variables + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 25] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   Octet  Bit 0   Bit 1   Bit 2   Bit 3   Bit 4   Bit 5   Bit 6   Bit 7 +   _____________________________________________________________________ +       0    1       1       0       1    LARc0.0 LARc0.1 LARc0.2 LARc0.3 +       1 LARc0.4 LARc0.5 LARc1.0 LARc1.1 LARc1.2 LARc1.3 LARc1.4 LARc1.5 +       2 LARc2.0 LARc2.1 LARc2.2 LARc2.3 LARc2.4 LARc3.0 LARc3.1 LARc3.2 +       3 LARc3.3 LARc3.4 LARc4.0 LARc4.1 LARc4.2 LARc4.3 LARc5.0 LARc5.1 +       4 LARc5.2 LARc5.3 LARc6.0 LARc6.1 LARc6.2 LARc7.0 LARc7.1 LARc7.2 +       5  Nc0.0   Nc0.1   Nc0.2   Nc0.3   Nc0.4   Nc0.5   Nc0.6  bc0.0 +       6  bc0.1   Mc0.0   Mc0.1  xmaxc00 xmaxc01 xmaxc02 xmaxc03 xmaxc04 +       7 xmaxc05 xmc0.0  xmc0.1  xmc0.2  xmc1.0  xmc1.1  xmc1.2  xmc2.0 +       8 xmc2.1  xmc2.2  xmc3.0  xmc3.1  xmc3.2  xmc4.0  xmc4.1  xmc4.2 +       9 xmc5.0  xmc5.1  xmc5.2  xmc6.0  xmc6.1  xmc6.2  xmc7.0  xmc7.1 +      10 xmc7.2  xmc8.0  xmc8.1  xmc8.2  xmc9.0  xmc9.1  xmc9.2  xmc10.0 +      11 xmc10.1 xmc10.2 xmc11.0 xmc11.1 xmc11.2 xmc12.0 xmc12.1 xcm12.2 +      12  Nc1.0   Nc1.1   Nc1.2   Nc1.3   Nc1.4   Nc1.5   Nc1.6   bc1.0 +      13  bc1.1   Mc1.0   Mc1.1  xmaxc10 xmaxc11 xmaxc12 xmaxc13 xmaxc14 +      14 xmax15  xmc13.0 xmc13.1 xmc13.2 xmc14.0 xmc14.1 xmc14.2 xmc15.0 +      15 xmc15.1 xmc15.2 xmc16.0 xmc16.1 xmc16.2 xmc17.0 xmc17.1 xmc17.2 +      16 xmc18.0 xmc18.1 xmc18.2 xmc19.0 xmc19.1 xmc19.2 xmc20.0 xmc20.1 +      17 xmc20.2 xmc21.0 xmc21.1 xmc21.2 xmc22.0 xmc22.1 xmc22.2 xmc23.0 +      18 xmc23.1 xmc23.2 xmc24.0 xmc24.1 xmc24.2 xmc25.0 xmc25.1 xmc25.2 +      19  Nc2.0   Nc2.1   Nc2.2   Nc2.3   Nc2.4   Nc2.5   Nc2.6   bc2.0 +      20  bc2.1   Mc2.0   Mc2.1  xmaxc20 xmaxc21 xmaxc22 xmaxc23 xmaxc24 +      21 xmaxc25 xmc26.0 xmc26.1 xmc26.2 xmc27.0 xmc27.1 xmc27.2 xmc28.0 +      22 xmc28.1 xmc28.2 xmc29.0 xmc29.1 xmc29.2 xmc30.0 xmc30.1 xmc30.2 +      23 xmc31.0 xmc31.1 xmc31.2 xmc32.0 xmc32.1 xmc32.2 xmc33.0 xmc33.1 +      24 xmc33.2 xmc34.0 xmc34.1 xmc34.2 xmc35.0 xmc35.1 xmc35.2 xmc36.0 +      25 Xmc36.1 xmc36.2 xmc37.0 xmc37.1 xmc37.2 xmc38.0 xmc38.1 xmc38.2 +      26  Nc3.0   Nc3.1   Nc3.2   Nc3.3   Nc3.4   Nc3.5   Nc3.6   bc3.0 +      27  bc3.1   Mc3.0   Mc3.1  xmaxc30 xmaxc31 xmaxc32 xmaxc33 xmaxc34 +      28 xmaxc35 xmc39.0 xmc39.1 xmc39.2 xmc40.0 xmc40.1 xmc40.2 xmc41.0 +      29 xmc41.1 xmc41.2 xmc42.0 xmc42.1 xmc42.2 xmc43.0 xmc43.1 xmc43.2 +      30 xmc44.0 xmc44.1 xmc44.2 xmc45.0 xmc45.1 xmc45.2 xmc46.0 xmc46.1 +      31 xmc46.2 xmc47.0 xmc47.1 xmc47.2 xmc48.0 xmc48.1 xmc48.2 xmc49.0 +      32 xmc49.1 xmc49.2 xmc50.0 xmc50.1 xmc50.2 xmc51.0 xmc51.1 xmc51.2 + +                        Table 3: GSM payload format + +   In the GSM packing used by RTP, the bits SHALL be packed beginning +   from the most significant bit.  Every 160 sample GSM frame is coded +   into one 33 octet (264 bit) buffer.  Every such buffer begins with a +   4 bit signature (0xD), followed by the MSB encoding of the fields of +   the frame.  The first octet thus contains 1101 in the 4 most +   significant bits (0-3) and the 4 most significant bits of F1 (0-3) in +   the 4 least significant bits (4-7).  The second octet contains the 2 +   least significant bits of F1 in bits 0-1, and F2 in bits 2-7, and so +   on.  The order of the fields in the frame is described in Table 2. + + + + +Schulzrinne & Casner        Standards Track                    [Page 26] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5.8.2   GSM Variable Names and Numbers + +   In the RTP encoding we have the bit pattern described in Table 3, +   where F.i signifies the ith bit of the field F, bit 0 is the most +   significant bit, and the bits of every octet are numbered from 0 to 7 +   from most to least significant. + +4.5.9 GSM-EFR + +   GSM-EFR denotes GSM 06.60 enhanced full rate speech transcoding, +   specified in ETS 300 726 which is available from ETSI at the address +   given in Section 4.5.8.  This codec has a frame length of 244 bits. +   For transmission in RTP, each codec frame is packed into a 31 octet +   (248 bit) buffer beginning with a 4-bit signature 0xC in a manner +   similar to that specified here for the original GSM 06.10 codec.  The +   packing is specified in ETSI Technical Specification TS 101 318. + +4.5.10 L8 + +   L8 denotes linear audio data samples, using 8-bits of precision with +   an offset of 128, that is, the most negative signal is encoded as +   zero. + +4.5.11 L16 + +   L16 denotes uncompressed audio data samples, using 16-bit signed +   representation with 65,535 equally divided steps between minimum and +   maximum signal level, ranging from -32,768 to 32,767.  The value is +   represented in two's complement notation and transmitted in network +   byte order (most significant byte first). + +   The MIME registration for L16 in RFC 3555 [7] specifies parameters +   that MAY be used with MIME or SDP to indicate that analog pre- +   emphasis was applied to the signal before quantization or to indicate +   that a multiple-channel audio stream follows a different channel +   ordering convention than is specified in Section 4.1. + +4.5.12 LPC + +   LPC designates an experimental linear predictive encoding contributed +   by Ron Frederick, which is based on an implementation written by Ron +   Zuckerman posted to the Usenet group comp.dsp on June 26, 1992.  The +   codec generates 14 octets for every frame.  The framesize is set to +   20 ms, resulting in a bit rate of 5,600 b/s. + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 27] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5.13 MPA + +   MPA denotes MPEG-1 or MPEG-2 audio encapsulated as elementary +   streams.  The encoding is defined in ISO standards ISO/IEC 11172-3 +   and 13818-3.  The encapsulation is specified in RFC 2250 [14]. + +   The encoding may be at any of three levels of complexity, called +   Layer I, II and III.  The selected layer as well as the sampling rate +   and channel count are indicated in the payload.  The RTP timestamp +   clock rate is always 90,000, independent of the sampling rate. +   MPEG-1 audio supports sampling rates of 32, 44.1, and 48 kHz (ISO/IEC +   11172-3, section 1.1; "Scope").  MPEG-2 supports sampling rates of +   16, 22.05 and 24 kHz.  The number of samples per frame is fixed, but +   the frame size will vary with the sampling rate and bit rate. + +   The MIME registration for MPA in RFC 3555 [7] specifies parameters +   that MAY be used with MIME or SDP to restrict the selection of layer, +   channel count, sampling rate, and bit rate. + +4.5.14 PCMA and PCMU + +   PCMA and PCMU are specified in ITU-T Recommendation G.711.  Audio +   data is encoded as eight bits per sample, after logarithmic scaling. +   PCMU denotes mu-law scaling, PCMA A-law scaling.  A detailed +   description is given by Jayant and Noll [15].  Each G.711 octet SHALL +   be octet-aligned in an RTP packet.  The sign bit of each G.711 octet +   SHALL correspond to the most significant bit of the octet in the RTP +   packet (i.e., assuming the G.711 samples are handled as octets on the +   host machine, the sign bit SHALL be the most significant bit of the +   octet as defined by the host machine format).  The 56 kb/s and 48 +   kb/s modes of G.711 are not applicable to RTP, since PCMA and PCMU +   MUST always be transmitted as 8-bit samples. + +   See Section 4.1 regarding silence suppression. + +4.5.15 QCELP + +   The Electronic Industries Association (EIA) & Telecommunications +   Industry Association (TIA) standard IS-733, "TR45: High Rate Speech +   Service Option for Wideband Spread Spectrum Communications Systems", +   defines the QCELP audio compression algorithm for use in wireless +   CDMA applications.  The QCELP CODEC compresses each 20 milliseconds +   of 8,000 Hz, 16-bit sampled input speech into one of four different +   size output frames:  Rate 1 (266 bits), Rate 1/2 (124 bits), Rate 1/4 +   (54 bits) or Rate 1/8 (20 bits).  For typical speech patterns, this +   results in an average output of 6.8 kb/s for normal mode and 4.7 kb/s +   for reduced rate mode.  The packetization of the QCELP audio codec is +   described in [16]. + + + +Schulzrinne & Casner        Standards Track                    [Page 28] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +4.5.16 RED + +   The redundant audio payload format "RED" is specified by RFC 2198 +   [17].  It defines a means by which multiple redundant copies of an +   audio packet may be transmitted in a single RTP stream.  Each packet +   in such a stream contains, in addition to the audio data for that +   packetization interval, a (more heavily compressed) copy of the data +   from a previous packetization interval.  This allows an approximation +   of the data from lost packets to be recovered upon decoding of a +   subsequent packet, giving much improved sound quality when compared +   with silence substitution for lost packets. + +4.5.17 VDVI + +   VDVI is a variable-rate version of DVI4, yielding speech bit rates of +   between 10 and 25 kb/s.  It is specified for single-channel operation +   only.  Samples are packed into octets starting at the most- +   significant bit.  The last octet is padded with 1 bits if the last +   sample does not fill the last octet.  This padding is distinct from +   the valid codewords.  The receiver needs to detect the padding +   because there is no explicit count of samples in the packet. + +   It uses the following encoding: + +            DVI4 codeword  VDVI bit pattern +            _______________________________ +                        0  00 +                        1  010 +                        2  1100 +                        3  11100 +                        4  111100 +                        5  1111100 +                        6  11111100 +                        7  11111110 +                        8  10 +                        9  011 +                       10  1101 +                       11  11101 +                       12  111101 +                       13  1111101 +                       14  11111101 +                       15  11111111 + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 29] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +5.  Video + +   The following sections describe the video encodings that are defined +   in this memo and give their abbreviated names used for +   identification.  These video encodings and their payload types are +   listed in Table 5. + +   All of these video encodings use an RTP timestamp frequency of 90,000 +   Hz, the same as the MPEG presentation time stamp frequency.  This +   frequency yields exact integer timestamp increments for the typical +   24 (HDTV), 25 (PAL), and 29.97 (NTSC) and 30 Hz (HDTV) frame rates +   and 50, 59.94 and 60 Hz field rates.  While 90 kHz is the RECOMMENDED +   rate for future video encodings used within this profile, other rates +   MAY be used.  However, it is not sufficient to use the video frame +   rate (typically between 15 and 30 Hz) because that does not provide +   adequate resolution for typical synchronization requirements when +   calculating the RTP timestamp corresponding to the NTP timestamp in +   an RTCP SR packet.  The timestamp resolution MUST also be sufficient +   for the jitter estimate contained in the receiver reports. + +   For most of these video encodings, the RTP timestamp encodes the +   sampling instant of the video image contained in the RTP data packet. +   If a video image occupies more than one packet, the timestamp is the +   same on all of those packets.  Packets from different video images +   are distinguished by their different timestamps. + +   Most of these video encodings also specify that the marker bit of the +   RTP header SHOULD be set to one in the last packet of a video frame +   and otherwise set to zero.  Thus, it is not necessary to wait for a +   following packet with a different timestamp to detect that a new +   frame should be displayed. + +5.1  CelB + +   The CELL-B encoding is a proprietary encoding proposed by Sun +   Microsystems.  The byte stream format is described in RFC 2029 [18]. + +5.2 JPEG + +   The encoding is specified in ISO Standards 10918-1 and 10918-2.  The +   RTP payload format is as specified in RFC 2435 [19]. + +5.3 H261 + +   The encoding is specified in ITU-T Recommendation H.261, "Video codec +   for audiovisual services at p x 64 kbit/s".  The packetization and +   RTP-specific properties are described in RFC 2032 [20]. + + + + +Schulzrinne & Casner        Standards Track                    [Page 30] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +5.4 H263 + +   The encoding is specified in the 1996 version of ITU-T Recommendation +   H.263, "Video coding for low bit rate communication".  The +   packetization and RTP-specific properties are described in RFC 2190 +   [21].  The H263-1998 payload format is RECOMMENDED over this one for +   use by new implementations. + +5.5 H263-1998 + +   The encoding is specified in the 1998 version of ITU-T Recommendation +   H.263, "Video coding for low bit rate communication".  The +   packetization and RTP-specific properties are described in RFC 2429 +   [22].  Because the 1998 version of H.263 is a superset of the 1996 +   syntax, this payload format can also be used with the 1996 version of +   H.263, and is RECOMMENDED for this use by new implementations.  This +   payload format does not replace RFC 2190, which continues to be used +   by existing implementations, and may be required for backward +   compatibility in new implementations.  Implementations using the new +   features of the 1998 version of H.263 MUST use the payload format +   described in RFC 2429. + +5.6 MPV + +   MPV designates the use of MPEG-1 and MPEG-2 video encoding elementary +   streams as specified in ISO Standards ISO/IEC 11172 and 13818-2, +   respectively.  The RTP payload format is as specified in RFC 2250 +   [14], Section 3. + +   The MIME registration for MPV in RFC 3555 [7] specifies a parameter +   that MAY be used with MIME or SDP to restrict the selection of the +   type of MPEG video. + +5.7 MP2T + +   MP2T designates the use of MPEG-2 transport streams, for either audio +   or video.  The RTP payload format is described in RFC 2250 [14], +   Section 2. + + + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 31] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +5.8 nv + +   The encoding is implemented in the program `nv', version 4, developed +   at Xerox PARC by Ron Frederick.  Further information is available +   from the author: + +   Ron Frederick +   Blue Coat Systems Inc. +   650 Almanor Avenue +   Sunnyvale, CA 94085 +   United States +   EMail: ronf@bluecoat.com + +6.  Payload Type Definitions + +   Tables 4 and 5 define this profile's static payload type values for +   the PT field of the RTP data header.  In addition, payload type +   values in the range 96-127 MAY be defined dynamically through a +   conference control protocol, which is beyond the scope of this +   document.  For example, a session directory could specify that for a +   given session, payload type 96 indicates PCMU encoding, 8,000 Hz +   sampling rate, 2 channels.  Entries in Tables 4 and 5 with payload +   type "dyn" have no static payload type assigned and are only used +   with a dynamic payload type.  Payload type 2 was assigned to G721 in +   RFC 1890 and to its equivalent successor G726-32 in draft versions of +   this specification, but its use is now deprecated and that static +   payload type is marked reserved due to conflicting use for the +   payload formats G726-32 and AAL2-G726-32 (see Section 4.5.4). +   Payload type 13 indicates the Comfort Noise (CN) payload format +   specified in RFC 3389 [9].  Payload type 19 is marked "reserved" +   because some draft versions of this specification assigned that +   number to an earlier version of the comfort noise payload format. +   The payload type range 72-76 is marked "reserved" so that RTCP and +   RTP packets can be reliably distinguished (see Section "Summary of +   Protocol Constants" of the RTP protocol specification). + +   The payload types currently defined in this profile are assigned to +   exactly one of three categories or media types:  audio only, video +   only and those combining audio and video.  The media types are marked +   in Tables 4 and 5 as "A", "V" and "AV", respectively.  Payload types +   of different media types SHALL NOT be interleaved or multiplexed +   within a single RTP session, but multiple RTP sessions MAY be used in +   parallel to send multiple media types.  An RTP source MAY change +   payload types within the same media type during a session.  See the +   section "Multiplexing RTP Sessions" of RFC 3550 for additional +   explanation. + + + + + +Schulzrinne & Casner        Standards Track                    [Page 32] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +               PT   encoding    media type  clock rate   channels +                    name                    (Hz) +               ___________________________________________________ +               0    PCMU        A            8,000       1 +               1    reserved    A +               2    reserved    A +               3    GSM         A            8,000       1 +               4    G723        A            8,000       1 +               5    DVI4        A            8,000       1 +               6    DVI4        A           16,000       1 +               7    LPC         A            8,000       1 +               8    PCMA        A            8,000       1 +               9    G722        A            8,000       1 +               10   L16         A           44,100       2 +               11   L16         A           44,100       1 +               12   QCELP       A            8,000       1 +               13   CN          A            8,000       1 +               14   MPA         A           90,000       (see text) +               15   G728        A            8,000       1 +               16   DVI4        A           11,025       1 +               17   DVI4        A           22,050       1 +               18   G729        A            8,000       1 +               19   reserved    A +               20   unassigned  A +               21   unassigned  A +               22   unassigned  A +               23   unassigned  A +               dyn  G726-40     A            8,000       1 +               dyn  G726-32     A            8,000       1 +               dyn  G726-24     A            8,000       1 +               dyn  G726-16     A            8,000       1 +               dyn  G729D       A            8,000       1 +               dyn  G729E       A            8,000       1 +               dyn  GSM-EFR     A            8,000       1 +               dyn  L8          A            var.        var. +               dyn  RED         A                        (see text) +               dyn  VDVI        A            var.        1 + +               Table 4: Payload types (PT) for audio encodings + + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 33] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +               PT      encoding    media type  clock rate +                       name                    (Hz) +               _____________________________________________ +               24      unassigned  V +               25      CelB        V           90,000 +               26      JPEG        V           90,000 +               27      unassigned  V +               28      nv          V           90,000 +               29      unassigned  V +               30      unassigned  V +               31      H261        V           90,000 +               32      MPV         V           90,000 +               33      MP2T        AV          90,000 +               34      H263        V           90,000 +               35-71   unassigned  ? +               72-76   reserved    N/A         N/A +               77-95   unassigned  ? +               96-127  dynamic     ? +               dyn     H263-1998   V           90,000 + +               Table 5: Payload types (PT) for video and combined +                        encodings + +   Session participants agree through mechanisms beyond the scope of +   this specification on the set of payload types allowed in a given +   session.  This set MAY, for example, be defined by the capabilities +   of the applications used, negotiated by a conference control protocol +   or established by agreement between the human participants. + +   Audio applications operating under this profile SHOULD, at a minimum, +   be able to send and/or receive payload types 0 (PCMU) and 5 (DVI4). +   This allows interoperability without format negotiation and ensures +   successful negotiation with a conference control protocol. + +7.  RTP over TCP and Similar Byte Stream Protocols + +   Under special circumstances, it may be necessary to carry RTP in +   protocols offering a byte stream abstraction, such as TCP, possibly +   multiplexed with other data.  The application MUST define its own +   method of delineating RTP and RTCP packets (RTSP [23] provides an +   example of such an encapsulation specification). + +8.  Port Assignment + +   As specified in the RTP protocol definition, RTP data SHOULD be +   carried on an even UDP port number and the corresponding RTCP packets +   SHOULD be carried on the next higher (odd) port number. + + + + +Schulzrinne & Casner        Standards Track                    [Page 34] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   Applications operating under this profile MAY use any such UDP port +   pair.  For example, the port pair MAY be allocated randomly by a +   session management program.  A single fixed port number pair cannot +   be required because multiple applications using this profile are +   likely to run on the same host, and there are some operating systems +   that do not allow multiple processes to use the same UDP port with +   different multicast addresses. + +   However, port numbers 5004 and 5005 have been registered for use with +   this profile for those applications that choose to use them as the +   default pair.  Applications that operate under multiple profiles MAY +   use this port pair as an indication to select this profile if they +   are not subject to the constraint of the previous paragraph. +   Applications need not have a default and MAY require that the port +   pair be explicitly specified.  The particular port numbers were +   chosen to lie in the range above 5000 to accommodate port number +   allocation practice within some versions of the Unix operating +   system, where port numbers below 1024 can only be used by privileged +   processes and port numbers between 1024 and 5000 are automatically +   assigned by the operating system. + +9.  Changes from RFC 1890 + +   This RFC revises RFC 1890.  It is mostly backwards-compatible with +   RFC 1890 except for functions removed because two interoperable +   implementations were not found.  The additions to RFC 1890 codify +   existing practice in the use of payload formats under this profile. +   Since this profile may be used without using any of the payload +   formats listed here, the addition of new payload formats in this +   revision does not affect backwards compatibility.  The changes are +   listed below, categorized into functional and non-functional changes. + +   Functional changes: + +   o  Section 11, "IANA Considerations" was added to specify the +      registration of the name for this profile.  That appendix also +      references a new Section 3 "Registering Additional Encodings" +      which establishes a policy that no additional registration of +      static payload types for this profile will be made beyond those +      added in this revision and included in Tables 4 and 5.  Instead, +      additional encoding names may be registered as MIME subtypes for +      binding to dynamic payload types.  Non-normative references were +      added to RFC 3555 [7] where MIME subtypes for all the listed +      payload formats are registered, some with optional parameters for +      use of the payload formats. + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 35] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   o  Static payload types 4, 16, 17 and 34 were added to incorporate +      IANA registrations made since the publication of RFC 1890, along +      with the corresponding payload format descriptions for G723 and +      H263. + +   o  Following working group discussion, static payload types 12 and 18 +      were added along with the corresponding payload format +      descriptions for QCELP and G729.  Static payload type 13 was +      assigned to the Comfort Noise (CN) payload format defined in RFC +      3389.  Payload type 19 was marked reserved because it had been +      temporarily allocated to an earlier version of Comfort Noise +      present in some draft revisions of this document. + +   o  The payload format for G721 was renamed to G726-32 following the +      ITU-T renumbering, and the payload format description for G726 was +      expanded to include the -16, -24 and -40 data rates.  Because of +      confusion regarding draft revisions of this document, some +      implementations of these G726 payload formats packed samples into +      octets starting with the most significant bit rather than the +      least significant bit as specified here.  To partially resolve +      this incompatibility, new payload formats named AAL2-G726-16, -24, +      -32 and -40 will be specified in a separate document (see note in +      Section 4.5.4), and use of static payload type 2 is deprecated as +      explained in Section 6. + +   o  Payload formats G729D and G729E were added following the ITU-T +      addition of Annexes D and E to Recommendation G.729.  Listings +      were added for payload formats GSM-EFR, RED, and H263-1998 +      published in other documents subsequent to RFC 1890.  These +      additional payload formats are referenced only by dynamic payload +      type numbers. + +   o  The descriptions of the payload formats for G722, G728, GSM, VDVI +      were expanded. + +   o  The payload format for 1016 audio was removed and its static +      payload type assignment 1 was marked "reserved" because two +      interoperable implementations were not found. + +   o  Requirements for congestion control were added in Section 2. + +   o  This profile follows the suggestion in the revised RTP spec that +      RTCP bandwidth may be specified separately from the session +      bandwidth and separately for active senders and passive receivers. + +   o  The mapping of a user pass-phrase string into an encryption key +      was deleted from Section 2 because two interoperable +      implementations were not found. + + + +Schulzrinne & Casner        Standards Track                    [Page 36] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   o  The "quadrophonic" sample ordering convention for four-channel +      audio was removed to eliminate an ambiguity as noted in Section +      4.1. + +   Non-functional changes: + +   o  In Section 4.1, it is now explicitly stated that silence +      suppression is allowed for all audio payload formats.  (This has +      always been the case and derives from a fundamental aspect of +      RTP's design and the motivations for packet audio, but was not +      explicit stated before.)  The use of comfort noise is also +      explained. + +   o  In Section 4.1, the requirement level for setting of the marker +      bit on the first packet after silence for audio was changed from +      "is" to "SHOULD be", and clarified that the marker bit is set only +      when packets are intentionally not sent. + +   o  Similarly, text was added to specify that the marker bit SHOULD be +      set to one on the last packet of a video frame, and that video +      frames are distinguished by their timestamps. + +   o  RFC references are added for payload formats published after RFC +      1890. + +   o  The security considerations and full copyright sections were +      added. + +   o  According to Peter Hoddie of Apple, only pre-1994 Macintosh used +      the 22254.54 rate and none the 11127.27 rate, so the latter was +      dropped from the discussion of suggested sampling frequencies. + +   o  Table 1 was corrected to move some values from the "ms/packet" +      column to the "default ms/packet" column where they belonged. + +   o  Since the Interactive Multimedia Association ceased operations, an +      alternate resource was provided for a referenced IMA document. + +   o  A note has been added for G722 to clarify a discrepancy between +      the actual sampling rate and the RTP timestamp clock rate. + +   o  Small clarifications of the text have been made in several places, +      some in response to questions from readers.  In particular: + +      -  A definition for "media type" is given in Section 1.1 to allow +         the explanation of multiplexing RTP sessions in Section 6 to be +         more clear regarding the multiplexing of multiple media. + + + + +Schulzrinne & Casner        Standards Track                    [Page 37] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +      -  The explanation of how to determine the number of audio frames +         in a packet from the length was expanded. + +      -  More description of the allocation of bandwidth to SDES items +         is given. + +      -  A note was added that the convention for the order of channels +         specified in Section 4.1 may be overridden by a particular +         encoding or payload format specification. + +      -  The terms MUST, SHOULD, MAY, etc. are used as defined in RFC +         2119. + +   o  A second author for this document was added. + +10. Security Considerations + +   Implementations using the profile defined in this specification are +   subject to the security considerations discussed in the RTP +   specification [1].  This profile does not specify any different +   security services.  The primary function of this profile is to list a +   set of data compression encodings for audio and video media. + +   Confidentiality of the media streams is achieved by encryption. +   Because the data compression used with the payload formats described +   in this profile is applied end-to-end, encryption may be performed +   after compression so there is no conflict between the two operations. + +   A potential denial-of-service threat exists for data encodings using +   compression techniques that have non-uniform receiver-end +   computational load.  The attacker can inject pathological datagrams +   into the stream which are complex to decode and cause the receiver to +   be overloaded. + +   As with any IP-based protocol, in some circumstances a receiver may +   be overloaded simply by the receipt of too many packets, either +   desired or undesired.  Network-layer authentication MAY be used to +   discard packets from undesired sources, but the processing cost of +   the authentication itself may be too high.  In a multicast +   environment, source pruning is implemented in IGMPv3 (RFC 3376) [24] +   and in multicast routing protocols to allow a receiver to select +   which sources are allowed to reach it. + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 38] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +11. IANA Considerations + +   The RTP specification establishes a registry of profile names for use +   by higher-level control protocols, such as the Session Description +   Protocol (SDP), RFC 2327 [6], to refer to transport methods.  This +   profile registers the name "RTP/AVP". + +   Section 3 establishes the policy that no additional registration of +   static RTP payload types for this profile will be made beyond those +   added in this document revision and included in Tables 4 and 5.  IANA +   may reference that section in declining to accept any additional +   registration requests.  In Tables 4 and 5, note that types 1 and 2 +   have been marked reserved and the set of "dyn" payload types included +   has been updated.  These changes are explained in Sections 6 and 9. + +12.  References + +12.1 Normative References + +   [1]  Schulzrinne, H., Casner, S., Frederick, R. and V. Jacobson, +        "RTP:  A Transport Protocol for Real-Time Applications", RFC +        3550, July 2003. + +   [2]  Bradner, S., "Key Words for Use in RFCs to Indicate Requirement +        Levels", BCP 14, RFC 2119, March 1997. + +   [3]  Apple Computer, "Audio Interchange File Format AIFF-C", August +        1991.  (also ftp://ftp.sgi.com/sgi/aiff-c.9.26.91.ps.Z). + +12.2 Informative References + +   [4]  Braden, R., Clark, D. and S. Shenker, "Integrated Services in +        the Internet Architecture: an Overview", RFC 1633, June 1994. + +   [5]  Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. and W. +        Weiss, "An Architecture for Differentiated Service", RFC 2475, +        December 1998. + +   [6]  Handley, M. and V. Jacobson, "SDP: Session Description +        Protocol", RFC 2327, April 1998. + +   [7]  Casner, S. and P. Hoschka, "MIME Type Registration of RTP +        Payload Types", RFC 3555, July 2003. + +   [8]  Freed, N., Klensin, J. and J. Postel, "Multipurpose Internet +        Mail Extensions (MIME) Part Four: Registration Procedures", BCP +        13, RFC 2048, November 1996. + + + + +Schulzrinne & Casner        Standards Track                    [Page 39] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   [9]  Zopf, R., "Real-time Transport Protocol (RTP) Payload for +        Comfort Noise (CN)", RFC 3389, September 2002. + +   [10] Deleam, D. and J.-P. Petit, "Real-time implementations of the +        recent ITU-T low bit rate speech coders on the TI TMS320C54X +        DSP: results, methodology, and applications", in Proc. of +        International Conference on Signal Processing, Technology, and +        Applications (ICSPAT) , (Boston, Massachusetts), pp. 1656--1660, +        October 1996. + +   [11] Mouly, M. and M.-B. Pautet, The GSM system for mobile +        communications Lassay-les-Chateaux, France: Europe Media +        Duplication, 1993. + +   [12] Degener, J., "Digital Speech Compression", Dr. Dobb's Journal, +        December 1994. + +   [13] Redl, S., Weber, M. and M. Oliphant, An Introduction to GSM +        Boston: Artech House, 1995. + +   [14] Hoffman, D., Fernando, G., Goyal, V. and M. Civanlar, "RTP +        Payload Format for MPEG1/MPEG2 Video", RFC 2250, January 1998. + +   [15] Jayant, N. and P. Noll, Digital Coding of Waveforms--Principles +        and Applications to Speech and Video Englewood Cliffs, New +        Jersey: Prentice-Hall, 1984. + +   [16] McKay, K., "RTP Payload Format for PureVoice(tm) Audio", RFC +        2658, August 1999. + +   [17] Perkins, C., Kouvelas, I., Hodson, O., Hardman, V., Handley, M., +        Bolot, J.-C., Vega-Garcia, A. and S. Fosse-Parisis, "RTP Payload +        for Redundant Audio Data", RFC 2198, September 1997. + +   [18] Speer, M. and D. Hoffman, "RTP Payload Format of Sun's CellB +        Video Encoding", RFC 2029, October 1996. + +   [19] Berc, L., Fenner, W., Frederick, R., McCanne, S. and P. Stewart, +        "RTP Payload Format for JPEG-Compressed Video", RFC 2435, +        October 1998. + +   [20] Turletti, T. and C. Huitema, "RTP Payload Format for H.261 Video +        Streams", RFC 2032, October 1996. + +   [21] Zhu, C., "RTP Payload Format for H.263 Video Streams", RFC 2190, +        September 1997. + + + + + +Schulzrinne & Casner        Standards Track                    [Page 40] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   [22] Bormann, C., Cline, L., Deisher, G., Gardos, T., Maciocco, C., +        Newell, D., Ott, J., Sullivan, G., Wenger, S. and C. Zhu, "RTP +        Payload Format for the 1998 Version of ITU-T Rec. H.263 Video +        (H.263+)", RFC 2429, October 1998. + +   [23] Schulzrinne, H., Rao, A. and R. Lanphier, "Real Time Streaming +        Protocol (RTSP)", RFC 2326, April 1998. + +   [24] Cain, B., Deering, S., Kouvelas, I., Fenner, B. and A. +        Thyagarajan, "Internet Group Management Protocol, Version 3", +        RFC 3376, October 2002. + +13. Current Locations of Related Resources + +   Note:  Several sections below refer to the ITU-T Software Tool +   Library (STL).  It is available from the ITU Sales Service, Place des +   Nations, CH-1211 Geneve 20, Switzerland (also check +   http://www.itu.int).  The ITU-T STL is covered by a license defined +   in ITU-T Recommendation G.191, "Software tools for speech and audio +   coding standardization". + +   DVI4 + +   An archived copy of the document IMA Recommended Practices for +   Enhancing Digital Audio Compatibility in Multimedia Systems (version +   3.0), which describes the IMA ADPCM algorithm, is available at: + +      http://www.cs.columbia.edu/~hgs/audio/dvi/ + +   An implementation is available from Jack Jansen at + +      ftp://ftp.cwi.nl/local/pub/audio/adpcm.shar + +   G722 + +   An implementation of the G.722 algorithm is available as part of the +   ITU-T STL, described above. + +   G723 + +   The reference C code implementation defining the G.723.1 algorithm +   and its Annexes A, B, and C are available as an integral part of +   Recommendation G.723.1 from the ITU Sales Service, address listed +   above.  Both the algorithm and C code are covered by a specific +   license.  The ITU-T Secretariat should be contacted to obtain such +   licensing information. + + + + + +Schulzrinne & Casner        Standards Track                    [Page 41] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +   G726 + +   G726 is specified in the ITU-T Recommendation G.726, "40, 32, 24, and +   16 kb/s Adaptive Differential Pulse Code Modulation (ADPCM)".  An +   implementation of the G.726 algorithm is available as part of the +   ITU-T STL, described above. + +   G729 + +   The reference C code implementation defining the G.729 algorithm and +   its Annexes A through I are available as an integral part of +   Recommendation G.729 from the ITU Sales Service, listed above.  Annex +   I contains the integrated C source code for all G.729 operating +   modes.  The G.729 algorithm and associated C code are covered by a +   specific license.  The contact information for obtaining the license +   is available from the ITU-T Secretariat. + +   GSM + +   A reference implementation was written by Carsten Bormann and Jutta +   Degener (then at TU Berlin, Germany).  It is available at + +      http://www.dmn.tzi.org/software/gsm/ + +   Although the RPE-LTP algorithm is not an ITU-T standard, there is a C +   code implementation of the RPE-LTP algorithm available as part of the +   ITU-T STL.  The STL implementation is an adaptation of the TU Berlin +   version. + +   LPC + +   An implementation is available at + +      ftp://parcftp.xerox.com/pub/net-research/lpc.tar.Z + +   PCMU, PCMA + +   An implementation of these algorithms is available as part of the +   ITU-T STL, described above. + +14. Acknowledgments + +   The comments and careful review of Simao Campos, Richard Cox and AVT +   Working Group participants are gratefully acknowledged.  The GSM +   description was adopted from the IMTC Voice over IP Forum Service +   Interoperability Implementation Agreement (January 1997).  Fred Burg +   and Terry Lyons helped with the G.729 description. + + + + +Schulzrinne & Casner        Standards Track                    [Page 42] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +15. Intellectual Property Rights Statement + +   The IETF takes no position regarding the validity or scope of any +   intellectual property or other rights that might be claimed to +   pertain to the implementation or use of the technology described in +   this document or the extent to which any license under such rights +   might or might not be available; neither does it represent that it +   has made any effort to identify any such rights.  Information on the +   IETF's procedures with respect to rights in standards-track and +   standards-related documentation can be found in BCP-11.  Copies of +   claims of rights made available for publication and any assurances of +   licenses to be made available, or the result of an attempt made to +   obtain a general license or permission for the use of such +   proprietary rights by implementors or users of this specification can +   be obtained from the IETF Secretariat. + +   The IETF invites any interested party to bring to its attention any +   copyrights, patents or patent applications, or other proprietary +   rights which may cover technology that may be required to practice +   this standard.  Please address the information to the IETF Executive +   Director. + +16. Authors' Addresses + +   Henning Schulzrinne +   Department of Computer Science +   Columbia University +   1214 Amsterdam Avenue +   New York, NY 10027 +   United States + +   EMail: schulzrinne@cs.columbia.edu + + +   Stephen L. Casner +   Packet Design +   3400 Hillview Avenue, Building 3 +   Palo Alto, CA 94304 +   United States + +   EMail: casner@acm.org + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 43] + +RFC 3551                    RTP A/V Profile                    July 2003 + + +17. Full Copyright Statement + +   Copyright (C) The Internet Society (2003).  All Rights Reserved. + +   This document and translations of it may be copied and furnished to +   others, and derivative works that comment on or otherwise explain it +   or assist in its implementation may be prepared, copied, published +   and distributed, in whole or in part, without restriction of any +   kind, provided that the above copyright notice and this paragraph are +   included on all such copies and derivative works.  However, this +   document itself may not be modified in any way, such as by removing +   the copyright notice or references to the Internet Society or other +   Internet organizations, except as needed for the purpose of +   developing Internet standards in which case the procedures for +   copyrights defined in the Internet Standards process must be +   followed, or as required to translate it into languages other than +   English. + +   The limited permissions granted above are perpetual and will not be +   revoked by the Internet Society or its successors or assigns. + +   This document and the information contained herein is provided on an +   "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING +   TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING +   BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION +   HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF +   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + +   Funding for the RFC Editor function is currently provided by the +   Internet Society. + + + + + + + + + + + + + + + + + + + +Schulzrinne & Casner        Standards Track                    [Page 44] + diff --git a/src/modules/rtp/rtp.c b/src/modules/rtp/rtp.c new file mode 100644 index 00000000..a3e78d84 --- /dev/null +++ b/src/modules/rtp/rtp.c @@ -0,0 +1,193 @@ +/* $Id$ */ + +/*** +  This file is part of polypaudio. +  +  polypaudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2 of the License, +  or (at your option) any later version. +  +  polypaudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. +  +  You should have received a copy of the GNU Lesser General Public License +  along with polypaudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <assert.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <arpa/inet.h> +#include <unistd.h> + +#include <polypcore/log.h> + +#include "rtp.h" + +pa_rtp_context* pa_rtp_context_init_send(pa_rtp_context *c, int fd, uint32_t ssrc, uint8_t payload) { +    assert(c); +    assert(fd >= 0); + +    c->fd = fd; +    c->sequence = (uint16_t) (rand()*rand()); +    c->timestamp = 0; +    c->ssrc = ssrc ? ssrc : (uint32_t) (rand()*rand()); +    c->payload = payload & 127; + +    return c; +} + +#define MAX_IOVECS 16 + +int pa_rtp_send(pa_rtp_context *c, size_t size, pa_memblockq *q) { +    struct iovec iov[MAX_IOVECS]; +    pa_memblock* mb[MAX_IOVECS]; +    int iov_idx = 1; +    size_t n = 0, skip = 0; +     +    assert(c); +    assert(size > 0); +    assert(q); + +    if (pa_memblockq_get_length(q) < size) +        return 0; +     +    for (;;) { +        int r; +        pa_memchunk chunk; + +        if ((r = pa_memblockq_peek(q, &chunk)) >= 0) { + +            size_t k = n + chunk.length > size ? size - n : chunk.length; + +            if (chunk.memblock) { +                iov[iov_idx].iov_base = (uint8_t*) chunk.memblock->data + chunk.index; +                iov[iov_idx].iov_len = k; +                mb[iov_idx] = chunk.memblock; +                iov_idx ++; + +                n += k; +            } + +            skip += k; +            pa_memblockq_drop(q, &chunk, k); +        } + +        if (r < 0 || !chunk.memblock || n >= size || iov_idx >= MAX_IOVECS) { +            uint32_t header[3]; +            struct msghdr m; +            int k, i; + +            if (n > 0) { +                header[0] = htonl(((uint32_t) 2 << 30) | ((uint32_t) c->payload << 16) | ((uint32_t) c->sequence)); +                header[1] = htonl(c->timestamp); +                header[2] = htonl(c->ssrc); + +                iov[0].iov_base = header; +                iov[0].iov_len = sizeof(header); +                 +                m.msg_name = NULL; +                m.msg_namelen = 0; +                m.msg_iov = iov; +                m.msg_iovlen = iov_idx; +                m.msg_control = NULL; +                m.msg_controllen = 0; +                m.msg_flags = 0; +                 +                k = sendmsg(c->fd, &m, MSG_DONTWAIT); + +                for (i = 1; i < iov_idx; i++) +                    pa_memblock_unref(mb[i]); + +                c->sequence++; +            } else +                k = 0; + +            c->timestamp += skip; +             +            if (k < 0) { +                if (errno != EAGAIN) /* If the queue is full, just ignore it */ +                    pa_log(__FILE__": sendmsg() failed: %s", strerror(errno)); +                return -1; +            } +             +            if (r < 0 || pa_memblockq_get_length(q) < size) +                break; + +            n = 0; +            skip = 0; +            iov_idx = 1; +        } +    } + +    return 0; +} + +pa_rtp_context* pa_rtp_context_init_recv(pa_rtp_context *c, int fd) { +    assert(c); + +    c->fd = fd; +    return c; +} + +int pa_rtp_recv(pa_rtp_context *c, pa_memchunk *chunk) { +    assert(c); +    assert(chunk); + +    return 0; +} + +uint8_t pa_rtp_payload_type(const pa_sample_spec *ss) { +    assert(ss); + +    if (ss->format == PA_SAMPLE_ULAW && ss->rate == 8000 && ss->channels == 1) +        return 0; +    if (ss->format == PA_SAMPLE_ALAW && ss->rate == 8000 && ss->channels == 1) +        return 0; +    if (ss->format == PA_SAMPLE_S16BE && ss->rate == 44100 && ss->channels == 2) +        return 10; +    if (ss->format == PA_SAMPLE_S16BE && ss->rate == 44100 && ss->channels == 1) +        return 11; +     +    return 127; +} + +pa_sample_spec *pa_rtp_sample_spec_fixup(pa_sample_spec * ss) { +    assert(ss); + +    if (!pa_rtp_sample_spec_valid(ss)) +        ss->format = PA_SAMPLE_S16BE; + +    assert(pa_rtp_sample_spec_valid(ss)); +    return ss; +} + +int pa_rtp_sample_spec_valid(const pa_sample_spec *ss) { +    assert(ss); + +    if (!pa_sample_spec_valid(ss)) +        return 0; + +    return +        ss->format == PA_SAMPLE_U8 || +        ss->format == PA_SAMPLE_ALAW || +        ss->format == PA_SAMPLE_ULAW || +        ss->format == PA_SAMPLE_S16BE; +} + +void pa_rtp_context_destroy(pa_rtp_context *c) { +    assert(c); + +    close(c->fd); +} diff --git a/src/modules/rtp/rtp.h b/src/modules/rtp/rtp.h new file mode 100644 index 00000000..e925cc0e --- /dev/null +++ b/src/modules/rtp/rtp.h @@ -0,0 +1,51 @@ +#ifndef foortphfoo +#define foortphfoo + +/* $Id$ */ + +/*** +  This file is part of polypaudio. +  +  polypaudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2 of the License, +  or (at your option) any later version. +  +  polypaudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. +  +  You should have received a copy of the GNU Lesser General Public License +  along with polypaudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#include <inttypes.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <polypcore/memblockq.h> +#include <polypcore/memchunk.h> + +typedef struct pa_rtp_context { +    int fd; +    uint16_t sequence; +    uint32_t timestamp; +    uint32_t ssrc; +    uint8_t payload; +} pa_rtp_context; + +pa_rtp_context* pa_rtp_context_init_send(pa_rtp_context *c, int fd, uint32_t ssrc, uint8_t payload); +int pa_rtp_send(pa_rtp_context *c, size_t size, pa_memblockq *q); + +pa_rtp_context* pa_rtp_context_init_recv(pa_rtp_context *c, int fd); +int pa_rtp_recv(pa_rtp_context *c, pa_memchunk *chunk); + +uint8_t pa_rtp_payload_type(const pa_sample_spec *ss); +pa_sample_spec* pa_rtp_sample_spec_fixup(pa_sample_spec *ss); +int pa_rtp_sample_spec_valid(const pa_sample_spec *ss); + +void pa_rtp_context_destroy(pa_rtp_context *c); + +#endif diff --git a/src/modules/rtp/sap.c b/src/modules/rtp/sap.c new file mode 100644 index 00000000..ebf20bc4 --- /dev/null +++ b/src/modules/rtp/sap.c @@ -0,0 +1,107 @@ +/* $Id$ */ + +/*** +  This file is part of polypaudio. +  +  polypaudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2 of the License, +  or (at your option) any later version. +  +  polypaudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. +  +  You should have received a copy of the GNU Lesser General Public License +  along with polypaudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <assert.h> +#include <time.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> + +#include <polypcore/util.h> +#include <polypcore/log.h> +#include <polypcore/xmalloc.h> + +#include "sap.h" + +pa_sap_context* pa_sap_context_init_send(pa_sap_context *c, int fd, char *sdp_data) { +    assert(c); +    assert(fd >= 0); +    assert(sdp_data); + +    c->fd = fd; +    c->sdp_data = sdp_data; +    c->msg_id_hash = (uint16_t) (rand()*rand()); +     +    return c;     +} + +void pa_sap_context_destroy(pa_sap_context *c) { +    assert(c); + +    close(c->fd); +    pa_xfree(c->sdp_data); +} + +int pa_sap_send(pa_sap_context *c, int goodbye) { +    uint32_t header; +    const char mime[] = "application/sdp"; +    struct sockaddr_storage sa_buf; +    struct sockaddr *sa = (struct sockaddr*) &sa_buf; +    socklen_t salen = sizeof(sa_buf); +    struct iovec iov[4]; +    struct msghdr m; +    int k; + +    if (getsockname(c->fd, sa, &salen) < 0) { +        pa_log("getsockname() failed: %s\n", strerror(errno)); +        return -1; +    } + +    assert(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); +     +    header = htonl(((uint32_t) 1 << 29) | +                   (sa->sa_family == AF_INET6 ? (uint32_t) 1 << 28 : 0) | +                   (goodbye ? (uint32_t) 1 << 26 : 0) | +                   (c->msg_id_hash)); + +    iov[0].iov_base = &header; +    iov[0].iov_len = sizeof(header); + +    iov[1].iov_base = sa->sa_family == AF_INET ? (void*) &((struct sockaddr_in*) sa)->sin_addr : (void*) &((struct sockaddr_in6*) sa)->sin6_addr; +    iov[1].iov_len = sa->sa_family == AF_INET ? 4 : 16; + +    iov[2].iov_base = (char*) mime; +    iov[2].iov_len = sizeof(mime); + +    iov[3].iov_base = c->sdp_data; +    iov[3].iov_len = strlen(c->sdp_data); +                    +    m.msg_name = NULL; +    m.msg_namelen = 0; +    m.msg_iov = iov; +    m.msg_iovlen = 4; +    m.msg_control = NULL; +    m.msg_controllen = 0; +    m.msg_flags = 0; +     +    if ((k = sendmsg(c->fd, &m, MSG_DONTWAIT)) < 0) +        pa_log("sendmsg() failed: %s\n", strerror(errno)); + +    return k; +} diff --git a/src/modules/rtp/sap.h b/src/modules/rtp/sap.h new file mode 100644 index 00000000..787b39f7 --- /dev/null +++ b/src/modules/rtp/sap.h @@ -0,0 +1,43 @@ +#ifndef foosaphfoo +#define foosaphfoo + +/* $Id$ */ + +/*** +  This file is part of polypaudio. +  +  polypaudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2 of the License, +  or (at your option) any later version. +  +  polypaudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. +  +  You should have received a copy of the GNU Lesser General Public License +  along with polypaudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#include <inttypes.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <polypcore/memblockq.h> +#include <polypcore/memchunk.h> + +typedef struct pa_sap_context { +    int fd; +    char *sdp_data; + +    uint16_t msg_id_hash; +} pa_sap_context; + +pa_sap_context* pa_sap_context_init_send(pa_sap_context *c, int fd, char *sdp_data); +void pa_sap_context_destroy(pa_sap_context *c); + +int pa_sap_send(pa_sap_context *c, int goodbye); + +#endif diff --git a/src/modules/rtp/sdp.c b/src/modules/rtp/sdp.c new file mode 100644 index 00000000..99e8c12b --- /dev/null +++ b/src/modules/rtp/sdp.c @@ -0,0 +1,87 @@ +/* $Id$ */ + +/*** +  This file is part of polypaudio. +  +  polypaudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2 of the License, +  or (at your option) any later version. +  +  polypaudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. +  +  You should have received a copy of the GNU Lesser General Public License +  along with polypaudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <assert.h> +#include <time.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <arpa/inet.h> + +#include <polypcore/util.h> + +#include "sdp.h" + +static const char* map_format(pa_sample_format_t f) { +    switch (f) { +        case PA_SAMPLE_S16BE: return "L16"; +        case PA_SAMPLE_U8: return "L8"; +        case PA_SAMPLE_ALAW: return "PCMA"; +        case PA_SAMPLE_ULAW: return "PCMU"; +        default: +            return NULL; +    } +} + +char *pa_sdp_build(int af, const void *src, const void *dst, const char *name, uint16_t port, uint8_t payload, const pa_sample_spec *ss) { +    uint32_t ntp; +    char buf_src[64], buf_dst[64]; +    const char *u, *f, *a; + +    assert(src); +    assert(dst); +    assert(af == AF_INET || af == AF_INET6); + +    f = map_format(ss->format); +    assert(f); +     +    if (!(u = getenv("USER"))) +        if (!(u = getenv("USERNAME"))) +            u = "-"; +     +    ntp = time(NULL) + 2208988800; + +    a = inet_ntop(af, src, buf_src, sizeof(buf_src)); +    assert(a); +    a = inet_ntop(af, dst, buf_dst, sizeof(buf_dst)); +    assert(a); +     +    return pa_sprintf_malloc( +            "v=0\n" +            "o=%s %lu 0 IN %s %s\n" +            "s=%s\n" +            "c=IN %s %s\n" +            "t=%lu 0\n" +            "a=recvonly\n" +            "m=audio %u RTP/AVP %i\n" +            "a=rtpmap:%i %s/%u/%u\n" +            "a=type:broadcast\n", +            u, (unsigned long) ntp, af == AF_INET ? "IP4" : "IP6", buf_src, +            name, +            af == AF_INET ? "IP4" : "IP6", buf_dst, +            (unsigned long) ntp, +            port, payload, +            payload, f, ss->rate, ss->channels); +} diff --git a/src/modules/rtp/sdp.h b/src/modules/rtp/sdp.h new file mode 100644 index 00000000..10820067 --- /dev/null +++ b/src/modules/rtp/sdp.h @@ -0,0 +1,33 @@ +#ifndef foosdphfoo +#define foosdphfoo + +/* $Id$ */ + +/*** +  This file is part of polypaudio. +  +  polypaudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2 of the License, +  or (at your option) any later version. +  +  polypaudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. +  +  You should have received a copy of the GNU Lesser General Public License +  along with polypaudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#include <inttypes.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <polyp/sample.h> + +char *pa_sdp_build(int af, const void *src, const void *dst, const char *name, uint16_t port, uint8_t payload, const pa_sample_spec *ss); + +#endif  | 
