summaryrefslogtreecommitdiffstats
path: root/src/modules/rtp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/rtp')
-rw-r--r--src/modules/rtp/Makefile13
-rw-r--r--src/modules/rtp/module-rtp-monitor.c340
-rw-r--r--src/modules/rtp/rfc2327.txt2355
-rw-r--r--src/modules/rtp/rfc2974.txt1011
-rw-r--r--src/modules/rtp/rfc3550.txt5827
-rw-r--r--src/modules/rtp/rfc3551.txt2467
-rw-r--r--src/modules/rtp/rtp.c193
-rw-r--r--src/modules/rtp/rtp.h51
-rw-r--r--src/modules/rtp/sap.c107
-rw-r--r--src/modules/rtp/sap.h43
-rw-r--r--src/modules/rtp/sdp.c87
-rw-r--r--src/modules/rtp/sdp.h33
12 files changed, 12527 insertions, 0 deletions
diff --git a/src/modules/rtp/Makefile b/src/modules/rtp/Makefile
new file mode 100644
index 00000000..316beb72
--- /dev/null
+++ b/src/modules/rtp/Makefile
@@ -0,0 +1,13 @@
+# This is a dirty trick just to ease compilation with emacs
+#
+# This file is not intended to be distributed or anything
+#
+# So: don't touch it, even better ignore it!
+
+all:
+ $(MAKE) -C ../..
+
+clean:
+ $(MAKE) -C ../.. clean
+
+.PHONY: all clean
diff --git a/src/modules/rtp/module-rtp-monitor.c b/src/modules/rtp/module-rtp-monitor.c
new file mode 100644
index 00000000..66332093
--- /dev/null
+++ b/src/modules/rtp/module-rtp-monitor.c
@@ -0,0 +1,340 @@
+
+/***
+ This file is part of polypaudio.
+
+ polypaudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ polypaudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with polypaudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <polypcore/module.h>
+#include <polypcore/llist.h>
+#include <polypcore/source.h>
+#include <polypcore/source-output.h>
+#include <polypcore/memblockq.h>
+#include <polypcore/log.h>
+#include <polypcore/util.h>
+#include <polypcore/xmalloc.h>
+#include <polypcore/modargs.h>
+#include <polypcore/namereg.h>
+
+#include "module-rtp-monitor-symdef.h"
+
+#include "rtp.h"
+#include "sdp.h"
+#include "sap.h"
+
+PA_MODULE_AUTHOR("Lennart Poettering")
+PA_MODULE_DESCRIPTION("Read data from source and send it to the network via RTP")
+PA_MODULE_VERSION(PACKAGE_VERSION)
+PA_MODULE_USAGE(
+ "source=<name for the source> "
+ "format=<sample format> "
+ "channels=<number of channels> "
+ "rate=<sample rate> "
+ "destinaton=<destination IP address> "
+ "port=<port number> "
+ "mtu=<maximum transfer unit> "
+)
+
+#define DEFAULT_PORT 5666
+#define SAP_PORT 9875
+#define DEFAULT_DESTINATION "224.0.0.252"
+#define MEMBLOCKQ_MAXLENGTH (1024*170)
+#define DEFAULT_MTU 1024
+#define SAP_INTERVAL 5000000
+
+static const char* const valid_modargs[] = {
+ "source",
+ "format",
+ "channels",
+ "rate",
+ "destination",
+ "port",
+ NULL
+};
+
+struct userdata {
+ pa_module *module;
+ pa_core *core;
+
+ pa_source_output *source_output;
+ pa_memblockq *memblockq;
+
+ pa_rtp_context rtp_context;
+ pa_sap_context sap_context;
+ size_t mtu;
+
+ pa_time_event *sap_event;
+};
+
+static void source_output_push(pa_source_output *o, const pa_memchunk *chunk) {
+ struct userdata *u;
+ assert(o);
+ u = o->userdata;
+
+ if (pa_memblockq_push(u->memblockq, chunk) < 0) {
+ pa_log(__FILE__": Failed to push chunk into memblockq.");
+ return;
+ }
+
+ pa_rtp_send(&u->rtp_context, u->mtu, u->memblockq);
+}
+
+static void source_output_kill(pa_source_output* o) {
+ struct userdata *u;
+ assert(o);
+ u = o->userdata;
+
+ pa_module_unload_request(u->module);
+
+ pa_source_output_disconnect(u->source_output);
+ pa_source_output_unref(u->source_output);
+ u->source_output = NULL;
+}
+
+static pa_usec_t source_output_get_latency (pa_source_output *o) {
+ struct userdata *u;
+ assert(o);
+ u = o->userdata;
+
+ return pa_bytes_to_usec(pa_memblockq_get_length(u->memblockq), &o->sample_spec);
+}
+
+static void sap_event(pa_mainloop_api *m, pa_time_event *t, const struct timeval *tv, void *userdata) {
+ struct userdata *u = userdata;
+ struct timeval next;
+
+ assert(m);
+ assert(t);
+ assert(tv);
+ assert(u);
+
+ pa_sap_send(&u->sap_context, 0);
+
+ pa_log("SAP update");
+ pa_gettimeofday(&next);
+ pa_timeval_add(&next, SAP_INTERVAL);
+ m->time_restart(t, &next);
+}
+
+int pa__init(pa_core *c, pa_module*m) {
+ struct userdata *u;
+ pa_modargs *ma = NULL;
+ const char *dest;
+ uint32_t port = DEFAULT_PORT, mtu;
+ int af, fd = -1, sap_fd = -1;
+ pa_source *s;
+ pa_sample_spec ss;
+ pa_channel_map cm;
+ struct sockaddr_in sa4, sap_sa4;
+ struct sockaddr_in6 sa6, sap_sa6;
+ struct sockaddr_storage sa_dst;
+ pa_source_output *o = NULL;
+ uint8_t payload;
+ char *p;
+ int r;
+ socklen_t k;
+ struct timeval tv;
+
+ assert(c);
+ assert(m);
+
+ if (!(ma = pa_modargs_new(m->argument, valid_modargs))) {
+ pa_log(__FILE__": failed to parse module arguments");
+ goto fail;
+ }
+
+ if (!(s = pa_namereg_get(m->core, pa_modargs_get_value(ma, "source", NULL), PA_NAMEREG_SOURCE, 1))) {
+ pa_log(__FILE__": source does not exist.");
+ goto fail;
+ }
+
+ ss = s->sample_spec;
+ pa_rtp_sample_spec_fixup(&ss);
+ cm = s->channel_map;
+ if (pa_modargs_get_sample_spec(ma, &ss) < 0) {
+ pa_log(__FILE__": failed to parse sample specification");
+ goto fail;
+ }
+
+ if (!pa_rtp_sample_spec_valid(&ss)) {
+ pa_log(__FILE__": specified sample type not compatible with RTP");
+ goto fail;
+ }
+
+ if (ss.channels != cm.channels)
+ pa_channel_map_init_auto(&cm, ss.channels);
+
+ payload = pa_rtp_payload_type(&ss);
+
+ mtu = (DEFAULT_MTU/pa_frame_size(&ss))*pa_frame_size(&ss);
+
+ if (pa_modargs_get_value_u32(ma, "mtu", &mtu) < 0 || mtu < 1 || mtu % pa_frame_size(&ss) != 0) {
+ pa_log(__FILE__": invalid mtu.");
+ goto fail;
+ }
+
+ if (pa_modargs_get_value_u32(ma, "port", &port) < 0 || port < 1 || port > 0xFFFF) {
+ pa_log(__FILE__": port= expects a numerical argument between 1 and 65535.");
+ goto fail;
+ }
+
+ if ((dest = pa_modargs_get_value(ma, "destination", DEFAULT_DESTINATION))) {
+ if (inet_pton(AF_INET6, dest, &sa6.sin6_addr) > 0) {
+ sa6.sin6_family = af = AF_INET6;
+ sa6.sin6_port = htons(port);
+ sap_sa6 = sa6;
+ sap_sa6.sin6_port = htons(SAP_PORT);
+ } else if (inet_pton(AF_INET, dest, &sa4.sin_addr) > 0) {
+ sa4.sin_family = af = AF_INET;
+ sa4.sin_port = htons(port);
+ sap_sa4 = sa4;
+ sap_sa4.sin_port = htons(SAP_PORT);
+ } else {
+ pa_log(__FILE__": invalid destination '%s'", dest);
+ goto fail;
+ }
+ }
+
+ if ((fd = socket(af, SOCK_DGRAM, 0)) < 0) {
+ pa_log(__FILE__": socket() failed: %s", strerror(errno));
+ goto fail;
+ }
+
+ if (connect(fd, af == AF_INET ? (struct sockaddr*) &sa4 : (struct sockaddr*) &sa6, af == AF_INET ? sizeof(sa4) : sizeof(sa6)) < 0) {
+ pa_log(__FILE__": connect() failed: %s", strerror(errno));
+ goto fail;
+ }
+
+ if ((sap_fd = socket(af, SOCK_DGRAM, 0)) < 0) {
+ pa_log(__FILE__": socket() failed: %s", strerror(errno));
+ goto fail;
+ }
+
+ if (connect(sap_fd, af == AF_INET ? (struct sockaddr*) &sap_sa4 : (struct sockaddr*) &sap_sa6, af == AF_INET ? sizeof(sap_sa4) : sizeof(sap_sa6)) < 0) {
+ pa_log(__FILE__": connect() failed: %s", strerror(errno));
+ goto fail;
+ }
+
+ if (!(o = pa_source_output_new(s, __FILE__, "RTP Monitor Stream", &ss, &cm, PA_RESAMPLER_INVALID))) {
+ pa_log(__FILE__": failed to create source output.");
+ goto fail;
+ }
+
+ o->push = source_output_push;
+ o->kill = source_output_kill;
+ o->get_latency = source_output_get_latency;
+ o->owner = m;
+
+ u = pa_xnew(struct userdata, 1);
+ m->userdata = u;
+ o->userdata = u;
+
+ u->module = m;
+ u->core = c;
+ u->source_output = o;
+
+ u->memblockq = pa_memblockq_new(
+ 0,
+ MEMBLOCKQ_MAXLENGTH,
+ MEMBLOCKQ_MAXLENGTH,
+ pa_frame_size(&ss),
+ 1,
+ 0,
+ NULL,
+ c->memblock_stat);
+
+ u->mtu = mtu;
+
+ k = sizeof(sa_dst);
+ r = getsockname(fd, (struct sockaddr*) &sa_dst, &k);
+ assert(r >= 0);
+
+ p = pa_sdp_build(af,
+ af == AF_INET ? (void*) &((struct sockaddr_in*) &sa_dst)->sin_addr : (void*) &((struct sockaddr_in6*) &sa_dst)->sin6_addr,
+ af == AF_INET ? (void*) &sa4.sin_addr : (void*) &sa6.sin6_addr,
+ "Polypaudio RTP Stream", port, payload, &ss);
+
+ pa_rtp_context_init_send(&u->rtp_context, fd, 0, payload);
+ pa_sap_context_init_send(&u->sap_context, sap_fd, p);
+
+ pa_log_info("RTP stream initialized with mtu %u on %s:%u, SSRC=0x%08x, payload=%u, initial sequence #%u", mtu, dest, port, u->rtp_context.ssrc, payload, u->rtp_context.sequence);
+ pa_log_info("SDP-Data:\n%s\nEOF", p);
+
+ pa_sap_send(&u->sap_context, 0);
+
+ pa_gettimeofday(&tv);
+ pa_timeval_add(&tv, SAP_INTERVAL);
+ u->sap_event = c->mainloop->time_new(c->mainloop, &tv, sap_event, u);
+
+ pa_modargs_free(ma);
+
+ return 0;
+
+fail:
+ if (ma)
+ pa_modargs_free(ma);
+
+ if (fd >= 0)
+ close(fd);
+
+ if (sap_fd >= 0)
+ close(sap_fd);
+
+ if (o) {
+ pa_source_output_disconnect(o);
+ pa_source_output_unref(o);
+ }
+
+ return -1;
+}
+
+void pa__done(pa_core *c, pa_module*m) {
+ struct userdata *u;
+ assert(c);
+ assert(m);
+
+ if (!(u = m->userdata))
+ return;
+
+ c->mainloop->time_free(u->sap_event);
+
+ if (u->source_output) {
+ pa_source_output_disconnect(u->source_output);
+ pa_source_output_unref(u->source_output);
+ }
+
+ pa_rtp_context_destroy(&u->rtp_context);
+
+ pa_sap_send(&u->sap_context, 1);
+ pa_sap_context_destroy(&u->sap_context);
+
+ pa_memblockq_free(u->memblockq);
+
+ pa_xfree(u);
+}
diff --git a/src/modules/rtp/rfc2327.txt b/src/modules/rtp/rfc2327.txt
new file mode 100644
index 00000000..ce77de61
--- /dev/null
+++ b/src/modules/rtp/rfc2327.txt
@@ -0,0 +1,2355 @@
+
+
+
+
+
+
+Network Working Group M. Handley
+Request for Comments: 2327 V. Jacobson
+Category: Standards Track ISI/LBNL
+ April 1998
+
+
+ SDP: Session Description Protocol
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (1998). All Rights Reserved.
+
+Abstract
+
+ This document defines the Session Description Protocol, SDP. SDP is
+ intended for describing multimedia sessions for the purposes of
+ session announcement, session invitation, and other forms of
+ multimedia session initiation.
+
+ This document is a product of the Multiparty Multimedia Session
+ Control (MMUSIC) working group of the Internet Engineering Task
+ Force. Comments are solicited and should be addressed to the working
+ group's mailing list at confctrl@isi.edu and/or the authors.
+
+1. Introduction
+
+ On the Internet multicast backbone (Mbone), a session directory tool
+ is used to advertise multimedia conferences and communicate the
+ conference addresses and conference tool-specific information
+ necessary for participation. This document defines a session
+ description protocol for this purpose, and for general real-time
+ multimedia session description purposes. This memo does not describe
+ multicast address allocation or the distribution of SDP messages in
+ detail. These are described in accompanying memos. SDP is not
+ intended for negotiation of media encodings.
+
+
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 1]
+
+RFC 2327 SDP April 1998
+
+
+2. Background
+
+ The Mbone is the part of the internet that supports IP multicast, and
+ thus permits efficient many-to-many communication. It is used
+ extensively for multimedia conferencing. Such conferences usually
+ have the property that tight coordination of conference membership is
+ not necessary; to receive a conference, a user at an Mbone site only
+ has to know the conference's multicast group address and the UDP
+ ports for the conference data streams.
+
+ Session directories assist the advertisement of conference sessions
+ and communicate the relevant conference setup information to
+ prospective participants. SDP is designed to convey such information
+ to recipients. SDP is purely a format for session description - it
+ does not incorporate a transport protocol, and is intended to use
+ different transport protocols as appropriate including the Session
+ Announcement Protocol [4], Session Initiation Protocol [11], Real-
+ Time Streaming Protocol [12], electronic mail using the MIME
+ extensions, and the Hypertext Transport Protocol.
+
+ SDP is intended to be general purpose so that it can be used for a
+ wider range of network environments and applications than just
+ multicast session directories. However, it is not intended to
+ support negotiation of session content or media encodings - this is
+ viewed as outside the scope of session description.
+
+3. Glossary of Terms
+
+ The following terms are used in this document, and have specific
+ meaning within the context of this document.
+
+ Conference
+ A multimedia conference is a set of two or more communicating users
+ along with the software they are using to communicate.
+
+ Session
+ A multimedia session is a set of multimedia senders and receivers
+ and the data streams flowing from senders to receivers. A
+ multimedia conference is an example of a multimedia session.
+
+ Session Advertisement
+ See session announcement.
+
+ Session Announcement
+ A session announcement is a mechanism by which a session
+ description is conveyed to users in a proactive fashion, i.e., the
+ session description was not explicitly requested by the user.
+
+
+
+
+Handley & Jacobson Standards Track [Page 2]
+
+RFC 2327 SDP April 1998
+
+
+ Session Description
+ A well defined format for conveying sufficient information to
+ discover and participate in a multimedia session.
+
+3.1. Terminology
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119.
+
+4. SDP Usage
+
+4.1. Multicast Announcements
+
+ SDP is a session description protocol for multimedia sessions. A
+ common mode of usage is for a client to announce a conference session
+ by periodically multicasting an announcement packet to a well known
+ multicast address and port using the Session Announcement Protocol
+ (SAP).
+
+ SAP packets are UDP packets with the following format:
+
+ |--------------------|
+ | SAP header |
+ |--------------------|
+ | text payload |
+ |//////////
+
+
+ The header is the Session Announcement Protocol header. SAP is
+ described in more detail in a companion memo [4]
+
+ The text payload is an SDP session description, as described in this
+ memo. The text payload should be no greater than 1 Kbyte in length.
+ If announced by SAP, only one session announcement is permitted in a
+ single packet.
+
+4.2. Email and WWW Announcements
+
+ Alternative means of conveying session descriptions include
+ electronic mail and the World Wide Web. For both email and WWW
+ distribution, the use of the MIME content type "application/sdp"
+ should be used. This enables the automatic launching of applications
+ for participation in the session from the WWW client or mail reader
+ in a standard manner.
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 3]
+
+RFC 2327 SDP April 1998
+
+
+ Note that announcements of multicast sessions made only via email or
+ the World Wide Web (WWW) do not have the property that the receiver
+ of a session announcement can necessarily receive the session because
+ the multicast sessions may be restricted in scope, and access to the
+ WWW server or reception of email is possible outside this scope. SAP
+ announcements do not suffer from this mismatch.
+
+5. Requirements and Recommendations
+
+ The purpose of SDP is to convey information about media streams in
+ multimedia sessions to allow the recipients of a session description
+ to participate in the session. SDP is primarily intended for use in
+ an internetwork, although it is sufficiently general that it can
+ describe conferences in other network environments.
+
+ A multimedia session, for these purposes, is defined as a set of
+ media streams that exist for some duration of time. Media streams
+ can be many-to-many. The times during which the session is active
+ need not be continuous.
+
+ Thus far, multicast based sessions on the Internet have differed from
+ many other forms of conferencing in that anyone receiving the traffic
+ can join the session (unless the session traffic is encrypted). In
+ such an environment, SDP serves two primary purposes. It is a means
+ to communicate the existence of a session, and is a means to convey
+ sufficient information to enable joining and participating in the
+ session. In a unicast environment, only the latter purpose is likely
+ to be relevant.
+
+ Thus SDP includes:
+
+ o Session name and purpose
+
+ o Time(s) the session is active
+
+ o The media comprising the session
+
+ o Information to receive those media (addresses, ports, formats and
+ so on)
+
+ As resources necessary to participate in a session may be limited,
+ some additional information may also be desirable:
+
+ o Information about the bandwidth to be used by the conference
+
+ o Contact information for the person responsible for the session
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 4]
+
+RFC 2327 SDP April 1998
+
+
+ In general, SDP must convey sufficient information to be able to join
+ a session (with the possible exception of encryption keys) and to
+ announce the resources to be used to non-participants that may need
+ to know.
+
+5.1. Media Information
+
+ SDP includes:
+
+ o The type of media (video, audio, etc)
+
+ o The transport protocol (RTP/UDP/IP, H.320, etc)
+
+ o The format of the media (H.261 video, MPEG video, etc)
+
+ For an IP multicast session, the following are also conveyed:
+
+ o Multicast address for media
+
+ o Transport Port for media
+
+ This address and port are the destination address and destination
+ port of the multicast stream, whether being sent, received, or both.
+
+ For an IP unicast session, the following are conveyed:
+
+ o Remote address for media
+
+ o Transport port for contact address
+
+ The semantics of this address and port depend on the media and
+ transport protocol defined. By default, this is the remote address
+ and remote port to which data is sent, and the remote address and
+ local port on which to receive data. However, some media may define
+ to use these to establish a control channel for the actual media
+ flow.
+
+5.2. Timing Information
+
+ Sessions may either be bounded or unbounded in time. Whether or not
+ they are bounded, they may be only active at specific times.
+
+ SDP can convey:
+
+ o An arbitrary list of start and stop times bounding the session
+
+ o For each bound, repeat times such as "every Wednesday at 10am for
+ one hour"
+
+
+
+Handley & Jacobson Standards Track [Page 5]
+
+RFC 2327 SDP April 1998
+
+
+ This timing information is globally consistent, irrespective of local
+ time zone or daylight saving time.
+
+5.3. Private Sessions
+
+ It is possible to create both public sessions and private sessions.
+ Private sessions will typically be conveyed by encrypting the session
+ description to distribute it. The details of how encryption is
+ performed are dependent on the mechanism used to convey SDP - see [4]
+ for how this is done for session announcements.
+
+ If a session announcement is private it is possible to use that
+ private announcement to convey encryption keys necessary to decode
+ each of the media in a conference, including enough information to
+ know which encryption scheme is used for each media.
+
+5.4. Obtaining Further Information about a Session
+
+ A session description should convey enough information to decide
+ whether or not to participate in a session. SDP may include
+ additional pointers in the form of Universal Resources Identifiers
+ (URIs) for more information about the session.
+
+5.5. Categorisation
+
+ When many session descriptions are being distributed by SAP or any
+ other advertisement mechanism, it may be desirable to filter
+ announcements that are of interest from those that are not. SDP
+ supports a categorisation mechanism for sessions that is capable of
+ being automated.
+
+5.6. Internationalization
+
+ The SDP specification recommends the use of the ISO 10646 character
+ sets in the UTF-8 encoding (RFC 2044) to allow many different
+ languages to be represented. However, to assist in compact
+ representations, SDP also allows other character sets such as ISO
+ 8859-1 to be used when desired. Internationalization only applies to
+ free-text fields (session name and background information), and not
+ to SDP as a whole.
+
+6. SDP Specification
+
+ SDP session descriptions are entirely textual using the ISO 10646
+ character set in UTF-8 encoding. SDP field names and attributes names
+ use only the US-ASCII subset of UTF-8, but textual fields and
+ attribute values may use the full ISO 10646 character set. The
+ textual form, as opposed to a binary encoding such as ASN/1 or XDR,
+
+
+
+Handley & Jacobson Standards Track [Page 6]
+
+RFC 2327 SDP April 1998
+
+
+ was chosen to enhance portability, to enable a variety of transports
+ to be used (e.g, session description in a MIME email message) and to
+ allow flexible, text-based toolkits (e.g., Tcl/Tk ) to be used to
+ generate and to process session descriptions. However, since the
+ total bandwidth allocated to all SAP announcements is strictly
+ limited, the encoding is deliberately compact. Also, since
+ announcements may be transported via very unreliable means (e.g.,
+ email) or damaged by an intermediate caching server, the encoding was
+ designed with strict order and formatting rules so that most errors
+ would result in malformed announcements which could be detected
+ easily and discarded. This also allows rapid discarding of encrypted
+ announcements for which a receiver does not have the correct key.
+
+ An SDP session description consists of a number of lines of text of
+ the form <type>=<value> <type> is always exactly one character and is
+ case-significant. <value> is a structured text string whose format
+ depends on <type>. It also will be case-significant unless a
+ specific field defines otherwise. Whitespace is not permitted either
+ side of the `=' sign. In general <value> is either a number of fields
+ delimited by a single space character or a free format string.
+
+ A session description consists of a session-level description
+ (details that apply to the whole session and all media streams) and
+ optionally several media-level descriptions (details that apply onto
+ to a single media stream).
+
+ An announcement consists of a session-level section followed by zero
+ or more media-level sections. The session-level part starts with a
+ `v=' line and continues to the first media-level section. The media
+ description starts with an `m=' line and continues to the next media
+ description or end of the whole session description. In general,
+ session-level values are the default for all media unless overridden
+ by an equivalent media-level value.
+
+ When SDP is conveyed by SAP, only one session description is allowed
+ per packet. When SDP is conveyed by other means, many SDP session
+ descriptions may be concatenated together (the `v=' line indicating
+ the start of a session description terminates the previous
+ description). Some lines in each description are required and some
+ are optional but all must appear in exactly the order given here (the
+ fixed order greatly enhances error detection and allows for a simple
+ parser). Optional items are marked with a `*'.
+
+Session description
+ v= (protocol version)
+ o= (owner/creator and session identifier).
+ s= (session name)
+ i=* (session information)
+
+
+
+Handley & Jacobson Standards Track [Page 7]
+
+RFC 2327 SDP April 1998
+
+
+ u=* (URI of description)
+ e=* (email address)
+ p=* (phone number)
+ c=* (connection information - not required if included in all media)
+ b=* (bandwidth information)
+ One or more time descriptions (see below)
+ z=* (time zone adjustments)
+ k=* (encryption key)
+ a=* (zero or more session attribute lines)
+ Zero or more media descriptions (see below)
+
+Time description
+ t= (time the session is active)
+ r=* (zero or more repeat times)
+
+Media description
+ m= (media name and transport address)
+ i=* (media title)
+ c=* (connection information - optional if included at session-level)
+ b=* (bandwidth information)
+ k=* (encryption key)
+ a=* (zero or more media attribute lines)
+
+ The set of `type' letters is deliberately small and not intended to
+ be extensible -- SDP parsers must completely ignore any announcement
+ that contains a `type' letter that it does not understand. The
+ `attribute' mechanism ("a=" described below) is the primary means for
+ extending SDP and tailoring it to particular applications or media.
+ Some attributes (the ones listed in this document) have a defined
+ meaning but others may be added on an application-, media- or
+ session-specific basis. A session directory must ignore any
+ attribute it doesn't understand.
+
+ The connection (`c=') and attribute (`a=') information in the
+ session-level section applies to all the media of that session unless
+ overridden by connection information or an attribute of the same name
+ in the media description. For instance, in the example below, each
+ media behaves as if it were given a `recvonly' attribute.
+
+ An example SDP description is:
+
+ v=0
+ o=mhandley 2890844526 2890842807 IN IP4 126.16.64.4
+ s=SDP Seminar
+ i=A Seminar on the session description protocol
+ u=http://www.cs.ucl.ac.uk/staff/M.Handley/sdp.03.ps
+ e=mjh@isi.edu (Mark Handley)
+ c=IN IP4 224.2.17.12/127
+
+
+
+Handley & Jacobson Standards Track [Page 8]
+
+RFC 2327 SDP April 1998
+
+
+ t=2873397496 2873404696
+ a=recvonly
+ m=audio 49170 RTP/AVP 0
+ m=video 51372 RTP/AVP 31
+ m=application 32416 udp wb
+ a=orient:portrait
+
+ Text records such as the session name and information are bytes
+ strings which may contain any byte with the exceptions of 0x00 (Nul),
+ 0x0a (ASCII newline) and 0x0d (ASCII carriage return). The sequence
+ CRLF (0x0d0a) is used to end a record, although parsers should be
+ tolerant and also accept records terminated with a single newline
+ character. By default these byte strings contain ISO-10646
+ characters in UTF-8 encoding, but this default may be changed using
+ the `charset' attribute.
+
+ Protocol Version
+
+ v=0
+
+ The "v=" field gives the version of the Session Description Protocol.
+ There is no minor version number.
+
+ Origin
+
+ o=<username> <session id> <version> <network type> <address type>
+ <address>
+
+ The "o=" field gives the originator of the session (their username
+ and the address of the user's host) plus a session id and session
+ version number.
+
+ <username> is the user's login on the originating host, or it is "-"
+ if the originating host does not support the concept of user ids.
+ <username> must not contain spaces. <session id> is a numeric string
+ such that the tuple of <username>, <session id>, <network type>,
+ <address type> and <address> form a globally unique identifier for
+ the session.
+
+ The method of <session id> allocation is up to the creating tool, but
+ it has been suggested that a Network Time Protocol (NTP) timestamp be
+ used to ensure uniqueness [1].
+
+ <version> is a version number for this announcement. It is needed
+ for proxy announcements to detect which of several announcements for
+ the same session is the most recent. Again its usage is up to the
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 9]
+
+RFC 2327 SDP April 1998
+
+
+ creating tool, so long as <version> is increased when a modification
+ is made to the session data. Again, it is recommended (but not
+ mandatory) that an NTP timestamp is used.
+
+ <network type> is a text string giving the type of network.
+ Initially "IN" is defined to have the meaning "Internet". <address
+ type> is a text string giving the type of the address that follows.
+ Initially "IP4" and "IP6" are defined. <address> is the globally
+ unique address of the machine from which the session was created.
+ For an address type of IP4, this is either the fully-qualified domain
+ name of the machine, or the dotted-decimal representation of the IP
+ version 4 address of the machine. For an address type of IP6, this
+ is either the fully-qualified domain name of the machine, or the
+ compressed textual representation of the IP version 6 address of the
+ machine. For both IP4 and IP6, the fully-qualified domain name is
+ the form that SHOULD be given unless this is unavailable, in which
+ case the globally unique address may be substituted. A local IP
+ address MUST NOT be used in any context where the SDP description
+ might leave the scope in which the address is meaningful.
+
+ In general, the "o=" field serves as a globally unique identifier for
+ this version of this session description, and the subfields excepting
+ the version taken together identify the session irrespective of any
+ modifications.
+
+ Session Name
+
+ s=<session name>
+
+ The "s=" field is the session name. There must be one and only one
+ "s=" field per session description, and it must contain ISO 10646
+ characters (but see also the `charset' attribute below).
+
+ Session and Media Information
+
+ i=<session description>
+
+ The "i=" field is information about the session. There may be at
+ most one session-level "i=" field per session description, and at
+ most one "i=" field per media. Although it may be omitted, this is
+ discouraged for session announcements, and user interfaces for
+ composing sessions should require text to be entered. If it is
+ present it must contain ISO 10646 characters (but see also the
+ `charset' attribute below).
+
+ A single "i=" field can also be used for each media definition. In
+ media definitions, "i=" fields are primarily intended for labeling
+ media streams. As such, they are most likely to be useful when a
+
+
+
+Handley & Jacobson Standards Track [Page 10]
+
+RFC 2327 SDP April 1998
+
+
+ single session has more than one distinct media stream of the same
+ media type. An example would be two different whiteboards, one for
+ slides and one for feedback and questions.
+
+ URI
+
+ u=<URI>
+
+ o A URI is a Universal Resource Identifier as used by WWW clients
+
+ o The URI should be a pointer to additional information about the
+ conference
+
+ o This field is optional, but if it is present it should be specified
+ before the first media field
+
+ o No more than one URI field is allowed per session description
+
+
+ Email Address and Phone Number
+
+ e=<email address>
+ p=<phone number>
+
+ o These specify contact information for the person responsible for
+ the conference. This is not necessarily the same person that
+ created the conference announcement.
+
+ o Either an email field or a phone field must be specified.
+ Additional email and phone fields are allowed.
+
+ o If these are present, they should be specified before the first
+ media field.
+
+ o More than one email or phone field can be given for a session
+ description.
+
+ o Phone numbers should be given in the conventional international
+
+ format - preceded by a "+ and the international country code.
+ There must be a space or a hyphen ("-") between the country code
+ and the rest of the phone number. Spaces and hyphens may be used
+ to split up a phone field to aid readability if desired. For
+ example:
+
+ p=+44-171-380-7777 or p=+1 617 253 6011
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 11]
+
+RFC 2327 SDP April 1998
+
+
+ o Both email addresses and phone numbers can have an optional free
+ text string associated with them, normally giving the name of the
+ person who may be contacted. This should be enclosed in
+ parenthesis if it is present. For example:
+
+ e=mjh@isi.edu (Mark Handley)
+
+ The alternative RFC822 name quoting convention is also allowed for
+ both email addresses and phone numbers. For example,
+
+ e=Mark Handley <mjh@isi.edu>
+
+ The free text string should be in the ISO-10646 character set with
+ UTF-8 encoding, or alternatively in ISO-8859-1 or other encodings
+ if the appropriate charset session-level attribute is set.
+
+ Connection Data
+
+ c=<network type> <address type> <connection address>
+
+ The "c=" field contains connection data.
+
+ A session announcement must contain one "c=" field in each media
+ description (see below) or a "c=" field at the session-level. It may
+ contain a session-level "c=" field and one additional "c=" field per
+ media description, in which case the per-media values override the
+ session-level settings for the relevant media.
+
+ The first sub-field is the network type, which is a text string
+ giving the type of network. Initially "IN" is defined to have the
+ meaning "Internet".
+
+ The second sub-field is the address type. This allows SDP to be used
+ for sessions that are not IP based. Currently only IP4 is defined.
+
+ The third sub-field is the connection address. Optional extra
+ subfields may be added after the connection address depending on the
+ value of the <address type> field.
+
+ For IP4 addresses, the connection address is defined as follows:
+
+ o Typically the connection address will be a class-D IP multicast
+
+ group address. If the session is not multicast, then the
+ connection address contains the fully-qualified domain name or the
+ unicast IP address of the expected data source or data relay or
+ data sink as determined by additional attribute fields. It is not
+ expected that fully-qualified domain names or unicast addresses
+
+
+
+Handley & Jacobson Standards Track [Page 12]
+
+RFC 2327 SDP April 1998
+
+
+ will be given in a session description that is communicated by a
+ multicast announcement, though this is not prohibited. If a
+ unicast data stream is to pass through a network address
+ translator, the use of a fully-qualified domain name rather than an
+ unicast IP address is RECOMMENDED. In other cases, the use of an
+ IP address to specify a particular interface on a multi-homed host
+ might be required. Thus this specification leaves the decision as
+ to which to use up to the individual application, but all
+ applications MUST be able to cope with receiving both formats.
+
+ o Conferences using an IP multicast connection address must also have
+ a time to live (TTL) value present in addition to the multicast
+ address. The TTL and the address together define the scope with
+ which multicast packets sent in this conference will be sent. TTL
+ values must be in the range 0-255.
+
+ The TTL for the session is appended to the address using a slash as
+ a separator. An example is:
+
+ c=IN IP4 224.2.1.1/127
+
+ Hierarchical or layered encoding schemes are data streams where the
+ encoding from a single media source is split into a number of
+ layers. The receiver can choose the desired quality (and hence
+ bandwidth) by only subscribing to a subset of these layers. Such
+ layered encodings are normally transmitted in multiple multicast
+ groups to allow multicast pruning. This technique keeps unwanted
+ traffic from sites only requiring certain levels of the hierarchy.
+ For applications requiring multiple multicast groups, we allow the
+ following notation to be used for the connection address:
+
+ <base multicast address>/<ttl>/<number of addresses>
+
+ If the number of addresses is not given it is assumed to be one.
+ Multicast addresses so assigned are contiguously allocated above
+ the base address, so that, for example:
+
+ c=IN IP4 224.2.1.1/127/3
+
+ would state that addresses 224.2.1.1, 224.2.1.2 and 224.2.1.3 are
+ to be used at a ttl of 127. This is semantically identical to
+ including multiple "c=" lines in a media description:
+
+ c=IN IP4 224.2.1.1/127
+ c=IN IP4 224.2.1.2/127
+ c=IN IP4 224.2.1.3/127
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 13]
+
+RFC 2327 SDP April 1998
+
+
+ Multiple addresses or "c=" lines can only be specified on a per-
+ media basis, and not for a session-level "c=" field.
+
+ It is illegal for the slash notation described above to be used for
+ IP unicast addresses.
+
+ Bandwidth
+
+ b=<modifier>:<bandwidth-value>
+
+ o This specifies the proposed bandwidth to be used by the session or
+ media, and is optional.
+
+ o <bandwidth-value> is in kilobits per second
+
+ o <modifier> is a single alphanumeric word giving the meaning of the
+ bandwidth figure.
+
+ o Two modifiers are initially defined:
+
+ CT Conference Total: An implicit maximum bandwidth is associated with
+ each TTL on the Mbone or within a particular multicast
+ administrative scope region (the Mbone bandwidth vs. TTL limits are
+ given in the MBone FAQ). If the bandwidth of a session or media in
+ a session is different from the bandwidth implicit from the scope,
+ a `b=CT:...' line should be supplied for the session giving the
+ proposed upper limit to the bandwidth used. The primary purpose of
+ this is to give an approximate idea as to whether two or more
+ conferences can co-exist simultaneously.
+
+ AS Application-Specific Maximum: The bandwidth is interpreted to be
+ application-specific, i.e., will be the application's concept of
+ maximum bandwidth. Normally this will coincide with what is set on
+ the application's "maximum bandwidth" control if applicable.
+
+ Note that CT gives a total bandwidth figure for all the media at
+ all sites. AS gives a bandwidth figure for a single media at a
+ single site, although there may be many sites sending
+ simultaneously.
+
+ o Extension Mechanism: Tool writers can define experimental bandwidth
+ modifiers by prefixing their modifier with "X-". For example:
+
+ b=X-YZ:128
+
+ SDP parsers should ignore bandwidth fields with unknown modifiers.
+ Modifiers should be alpha-numeric and, although no length limit is
+ given, they are recommended to be short.
+
+
+
+Handley & Jacobson Standards Track [Page 14]
+
+RFC 2327 SDP April 1998
+
+
+ Times, Repeat Times and Time Zones
+
+ t=<start time> <stop time>
+
+ o "t=" fields specify the start and stop times for a conference
+ session. Multiple "t=" fields may be used if a session is active
+ at multiple irregularly spaced times; each additional "t=" field
+ specifies an additional period of time for which the session will
+ be active. If the session is active at regular times, an "r="
+ field (see below) should be used in addition to and following a
+ "t=" field - in which case the "t=" field specifies the start and
+ stop times of the repeat sequence.
+
+ o The first and second sub-fields give the start and stop times for
+ the conference respectively. These values are the decimal
+ representation of Network Time Protocol (NTP) time values in
+ seconds [1]. To convert these values to UNIX time, subtract
+ decimal 2208988800.
+
+ o If the stop-time is set to zero, then the session is not bounded,
+ though it will not become active until after the start-time. If
+ the start-time is also zero, the session is regarded as permanent.
+
+ User interfaces should strongly discourage the creation of
+ unbounded and permanent sessions as they give no information about
+ when the session is actually going to terminate, and so make
+ scheduling difficult.
+
+ The general assumption may be made, when displaying unbounded
+ sessions that have not timed out to the user, that an unbounded
+ session will only be active until half an hour from the current
+ time or the session start time, whichever is the later. If
+ behaviour other than this is required, an end-time should be given
+ and modified as appropriate when new information becomes available
+ about when the session should really end.
+
+ Permanent sessions may be shown to the user as never being active
+ unless there are associated repeat times which state precisely when
+ the session will be active. In general, permanent sessions should
+ not be created for any session expected to have a duration of less
+ than 2 months, and should be discouraged for sessions expected to
+ have a duration of less than 6 months.
+
+ r=<repeat interval> <active duration> <list of offsets from start-
+ time>
+
+ o "r=" fields specify repeat times for a session. For example, if
+ a session is active at 10am on Monday and 11am on Tuesday for one
+
+
+
+Handley & Jacobson Standards Track [Page 15]
+
+RFC 2327 SDP April 1998
+
+
+ hour each week for three months, then the <start time> in the
+ corresponding "t=" field would be the NTP representation of 10am on
+ the first Monday, the <repeat interval> would be 1 week, the
+ <active duration> would be 1 hour, and the offsets would be zero
+ and 25 hours. The corresponding "t=" field stop time would be the
+ NTP representation of the end of the last session three months
+ later. By default all fields are in seconds, so the "r=" and "t="
+ fields might be:
+
+ t=3034423619 3042462419
+ r=604800 3600 0 90000
+
+ To make announcements more compact, times may also be given in units
+ of days, hours or minutes. The syntax for these is a number
+ immediately followed by a single case-sensitive character.
+ Fractional units are not allowed - a smaller unit should be used
+ instead. The following unit specification characters are allowed:
+
+ d - days (86400 seconds)
+ h - minutes (3600 seconds)
+ m - minutes (60 seconds)
+ s - seconds (allowed for completeness but not recommended)
+
+ Thus, the above announcement could also have been written:
+
+ r=7d 1h 0 25h
+
+ Monthly and yearly repeats cannot currently be directly specified
+ with a single SDP repeat time - instead separate "t" fields should
+ be used to explicitly list the session times.
+
+ z=<adjustment time> <offset> <adjustment time> <offset> ....
+
+ o To schedule a repeated session which spans a change from daylight-
+ saving time to standard time or vice-versa, it is necessary to
+ specify offsets from the base repeat times. This is required
+ because different time zones change time at different times of day,
+ different countries change to or from daylight time on different
+ dates, and some countries do not have daylight saving time at all.
+
+ Thus in order to schedule a session that is at the same time winter
+ and summer, it must be possible to specify unambiguously by whose
+ time zone a session is scheduled. To simplify this task for
+ receivers, we allow the sender to specify the NTP time that a time
+ zone adjustment happens and the offset from the time when the
+ session was first scheduled. The "z" field allows the sender to
+ specify a list of these adjustment times and offsets from the base
+ time.
+
+
+
+Handley & Jacobson Standards Track [Page 16]
+
+RFC 2327 SDP April 1998
+
+
+ An example might be:
+
+ z=2882844526 -1h 2898848070 0
+
+ This specifies that at time 2882844526 the time base by which the
+ session's repeat times are calculated is shifted back by 1 hour,
+ and that at time 2898848070 the session's original time base is
+ restored. Adjustments are always relative to the specified start
+ time - they are not cumulative.
+
+ o If a session is likely to last several years, it is expected
+ that
+ the session announcement will be modified periodically rather than
+ transmit several years worth of adjustments in one announcement.
+
+ Encryption Keys
+
+ k=<method>
+ k=<method>:<encryption key>
+
+ o The session description protocol may be used to convey encryption
+ keys. A key field is permitted before the first media entry (in
+ which case it applies to all media in the session), or for each
+ media entry as required.
+
+ o The format of keys and their usage is outside the scope of this
+ document, but see [3].
+
+ o The method indicates the mechanism to be used to obtain a usable
+ key by external means, or from the encoded encryption key given.
+
+ The following methods are defined:
+
+ k=clear:<encryption key>
+ The encryption key (as described in [3] for RTP media streams
+ under the AV profile) is included untransformed in this key
+ field.
+
+ k=base64:<encoded encryption key>
+ The encryption key (as described in [3] for RTP media streams
+ under the AV profile) is included in this key field but has been
+ base64 encoded because it includes characters that are
+ prohibited in SDP.
+
+ k=uri:<URI to obtain key>
+ A Universal Resource Identifier as used by WWW clients is
+ included in this key field. The URI refers to the data
+ containing the key, and may require additional authentication
+
+
+
+Handley & Jacobson Standards Track [Page 17]
+
+RFC 2327 SDP April 1998
+
+
+ before the key can be returned. When a request is made to the
+ given URI, the MIME content-type of the reply specifies the
+ encoding for the key in the reply. The key should not be
+ obtained until the user wishes to join the session to reduce
+ synchronisation of requests to the WWW server(s).
+
+ k=prompt
+ No key is included in this SDP description, but the session or
+ media stream referred to by this key field is encrypted. The
+ user should be prompted for the key when attempting to join the
+ session, and this user-supplied key should then be used to
+ decrypt the media streams.
+
+ Attributes
+
+ a=<attribute>
+ a=<attribute>:<value>
+
+ Attributes are the primary means for extending SDP. Attributes may
+ be defined to be used as "session-level" attributes, "media-level"
+ attributes, or both.
+
+ A media description may have any number of attributes ("a=" fields)
+ which are media specific. These are referred to as "media-level"
+ attributes and add information about the media stream. Attribute
+ fields can also be added before the first media field; these
+ "session-level" attributes convey additional information that applies
+ to the conference as a whole rather than to individual media; an
+ example might be the conference's floor control policy.
+
+ Attribute fields may be of two forms:
+
+ o property attributes. A property attribute is simply of the form
+ "a=<flag>". These are binary attributes, and the presence of the
+ attribute conveys that the attribute is a property of the session.
+ An example might be "a=recvonly".
+
+ o value attributes. A value attribute is of the form
+ "a=<attribute>:<value>". An example might be that a whiteboard
+ could have the value attribute "a=orient:landscape"
+
+ Attribute interpretation depends on the media tool being invoked.
+ Thus receivers of session descriptions should be configurable in
+ their interpretation of announcements in general and of attributes in
+ particular.
+
+ Attribute names must be in the US-ASCII subset of ISO-10646/UTF-8.
+
+
+
+
+Handley & Jacobson Standards Track [Page 18]
+
+RFC 2327 SDP April 1998
+
+
+ Attribute values are byte strings, and MAY use any byte value except
+ 0x00 (Nul), 0x0A (LF), and 0x0D (CR). By default, attribute values
+ are to be interpreted as in ISO-10646 character set with UTF-8
+ encoding. Unlike other text fields, attribute values are NOT
+ normally affected by the `charset' attribute as this would make
+ comparisons against known values problematic. However, when an
+ attribute is defined, it can be defined to be charset-dependent, in
+ which case it's value should be interpreted in the session charset
+ rather than in ISO-10646.
+
+ Attributes that will be commonly used can be registered with IANA
+ (see Appendix B). Unregistered attributes should begin with "X-" to
+ prevent inadvertent collision with registered attributes. In either
+ case, if an attribute is received that is not understood, it should
+ simply be ignored by the receiver.
+
+ Media Announcements
+
+ m=<media> <port> <transport> <fmt list>
+
+ A session description may contain a number of media descriptions.
+ Each media description starts with an "m=" field, and is terminated
+ by either the next "m=" field or by the end of the session
+ description. A media field also has several sub-fields:
+
+ o The first sub-field is the media type. Currently defined media are
+ "audio", "video", "application", "data" and "control", though this
+ list may be extended as new communication modalities emerge (e.g.,
+ telepresense). The difference between "application" and "data" is
+ that the former is a media flow such as whiteboard information, and
+ the latter is bulk-data transfer such as multicasting of program
+ executables which will not typically be displayed to the user.
+ "control" is used to specify an additional conference control
+ channel for the session.
+
+ o The second sub-field is the transport port to which the media
+ stream will be sent. The meaning of the transport port depends on
+ the network being used as specified in the relevant "c" field and
+ on the transport protocol defined in the third sub-field. Other
+ ports used by the media application (such as the RTCP port, see
+ [2]) should be derived algorithmically from the base media port.
+
+ Note: For transports based on UDP, the value should be in the range
+ 1024 to 65535 inclusive. For RTP compliance it should be an even
+ number.
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 19]
+
+RFC 2327 SDP April 1998
+
+
+ For applications where hierarchically encoded streams are being
+ sent to a unicast address, it may be necessary to specify multiple
+ transport ports. This is done using a similar notation to that
+ used for IP multicast addresses in the "c=" field:
+
+ m=<media> <port>/<number of ports> <transport> <fmt list>
+
+ In such a case, the ports used depend on the transport protocol.
+ For RTP, only the even ports are used for data and the
+ corresponding one-higher odd port is used for RTCP. For example:
+
+ m=video 49170/2 RTP/AVP 31
+
+ would specify that ports 49170 and 49171 form one RTP/RTCP pair and
+ 49172 and 49173 form the second RTP/RTCP pair. RTP/AVP is the
+ transport protocol and 31 is the format (see below).
+
+ It is illegal for both multiple addresses to be specified in the
+ "c=" field and for multiple ports to be specified in the "m=" field
+ in the same session description.
+
+ o The third sub-field is the transport protocol. The transport
+ protocol values are dependent on the address-type field in the "c="
+ fields. Thus a "c=" field of IP4 defines that the transport
+ protocol runs over IP4. For IP4, it is normally expected that most
+ media traffic will be carried as RTP over UDP. The following
+ transport protocols are preliminarily defined, but may be extended
+ through registration of new protocols with IANA:
+
+ - RTP/AVP - the IETF's Realtime Transport Protocol using the
+ Audio/Video profile carried over UDP.
+
+ - udp - User Datagram Protocol
+
+ If an application uses a single combined proprietary media format
+ and transport protocol over UDP, then simply specifying the
+ transport protocol as udp and using the format field to distinguish
+ the combined protocol is recommended. If a transport protocol is
+ used over UDP to carry several distinct media types that need to be
+ distinguished by a session directory, then specifying the transport
+ protocol and media format separately is necessary. RTP is an
+ example of a transport-protocol that carries multiple payload
+ formats that must be distinguished by the session directory for it
+ to know how to start appropriate tools, relays, mixers or
+ recorders.
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 20]
+
+RFC 2327 SDP April 1998
+
+
+ The main reason to specify the transport-protocol in addition to
+ the media format is that the same standard media formats may be
+ carried over different transport protocols even when the network
+ protocol is the same - a historical example is vat PCM audio and
+ RTP PCM audio. In addition, relays and monitoring tools that are
+ transport-protocol-specific but format-independent are possible.
+
+ For RTP media streams operating under the RTP Audio/Video Profile
+ [3], the protocol field is "RTP/AVP". Should other RTP profiles be
+ defined in the future, their profiles will be specified in the same
+ way. For example, the protocol field "RTP/XYZ" would specify RTP
+ operating under a profile whose short name is "XYZ".
+
+ o The fourth and subsequent sub-fields are media formats. For audio
+ and video, these will normally be a media payload type as defined
+ in the RTP Audio/Video Profile.
+
+ When a list of payload formats is given, this implies that all of
+ these formats may be used in the session, but the first of these
+ formats is the default format for the session.
+
+ For media whose transport protocol is not RTP or UDP the format
+ field is protocol specific. Such formats should be defined in an
+ additional specification document.
+
+ For media whose transport protocol is RTP, SDP can be used to
+ provide a dynamic binding of media encoding to RTP payload type.
+ The encoding names in the RTP AV Profile do not specify unique
+ audio encodings (in terms of clock rate and number of audio
+ channels), and so they are not used directly in SDP format fields.
+ Instead, the payload type number should be used to specify the
+ format for static payload types and the payload type number along
+ with additional encoding information should be used for dynamically
+ allocated payload types.
+
+ An example of a static payload type is u-law PCM coded single
+ channel audio sampled at 8KHz. This is completely defined in the
+ RTP Audio/Video profile as payload type 0, so the media field for
+ such a stream sent to UDP port 49232 is:
+
+ m=video 49232 RTP/AVP 0
+
+ An example of a dynamic payload type is 16 bit linear encoded
+ stereo audio sampled at 16KHz. If we wish to use dynamic RTP/AVP
+ payload type 98 for such a stream, additional information is
+ required to decode it:
+
+ m=video 49232 RTP/AVP 98
+
+
+
+Handley & Jacobson Standards Track [Page 21]
+
+RFC 2327 SDP April 1998
+
+
+ a=rtpmap:98 L16/16000/2
+
+ The general form of an rtpmap attribute is:
+
+ a=rtpmap:<payload type> <encoding name>/<clock rate>[/<encoding
+ parameters>]
+
+ For audio streams, <encoding parameters> may specify the number of
+ audio channels. This parameter may be omitted if the number of
+ channels is one provided no additional parameters are needed. For
+ video streams, no encoding parameters are currently specified.
+
+ Additional parameters may be defined in the future, but
+ codecspecific parameters should not be added. Parameters added to
+ an rtpmap attribute should only be those required for a session
+ directory to make the choice of appropriate media too to
+ participate in a session. Codec-specific parameters should be
+ added in other attributes.
+
+ Up to one rtpmap attribute can be defined for each media format
+ specified. Thus we might have:
+
+ m=audio 49230 RTP/AVP 96 97 98
+ a=rtpmap:96 L8/8000
+ a=rtpmap:97 L16/8000
+ a=rtpmap:98 L16/11025/2
+
+ RTP profiles that specify the use of dynamic payload types must
+ define the set of valid encoding names and/or a means to register
+ encoding names if that profile is to be used with SDP.
+
+ Experimental encoding formats can also be specified using rtpmap.
+ RTP formats that are not registered as standard format names must
+ be preceded by "X-". Thus a new experimental redundant audio
+ stream called GSMLPC using dynamic payload type 99 could be
+ specified as:
+
+ m=video 49232 RTP/AVP 99
+ a=rtpmap:99 X-GSMLPC/8000
+
+ Such an experimental encoding requires that any site wishing to
+ receive the media stream has relevant configured state in its
+ session directory to know which tools are appropriate.
+
+ Note that RTP audio formats typically do not include information
+ about the number of samples per packet. If a non-default (as
+ defined in the RTP Audio/Video Profile) packetisation is required,
+ the "ptime" attribute is used as given below.
+
+
+
+Handley & Jacobson Standards Track [Page 22]
+
+RFC 2327 SDP April 1998
+
+
+ For more details on RTP audio and video formats, see [3].
+
+ o Formats for non-RTP media should be registered as MIME content
+ types as described in Appendix B. For example, the LBL whiteboard
+ application might be registered as MIME content-type application/wb
+ with encoding considerations specifying that it operates over UDP,
+ with no appropriate file format. In SDP this would then be
+ expressed using a combination of the "media" field and the "fmt"
+ field, as follows:
+
+ m=application 32416 udp wb
+
+ Suggested Attributes
+
+ The following attributes are suggested. Since application writers
+ may add new attributes as they are required, this list is not
+ exhaustive.
+
+ a=cat:<category>
+ This attribute gives the dot-separated hierarchical category of
+ the session. This is to enable a receiver to filter unwanted
+ sessions by category. It would probably have been a compulsory
+ separate field, except for its experimental nature at this time.
+ It is a session-level attribute, and is not dependent on charset.
+
+ a=keywds:<keywords>
+ Like the cat attribute, this is to assist identifying wanted
+ sessions at the receiver. This allows a receiver to select
+ interesting session based on keywords describing the purpose of
+ the session. It is a session-level attribute. It is a charset
+ dependent attribute, meaning that its value should be interpreted
+ in the charset specified for the session description if one is
+ specified, or by default in ISO 10646/UTF-8.
+
+ a=tool:<name and version of tool>
+ This gives the name and version number of the tool used to create
+ the session description. It is a session-level attribute, and is
+ not dependent on charset.
+
+ a=ptime:<packet time>
+ This gives the length of time in milliseconds represented by the
+ media in a packet. This is probably only meaningful for audio
+ data. It should not be necessary to know ptime to decode RTP or
+ vat audio, and it is intended as a recommendation for the
+ encoding/packetisation of audio. It is a media attribute, and is
+ not dependent on charset.
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 23]
+
+RFC 2327 SDP April 1998
+
+
+ a=recvonly
+ This specifies that the tools should be started in receive-only
+ mode where applicable. It can be either a session or media
+ attribute, and is not dependent on charset.
+
+ a=sendrecv
+ This specifies that the tools should be started in send and
+ receive mode. This is necessary for interactive conferences with
+ tools such as wb which defaults to receive only mode. It can be
+ either a session or media attribute, and is not dependent on
+ charset.
+
+ a=sendonly
+ This specifies that the tools should be started in send-only
+ mode. An example may be where a different unicast address is to
+ be used for a traffic destination than for a traffic source. In
+ such a case, two media descriptions may be use, one sendonly and
+ one recvonly. It can be either a session or media attribute, but
+ would normally only be used as a media attribute, and is not
+ dependent on charset.
+
+ a=orient:<whiteboard orientation>
+ Normally this is only used in a whiteboard media specification.
+ It specifies the orientation of a the whiteboard on the screen.
+ It is a media attribute. Permitted values are `portrait',
+ `landscape' and `seascape' (upside down landscape). It is not
+ dependent on charset
+
+ a=type:<conference type>
+ This specifies the type of the conference. Suggested values are
+ `broadcast', `meeting', `moderated', `test' and `H332'.
+ `recvonly' should be the default for `type:broadcast' sessions,
+ `type:meeting' should imply `sendrecv' and `type:moderated'
+ should indicate the use of a floor control tool and that the
+ media tools are started so as to "mute" new sites joining the
+ conference.
+
+ Specifying the attribute type:H332 indicates that this loosely
+ coupled session is part of a H.332 session as defined in the ITU
+ H.332 specification [10]. Media tools should be started
+ `recvonly'.
+
+ Specifying the attribute type:test is suggested as a hint that,
+ unless explicitly requested otherwise, receivers can safely avoid
+ displaying this session description to users.
+
+ The type attribute is a session-level attribute, and is not
+ dependent on charset.
+
+
+
+Handley & Jacobson Standards Track [Page 24]
+
+RFC 2327 SDP April 1998
+
+
+ a=charset:<character set>
+ This specifies the character set to be used to display the
+ session name and information data. By default, the ISO-10646
+ character set in UTF-8 encoding is used. If a more compact
+ representation is required, other character sets may be used such
+ as ISO-8859-1 for Northern European languages. In particular,
+ the ISO 8859-1 is specified with the following SDP attribute:
+
+ a=charset:ISO-8859-1
+
+ This is a session-level attribute; if this attribute is present,
+ it must be before the first media field. The charset specified
+ MUST be one of those registered with IANA, such as ISO-8859-1.
+ The character set identifier is a US-ASCII string and MUST be
+ compared against the IANA identifiers using a case-insensitive
+ comparison. If the identifier is not recognised or not
+ supported, all strings that are affected by it SHOULD be regarded
+ as byte strings.
+
+ Note that a character set specified MUST still prohibit the use
+ of bytes 0x00 (Nul), 0x0A (LF) and 0x0d (CR). Character sets
+ requiring the use of these characters MUST define a quoting
+ mechanism that prevents these bytes appearing within text fields.
+
+ a=sdplang:<language tag>
+ This can be a session level attribute or a media level attribute.
+ As a session level attribute, it specifies the language for the
+ session description. As a media level attribute, it specifies
+ the language for any media-level SDP information field associated
+ with that media. Multiple sdplang attributes can be provided
+ either at session or media level if multiple languages in the
+ session description or media use multiple languages, in which
+ case the order of the attributes indicates the order of
+ importance of the various languages in the session or media from
+ most important to least important.
+
+ In general, sending session descriptions consisting of multiple
+ languages should be discouraged. Instead, multiple descriptions
+ should be sent describing the session, one in each language.
+ However this is not possible with all transport mechanisms, and
+ so multiple sdplang attributes are allowed although not
+ recommended.
+
+ The sdplang attribute value must be a single RFC 1766 language
+ tag in US-ASCII. It is not dependent on the charset attribute.
+ An sdplang attribute SHOULD be specified when a session is of
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 25]
+
+RFC 2327 SDP April 1998
+
+
+ sufficient scope to cross geographic boundaries where the
+ language of recipients cannot be assumed, or where the session is
+ in a different language from the locally assumed norm.
+
+ a=lang:<language tag>
+ This can be a session level attribute or a media level attribute.
+ As a session level attribute, it specifies the default language
+ for the session being described. As a media level attribute, it
+ specifies the language for that media, overriding any session-
+ level language specified. Multiple lang attributes can be
+ provided either at session or media level if multiple languages
+ if the session description or media use multiple languages, in
+ which case the order of the attributes indicates the order of
+ importance of the various languages in the session or media from
+ most important to least important.
+
+ The lang attribute value must be a single RFC 1766 language tag
+ in US-ASCII. It is not dependent on the charset attribute. A
+ lang attribute SHOULD be specified when a session is of
+ sufficient scope to cross geographic boundaries where the
+ language of recipients cannot be assumed, or where the session is
+ in a different language from the locally assumed norm.
+
+ a=framerate:<frame rate>
+ This gives the maximum video frame rate in frames/sec. It is
+ intended as a recommendation for the encoding of video data.
+ Decimal representations of fractional values using the notation
+ "<integer>.<fraction>" are allowed. It is a media attribute, is
+ only defined for video media, and is not dependent on charset.
+
+ a=quality:<quality>
+ This gives a suggestion for the quality of the encoding as an
+ integer value.
+
+ The intention of the quality attribute for video is to specify a
+ non-default trade-off between frame-rate and still-image quality.
+ For video, the value in the range 0 to 10, with the following
+ suggested meaning:
+
+ 10 - the best still-image quality the compression scheme can
+ give.
+
+ 5 - the default behaviour given no quality suggestion.
+
+ 0 - the worst still-image quality the codec designer thinks is
+ still usable.
+
+ It is a media attribute, and is not dependent on charset.
+
+
+
+Handley & Jacobson Standards Track [Page 26]
+
+RFC 2327 SDP April 1998
+
+
+ a=fmtp:<format> <format specific parameters>
+ This attribute allows parameters that are specific to a
+ particular format to be conveyed in a way that SDP doesn't have
+ to understand them. The format must be one of the formats
+ specified for the media. Format-specific parameters may be any
+ set of parameters required to be conveyed by SDP and given
+ unchanged to the media tool that will use this format.
+
+ It is a media attribute, and is not dependent on charset.
+
+6.1. Communicating Conference Control Policy
+
+ There is some debate over the way conference control policy should be
+ communicated. In general, the authors believe that an implicit
+ declarative style of specifying conference control is desirable where
+ possible.
+
+ A simple declarative style uses a single conference attribute field
+ before the first media field, possibly supplemented by properties
+ such as `recvonly' for some of the media tools. This conference
+ attribute conveys the conference control policy. An example might be:
+
+ a=type:moderated
+
+ In some cases, however, it is possible that this may be insufficient
+ to communicate the details of an unusual conference control policy.
+ If this is the case, then a conference attribute specifying external
+ control might be set, and then one or more "media" fields might be
+ used to specify the conference control tools and configuration data
+ for those tools. An example is an ITU H.332 session:
+
+ c=IN IP4 224.5.6.7
+ a=type:H332
+ m=audio 49230 RTP/AVP 0
+ m=video 49232 RTP/AVP 31
+ m=application 12349 udp wb
+ m=control 49234 H323 mc
+ c=IN IP4 134.134.157.81
+
+ In this example, a general conference attribute (type:H332) is
+ specified stating that conference control will be provided by an
+ external H.332 tool, and a contact addresses for the H.323 session
+ multipoint controller is given.
+
+ In this document, only the declarative style of conference control
+ declaration is specified. Other forms of conference control should
+ specify an appropriate type attribute, and should define the
+ implications this has for control media.
+
+
+
+Handley & Jacobson Standards Track [Page 27]
+
+RFC 2327 SDP April 1998
+
+
+7. Security Considerations
+
+ SDP is a session description format that describes multimedia
+ sessions. A session description should not be trusted unless it has
+ been obtained by an authenticated transport protocol from a trusted
+ source. Many different transport protocols may be used to distribute
+ session description, and the nature of the authentication will differ
+ from transport to transport.
+
+ One transport that will frequently be used to distribute session
+ descriptions is the Session Announcement Protocol (SAP). SAP
+ provides both encryption and authentication mechanisms but due to the
+ nature of session announcements it is likely that there are many
+ occasions where the originator of a session announcement cannot be
+ authenticated because they are previously unknown to the receiver of
+ the announcement and because no common public key infrastructure is
+ available.
+
+ On receiving a session description over an unauthenticated transport
+ mechanism or from an untrusted party, software parsing the session
+ should take a few precautions. Session description contain
+ information required to start software on the receivers system.
+ Software that parses a session description MUST not be able to start
+ other software except that which is specifically configured as
+ appropriate software to participate in multimedia sessions. It is
+ normally considered INAPPROPRIATE for software parsing a session
+ description to start, on a user's system, software that is
+ appropriate to participate in multimedia sessions, without the user
+ first being informed that such software will be started and giving
+ their consent. Thus a session description arriving by session
+ announcement, email, session invitation, or WWW page SHOULD not
+ deliver the user into an {it interactive} multimedia session without
+ the user being aware that this will happen. As it is not always
+ simple to tell whether a session is interactive or not, applications
+ that are unsure should assume sessions are interactive.
+
+ In this specification, there are no attributes which would allow the
+ recipient of a session description to be informed to start multimedia
+ tools in a mode where they default to transmitting. Under some
+ circumstances it might be appropriate to define such attributes. If
+ this is done an application parsing a session description containing
+ such attributes SHOULD either ignore them, or inform the user that
+ joining this session will result in the automatic transmission of
+ multimedia data. The default behaviour for an unknown attribute is
+ to ignore it.
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 28]
+
+RFC 2327 SDP April 1998
+
+
+ Session descriptions may be parsed at intermediate systems such as
+ firewalls for the purposes of opening a hole in the firewall to allow
+ the participation in multimedia sessions. It is considered
+ INAPPROPRIATE for a firewall to open such holes for unicast data
+ streams unless the session description comes in a request from inside
+ the firewall.
+
+ For multicast sessions, it is likely that local administrators will
+ apply their own policies, but the exclusive use of "local" or "site-
+ local" administrative scope within the firewall and the refusal of
+ the firewall to open a hole for such scopes will provide separation
+ of global multicast sessions from local ones.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 29]
+
+RFC 2327 SDP April 1998
+
+
+Appendix A: SDP Grammar
+
+ This appendix provides an Augmented BNF grammar for SDP. ABNF is
+ defined in RFC 2234.
+
+
+ announcement = proto-version
+ origin-field
+ session-name-field
+ information-field
+ uri-field
+ email-fields
+ phone-fields
+ connection-field
+ bandwidth-fields
+ time-fields
+ key-field
+ attribute-fields
+ media-descriptions
+
+ proto-version = "v=" 1*DIGIT CRLF
+ ;this memo describes version 0
+
+ origin-field = "o=" username space
+ sess-id space sess-version space
+ nettype space addrtype space
+ addr CRLF
+
+ session-name-field = "s=" text CRLF
+
+ information-field = ["i=" text CRLF]
+
+ uri-field = ["u=" uri CRLF]
+
+ email-fields = *("e=" email-address CRLF)
+
+ phone-fields = *("p=" phone-number CRLF)
+
+
+ connection-field = ["c=" nettype space addrtype space
+ connection-address CRLF]
+ ;a connection field must be present
+ ;in every media description or at the
+ ;session-level
+
+
+ bandwidth-fields = *("b=" bwtype ":" bandwidth CRLF)
+
+
+
+
+Handley & Jacobson Standards Track [Page 30]
+
+RFC 2327 SDP April 1998
+
+
+ time-fields = 1*( "t=" start-time space stop-time
+ *(CRLF repeat-fields) CRLF)
+ [zone-adjustments CRLF]
+
+
+ repeat-fields = "r=" repeat-interval space typed-time
+ 1*(space typed-time)
+
+
+ zone-adjustments = time space ["-"] typed-time
+ *(space time space ["-"] typed-time)
+
+
+ key-field = ["k=" key-type CRLF]
+
+
+ key-type = "prompt" |
+ "clear:" key-data |
+ "base64:" key-data |
+ "uri:" uri
+
+
+ key-data = email-safe | "~" | "
+
+
+ attribute-fields = *("a=" attribute CRLF)
+
+
+ media-descriptions = *( media-field
+ information-field
+ *(connection-field)
+ bandwidth-fields
+ key-field
+ attribute-fields )
+
+
+ media-field = "m=" media space port ["/" integer]
+ space proto 1*(space fmt) CRLF
+
+
+ media = 1*(alpha-numeric)
+ ;typically "audio", "video", "application"
+ ;or "data"
+
+ fmt = 1*(alpha-numeric)
+ ;typically an RTP payload type for audio
+ ;and video media
+
+
+
+
+Handley & Jacobson Standards Track [Page 31]
+
+RFC 2327 SDP April 1998
+
+
+ proto = 1*(alpha-numeric)
+ ;typically "RTP/AVP" or "udp" for IP4
+
+
+ port = 1*(DIGIT)
+ ;should in the range "1024" to "65535" inclusive
+ ;for UDP based media
+
+
+ attribute = (att-field ":" att-value) | att-field
+
+
+ att-field = 1*(alpha-numeric)
+
+
+ att-value = byte-string
+
+
+ sess-id = 1*(DIGIT)
+ ;should be unique for this originating username/host
+
+
+ sess-version = 1*(DIGIT)
+ ;0 is a new session
+
+
+ connection-address = multicast-address
+ | addr
+
+
+ multicast-address = 3*(decimal-uchar ".") decimal-uchar "/" ttl
+ [ "/" integer ]
+ ;multicast addresses may be in the range
+ ;224.0.0.0 to 239.255.255.255
+
+ ttl = decimal-uchar
+
+ start-time = time | "0"
+
+ stop-time = time | "0"
+
+ time = POS-DIGIT 9*(DIGIT)
+ ;sufficient for 2 more centuries
+
+
+ repeat-interval = typed-time
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 32]
+
+RFC 2327 SDP April 1998
+
+
+ typed-time = 1*(DIGIT) [fixed-len-time-unit]
+
+
+ fixed-len-time-unit = "d" | "h" | "m" | "s"
+
+
+ bwtype = 1*(alpha-numeric)
+
+ bandwidth = 1*(DIGIT)
+
+
+ username = safe
+ ;pretty wide definition, but doesn't include space
+
+
+ email-address = email | email "(" email-safe ")" |
+ email-safe "<" email ">"
+
+
+ email = ;defined in RFC822
+
+
+ uri= ;defined in RFC1630
+
+
+ phone-number = phone | phone "(" email-safe ")" |
+ email-safe "<" phone ">"
+
+
+ phone = "+" POS-DIGIT 1*(space | "-" | DIGIT)
+ ;there must be a space or hyphen between the
+ ;international code and the rest of the number.
+
+
+ nettype = "IN"
+ ;list to be extended
+
+
+ addrtype = "IP4" | "IP6"
+ ;list to be extended
+
+
+ addr = FQDN | unicast-address
+
+
+ FQDN = 4*(alpha-numeric|"-"|".")
+ ;fully qualified domain name as specified in RFC1035
+
+
+
+
+Handley & Jacobson Standards Track [Page 33]
+
+RFC 2327 SDP April 1998
+
+
+ unicast-address = IP4-address | IP6-address
+
+
+ IP4-address = b1 "." decimal-uchar "." decimal-uchar "." b4
+ b1 = decimal-uchar
+ ;less than "224"; not "0" or "127"
+ b4 = decimal-uchar
+ ;not "0"
+
+ IP6-address = ;to be defined
+
+
+ text = byte-string
+ ;default is to interpret this as IS0-10646 UTF8
+ ;ISO 8859-1 requires a "a=charset:ISO-8859-1"
+ ;session-level attribute to be used
+
+
+ byte-string = 1*(0x01..0x09|0x0b|0x0c|0x0e..0xff)
+ ;any byte except NUL, CR or LF
+
+
+ decimal-uchar = DIGIT
+ | POS-DIGIT DIGIT
+ | ("1" 2*(DIGIT))
+ | ("2" ("0"|"1"|"2"|"3"|"4") DIGIT)
+ | ("2" "5" ("0"|"1"|"2"|"3"|"4"|"5"))
+
+
+ integer = POS-DIGIT *(DIGIT)
+
+
+ alpha-numeric = ALPHA | DIGIT
+
+
+ DIGIT = "0" | POS-DIGIT
+
+
+ POS-DIGIT = "1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
+
+
+ ALPHA = "a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"|
+ "l"|"m"|"n"|"o "|"p"|"q"|"r"|"s"|"t"|"u"|"v"|
+ "w"|"x"|"y"|"z"|"A"|"B"|"C "|"D"|"E"|"F"|"G"|
+ "H"|"I"|"J"|"K"|"L"|"M"|"N"|"O"|"P"|" Q"|"R"|
+ "S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z"
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 34]
+
+RFC 2327 SDP April 1998
+
+
+ email-safe = safe | space | tab
+
+
+ safe = alpha-numeric |
+ "'" | "'" | "-" | "." | "/" | ":" | "?" | """ |
+ "#" | "$" | "&" | "*" | ";" | "=" | "@" | "[" |
+ "]" | "^" | "_" | "`" | "{" | "|" | "}" | "+" |
+ "~" | "
+
+
+ space = %d32
+ tab = %d9
+ CRLF = %d13.10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 35]
+
+RFC 2327 SDP April 1998
+
+
+Appendix B: Guidelines for registering SDP names with IANA
+
+ There are seven field names that may be registered with IANA. Using
+ the terminology in the SDP specification BNF, they are "media",
+ "proto", "fmt", "att-field", "bwtype", "nettype" and "addrtype".
+
+ "media" (eg, audio, video, application, data).
+
+ Packetized media types, such as those used by RTP, share the
+ namespace used by media types registry [RFC 2048] (i.e. "MIME
+ types"). The list of valid media names is the set of top-level
+ MIME content types. The set of media is intended to be small and
+ not to be extended except under rare circumstances. (The MIME
+ subtype corresponds to the "fmt" parameter below).
+
+ "proto"
+
+ In general this should be an IETF standards-track transport
+ protocol identifier such as RTP/AVP (rfc 1889 under the rfc 1890
+ profile).
+
+ However, people will want to invent their own proprietary
+ transport protocols. Some of these should be registered as a
+ "fmt" using "udp" as the protocol and some of which probably
+ can't be.
+
+ Where the protocol and the application are intimately linked,
+ such as with the LBL whiteboard wb which used a proprietary and
+ special purpose protocol over UDP, the protocol name should be
+ "udp" and the format name that should be registered is "wb". The
+ rules for formats (see below) apply to such registrations.
+
+ Where the proprietary transport protocol really carries many
+ different data formats, it is possible to register a new protocol
+ name with IANA. In such a case, an RFC MUST be produced
+ describing the protocol and referenced in the registration. Such
+ an RFC MAY be informational, although it is preferable if it is
+ standards-track.
+
+ "fmt"
+
+ The format namespace is dependent on the context of the "proto"
+ field, so a format cannot be registered without specifying one or
+ more transport protocols that it applies to.
+
+ Formats cover all the possible encodings that might want to be
+ transported in a multimedia session.
+
+
+
+
+Handley & Jacobson Standards Track [Page 36]
+
+RFC 2327 SDP April 1998
+
+
+ For RTP formats that have been assigned static payload types, the
+ payload type number is used. For RTP formats using a dynamic
+ payload type number, the dynamic payload type number is given as
+ the format and an additional "rtpmap" attribute specifies the
+ format and parameters.
+
+ For non-RTP formats, any unregistered format name may be
+ registered through the MIME-type registration process [RFC 2048].
+ The type given here is the MIME subtype only (the top-level MIME
+ content type is specified by the media parameter). The MIME type
+ registration SHOULD reference a standards-track RFC which
+ describes the transport protocol for this media type. If there
+ is an existing MIME type for this format, the MIME registration
+ should be augmented to reference the transport specification for
+ this media type. If there is not an existing MIME type for this
+ format, and there exists no appropriate file format, this should
+ be noted in the encoding considerations as "no appropriate file
+ format".
+
+ "att-field" (Attribute names)
+
+ Attribute field names MAY be registered with IANA, although this
+ is not compulsory, and unknown attributes are simply ignored.
+
+ When an attribute is registered, it must be accompanied by a
+ brief specification stating the following:
+
+ o contact name, email address and telephone number
+
+ o attribute-name (as it will appear in SDP)
+
+ o long-form attribute name in English
+
+ o type of attribute (session level, media level, or both)
+
+ o whether the attribute value is subject to the charset
+ attribute.
+
+ o a one paragraph explanation of the purpose of the attribute.
+
+ o a specification of appropriate attribute values for this
+ attribute.
+
+ IANA will not sanity check such attribute registrations except to
+ ensure that they do not clash with existing registrations.
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 37]
+
+RFC 2327 SDP April 1998
+
+
+ Although the above is the minimum that IANA will accept, if the
+ attribute is expected to see widespread use and interoperability
+ is an issue, authors are encouraged to produce a standards-track
+ RFC that specifies the attribute more precisely.
+
+ Submitters of registrations should ensure that the specification
+ is in the spirit of SDP attributes, most notably that the
+ attribute is platform independent in the sense that it makes no
+ implicit assumptions about operating systems and does not name
+ specific pieces of software in a manner that might inhibit
+ interoperability.
+
+ "bwtype" (bandwidth specifiers)
+
+ A proliferation of bandwidth specifiers is strongly discouraged.
+
+ New bandwidth specifiers may be registered with IANA. The
+ submission MUST reference a standards-track RFC specifying the
+ semantics of the bandwidth specifier precisely, and indicating
+ when it should be used, and why the existing registered bandwidth
+ specifiers do not suffice.
+
+ "nettype" (Network Type)
+
+ New network types may be registered with IANA if SDP needs to be
+ used in the context of non-internet environments. Whilst these
+ are not normally the preserve of IANA, there may be circumstances
+ when an Internet application needs to interoperate with a non-
+ internet application, such as when gatewaying an internet
+ telephony call into the PSTN. The number of network types should
+ be small and should be rarely extended. A new network type
+ cannot be registered without registering at least one address
+ type to be used with that network type. A new network type
+ registration MUST reference an RFC which gives details of the
+ network type and address type and specifies how and when they
+ would be used. Such an RFC MAY be Informational.
+
+ "addrtype" (Address Type)
+
+ New address types may be registered with IANA. An address type
+ is only meaningful in the context of a network type, and any
+ registration of an address type MUST specify a registered network
+ type, or be submitted along with a network type registration. A
+ new address type registration MUST reference an RFC giving
+ details of the syntax of the address type. Such an RFC MAY be
+ Informational. Address types are not expected to be registered
+ frequently.
+
+
+
+
+Handley & Jacobson Standards Track [Page 38]
+
+RFC 2327 SDP April 1998
+
+
+ Registration Procedure
+
+ To register a name the above guidelines should be followed regarding
+ the required level of documentation that is required. The
+ registration itself should be sent to IANA. Attribute registrations
+ should include the information given above. Other registrations
+ should include the following additional information:
+
+ o contact name, email address and telephone number
+
+ o name being registered (as it will appear in SDP)
+
+ o long-form name in English
+
+ o type of name ("media", "proto", "fmt", "bwtype", "nettype", or
+ "addrtype")
+
+ o a one paragraph explanation of the purpose of the registered name.
+
+ o a reference to the specification (eg RFC number) of the registered
+ name.
+
+ IANA may refer any registration to the IESG or to any appropriate
+ IETF working group for review, and may request revisions to be made
+ before a registration will be made.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 39]
+
+RFC 2327 SDP April 1998
+
+
+Appendix C: Authors' Addresses
+
+ Mark Handley
+ Information Sciences Institute
+ c/o MIT Laboratory for Computer Science
+ 545 Technology Square
+ Cambridge, MA 02139
+ United States
+ electronic mail: mjh@isi.edu
+
+ Van Jacobson
+ MS 46a-1121
+ Lawrence Berkeley Laboratory
+ Berkeley, CA 94720
+ United States
+ electronic mail: van@ee.lbl.gov
+
+Acknowledgments
+
+ Many people in the IETF MMUSIC working group have made comments and
+ suggestions contributing to this document. In particular, we would
+ like to thank Eve Schooler, Steve Casner, Bill Fenner, Allison
+ Mankin, Ross Finlayson, Peter Parnes, Joerg Ott, Carsten Bormann, Rob
+ Lanphier and Steve Hanna.
+
+References
+
+ [1] Mills, D., "Network Time Protocol (version 3) specification and
+ implementation", RFC 1305, March 1992.
+
+ [2] Schulzrinne, H., Casner, S., Frederick, R. and V. Jacobson, "RTP:
+ A Transport Protocol for Real-Time Applications", RFC 1889, January
+ 1996.
+
+ [3] Schulzrinne, H., "RTP Profile for Audio and Video Conferences
+ with Minimal Control", RFC 1890, January 1996
+
+ [4] Handley, M., "SAP - Session Announcement Protocol", Work in
+ Progress.
+
+ [5] V. Jacobson, S. McCanne, "vat - X11-based audio teleconferencing
+ tool" vat manual page, Lawrence Berkeley Laboratory, 1994.
+
+ [6] The Unicode Consortium, "The Unicode Standard -- Version 2.0",
+ Addison-Wesley, 1996.
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 40]
+
+RFC 2327 SDP April 1998
+
+
+ [7] ISO/IEC 10646-1:1993. International Standard -- Information
+ technol- ogy -- Universal Multiple-Octet Coded Character Set (UCS) --
+ Part 1: Architecture and Basic Multilingual Plane. Five amendments
+ and a techn- ical corrigendum have been published up to now. UTF-8
+ is described in Annex R, published as Amendment 2.
+
+ [8] Goldsmith, D., and M. Davis, "Using Unicode with MIME", RFC 1641,
+ July 1994.
+
+ [9] Yergeau, F., "UTF-8, a transformation format of Unicode and ISO
+ 10646", RFC 2044, October 1996.
+
+ [10] ITU-T Recommendation H.332 (1998): "Multimedia Terminal for
+ Receiving Internet-based H.323 Conferences", ITU, Geneva.
+
+ [11] Handley, M., Schooler, E., and H. Schulzrinne, "Session
+ Initiation Protocol (SIP)", Work in Progress.
+
+ [12] Schulzrinne, H., Rao, A., and R. Lanphier, "Real Time Streaming
+ Protocol (RTSP)", RFC 2326, April 1998.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 41]
+
+RFC 2327 SDP April 1998
+
+
+Full Copyright Statement
+
+ Copyright (C) The Internet Society (1998). All Rights Reserved.
+
+ This document and translations of it may be copied and furnished to
+ others, and derivative works that comment on or otherwise explain it
+ or assist in its implementation may be prepared, copied, published
+ and distributed, in whole or in part, without restriction of any
+ kind, provided that the above copyright notice and this paragraph are
+ included on all such copies and derivative works. However, this
+ document itself may not be modified in any way, such as by removing
+ the copyright notice or references to the Internet Society or other
+ Internet organizations, except as needed for the purpose of
+ developing Internet standards in which case the procedures for
+ copyrights defined in the Internet Standards process must be
+ followed, or as required to translate it into languages other than
+ English.
+
+ The limited permissions granted above are perpetual and will not be
+ revoked by the Internet Society or its successors or assigns.
+
+ This document and the information contained herein is provided on an
+ "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley & Jacobson Standards Track [Page 42]
+
diff --git a/src/modules/rtp/rfc2974.txt b/src/modules/rtp/rfc2974.txt
new file mode 100644
index 00000000..4a5aa626
--- /dev/null
+++ b/src/modules/rtp/rfc2974.txt
@@ -0,0 +1,1011 @@
+
+
+
+
+
+
+Network Working Group M. Handley
+Request for Comments: 2974 ACIRI
+Category: Experimental C. Perkins
+ USC/ISI
+ E. Whelan
+ UCL
+ October 2000
+
+
+ Session Announcement Protocol
+
+Status of this Memo
+
+ This memo defines an Experimental Protocol for the Internet
+ community. It does not specify an Internet standard of any kind.
+ Discussion and suggestions for improvement are requested.
+ Distribution of this memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (2000). All Rights Reserved.
+
+Abstract
+
+ This document describes version 2 of the multicast session directory
+ announcement protocol, Session Announcement Protocol (SAP), and the
+ related issues affecting security and scalability that should be
+ taken into account by implementors.
+
+1 Introduction
+
+ In order to assist the advertisement of multicast multimedia
+ conferences and other multicast sessions, and to communicate the
+ relevant session setup information to prospective participants, a
+ distributed session directory may be used. An instance of such a
+ session directory periodically multicasts packets containing a
+ description of the session, and these advertisements are received by
+ other session directories such that potential remote participants can
+ use the session description to start the tools required to
+ participate in the session.
+
+ This memo describes the issues involved in the multicast announcement
+ of session description information and defines an announcement
+ protocol to be used. Sessions are described using the session
+ description protocol which is described in a companion memo [4].
+
+
+
+
+
+
+Handley, et al. Experimental [Page 1]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+2 Terminology
+
+ A SAP announcer periodically multicasts an announcement packet to a
+ well known multicast address and port. The announcement is multicast
+ with the same scope as the session it is announcing, ensuring that
+ the recipients of the announcement are within the scope of the
+ session the announcement describes (bandwidth and other such
+ constraints permitting). This is also important for the scalability
+ of the protocol, as it keeps local session announcements local.
+
+ A SAP listener learns of the multicast scopes it is within (for
+ example, using the Multicast-Scope Zone Announcement Protocol [5])
+ and listens on the well known SAP address and port for those scopes.
+ In this manner, it will eventually learn of all the sessions being
+ announced, allowing those sessions to be joined.
+
+ The key words `MUST', `MUST NOT', `REQUIRED', `SHALL', `SHALL NOT',
+ `SHOULD', `SHOULD NOT', `RECOMMENDED', `MAY', and `OPTIONAL' in this
+ document are to be interpreted as described in [1].
+
+3 Session Announcement
+
+ As noted previously, a SAP announcer periodically sends an
+ announcement packet to a well known multicast address and port.
+ There is no rendezvous mechanism - the SAP announcer is not aware of
+ the presence or absence of any SAP listeners - and no additional
+ reliability is provided over the standard best-effort UDP/IP
+ semantics.
+
+ That announcement contains a session description and SHOULD contain
+ an authentication header. The session description MAY be encrypted
+ although this is NOT RECOMMENDED (see section 7).
+
+ A SAP announcement is multicast with the same scope as the session it
+ is announcing, ensuring that the recipients of the announcement are
+ within the scope of the session the announcement describes. There are
+ a number of possibilities:
+
+ IPv4 global scope sessions use multicast addresses in the range
+ 224.2.128.0 - 224.2.255.255 with SAP announcements being sent to
+ 224.2.127.254 (note that 224.2.127.255 is used by the obsolete
+ SAPv0 and MUST NOT be used).
+
+
+
+
+
+
+
+
+
+Handley, et al. Experimental [Page 2]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ IPv4 administrative scope sessions using administratively scoped IP
+ multicast as defined in [7]. The multicast address to be used for
+ announcements is the highest multicast address in the relevant
+ administrative scope zone. For example, if the scope range is
+ 239.16.32.0 - 239.16.33.255, then 239.16.33.255 is used for SAP
+ announcements.
+
+ IPv6 sessions are announced on the address FF0X:0:0:0:0:0:2:7FFE
+ where X is the 4-bit scope value. For example, an announcement
+ for a link-local session assigned the address
+ FF02:0:0:0:0:0:1234:5678, should be advertised on SAP address
+ FF02:0:0:0:0:0:2:7FFE.
+
+ Ensuring that a description is not used by a potential participant
+ outside the session scope is not addressed in this memo.
+
+ SAP announcements MUST be sent on port 9875 and SHOULD be sent with
+ an IP time-to-live of 255 (the use of TTL scoping for multicast is
+ discouraged [7]).
+
+ If a session uses addresses in multiple administrative scope ranges,
+ it is necessary for the announcer to send identical copies of the
+ announcement to each administrative scope range. It is up to the
+ listeners to parse such multiple announcements as the same session
+ (as identified by the SDP origin field, for example). The
+ announcement rate for each administrative scope range MUST be
+ calculated separately, as if the multiple announcements were
+ separate.
+
+ Multiple announcers may announce a single session, as an aid to
+ robustness in the face of packet loss and failure of one or more
+ announcers. The rate at which each announcer repeats its
+ announcement MUST be scaled back such that the total announcement
+ rate is equal to that which a single server would choose.
+ Announcements made in this manner MUST be identical.
+
+ If multiple announcements are being made for a session, then each
+ announcement MUST carry an authentication header signed by the same
+ key, or be treated as a completely separate announcement by
+ listeners.
+
+ An IPv4 SAP listener SHOULD listen on the IPv4 global scope SAP
+ address and on the SAP addresses for each IPv4 administrative scope
+ zone it is within. The discovery of administrative scope zones is
+ outside the scope of this memo, but it is assumed that each SAP
+ listener within a particular scope zone is aware of that scope zone.
+ A SAP listener which supports IPv6 SHOULD also listen to the IPv6 SAP
+ addresses.
+
+
+
+Handley, et al. Experimental [Page 3]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+3.1 Announcement Interval
+
+ The time period between repetitions of an announcement is chosen such
+ that the total bandwidth used by all announcements on a single SAP
+ group remains below a preconfigured limit. If not otherwise
+ specified, the bandwidth limit SHOULD be assumed to be 4000 bits per
+ second.
+
+ Each announcer is expected to listen to other announcements in order
+ to determine the total number of sessions being announced on a
+ particular group. Sessions are uniquely identified by the
+ combination of the message identifier hash and originating source
+ fields of the SAP header (note that SAP v0 announcers always set the
+ message identifier hash to zero, and if such an announcement is
+ received the entire message MUST be compared to determine
+ uniqueness).
+
+ Announcements are made by periodic multicast to the group. The base
+ interval between announcements is derived from the number of
+ announcements being made in that group, the size of the announcement
+ and the configured bandwidth limit. The actual transmission time is
+ derived from this base interval as follows:
+
+ 1. The announcer initializes the variable tp to be the last time a
+ particular announcement was transmitted (or the current time if
+ this is the first time this announcement is to be made).
+
+ 2. Given a configured bandwidth limit in bits/second and an
+ announcement of ad_size bytes, the base announcement interval
+ in seconds is
+
+ interval =max(300; (8*no_of_ads*ad_size)/limit)
+
+ 3. An offset is calculated based on the base announcement interval
+
+ offset= rand(interval* 2/3)-(interval/3)
+
+ 4. The next transmission time for an announcement derived as
+
+ tn =tp+ interval+ offset
+
+ The announcer then sets a timer to expire at tn and waits. At time
+ tn the announcer SHOULD recalculate the next transmission time. If
+ the new value of tn is before the current time, the announcement is
+ sent immediately. Otherwise the transmission is rescheduled for the
+ new tn. This reconsideration prevents transient packet bursts on
+ startup and when a network partition heals.
+
+
+
+
+Handley, et al. Experimental [Page 4]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+4 Session Deletion
+
+ Sessions may be deleted in one of several ways:
+
+ Explicit Timeout The session description payload may contain
+ timestamp information specifying the start- and end-times of the
+ session. If the current time is later than the end-time of the
+ session, then the session SHOULD be deleted from the receiver's
+ session cache.
+
+ Implicit Timeout A session announcement message should be received
+ periodically for each session description in a receiver's session
+ cache. The announcement period can be predicted by the receiver
+ from the set of sessions currently being announced. If a session
+ announcement message has not been received for ten times the
+ announcement period, or one hour, whichever is the greater, then
+ the session is deleted from the receiver's session cache. The one
+ hour minimum is to allow for transient network partitionings.
+
+ Explicit Deletion A session deletion packet is received specifying
+ the session to be deleted. Session deletion packets SHOULD have a
+ valid authentication header, matching that used to authenticate
+ previous announcement packets. If this authentication is missing,
+ the deletion message SHOULD be ignored.
+
+5 Session Modification
+
+ A pre-announced session can be modified by simply announcing the
+ modified session description. In this case, the version hash in the
+ SAP header MUST be changed to indicate to receivers that the packet
+ contents should be parsed (or decrypted and parsed if it is
+ encrypted). The session itself, as distinct from the session
+ announcement, is uniquely identified by the payload and not by the
+ message identifier hash in the header.
+
+ The same rules apply for session modification as for session
+ deletion:
+
+ o Either the modified announcement must contain an authentication
+ header signed by the same key as the cached session announcement
+ it is modifying, or:
+
+ o The cached session announcement must not contain an authentication
+ header, and the session modification announcement must originate
+ from the same host as the session it is modifying.
+
+
+
+
+
+
+Handley, et al. Experimental [Page 5]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ If an announcement is received containing an authentication header
+ and the cached announcement did not contain an authentication header,
+ or it contained a different authentication header, then the modified
+ announcement MUST be treated as a new and different announcement, and
+ displayed in addition to the un-authenticated announcement. The same
+ should happen if a modified packet without an authentication header
+ is received from a different source than the original announcement.
+
+ These rules prevent an announcement having an authentication header
+ added by a malicious user and then being deleted using that header,
+ and it also prevents a denial-of-service attack by someone putting
+ out a spoof announcement which, due to packet loss, reaches some
+ participants before the original announcement. Note that under such
+ circumstances, being able to authenticate the message originator is
+ the only way to discover which session is the correct session.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | V=1 |A|R|T|E|C| auth len | msg id hash |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | |
+ : originating source (32 or 128 bits) :
+ : :
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | optional authentication data |
+ : .... :
+ *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
+ | optional payload type |
+ + +-+- - - - - - - - - -+
+ | |0| |
+ + - - - - - - - - - - - - - - - - - - - - +-+ |
+ | |
+ : payload :
+ | |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 1: Packet format
+
+6 Packet Format
+
+ SAP data packets have the format described in figure 1.
+
+ V: Version Number. The version number field MUST be set to 1 (SAPv2
+ announcements which use only SAPv1 features are backwards
+ compatible, those which use new features can be detected by other
+ means, so the SAP version number doesn't need to change).
+
+
+
+
+Handley, et al. Experimental [Page 6]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ A: Address type. If the A bit is 0, the originating source field
+ contains a 32-bit IPv4 address. If the A bit is 1, the
+ originating source contains a 128-bit IPv6 address.
+
+ R: Reserved. SAP announcers MUST set this to 0, SAP listeners MUST
+ ignore the contents of this field.
+
+ T: Message Type. If the T field is set to 0 this is a session
+ announcement packet, if 1 this is a session deletion packet.
+
+ E: Encryption Bit. If the encryption bit is set to 1, the payload of
+ the SAP packet is encrypted. If this bit is 0 the packet is not
+ encrypted. See section 7 for details of the encryption process.
+
+ C: Compressed bit. If the compressed bit is set to 1, the payload is
+ compressed using the zlib compression algorithm [3]. If the
+ payload is to be compressed and encrypted, the compression MUST be
+ performed first.
+
+ Authentication Length. An 8 bit unsigned quantity giving the number
+ of 32 bit words following the main SAP header that contain
+ authentication data. If it is zero, no authentication header is
+ present.
+
+ Authentication data containing a digital signature of the packet,
+ with length as specified by the authentication length header
+ field. See section 8 for details of the authentication process.
+
+ Message Identifier Hash. A 16 bit quantity that, used in combination
+ with the originating source, provides a globally unique identifier
+ indicating the precise version of this announcement. The choice
+ of value for this field is not specified here, except that it MUST
+ be unique for each session announced by a particular SAP announcer
+ and it MUST be changed if the session description is modified (and
+ a session deletion message SHOULD be sent for the old version of
+ the session).
+
+ Earlier versions of SAP used a value of zero to mean that the hash
+ should be ignored and the payload should always be parsed. This
+ had the unfortunate side-effect that SAP announcers had to study
+ the payload data to determine how many unique sessions were being
+ advertised, making the calculation of the announcement interval
+ more complex that necessary. In order to decouple the session
+ announcement process from the contents of those announcements, SAP
+ announcers SHOULD NOT set the message identifier hash to zero.
+
+ SAP listeners MAY silently discard messages if the message
+ identifier hash is set to zero.
+
+
+
+Handley, et al. Experimental [Page 7]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ Originating Source. This gives the IP address of the original source
+ of the message. This is an IPv4 address if the A field is set to
+ zero, else it is an IPv6 address. The address is stored in
+ network byte order.
+
+ SAPv0 permitted the originating source to be zero if the message
+ identifier hash was also zero. This practise is no longer legal,
+ and SAP announcers SHOULD NOT set the originating source to zero.
+ SAP listeners MAY silently discard packets with the originating
+ source set to zero.
+
+ The header is followed by an optional payload type field and the
+ payload data itself. If the E or C bits are set in the header both
+ the payload type and payload are encrypted and/or compressed.
+
+ The payload type field is a MIME content type specifier, describing
+ the format of the payload. This is a variable length ASCII text
+ string, followed by a single zero byte (ASCII NUL). The payload type
+ SHOULD be included in all packets. If the payload type is
+ `application/sdp' both the payload type and its terminating zero byte
+ MAY be omitted, although this is intended for backwards compatibility
+ with SAP v1 listeners only.
+
+ The absence of a payload type field may be noted since the payload
+ section of such a packet will start with an SDP `v=0' field, which is
+ not a legal MIME content type specifier.
+
+ All implementations MUST support payloads of type `application/sdp'
+ [4]. Other formats MAY be supported although since there is no
+ negotiation in SAP an announcer which chooses to use a session
+ description format other than SDP cannot know that the listeners are
+ able to understand the announcement. A proliferation of payload
+ types in announcements has the potential to lead to severe
+ interoperability problems, and for this reason, the use of non-SDP
+ payloads is NOT RECOMMENDED.
+
+ If the packet is an announcement packet, the payload contains a
+ session description.
+
+ If the packet is a session deletion packet, the payload contains a
+ session deletion message. If the payload format is `application/sdp'
+ the deletion message is a single SDP line consisting of the origin
+ field of the announcement to be deleted.
+
+ It is desirable for the payload to be sufficiently small that SAP
+ packets do not get fragmented by the underlying network.
+ Fragmentation has a loss multiplier effect, which is known to
+ significantly affect the reliability of announcements. It is
+
+
+
+Handley, et al. Experimental [Page 8]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ RECOMMENDED that SAP packets are smaller than 1kByte in length,
+ although if it is known that announcements will use a network with a
+ smaller MTU than this, then that SHOULD be used as the maximum
+ recommended packet size.
+
+7 Encrypted Announcements
+
+ An announcement is received by all listeners in the scope to which it
+ is sent. If an announcement is encrypted, and many of the receivers
+ do not have the encryption key, there is a considerable waste of
+ bandwidth since those receivers cannot use the announcement they have
+ received. For this reason, the use of encrypted SAP announcements is
+ NOT RECOMMENDED on the global scope SAP group or on administrative
+ scope groups which may have many receivers which cannot decrypt those
+ announcements.
+
+ The opinion of the authors is that encrypted SAP is useful in special
+ cases only, and that the vast majority of scenarios where encrypted
+ SAP has been proposed may be better served by distributing session
+ details using another mechanism. There are, however, certain
+ scenarios where encrypted announcements may be useful. For this
+ reason, the encryption bit is included in the SAP header to allow
+ experimentation with encrypted announcements.
+
+ This memo does not specify details of the encryption algorithm to be
+ used or the means by which keys are generated and distributed. An
+ additional specification should define these, if it is desired to use
+ encrypted SAP.
+
+ Note that if an encrypted announcement is being announced via a
+ proxy, then there may be no way for the proxy to discover that the
+ announcement has been superseded, and so it may continue to relay the
+ old announcement in addition to the new announcement. SAP provides
+ no mechanism to chain modified encrypted announcements, so it is
+ advisable to announce the unmodified session as deleted for a short
+ time after the modification has occurred. This does not guarantee
+ that all proxies have deleted the session, and so receivers of
+ encrypted sessions should be prepared to discard old versions of
+ session announcements that they may receive. In most cases however,
+ the only stateful proxy will be local to (and known to) the sender,
+ and an additional (local-area) protocol involving a handshake for
+ such session modifications can be used to avoid this problem.
+
+ Session announcements that are encrypted with a symmetric algorithm
+ may allow a degree of privacy in the announcement of a session, but
+ it should be recognized that a user in possession of such a key can
+ pass it on to other users who should not be in possession of such a
+ key. Thus announcements to such a group of key holders cannot be
+
+
+
+Handley, et al. Experimental [Page 9]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ assumed to have come from an authorized key holder unless there is an
+ appropriate authentication header signed by an authorized key holder.
+ In addition the recipients of such encrypted announcements cannot be
+ assumed to only be authorized key holders. Such encrypted
+ announcements do not provide any real security unless all of the
+ authorized key holders are trusted to maintain security of such
+ session directory keys. This property is shared by the multicast
+ session tools themselves, where it is possible for an un-trustworthy
+ member of the session to pass on encryption keys to un-authorized
+ users. However it is likely that keys used for the session tools
+ will be more short lived than those used for session directories.
+
+ Similar considerations should apply when session announcements are
+ encrypted with an asymmetric algorithm, but then it is possible to
+ restrict the possessor(s) of the private key, so that announcements
+ to a key-holder group can not be made, even if one of the untrusted
+ members of the group proves to be un-trustworthy.
+
+ 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | V=1 |P| Auth | |
+ +-+-+-+-+-+-+-+-+ |
+ | Format specific authentication subheader |
+ : .................. :
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 2: Format of the authentication data in the SAP header
+
+8 Authenticated Announcements
+
+ The authentication header can be used for two purposes:
+
+ o Verification that changes to a session description or deletion of
+ a session are permitted.
+
+ o Authentication of the identity of the session creator.
+
+ In some circumstances only verification is possible because a
+ certificate signed by a mutually trusted person or authority is not
+ available. However, under such circumstances, the session originator
+ may still be authenticated to be the same as the session originator
+ of previous sessions claiming to be from the same person. This may
+ or may not be sufficient depending on the purpose of the session and
+ the people involved.
+
+
+
+
+
+
+Handley, et al. Experimental [Page 10]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ Clearly the key used for the authentication should not be trusted to
+ belong to the session originator unless it has been separately
+ authenticated by some other means, such as being certified by a
+ trusted third party. Such certificates are not normally included in
+ an SAP header because they take more space than can normally be
+ afforded in an SAP packet, and such verification must therefore take
+ place by some other mechanism. However, as certified public keys are
+ normally locally cached, authentication of a particular key only has
+ to take place once, rather than every time the session directory
+ retransmits the announcement.
+
+ SAP is not tied to any single authentication mechanism.
+ Authentication data in the header is self-describing, but the precise
+ format depends on the authentication mechanism in use. The generic
+ format of the authentication data is given in figure 2. The
+ structure of the format specific authentication subheader, using both
+ the PGP and the CMS formats, is discussed in sections 8.1 and 8.2
+ respectively. Additional formats may be added in future.
+
+ Version Number, V: The version number of the authentication format
+ specified by this memo is 1.
+
+ Padding Bit, P: If necessary the authentication data is padded to be
+ a multiple of 32 bits and the padding bit is set. In this case
+ the last byte of the authentication data contains the number of
+ padding bytes (including the last byte) that must be discarded.
+
+ Authentication Type, Auth: The authentication type is a 4 bit
+ encoded field that denotes the authentication infrastructure the
+ sender expects the recipients to use to check the authenticity and
+ integrity of the information. This defines the format of the
+ authentication subheader and can take the values: 0 = PGP format,
+ 1 = CMS format. All other values are undefined and SHOULD be
+ ignored.
+
+ If a SAP packet is to be compressed or encrypted, this MUST be done
+ before the authentication is added.
+
+ The digital signature in the authentication data MUST be calculated
+ over the entire packet, including the header. The authentication
+ length MUST be set to zero and the authentication data excluded when
+ calculating the digital signature.
+
+ It is to be expected that sessions may be announced by a number of
+ different mechanisms, not only SAP. For example, a session
+ description may placed on a web page, sent by email or conveyed in a
+
+
+
+
+
+Handley, et al. Experimental [Page 11]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ session initiation protocol. To ease interoperability with these
+ other mechanisms, application level security is employed, rather than
+ using IPsec authentication headers.
+
+8.1 PGP Authentication
+
+ A full description of the PGP protocol can be found in [2]. When
+ using PGP for SAP authentication the basic format specific
+ authentication subheader comprises a digital signature packet as
+ described in [2]. The signature type MUST be 0x01 which means the
+ signature is that of a canonical text document.
+
+8.2 CMS Authentication
+
+ A full description of the Cryptographic Message Syntax can be found
+ in [6]. The format specific authentication subheader will, in the
+ CMS case, have an ASN.1 ContentInfo type with the ContentType being
+ signedData.
+
+ Use is made of the option available in PKCS#7 to leave the content
+ itself blank as the content which is signed is already present in the
+ packet. Inclusion of it within the SignedData type would duplicate
+ this data and increase the packet length unnecessarily. In addition
+ this allows recipients with either no interest in the authentication,
+ or with no mechanism for checking it, to more easily skip the
+ authentication information.
+
+ There SHOULD be only one signerInfo and related fields corresponding
+ to the originator of the SAP announcement. The signingTime SHOULD be
+ present as a signedAttribute. However, due to the strict size
+ limitations on the size of SAP packets, certificates and CRLs SHOULD
+ NOT be included in the signedData structure. It is expected that
+ users of the protocol will have other methods for certificate and CRL
+ distribution.
+
+9 Scalability and caching
+
+ SAP is intended to announce the existence of long-lived wide-area
+ multicast sessions. It is not an especially timely protocol:
+ sessions are announced by periodic multicast with a repeat rate on
+ the order of tens of minutes, and no enhanced reliability over UDP.
+ This leads to a long startup delay before a complete set of
+ announcements is heard by a listener. This delay is clearly
+ undesirable for interactive browsing of announced sessions.
+
+ In order to reduce the delays inherent in SAP, it is recommended that
+ proxy caches are deployed. A SAP proxy cache is expected to listen
+ to all SAP groups in its scope, and to maintain an up-to-date list of
+
+
+
+Handley, et al. Experimental [Page 12]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ all announced sessions along with the time each announcement was last
+ received. When a new SAP listeners starts, it should contact its
+ local proxy to download this information, which is then sufficient
+ for it to process future announcements directly, as if it has been
+ continually listening.
+
+ The protocol by which a SAP listener contacts its local proxy cache
+ is not specified here.
+
+10 Security Considerations
+
+ SAP contains mechanisms for ensuring integrity of session
+ announcements, for authenticating the origin of an announcement and
+ for encrypting such announcements (sections 7 and 8).
+
+ As stated in section 5, if a session modification announcement is
+ received that contains a valid authentication header, but which is
+ not signed by the original creator of the session, then the session
+ must be treated as a new session in addition to the original session
+ with the same SDP origin information unless the originator of one of
+ the session descriptions can be authenticated using a certificate
+ signed by a trusted third party. If this were not done, there would
+ be a possible denial of service attack whereby a party listens for
+ new announcements, strips off the original authentication header,
+ modifies the session description, adds a new authentication header
+ and re-announces the session. If a rule was imposed that such spoof
+ announcements were ignored, then if packet loss or late starting of a
+ session directory instance caused the original announcement to fail
+ to arrive at a site, but the spoof announcement did so, this would
+ then prevent the original announcement from being accepted at that
+ site.
+
+ A similar denial-of-service attack is possible if a session
+ announcement receiver relies completely on the originating source and
+ hash fields to indicate change, and fails to parse the remainder of
+ announcements for which it has seen the origin/hash combination
+ before.
+
+ A denial of service attack is possible from a malicious site close to
+ a legitimate site which is making a session announcement. This can
+ happen if the malicious site floods the legitimate site with huge
+ numbers of (illegal) low TTL announcements describing high TTL
+ sessions. This may reduce the session announcement rate of the
+ legitimate announcement to below a tenth of the rate expected at
+ remote sites and therefore cause the session to time out. Such an
+ attack is likely to be easily detectable, and we do not provide any
+ mechanism here to prevent it.
+
+
+
+
+Handley, et al. Experimental [Page 13]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+A. Summary of differences between SAPv0 and SAPv1
+
+ For this purpose SAPv0 is defined as the protocol in use by version
+ 2.2 of the session directory tool, sdr. SAPv1 is the protocol
+ described in the 19 November 1996 version of this memo. The packet
+ headers of SAP messages are the same in V0 and V1 in that a V1 tool
+ can parse a V0 announcement header but not vice-versa. In SAPv0, the
+ fields have the following values:
+
+ o Version Number: 0
+
+ o Message Type: 0 (Announcement)
+
+ o Authentication Type: 0 (No Authentication)
+
+ o Encryption Bit: 0 (No Encryption)
+
+ o Compression Bit: 0 (No compression)
+
+ o Message Id Hash: 0 (No Hash Specified)
+
+ o Originating Source: 0 (No source specified, announcement has
+ not been relayed)
+
+B. Summary of differences between SAPv1 and SAPv2
+
+ The packet headers of SAP messages are the same in V1 and V2 in that
+ a V2 tool can parse a V1 announcement header but not necessarily
+ vice-versa.
+
+ o The A bit has been added to the SAP header, replacing one of the
+ bits of the SAPv1 message type field. If set to zero the
+ announcement is of an IPv4 session, and the packet is backwards
+ compatible with SAPv1. If set to one the announcement is of an
+ IPv6 session, and SAPv1 listeners (which do not support IPv6) will
+ see this as an illegal message type (MT) field.
+
+ o The second bit of the message type field in SAPv1 has been
+ replaced by a reserved, must-be-zero, bit. This bit was unused in
+ SAPv1, so this change just codifies existing usage.
+
+ o SAPv1 specified encryption of the payload. SAPv2 includes the E
+ bit in the SAP header to indicate that the payload is encrypted,
+ but does not specify any details of the encryption.
+
+ o SAPv1 allowed the message identifier hash and originating source
+ fields to be set to zero, for backwards compatibility. This is no
+ longer legal.
+
+
+
+Handley, et al. Experimental [Page 14]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+ o SAPv1 specified gzip compression. SAPv2 uses zlib (the only known
+ implementation of SAP compression used zlib, and gzip compression
+ was a mistake).
+
+ o SAPv2 provides a more complete specification for authentication.
+
+ o SAPv2 allows for non-SDP payloads to be transported. SAPv1
+ required that the payload was SDP.
+
+ o SAPv1 included a timeout field for encrypted announcement, SAPv2
+ does not (and relies of explicit deletion messages or implicit
+ timeouts).
+
+C. Acknowledgements
+
+ SAP and SDP were originally based on the protocol used by the sd
+ session directory from Van Jacobson at LBNL. Version 1 of SAP was
+ designed by Mark Handley as part of the European Commission MICE
+ (Esprit 7602) and MERCI (Telematics 1007) projects. Version 2
+ includes authentication features developed by Edmund Whelan, Goli
+ Montasser-Kohsari and Peter Kirstein as part of the European
+ Commission ICE-TEL project (Telematics 1005), and support for IPv6
+ developed by Maryann P. Maher and Colin Perkins.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley, et al. Experimental [Page 15]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+D. Authors' Addresses
+
+ Mark Handley
+ AT&T Center for Internet Research at ICSI,
+ International Computer Science Institute,
+ 1947 Center Street, Suite 600,
+ Berkeley, CA 94704, USA
+
+ EMail: mjh@aciri.org
+
+
+ Colin Perkins
+ USC Information Sciences Institute
+ 4350 N. Fairfax Drive, Suite 620
+ Arlington, VA 22203, USA
+
+ EMail: csp@isi.edu
+
+
+ Edmund Whelan
+ Department of Computer Science,
+ University College London,
+ Gower Street,
+ London, WC1E 6BT, UK
+
+ EMail: e.whelan@cs.ucl.ac.uk
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley, et al. Experimental [Page 16]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+References
+
+ [1] Bradner, S., "Key words for use in RFCs to indicate requirement
+ levels", BCP 14, RFC 2119, March 1997.
+
+ [2] Callas, J., Donnerhacke, L., Finney, H. and R. Thayer. "OpenPGP
+ message format", RFC 2440, November 1998.
+
+ [3] Deutsch, P. and J.-L. Gailly, "Zlib compressed data format
+ specification version 3.3", RFC 1950, May 1996.
+
+ [4] Handley, M. and V. Jacobson, "SDP: Session Description Protocol",
+ RFC 2327, April 1998.
+
+ [5] Handley, M., Thaler, D. and R. Kermode, "Multicast-scope zone
+ announcement protocol (MZAP)", RFC 2776, February 2000.
+
+ [6] Housley, R., "Cryptographic message syntax", RFC 2630, June 1999.
+
+ [7] Mayer, D., "Administratively scoped IP multicast", RFC 2365, July
+ 1998.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley, et al. Experimental [Page 17]
+
+RFC 2974 Session Announcement Protocol October 2000
+
+
+Full Copyright Statement
+
+ Copyright (C) The Internet Society (2000). All Rights Reserved.
+
+ This document and translations of it may be copied and furnished to
+ others, and derivative works that comment on or otherwise explain it
+ or assist in its implementation may be prepared, copied, published
+ and distributed, in whole or in part, without restriction of any
+ kind, provided that the above copyright notice and this paragraph are
+ included on all such copies and derivative works. However, this
+ document itself may not be modified in any way, such as by removing
+ the copyright notice or references to the Internet Society or other
+ Internet organizations, except as needed for the purpose of
+ developing Internet standards in which case the procedures for
+ copyrights defined in the Internet Standards process must be
+ followed, or as required to translate it into languages other than
+ English.
+
+ The limited permissions granted above are perpetual and will not be
+ revoked by the Internet Society or its successors or assigns.
+
+ This document and the information contained herein is provided on an
+ "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+ Funding for the RFC Editor function is currently provided by the
+ Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Handley, et al. Experimental [Page 18]
+
diff --git a/src/modules/rtp/rfc3550.txt b/src/modules/rtp/rfc3550.txt
new file mode 100644
index 00000000..165736cf
--- /dev/null
+++ b/src/modules/rtp/rfc3550.txt
@@ -0,0 +1,5827 @@
+
+
+
+
+
+
+Network Working Group H. Schulzrinne
+Request for Comments: 3550 Columbia University
+Obsoletes: 1889 S. Casner
+Category: Standards Track Packet Design
+ R. Frederick
+ Blue Coat Systems Inc.
+ V. Jacobson
+ Packet Design
+ July 2003
+
+
+ RTP: A Transport Protocol for Real-Time Applications
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+Abstract
+
+ This memorandum describes RTP, the real-time transport protocol. RTP
+ provides end-to-end network transport functions suitable for
+ applications transmitting real-time data, such as audio, video or
+ simulation data, over multicast or unicast network services. RTP
+ does not address resource reservation and does not guarantee
+ quality-of-service for real-time services. The data transport is
+ augmented by a control protocol (RTCP) to allow monitoring of the
+ data delivery in a manner scalable to large multicast networks, and
+ to provide minimal control and identification functionality. RTP and
+ RTCP are designed to be independent of the underlying transport and
+ network layers. The protocol supports the use of RTP-level
+ translators and mixers.
+
+ Most of the text in this memorandum is identical to RFC 1889 which it
+ obsoletes. There are no changes in the packet formats on the wire,
+ only changes to the rules and algorithms governing how the protocol
+ is used. The biggest change is an enhancement to the scalable timer
+ algorithm for calculating when to send RTCP packets in order to
+ minimize transmission in excess of the intended rate when many
+ participants join a session simultaneously.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 1]
+
+RFC 3550 RTP July 2003
+
+
+Table of Contents
+
+ 1. Introduction ................................................ 4
+ 1.1 Terminology ............................................ 5
+ 2. RTP Use Scenarios ........................................... 5
+ 2.1 Simple Multicast Audio Conference ...................... 6
+ 2.2 Audio and Video Conference ............................. 7
+ 2.3 Mixers and Translators ................................. 7
+ 2.4 Layered Encodings ...................................... 8
+ 3. Definitions ................................................. 8
+ 4. Byte Order, Alignment, and Time Format ...................... 12
+ 5. RTP Data Transfer Protocol .................................. 13
+ 5.1 RTP Fixed Header Fields ................................ 13
+ 5.2 Multiplexing RTP Sessions .............................. 16
+ 5.3 Profile-Specific Modifications to the RTP Header ....... 18
+ 5.3.1 RTP Header Extension ............................ 18
+ 6. RTP Control Protocol -- RTCP ................................ 19
+ 6.1 RTCP Packet Format ..................................... 21
+ 6.2 RTCP Transmission Interval ............................. 24
+ 6.2.1 Maintaining the Number of Session Members ....... 28
+ 6.3 RTCP Packet Send and Receive Rules ..................... 28
+ 6.3.1 Computing the RTCP Transmission Interval ........ 29
+ 6.3.2 Initialization .................................. 30
+ 6.3.3 Receiving an RTP or Non-BYE RTCP Packet ......... 31
+ 6.3.4 Receiving an RTCP BYE Packet .................... 31
+ 6.3.5 Timing Out an SSRC .............................. 32
+ 6.3.6 Expiration of Transmission Timer ................ 32
+ 6.3.7 Transmitting a BYE Packet ....................... 33
+ 6.3.8 Updating we_sent ................................ 34
+ 6.3.9 Allocation of Source Description Bandwidth ...... 34
+ 6.4 Sender and Receiver Reports ............................ 35
+ 6.4.1 SR: Sender Report RTCP Packet ................... 36
+ 6.4.2 RR: Receiver Report RTCP Packet ................. 42
+ 6.4.3 Extending the Sender and Receiver Reports ....... 42
+ 6.4.4 Analyzing Sender and Receiver Reports ........... 43
+ 6.5 SDES: Source Description RTCP Packet ................... 45
+ 6.5.1 CNAME: Canonical End-Point Identifier SDES Item . 46
+ 6.5.2 NAME: User Name SDES Item ....................... 48
+ 6.5.3 EMAIL: Electronic Mail Address SDES Item ........ 48
+ 6.5.4 PHONE: Phone Number SDES Item ................... 49
+ 6.5.5 LOC: Geographic User Location SDES Item ......... 49
+ 6.5.6 TOOL: Application or Tool Name SDES Item ........ 49
+ 6.5.7 NOTE: Notice/Status SDES Item ................... 50
+ 6.5.8 PRIV: Private Extensions SDES Item .............. 50
+ 6.6 BYE: Goodbye RTCP Packet ............................... 51
+ 6.7 APP: Application-Defined RTCP Packet ................... 52
+ 7. RTP Translators and Mixers .................................. 53
+ 7.1 General Description .................................... 53
+
+
+
+Schulzrinne, et al. Standards Track [Page 2]
+
+RFC 3550 RTP July 2003
+
+
+ 7.2 RTCP Processing in Translators ......................... 55
+ 7.3 RTCP Processing in Mixers .............................. 57
+ 7.4 Cascaded Mixers ........................................ 58
+ 8. SSRC Identifier Allocation and Use .......................... 59
+ 8.1 Probability of Collision ............................... 59
+ 8.2 Collision Resolution and Loop Detection ................ 60
+ 8.3 Use with Layered Encodings ............................. 64
+ 9. Security .................................................... 65
+ 9.1 Confidentiality ........................................ 65
+ 9.2 Authentication and Message Integrity ................... 67
+ 10. Congestion Control .......................................... 67
+ 11. RTP over Network and Transport Protocols .................... 68
+ 12. Summary of Protocol Constants ............................... 69
+ 12.1 RTCP Packet Types ...................................... 70
+ 12.2 SDES Types ............................................. 70
+ 13. RTP Profiles and Payload Format Specifications .............. 71
+ 14. Security Considerations ..................................... 73
+ 15. IANA Considerations ......................................... 73
+ 16. Intellectual Property Rights Statement ...................... 74
+ 17. Acknowledgments ............................................. 74
+ Appendix A. Algorithms ........................................ 75
+ Appendix A.1 RTP Data Header Validity Checks ................... 78
+ Appendix A.2 RTCP Header Validity Checks ....................... 82
+ Appendix A.3 Determining Number of Packets Expected and Lost ... 83
+ Appendix A.4 Generating RTCP SDES Packets ...................... 84
+ Appendix A.5 Parsing RTCP SDES Packets ......................... 85
+ Appendix A.6 Generating a Random 32-bit Identifier ............. 85
+ Appendix A.7 Computing the RTCP Transmission Interval .......... 87
+ Appendix A.8 Estimating the Interarrival Jitter ................ 94
+ Appendix B. Changes from RFC 1889 ............................. 95
+ References ...................................................... 100
+ Normative References ............................................ 100
+ Informative References .......................................... 100
+ Authors' Addresses .............................................. 103
+ Full Copyright Statement ........................................ 104
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 3]
+
+RFC 3550 RTP July 2003
+
+
+1. Introduction
+
+ This memorandum specifies the real-time transport protocol (RTP),
+ which provides end-to-end delivery services for data with real-time
+ characteristics, such as interactive audio and video. Those services
+ include payload type identification, sequence numbering, timestamping
+ and delivery monitoring. Applications typically run RTP on top of
+ UDP to make use of its multiplexing and checksum services; both
+ protocols contribute parts of the transport protocol functionality.
+ However, RTP may be used with other suitable underlying network or
+ transport protocols (see Section 11). RTP supports data transfer to
+ multiple destinations using multicast distribution if provided by the
+ underlying network.
+
+ Note that RTP itself does not provide any mechanism to ensure timely
+ delivery or provide other quality-of-service guarantees, but relies
+ on lower-layer services to do so. It does not guarantee delivery or
+ prevent out-of-order delivery, nor does it assume that the underlying
+ network is reliable and delivers packets in sequence. The sequence
+ numbers included in RTP allow the receiver to reconstruct the
+ sender's packet sequence, but sequence numbers might also be used to
+ determine the proper location of a packet, for example in video
+ decoding, without necessarily decoding packets in sequence.
+
+ While RTP is primarily designed to satisfy the needs of multi-
+ participant multimedia conferences, it is not limited to that
+ particular application. Storage of continuous data, interactive
+ distributed simulation, active badge, and control and measurement
+ applications may also find RTP applicable.
+
+ This document defines RTP, consisting of two closely-linked parts:
+
+ o the real-time transport protocol (RTP), to carry data that has
+ real-time properties.
+
+ o the RTP control protocol (RTCP), to monitor the quality of service
+ and to convey information about the participants in an on-going
+ session. The latter aspect of RTCP may be sufficient for "loosely
+ controlled" sessions, i.e., where there is no explicit membership
+ control and set-up, but it is not necessarily intended to support
+ all of an application's control communication requirements. This
+ functionality may be fully or partially subsumed by a separate
+ session control protocol, which is beyond the scope of this
+ document.
+
+ RTP represents a new style of protocol following the principles of
+ application level framing and integrated layer processing proposed by
+ Clark and Tennenhouse [10]. That is, RTP is intended to be malleable
+
+
+
+Schulzrinne, et al. Standards Track [Page 4]
+
+RFC 3550 RTP July 2003
+
+
+ to provide the information required by a particular application and
+ will often be integrated into the application processing rather than
+ being implemented as a separate layer. RTP is a protocol framework
+ that is deliberately not complete. This document specifies those
+ functions expected to be common across all the applications for which
+ RTP would be appropriate. Unlike conventional protocols in which
+ additional functions might be accommodated by making the protocol
+ more general or by adding an option mechanism that would require
+ parsing, RTP is intended to be tailored through modifications and/or
+ additions to the headers as needed. Examples are given in Sections
+ 5.3 and 6.4.3.
+
+ Therefore, in addition to this document, a complete specification of
+ RTP for a particular application will require one or more companion
+ documents (see Section 13):
+
+ o a profile specification document, which defines a set of payload
+ type codes and their mapping to payload formats (e.g., media
+ encodings). A profile may also define extensions or modifications
+ to RTP that are specific to a particular class of applications.
+ Typically an application will operate under only one profile. A
+ profile for audio and video data may be found in the companion RFC
+ 3551 [1].
+
+ o payload format specification documents, which define how a
+ particular payload, such as an audio or video encoding, is to be
+ carried in RTP.
+
+ A discussion of real-time services and algorithms for their
+ implementation as well as background discussion on some of the RTP
+ design decisions can be found in [11].
+
+1.1 Terminology
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in BCP 14, RFC 2119 [2]
+ and indicate requirement levels for compliant RTP implementations.
+
+2. RTP Use Scenarios
+
+ The following sections describe some aspects of the use of RTP. The
+ examples were chosen to illustrate the basic operation of
+ applications using RTP, not to limit what RTP may be used for. In
+ these examples, RTP is carried on top of IP and UDP, and follows the
+ conventions established by the profile for audio and video specified
+ in the companion RFC 3551.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 5]
+
+RFC 3550 RTP July 2003
+
+
+2.1 Simple Multicast Audio Conference
+
+ A working group of the IETF meets to discuss the latest protocol
+ document, using the IP multicast services of the Internet for voice
+ communications. Through some allocation mechanism the working group
+ chair obtains a multicast group address and pair of ports. One port
+ is used for audio data, and the other is used for control (RTCP)
+ packets. This address and port information is distributed to the
+ intended participants. If privacy is desired, the data and control
+ packets may be encrypted as specified in Section 9.1, in which case
+ an encryption key must also be generated and distributed. The exact
+ details of these allocation and distribution mechanisms are beyond
+ the scope of RTP.
+
+ The audio conferencing application used by each conference
+ participant sends audio data in small chunks of, say, 20 ms duration.
+ Each chunk of audio data is preceded by an RTP header; RTP header and
+ data are in turn contained in a UDP packet. The RTP header indicates
+ what type of audio encoding (such as PCM, ADPCM or LPC) is contained
+ in each packet so that senders can change the encoding during a
+ conference, for example, to accommodate a new participant that is
+ connected through a low-bandwidth link or react to indications of
+ network congestion.
+
+ The Internet, like other packet networks, occasionally loses and
+ reorders packets and delays them by variable amounts of time. To
+ cope with these impairments, the RTP header contains timing
+ information and a sequence number that allow the receivers to
+ reconstruct the timing produced by the source, so that in this
+ example, chunks of audio are contiguously played out the speaker
+ every 20 ms. This timing reconstruction is performed separately for
+ each source of RTP packets in the conference. The sequence number
+ can also be used by the receiver to estimate how many packets are
+ being lost.
+
+ Since members of the working group join and leave during the
+ conference, it is useful to know who is participating at any moment
+ and how well they are receiving the audio data. For that purpose,
+ each instance of the audio application in the conference periodically
+ multicasts a reception report plus the name of its user on the RTCP
+ (control) port. The reception report indicates how well the current
+ speaker is being received and may be used to control adaptive
+ encodings. In addition to the user name, other identifying
+ information may also be included subject to control bandwidth limits.
+ A site sends the RTCP BYE packet (Section 6.6) when it leaves the
+ conference.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 6]
+
+RFC 3550 RTP July 2003
+
+
+2.2 Audio and Video Conference
+
+ If both audio and video media are used in a conference, they are
+ transmitted as separate RTP sessions. That is, separate RTP and RTCP
+ packets are transmitted for each medium using two different UDP port
+ pairs and/or multicast addresses. There is no direct coupling at the
+ RTP level between the audio and video sessions, except that a user
+ participating in both sessions should use the same distinguished
+ (canonical) name in the RTCP packets for both so that the sessions
+ can be associated.
+
+ One motivation for this separation is to allow some participants in
+ the conference to receive only one medium if they choose. Further
+ explanation is given in Section 5.2. Despite the separation,
+ synchronized playback of a source's audio and video can be achieved
+ using timing information carried in the RTCP packets for both
+ sessions.
+
+2.3 Mixers and Translators
+
+ So far, we have assumed that all sites want to receive media data in
+ the same format. However, this may not always be appropriate.
+ Consider the case where participants in one area are connected
+ through a low-speed link to the majority of the conference
+ participants who enjoy high-speed network access. Instead of forcing
+ everyone to use a lower-bandwidth, reduced-quality audio encoding, an
+ RTP-level relay called a mixer may be placed near the low-bandwidth
+ area. This mixer resynchronizes incoming audio packets to
+ reconstruct the constant 20 ms spacing generated by the sender, mixes
+ these reconstructed audio streams into a single stream, translates
+ the audio encoding to a lower-bandwidth one and forwards the lower-
+ bandwidth packet stream across the low-speed link. These packets
+ might be unicast to a single recipient or multicast on a different
+ address to multiple recipients. The RTP header includes a means for
+ mixers to identify the sources that contributed to a mixed packet so
+ that correct talker indication can be provided at the receivers.
+
+ Some of the intended participants in the audio conference may be
+ connected with high bandwidth links but might not be directly
+ reachable via IP multicast. For example, they might be behind an
+ application-level firewall that will not let any IP packets pass.
+ For these sites, mixing may not be necessary, in which case another
+ type of RTP-level relay called a translator may be used. Two
+ translators are installed, one on either side of the firewall, with
+ the outside one funneling all multicast packets received through a
+ secure connection to the translator inside the firewall. The
+ translator inside the firewall sends them again as multicast packets
+ to a multicast group restricted to the site's internal network.
+
+
+
+Schulzrinne, et al. Standards Track [Page 7]
+
+RFC 3550 RTP July 2003
+
+
+ Mixers and translators may be designed for a variety of purposes. An
+ example is a video mixer that scales the images of individual people
+ in separate video streams and composites them into one video stream
+ to simulate a group scene. Other examples of translation include the
+ connection of a group of hosts speaking only IP/UDP to a group of
+ hosts that understand only ST-II, or the packet-by-packet encoding
+ translation of video streams from individual sources without
+ resynchronization or mixing. Details of the operation of mixers and
+ translators are given in Section 7.
+
+2.4 Layered Encodings
+
+ Multimedia applications should be able to adjust the transmission
+ rate to match the capacity of the receiver or to adapt to network
+ congestion. Many implementations place the responsibility of rate-
+ adaptivity at the source. This does not work well with multicast
+ transmission because of the conflicting bandwidth requirements of
+ heterogeneous receivers. The result is often a least-common
+ denominator scenario, where the smallest pipe in the network mesh
+ dictates the quality and fidelity of the overall live multimedia
+ "broadcast".
+
+ Instead, responsibility for rate-adaptation can be placed at the
+ receivers by combining a layered encoding with a layered transmission
+ system. In the context of RTP over IP multicast, the source can
+ stripe the progressive layers of a hierarchically represented signal
+ across multiple RTP sessions each carried on its own multicast group.
+ Receivers can then adapt to network heterogeneity and control their
+ reception bandwidth by joining only the appropriate subset of the
+ multicast groups.
+
+ Details of the use of RTP with layered encodings are given in
+ Sections 6.3.9, 8.3 and 11.
+
+3. Definitions
+
+ RTP payload: The data transported by RTP in a packet, for
+ example audio samples or compressed video data. The payload
+ format and interpretation are beyond the scope of this document.
+
+ RTP packet: A data packet consisting of the fixed RTP header, a
+ possibly empty list of contributing sources (see below), and the
+ payload data. Some underlying protocols may require an
+ encapsulation of the RTP packet to be defined. Typically one
+ packet of the underlying protocol contains a single RTP packet,
+ but several RTP packets MAY be contained if permitted by the
+ encapsulation method (see Section 11).
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 8]
+
+RFC 3550 RTP July 2003
+
+
+ RTCP packet: A control packet consisting of a fixed header part
+ similar to that of RTP data packets, followed by structured
+ elements that vary depending upon the RTCP packet type. The
+ formats are defined in Section 6. Typically, multiple RTCP
+ packets are sent together as a compound RTCP packet in a single
+ packet of the underlying protocol; this is enabled by the length
+ field in the fixed header of each RTCP packet.
+
+ Port: The "abstraction that transport protocols use to
+ distinguish among multiple destinations within a given host
+ computer. TCP/IP protocols identify ports using small positive
+ integers." [12] The transport selectors (TSEL) used by the OSI
+ transport layer are equivalent to ports. RTP depends upon the
+ lower-layer protocol to provide some mechanism such as ports to
+ multiplex the RTP and RTCP packets of a session.
+
+ Transport address: The combination of a network address and port
+ that identifies a transport-level endpoint, for example an IP
+ address and a UDP port. Packets are transmitted from a source
+ transport address to a destination transport address.
+
+ RTP media type: An RTP media type is the collection of payload
+ types which can be carried within a single RTP session. The RTP
+ Profile assigns RTP media types to RTP payload types.
+
+ Multimedia session: A set of concurrent RTP sessions among a
+ common group of participants. For example, a videoconference
+ (which is a multimedia session) may contain an audio RTP session
+ and a video RTP session.
+
+ RTP session: An association among a set of participants
+ communicating with RTP. A participant may be involved in multiple
+ RTP sessions at the same time. In a multimedia session, each
+ medium is typically carried in a separate RTP session with its own
+ RTCP packets unless the the encoding itself multiplexes multiple
+ media into a single data stream. A participant distinguishes
+ multiple RTP sessions by reception of different sessions using
+ different pairs of destination transport addresses, where a pair
+ of transport addresses comprises one network address plus a pair
+ of ports for RTP and RTCP. All participants in an RTP session may
+ share a common destination transport address pair, as in the case
+ of IP multicast, or the pairs may be different for each
+ participant, as in the case of individual unicast network
+ addresses and port pairs. In the unicast case, a participant may
+ receive from all other participants in the session using the same
+ pair of ports, or may use a distinct pair of ports for each.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 9]
+
+RFC 3550 RTP July 2003
+
+
+ The distinguishing feature of an RTP session is that each
+ maintains a full, separate space of SSRC identifiers (defined
+ next). The set of participants included in one RTP session
+ consists of those that can receive an SSRC identifier transmitted
+ by any one of the participants either in RTP as the SSRC or a CSRC
+ (also defined below) or in RTCP. For example, consider a three-
+ party conference implemented using unicast UDP with each
+ participant receiving from the other two on separate port pairs.
+ If each participant sends RTCP feedback about data received from
+ one other participant only back to that participant, then the
+ conference is composed of three separate point-to-point RTP
+ sessions. If each participant provides RTCP feedback about its
+ reception of one other participant to both of the other
+ participants, then the conference is composed of one multi-party
+ RTP session. The latter case simulates the behavior that would
+ occur with IP multicast communication among the three
+ participants.
+
+ The RTP framework allows the variations defined here, but a
+ particular control protocol or application design will usually
+ impose constraints on these variations.
+
+ Synchronization source (SSRC): The source of a stream of RTP
+ packets, identified by a 32-bit numeric SSRC identifier carried in
+ the RTP header so as not to be dependent upon the network address.
+ All packets from a synchronization source form part of the same
+ timing and sequence number space, so a receiver groups packets by
+ synchronization source for playback. Examples of synchronization
+ sources include the sender of a stream of packets derived from a
+ signal source such as a microphone or a camera, or an RTP mixer
+ (see below). A synchronization source may change its data format,
+ e.g., audio encoding, over time. The SSRC identifier is a
+ randomly chosen value meant to be globally unique within a
+ particular RTP session (see Section 8). A participant need not
+ use the same SSRC identifier for all the RTP sessions in a
+ multimedia session; the binding of the SSRC identifiers is
+ provided through RTCP (see Section 6.5.1). If a participant
+ generates multiple streams in one RTP session, for example from
+ separate video cameras, each MUST be identified as a different
+ SSRC.
+
+ Contributing source (CSRC): A source of a stream of RTP packets
+ that has contributed to the combined stream produced by an RTP
+ mixer (see below). The mixer inserts a list of the SSRC
+ identifiers of the sources that contributed to the generation of a
+ particular packet into the RTP header of that packet. This list
+ is called the CSRC list. An example application is audio
+ conferencing where a mixer indicates all the talkers whose speech
+
+
+
+Schulzrinne, et al. Standards Track [Page 10]
+
+RFC 3550 RTP July 2003
+
+
+ was combined to produce the outgoing packet, allowing the receiver
+ to indicate the current talker, even though all the audio packets
+ contain the same SSRC identifier (that of the mixer).
+
+ End system: An application that generates the content to be sent
+ in RTP packets and/or consumes the content of received RTP
+ packets. An end system can act as one or more synchronization
+ sources in a particular RTP session, but typically only one.
+
+ Mixer: An intermediate system that receives RTP packets from one
+ or more sources, possibly changes the data format, combines the
+ packets in some manner and then forwards a new RTP packet. Since
+ the timing among multiple input sources will not generally be
+ synchronized, the mixer will make timing adjustments among the
+ streams and generate its own timing for the combined stream.
+ Thus, all data packets originating from a mixer will be identified
+ as having the mixer as their synchronization source.
+
+ Translator: An intermediate system that forwards RTP packets
+ with their synchronization source identifier intact. Examples of
+ translators include devices that convert encodings without mixing,
+ replicators from multicast to unicast, and application-level
+ filters in firewalls.
+
+ Monitor: An application that receives RTCP packets sent by
+ participants in an RTP session, in particular the reception
+ reports, and estimates the current quality of service for
+ distribution monitoring, fault diagnosis and long-term statistics.
+ The monitor function is likely to be built into the application(s)
+ participating in the session, but may also be a separate
+ application that does not otherwise participate and does not send
+ or receive the RTP data packets (since they are on a separate
+ port). These are called third-party monitors. It is also
+ acceptable for a third-party monitor to receive the RTP data
+ packets but not send RTCP packets or otherwise be counted in the
+ session.
+
+ Non-RTP means: Protocols and mechanisms that may be needed in
+ addition to RTP to provide a usable service. In particular, for
+ multimedia conferences, a control protocol may distribute
+ multicast addresses and keys for encryption, negotiate the
+ encryption algorithm to be used, and define dynamic mappings
+ between RTP payload type values and the payload formats they
+ represent for formats that do not have a predefined payload type
+ value. Examples of such protocols include the Session Initiation
+ Protocol (SIP) (RFC 3261 [13]), ITU Recommendation H.323 [14] and
+ applications using SDP (RFC 2327 [15]), such as RTSP (RFC 2326
+ [16]). For simple
+
+
+
+Schulzrinne, et al. Standards Track [Page 11]
+
+RFC 3550 RTP July 2003
+
+
+ applications, electronic mail or a conference database may also be
+ used. The specification of such protocols and mechanisms is
+ outside the scope of this document.
+
+4. Byte Order, Alignment, and Time Format
+
+ All integer fields are carried in network byte order, that is, most
+ significant byte (octet) first. This byte order is commonly known as
+ big-endian. The transmission order is described in detail in [3].
+ Unless otherwise noted, numeric constants are in decimal (base 10).
+
+ All header data is aligned to its natural length, i.e., 16-bit fields
+ are aligned on even offsets, 32-bit fields are aligned at offsets
+ divisible by four, etc. Octets designated as padding have the value
+ zero.
+
+ Wallclock time (absolute date and time) is represented using the
+ timestamp format of the Network Time Protocol (NTP), which is in
+ seconds relative to 0h UTC on 1 January 1900 [4]. The full
+ resolution NTP timestamp is a 64-bit unsigned fixed-point number with
+ the integer part in the first 32 bits and the fractional part in the
+ last 32 bits. In some fields where a more compact representation is
+ appropriate, only the middle 32 bits are used; that is, the low 16
+ bits of the integer part and the high 16 bits of the fractional part.
+ The high 16 bits of the integer part must be determined
+ independently.
+
+ An implementation is not required to run the Network Time Protocol in
+ order to use RTP. Other time sources, or none at all, may be used
+ (see the description of the NTP timestamp field in Section 6.4.1).
+ However, running NTP may be useful for synchronizing streams
+ transmitted from separate hosts.
+
+ The NTP timestamp will wrap around to zero some time in the year
+ 2036, but for RTP purposes, only differences between pairs of NTP
+ timestamps are used. So long as the pairs of timestamps can be
+ assumed to be within 68 years of each other, using modular arithmetic
+ for subtractions and comparisons makes the wraparound irrelevant.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 12]
+
+RFC 3550 RTP July 2003
+
+
+5. RTP Data Transfer Protocol
+
+5.1 RTP Fixed Header Fields
+
+ The RTP header has the following format:
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |V=2|P|X| CC |M| PT | sequence number |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | timestamp |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | synchronization source (SSRC) identifier |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+ | contributing source (CSRC) identifiers |
+ | .... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The first twelve octets are present in every RTP packet, while the
+ list of CSRC identifiers is present only when inserted by a mixer.
+ The fields have the following meaning:
+
+ version (V): 2 bits
+ This field identifies the version of RTP. The version defined by
+ this specification is two (2). (The value 1 is used by the first
+ draft version of RTP and the value 0 is used by the protocol
+ initially implemented in the "vat" audio tool.)
+
+ padding (P): 1 bit
+ If the padding bit is set, the packet contains one or more
+ additional padding octets at the end which are not part of the
+ payload. The last octet of the padding contains a count of how
+ many padding octets should be ignored, including itself. Padding
+ may be needed by some encryption algorithms with fixed block sizes
+ or for carrying several RTP packets in a lower-layer protocol data
+ unit.
+
+ extension (X): 1 bit
+ If the extension bit is set, the fixed header MUST be followed by
+ exactly one header extension, with a format defined in Section
+ 5.3.1.
+
+ CSRC count (CC): 4 bits
+ The CSRC count contains the number of CSRC identifiers that follow
+ the fixed header.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 13]
+
+RFC 3550 RTP July 2003
+
+
+ marker (M): 1 bit
+ The interpretation of the marker is defined by a profile. It is
+ intended to allow significant events such as frame boundaries to
+ be marked in the packet stream. A profile MAY define additional
+ marker bits or specify that there is no marker bit by changing the
+ number of bits in the payload type field (see Section 5.3).
+
+ payload type (PT): 7 bits
+ This field identifies the format of the RTP payload and determines
+ its interpretation by the application. A profile MAY specify a
+ default static mapping of payload type codes to payload formats.
+ Additional payload type codes MAY be defined dynamically through
+ non-RTP means (see Section 3). A set of default mappings for
+ audio and video is specified in the companion RFC 3551 [1]. An
+ RTP source MAY change the payload type during a session, but this
+ field SHOULD NOT be used for multiplexing separate media streams
+ (see Section 5.2).
+
+ A receiver MUST ignore packets with payload types that it does not
+ understand.
+
+ sequence number: 16 bits
+ The sequence number increments by one for each RTP data packet
+ sent, and may be used by the receiver to detect packet loss and to
+ restore packet sequence. The initial value of the sequence number
+ SHOULD be random (unpredictable) to make known-plaintext attacks
+ on encryption more difficult, even if the source itself does not
+ encrypt according to the method in Section 9.1, because the
+ packets may flow through a translator that does. Techniques for
+ choosing unpredictable numbers are discussed in [17].
+
+ timestamp: 32 bits
+ The timestamp reflects the sampling instant of the first octet in
+ the RTP data packet. The sampling instant MUST be derived from a
+ clock that increments monotonically and linearly in time to allow
+ synchronization and jitter calculations (see Section 6.4.1). The
+ resolution of the clock MUST be sufficient for the desired
+ synchronization accuracy and for measuring packet arrival jitter
+ (one tick per video frame is typically not sufficient). The clock
+ frequency is dependent on the format of data carried as payload
+ and is specified statically in the profile or payload format
+ specification that defines the format, or MAY be specified
+ dynamically for payload formats defined through non-RTP means. If
+ RTP packets are generated periodically, the nominal sampling
+ instant as determined from the sampling clock is to be used, not a
+ reading of the system clock. As an example, for fixed-rate audio
+ the timestamp clock would likely increment by one for each
+ sampling period. If an audio application reads blocks covering
+
+
+
+Schulzrinne, et al. Standards Track [Page 14]
+
+RFC 3550 RTP July 2003
+
+
+ 160 sampling periods from the input device, the timestamp would be
+ increased by 160 for each such block, regardless of whether the
+ block is transmitted in a packet or dropped as silent.
+
+ The initial value of the timestamp SHOULD be random, as for the
+ sequence number. Several consecutive RTP packets will have equal
+ timestamps if they are (logically) generated at once, e.g., belong
+ to the same video frame. Consecutive RTP packets MAY contain
+ timestamps that are not monotonic if the data is not transmitted
+ in the order it was sampled, as in the case of MPEG interpolated
+ video frames. (The sequence numbers of the packets as transmitted
+ will still be monotonic.)
+
+ RTP timestamps from different media streams may advance at
+ different rates and usually have independent, random offsets.
+ Therefore, although these timestamps are sufficient to reconstruct
+ the timing of a single stream, directly comparing RTP timestamps
+ from different media is not effective for synchronization.
+ Instead, for each medium the RTP timestamp is related to the
+ sampling instant by pairing it with a timestamp from a reference
+ clock (wallclock) that represents the time when the data
+ corresponding to the RTP timestamp was sampled. The reference
+ clock is shared by all media to be synchronized. The timestamp
+ pairs are not transmitted in every data packet, but at a lower
+ rate in RTCP SR packets as described in Section 6.4.
+
+ The sampling instant is chosen as the point of reference for the
+ RTP timestamp because it is known to the transmitting endpoint and
+ has a common definition for all media, independent of encoding
+ delays or other processing. The purpose is to allow synchronized
+ presentation of all media sampled at the same time.
+
+ Applications transmitting stored data rather than data sampled in
+ real time typically use a virtual presentation timeline derived
+ from wallclock time to determine when the next frame or other unit
+ of each medium in the stored data should be presented. In this
+ case, the RTP timestamp would reflect the presentation time for
+ each unit. That is, the RTP timestamp for each unit would be
+ related to the wallclock time at which the unit becomes current on
+ the virtual presentation timeline. Actual presentation occurs
+ some time later as determined by the receiver.
+
+ An example describing live audio narration of prerecorded video
+ illustrates the significance of choosing the sampling instant as
+ the reference point. In this scenario, the video would be
+ presented locally for the narrator to view and would be
+ simultaneously transmitted using RTP. The "sampling instant" of a
+ video frame transmitted in RTP would be established by referencing
+
+
+
+Schulzrinne, et al. Standards Track [Page 15]
+
+RFC 3550 RTP July 2003
+
+
+ its timestamp to the wallclock time when that video frame was
+ presented to the narrator. The sampling instant for the audio RTP
+ packets containing the narrator's speech would be established by
+ referencing the same wallclock time when the audio was sampled.
+ The audio and video may even be transmitted by different hosts if
+ the reference clocks on the two hosts are synchronized by some
+ means such as NTP. A receiver can then synchronize presentation
+ of the audio and video packets by relating their RTP timestamps
+ using the timestamp pairs in RTCP SR packets.
+
+ SSRC: 32 bits
+ The SSRC field identifies the synchronization source. This
+ identifier SHOULD be chosen randomly, with the intent that no two
+ synchronization sources within the same RTP session will have the
+ same SSRC identifier. An example algorithm for generating a
+ random identifier is presented in Appendix A.6. Although the
+ probability of multiple sources choosing the same identifier is
+ low, all RTP implementations must be prepared to detect and
+ resolve collisions. Section 8 describes the probability of
+ collision along with a mechanism for resolving collisions and
+ detecting RTP-level forwarding loops based on the uniqueness of
+ the SSRC identifier. If a source changes its source transport
+ address, it must also choose a new SSRC identifier to avoid being
+ interpreted as a looped source (see Section 8.2).
+
+ CSRC list: 0 to 15 items, 32 bits each
+ The CSRC list identifies the contributing sources for the payload
+ contained in this packet. The number of identifiers is given by
+ the CC field. If there are more than 15 contributing sources,
+ only 15 can be identified. CSRC identifiers are inserted by
+ mixers (see Section 7.1), using the SSRC identifiers of
+ contributing sources. For example, for audio packets the SSRC
+ identifiers of all sources that were mixed together to create a
+ packet are listed, allowing correct talker indication at the
+ receiver.
+
+5.2 Multiplexing RTP Sessions
+
+ For efficient protocol processing, the number of multiplexing points
+ should be minimized, as described in the integrated layer processing
+ design principle [10]. In RTP, multiplexing is provided by the
+ destination transport address (network address and port number) which
+ is different for each RTP session. For example, in a teleconference
+ composed of audio and video media encoded separately, each medium
+ SHOULD be carried in a separate RTP session with its own destination
+ transport address.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 16]
+
+RFC 3550 RTP July 2003
+
+
+ Separate audio and video streams SHOULD NOT be carried in a single
+ RTP session and demultiplexed based on the payload type or SSRC
+ fields. Interleaving packets with different RTP media types but
+ using the same SSRC would introduce several problems:
+
+ 1. If, say, two audio streams shared the same RTP session and the
+ same SSRC value, and one were to change encodings and thus acquire
+ a different RTP payload type, there would be no general way of
+ identifying which stream had changed encodings.
+
+ 2. An SSRC is defined to identify a single timing and sequence number
+ space. Interleaving multiple payload types would require
+ different timing spaces if the media clock rates differ and would
+ require different sequence number spaces to tell which payload
+ type suffered packet loss.
+
+ 3. The RTCP sender and receiver reports (see Section 6.4) can only
+ describe one timing and sequence number space per SSRC and do not
+ carry a payload type field.
+
+ 4. An RTP mixer would not be able to combine interleaved streams of
+ incompatible media into one stream.
+
+ 5. Carrying multiple media in one RTP session precludes: the use of
+ different network paths or network resource allocations if
+ appropriate; reception of a subset of the media if desired, for
+ example just audio if video would exceed the available bandwidth;
+ and receiver implementations that use separate processes for the
+ different media, whereas using separate RTP sessions permits
+ either single- or multiple-process implementations.
+
+ Using a different SSRC for each medium but sending them in the same
+ RTP session would avoid the first three problems but not the last
+ two.
+
+ On the other hand, multiplexing multiple related sources of the same
+ medium in one RTP session using different SSRC values is the norm for
+ multicast sessions. The problems listed above don't apply: an RTP
+ mixer can combine multiple audio sources, for example, and the same
+ treatment is applicable for all of them. It may also be appropriate
+ to multiplex streams of the same medium using different SSRC values
+ in other scenarios where the last two problems do not apply.
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 17]
+
+RFC 3550 RTP July 2003
+
+
+5.3 Profile-Specific Modifications to the RTP Header
+
+ The existing RTP data packet header is believed to be complete for
+ the set of functions required in common across all the application
+ classes that RTP might support. However, in keeping with the ALF
+ design principle, the header MAY be tailored through modifications or
+ additions defined in a profile specification while still allowing
+ profile-independent monitoring and recording tools to function.
+
+ o The marker bit and payload type field carry profile-specific
+ information, but they are allocated in the fixed header since many
+ applications are expected to need them and might otherwise have to
+ add another 32-bit word just to hold them. The octet containing
+ these fields MAY be redefined by a profile to suit different
+ requirements, for example with more or fewer marker bits. If
+ there are any marker bits, one SHOULD be located in the most
+ significant bit of the octet since profile-independent monitors
+ may be able to observe a correlation between packet loss patterns
+ and the marker bit.
+
+ o Additional information that is required for a particular payload
+ format, such as a video encoding, SHOULD be carried in the payload
+ section of the packet. This might be in a header that is always
+ present at the start of the payload section, or might be indicated
+ by a reserved value in the data pattern.
+
+ o If a particular class of applications needs additional
+ functionality independent of payload format, the profile under
+ which those applications operate SHOULD define additional fixed
+ fields to follow immediately after the SSRC field of the existing
+ fixed header. Those applications will be able to quickly and
+ directly access the additional fields while profile-independent
+ monitors or recorders can still process the RTP packets by
+ interpreting only the first twelve octets.
+
+ If it turns out that additional functionality is needed in common
+ across all profiles, then a new version of RTP should be defined to
+ make a permanent change to the fixed header.
+
+5.3.1 RTP Header Extension
+
+ An extension mechanism is provided to allow individual
+ implementations to experiment with new payload-format-independent
+ functions that require additional information to be carried in the
+ RTP data packet header. This mechanism is designed so that the
+ header extension may be ignored by other interoperating
+ implementations that have not been extended.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 18]
+
+RFC 3550 RTP July 2003
+
+
+ Note that this header extension is intended only for limited use.
+ Most potential uses of this mechanism would be better done another
+ way, using the methods described in the previous section. For
+ example, a profile-specific extension to the fixed header is less
+ expensive to process because it is not conditional nor in a variable
+ location. Additional information required for a particular payload
+ format SHOULD NOT use this header extension, but SHOULD be carried in
+ the payload section of the packet.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | defined by profile | length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | header extension |
+ | .... |
+
+ If the X bit in the RTP header is one, a variable-length header
+ extension MUST be appended to the RTP header, following the CSRC list
+ if present. The header extension contains a 16-bit length field that
+ counts the number of 32-bit words in the extension, excluding the
+ four-octet extension header (therefore zero is a valid length). Only
+ a single extension can be appended to the RTP data header. To allow
+ multiple interoperating implementations to each experiment
+ independently with different header extensions, or to allow a
+ particular implementation to experiment with more than one type of
+ header extension, the first 16 bits of the header extension are left
+ open for distinguishing identifiers or parameters. The format of
+ these 16 bits is to be defined by the profile specification under
+ which the implementations are operating. This RTP specification does
+ not define any header extensions itself.
+
+6. RTP Control Protocol -- RTCP
+
+ The RTP control protocol (RTCP) is based on the periodic transmission
+ of control packets to all participants in the session, using the same
+ distribution mechanism as the data packets. The underlying protocol
+ MUST provide multiplexing of the data and control packets, for
+ example using separate port numbers with UDP. RTCP performs four
+ functions:
+
+ 1. The primary function is to provide feedback on the quality of the
+ data distribution. This is an integral part of the RTP's role as
+ a transport protocol and is related to the flow and congestion
+ control functions of other transport protocols (see Section 10 on
+ the requirement for congestion control). The feedback may be
+ directly useful for control of adaptive encodings [18,19], but
+ experiments with IP multicasting have shown that it is also
+
+
+
+Schulzrinne, et al. Standards Track [Page 19]
+
+RFC 3550 RTP July 2003
+
+
+ critical to get feedback from the receivers to diagnose faults in
+ the distribution. Sending reception feedback reports to all
+ participants allows one who is observing problems to evaluate
+ whether those problems are local or global. With a distribution
+ mechanism like IP multicast, it is also possible for an entity
+ such as a network service provider who is not otherwise involved
+ in the session to receive the feedback information and act as a
+ third-party monitor to diagnose network problems. This feedback
+ function is performed by the RTCP sender and receiver reports,
+ described below in Section 6.4.
+
+ 2. RTCP carries a persistent transport-level identifier for an RTP
+ source called the canonical name or CNAME, Section 6.5.1. Since
+ the SSRC identifier may change if a conflict is discovered or a
+ program is restarted, receivers require the CNAME to keep track of
+ each participant. Receivers may also require the CNAME to
+ associate multiple data streams from a given participant in a set
+ of related RTP sessions, for example to synchronize audio and
+ video. Inter-media synchronization also requires the NTP and RTP
+ timestamps included in RTCP packets by data senders.
+
+ 3. The first two functions require that all participants send RTCP
+ packets, therefore the rate must be controlled in order for RTP to
+ scale up to a large number of participants. By having each
+ participant send its control packets to all the others, each can
+ independently observe the number of participants. This number is
+ used to calculate the rate at which the packets are sent, as
+ explained in Section 6.2.
+
+ 4. A fourth, OPTIONAL function is to convey minimal session control
+ information, for example participant identification to be
+ displayed in the user interface. This is most likely to be useful
+ in "loosely controlled" sessions where participants enter and
+ leave without membership control or parameter negotiation. RTCP
+ serves as a convenient channel to reach all the participants, but
+ it is not necessarily expected to support all the control
+ communication requirements of an application. A higher-level
+ session control protocol, which is beyond the scope of this
+ document, may be needed.
+
+ Functions 1-3 SHOULD be used in all environments, but particularly in
+ the IP multicast environment. RTP application designers SHOULD avoid
+ mechanisms that can only work in unicast mode and will not scale to
+ larger numbers. Transmission of RTCP MAY be controlled separately
+ for senders and receivers, as described in Section 6.2, for cases
+ such as unidirectional links where feedback from receivers is not
+ possible.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 20]
+
+RFC 3550 RTP July 2003
+
+
+ Non-normative note: In the multicast routing approach
+ called Source-Specific Multicast (SSM), there is only one sender
+ per "channel" (a source address, group address pair), and
+ receivers (except for the channel source) cannot use multicast to
+ communicate directly with other channel members. The
+ recommendations here accommodate SSM only through Section 6.2's
+ option of turning off receivers' RTCP entirely. Future work will
+ specify adaptation of RTCP for SSM so that feedback from receivers
+ can be maintained.
+
+6.1 RTCP Packet Format
+
+ This specification defines several RTCP packet types to carry a
+ variety of control information:
+
+ SR: Sender report, for transmission and reception statistics from
+ participants that are active senders
+
+ RR: Receiver report, for reception statistics from participants
+ that are not active senders and in combination with SR for
+ active senders reporting on more than 31 sources
+
+ SDES: Source description items, including CNAME
+
+ BYE: Indicates end of participation
+
+ APP: Application-specific functions
+
+ Each RTCP packet begins with a fixed part similar to that of RTP data
+ packets, followed by structured elements that MAY be of variable
+ length according to the packet type but MUST end on a 32-bit
+ boundary. The alignment requirement and a length field in the fixed
+ part of each packet are included to make RTCP packets "stackable".
+ Multiple RTCP packets can be concatenated without any intervening
+ separators to form a compound RTCP packet that is sent in a single
+ packet of the lower layer protocol, for example UDP. There is no
+ explicit count of individual RTCP packets in the compound packet
+ since the lower layer protocols are expected to provide an overall
+ length to determine the end of the compound packet.
+
+ Each individual RTCP packet in the compound packet may be processed
+ independently with no requirements upon the order or combination of
+ packets. However, in order to perform the functions of the protocol,
+ the following constraints are imposed:
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 21]
+
+RFC 3550 RTP July 2003
+
+
+ o Reception statistics (in SR or RR) should be sent as often as
+ bandwidth constraints will allow to maximize the resolution of the
+ statistics, therefore each periodically transmitted compound RTCP
+ packet MUST include a report packet.
+
+ o New receivers need to receive the CNAME for a source as soon as
+ possible to identify the source and to begin associating media for
+ purposes such as lip-sync, so each compound RTCP packet MUST also
+ include the SDES CNAME except when the compound RTCP packet is
+ split for partial encryption as described in Section 9.1.
+
+ o The number of packet types that may appear first in the compound
+ packet needs to be limited to increase the number of constant bits
+ in the first word and the probability of successfully validating
+ RTCP packets against misaddressed RTP data packets or other
+ unrelated packets.
+
+ Thus, all RTCP packets MUST be sent in a compound packet of at least
+ two individual packets, with the following format:
+
+ Encryption prefix: If and only if the compound packet is to be
+ encrypted according to the method in Section 9.1, it MUST be
+ prefixed by a random 32-bit quantity redrawn for every compound
+ packet transmitted. If padding is required for the encryption, it
+ MUST be added to the last packet of the compound packet.
+
+ SR or RR: The first RTCP packet in the compound packet MUST
+ always be a report packet to facilitate header validation as
+ described in Appendix A.2. This is true even if no data has been
+ sent or received, in which case an empty RR MUST be sent, and even
+ if the only other RTCP packet in the compound packet is a BYE.
+
+ Additional RRs: If the number of sources for which reception
+ statistics are being reported exceeds 31, the number that will fit
+ into one SR or RR packet, then additional RR packets SHOULD follow
+ the initial report packet.
+
+ SDES: An SDES packet containing a CNAME item MUST be included
+ in each compound RTCP packet, except as noted in Section 9.1.
+ Other source description items MAY optionally be included if
+ required by a particular application, subject to bandwidth
+ constraints (see Section 6.3.9).
+
+ BYE or APP: Other RTCP packet types, including those yet to be
+ defined, MAY follow in any order, except that BYE SHOULD be the
+ last packet sent with a given SSRC/CSRC. Packet types MAY appear
+ more than once.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 22]
+
+RFC 3550 RTP July 2003
+
+
+ An individual RTP participant SHOULD send only one compound RTCP
+ packet per report interval in order for the RTCP bandwidth per
+ participant to be estimated correctly (see Section 6.2), except when
+ the compound RTCP packet is split for partial encryption as described
+ in Section 9.1. If there are too many sources to fit all the
+ necessary RR packets into one compound RTCP packet without exceeding
+ the maximum transmission unit (MTU) of the network path, then only
+ the subset that will fit into one MTU SHOULD be included in each
+ interval. The subsets SHOULD be selected round-robin across multiple
+ intervals so that all sources are reported.
+
+ It is RECOMMENDED that translators and mixers combine individual RTCP
+ packets from the multiple sources they are forwarding into one
+ compound packet whenever feasible in order to amortize the packet
+ overhead (see Section 7). An example RTCP compound packet as might
+ be produced by a mixer is shown in Fig. 1. If the overall length of
+ a compound packet would exceed the MTU of the network path, it SHOULD
+ be segmented into multiple shorter compound packets to be transmitted
+ in separate packets of the underlying protocol. This does not impair
+ the RTCP bandwidth estimation because each compound packet represents
+ at least one distinct participant. Note that each of the compound
+ packets MUST begin with an SR or RR packet.
+
+ An implementation SHOULD ignore incoming RTCP packets with types
+ unknown to it. Additional RTCP packet types may be registered with
+ the Internet Assigned Numbers Authority (IANA) as described in
+ Section 15.
+
+ if encrypted: random 32-bit integer
+ |
+ |[--------- packet --------][---------- packet ----------][-packet-]
+ |
+ | receiver chunk chunk
+ V reports item item item item
+ --------------------------------------------------------------------
+ R[SR #sendinfo #site1#site2][SDES #CNAME PHONE #CNAME LOC][BYE##why]
+ --------------------------------------------------------------------
+ | |
+ |<----------------------- compound packet ----------------------->|
+ |<-------------------------- UDP packet ------------------------->|
+
+ #: SSRC/CSRC identifier
+
+ Figure 1: Example of an RTCP compound packet
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 23]
+
+RFC 3550 RTP July 2003
+
+
+6.2 RTCP Transmission Interval
+
+ RTP is designed to allow an application to scale automatically over
+ session sizes ranging from a few participants to thousands. For
+ example, in an audio conference the data traffic is inherently self-
+ limiting because only one or two people will speak at a time, so with
+ multicast distribution the data rate on any given link remains
+ relatively constant independent of the number of participants.
+ However, the control traffic is not self-limiting. If the reception
+ reports from each participant were sent at a constant rate, the
+ control traffic would grow linearly with the number of participants.
+ Therefore, the rate must be scaled down by dynamically calculating
+ the interval between RTCP packet transmissions.
+
+ For each session, it is assumed that the data traffic is subject to
+ an aggregate limit called the "session bandwidth" to be divided among
+ the participants. This bandwidth might be reserved and the limit
+ enforced by the network. If there is no reservation, there may be
+ other constraints, depending on the environment, that establish the
+ "reasonable" maximum for the session to use, and that would be the
+ session bandwidth. The session bandwidth may be chosen based on some
+ cost or a priori knowledge of the available network bandwidth for the
+ session. It is somewhat independent of the media encoding, but the
+ encoding choice may be limited by the session bandwidth. Often, the
+ session bandwidth is the sum of the nominal bandwidths of the senders
+ expected to be concurrently active. For teleconference audio, this
+ number would typically be one sender's bandwidth. For layered
+ encodings, each layer is a separate RTP session with its own session
+ bandwidth parameter.
+
+ The session bandwidth parameter is expected to be supplied by a
+ session management application when it invokes a media application,
+ but media applications MAY set a default based on the single-sender
+ data bandwidth for the encoding selected for the session. The
+ application MAY also enforce bandwidth limits based on multicast
+ scope rules or other criteria. All participants MUST use the same
+ value for the session bandwidth so that the same RTCP interval will
+ be calculated.
+
+ Bandwidth calculations for control and data traffic include lower-
+ layer transport and network protocols (e.g., UDP and IP) since that
+ is what the resource reservation system would need to know. The
+ application can also be expected to know which of these protocols are
+ in use. Link level headers are not included in the calculation since
+ the packet will be encapsulated with different link level headers as
+ it travels.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 24]
+
+RFC 3550 RTP July 2003
+
+
+ The control traffic should be limited to a small and known fraction
+ of the session bandwidth: small so that the primary function of the
+ transport protocol to carry data is not impaired; known so that the
+ control traffic can be included in the bandwidth specification given
+ to a resource reservation protocol, and so that each participant can
+ independently calculate its share. The control traffic bandwidth is
+ in addition to the session bandwidth for the data traffic. It is
+ RECOMMENDED that the fraction of the session bandwidth added for RTCP
+ be fixed at 5%. It is also RECOMMENDED that 1/4 of the RTCP
+ bandwidth be dedicated to participants that are sending data so that
+ in sessions with a large number of receivers but a small number of
+ senders, newly joining participants will more quickly receive the
+ CNAME for the sending sites. When the proportion of senders is
+ greater than 1/4 of the participants, the senders get their
+ proportion of the full RTCP bandwidth. While the values of these and
+ other constants in the interval calculation are not critical, all
+ participants in the session MUST use the same values so the same
+ interval will be calculated. Therefore, these constants SHOULD be
+ fixed for a particular profile.
+
+ A profile MAY specify that the control traffic bandwidth may be a
+ separate parameter of the session rather than a strict percentage of
+ the session bandwidth. Using a separate parameter allows rate-
+ adaptive applications to set an RTCP bandwidth consistent with a
+ "typical" data bandwidth that is lower than the maximum bandwidth
+ specified by the session bandwidth parameter.
+
+ The profile MAY further specify that the control traffic bandwidth
+ may be divided into two separate session parameters for those
+ participants which are active data senders and those which are not;
+ let us call the parameters S and R. Following the recommendation
+ that 1/4 of the RTCP bandwidth be dedicated to data senders, the
+ RECOMMENDED default values for these two parameters would be 1.25%
+ and 3.75%, respectively. When the proportion of senders is greater
+ than S/(S+R) of the participants, the senders get their proportion of
+ the sum of these parameters. Using two parameters allows RTCP
+ reception reports to be turned off entirely for a particular session
+ by setting the RTCP bandwidth for non-data-senders to zero while
+ keeping the RTCP bandwidth for data senders non-zero so that sender
+ reports can still be sent for inter-media synchronization. Turning
+ off RTCP reception reports is NOT RECOMMENDED because they are needed
+ for the functions listed at the beginning of Section 6, particularly
+ reception quality feedback and congestion control. However, doing so
+ may be appropriate for systems operating on unidirectional links or
+ for sessions that don't require feedback on the quality of reception
+ or liveness of receivers and that have other means to avoid
+ congestion.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 25]
+
+RFC 3550 RTP July 2003
+
+
+ The calculated interval between transmissions of compound RTCP
+ packets SHOULD also have a lower bound to avoid having bursts of
+ packets exceed the allowed bandwidth when the number of participants
+ is small and the traffic isn't smoothed according to the law of large
+ numbers. It also keeps the report interval from becoming too small
+ during transient outages like a network partition such that
+ adaptation is delayed when the partition heals. At application
+ startup, a delay SHOULD be imposed before the first compound RTCP
+ packet is sent to allow time for RTCP packets to be received from
+ other participants so the report interval will converge to the
+ correct value more quickly. This delay MAY be set to half the
+ minimum interval to allow quicker notification that the new
+ participant is present. The RECOMMENDED value for a fixed minimum
+ interval is 5 seconds.
+
+ An implementation MAY scale the minimum RTCP interval to a smaller
+ value inversely proportional to the session bandwidth parameter with
+ the following limitations:
+
+ o For multicast sessions, only active data senders MAY use the
+ reduced minimum value to calculate the interval for transmission
+ of compound RTCP packets.
+
+ o For unicast sessions, the reduced value MAY be used by
+ participants that are not active data senders as well, and the
+ delay before sending the initial compound RTCP packet MAY be zero.
+
+ o For all sessions, the fixed minimum SHOULD be used when
+ calculating the participant timeout interval (see Section 6.3.5)
+ so that implementations which do not use the reduced value for
+ transmitting RTCP packets are not timed out by other participants
+ prematurely.
+
+ o The RECOMMENDED value for the reduced minimum in seconds is 360
+ divided by the session bandwidth in kilobits/second. This minimum
+ is smaller than 5 seconds for bandwidths greater than 72 kb/s.
+
+ The algorithm described in Section 6.3 and Appendix A.7 was designed
+ to meet the goals outlined in this section. It calculates the
+ interval between sending compound RTCP packets to divide the allowed
+ control traffic bandwidth among the participants. This allows an
+ application to provide fast response for small sessions where, for
+ example, identification of all participants is important, yet
+ automatically adapt to large sessions. The algorithm incorporates
+ the following characteristics:
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 26]
+
+RFC 3550 RTP July 2003
+
+
+ o The calculated interval between RTCP packets scales linearly with
+ the number of members in the group. It is this linear factor
+ which allows for a constant amount of control traffic when summed
+ across all members.
+
+ o The interval between RTCP packets is varied randomly over the
+ range [0.5,1.5] times the calculated interval to avoid unintended
+ synchronization of all participants [20]. The first RTCP packet
+ sent after joining a session is also delayed by a random variation
+ of half the minimum RTCP interval.
+
+ o A dynamic estimate of the average compound RTCP packet size is
+ calculated, including all those packets received and sent, to
+ automatically adapt to changes in the amount of control
+ information carried.
+
+ o Since the calculated interval is dependent on the number of
+ observed group members, there may be undesirable startup effects
+ when a new user joins an existing session, or many users
+ simultaneously join a new session. These new users will initially
+ have incorrect estimates of the group membership, and thus their
+ RTCP transmission interval will be too short. This problem can be
+ significant if many users join the session simultaneously. To
+ deal with this, an algorithm called "timer reconsideration" is
+ employed. This algorithm implements a simple back-off mechanism
+ which causes users to hold back RTCP packet transmission if the
+ group sizes are increasing.
+
+ o When users leave a session, either with a BYE or by timeout, the
+ group membership decreases, and thus the calculated interval
+ should decrease. A "reverse reconsideration" algorithm is used to
+ allow members to more quickly reduce their intervals in response
+ to group membership decreases.
+
+ o BYE packets are given different treatment than other RTCP packets.
+ When a user leaves a group, and wishes to send a BYE packet, it
+ may do so before its next scheduled RTCP packet. However,
+ transmission of BYEs follows a back-off algorithm which avoids
+ floods of BYE packets should a large number of members
+ simultaneously leave the session.
+
+ This algorithm may be used for sessions in which all participants are
+ allowed to send. In that case, the session bandwidth parameter is
+ the product of the individual sender's bandwidth times the number of
+ participants, and the RTCP bandwidth is 5% of that.
+
+ Details of the algorithm's operation are given in the sections that
+ follow. Appendix A.7 gives an example implementation.
+
+
+
+Schulzrinne, et al. Standards Track [Page 27]
+
+RFC 3550 RTP July 2003
+
+
+6.2.1 Maintaining the Number of Session Members
+
+ Calculation of the RTCP packet interval depends upon an estimate of
+ the number of sites participating in the session. New sites are
+ added to the count when they are heard, and an entry for each SHOULD
+ be created in a table indexed by the SSRC or CSRC identifier (see
+ Section 8.2) to keep track of them. New entries MAY be considered
+ not valid until multiple packets carrying the new SSRC have been
+ received (see Appendix A.1), or until an SDES RTCP packet containing
+ a CNAME for that SSRC has been received. Entries MAY be deleted from
+ the table when an RTCP BYE packet with the corresponding SSRC
+ identifier is received, except that some straggler data packets might
+ arrive after the BYE and cause the entry to be recreated. Instead,
+ the entry SHOULD be marked as having received a BYE and then deleted
+ after an appropriate delay.
+
+ A participant MAY mark another site inactive, or delete it if not yet
+ valid, if no RTP or RTCP packet has been received for a small number
+ of RTCP report intervals (5 is RECOMMENDED). This provides some
+ robustness against packet loss. All sites must have the same value
+ for this multiplier and must calculate roughly the same value for the
+ RTCP report interval in order for this timeout to work properly.
+ Therefore, this multiplier SHOULD be fixed for a particular profile.
+
+ For sessions with a very large number of participants, it may be
+ impractical to maintain a table to store the SSRC identifier and
+ state information for all of them. An implementation MAY use SSRC
+ sampling, as described in [21], to reduce the storage requirements.
+ An implementation MAY use any other algorithm with similar
+ performance. A key requirement is that any algorithm considered
+ SHOULD NOT substantially underestimate the group size, although it
+ MAY overestimate.
+
+6.3 RTCP Packet Send and Receive Rules
+
+ The rules for how to send, and what to do when receiving an RTCP
+ packet are outlined here. An implementation that allows operation in
+ a multicast environment or a multipoint unicast environment MUST meet
+ the requirements in Section 6.2. Such an implementation MAY use the
+ algorithm defined in this section to meet those requirements, or MAY
+ use some other algorithm so long as it provides equivalent or better
+ performance. An implementation which is constrained to two-party
+ unicast operation SHOULD still use randomization of the RTCP
+ transmission interval to avoid unintended synchronization of multiple
+ instances operating in the same environment, but MAY omit the "timer
+ reconsideration" and "reverse reconsideration" algorithms in Sections
+ 6.3.3, 6.3.6 and 6.3.7.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 28]
+
+RFC 3550 RTP July 2003
+
+
+ To execute these rules, a session participant must maintain several
+ pieces of state:
+
+ tp: the last time an RTCP packet was transmitted;
+
+ tc: the current time;
+
+ tn: the next scheduled transmission time of an RTCP packet;
+
+ pmembers: the estimated number of session members at the time tn
+ was last recomputed;
+
+ members: the most current estimate for the number of session
+ members;
+
+ senders: the most current estimate for the number of senders in
+ the session;
+
+ rtcp_bw: The target RTCP bandwidth, i.e., the total bandwidth
+ that will be used for RTCP packets by all members of this session,
+ in octets per second. This will be a specified fraction of the
+ "session bandwidth" parameter supplied to the application at
+ startup.
+
+ we_sent: Flag that is true if the application has sent data
+ since the 2nd previous RTCP report was transmitted.
+
+ avg_rtcp_size: The average compound RTCP packet size, in octets,
+ over all RTCP packets sent and received by this participant. The
+ size includes lower-layer transport and network protocol headers
+ (e.g., UDP and IP) as explained in Section 6.2.
+
+ initial: Flag that is true if the application has not yet sent
+ an RTCP packet.
+
+ Many of these rules make use of the "calculated interval" between
+ packet transmissions. This interval is described in the following
+ section.
+
+6.3.1 Computing the RTCP Transmission Interval
+
+ To maintain scalability, the average interval between packets from a
+ session participant should scale with the group size. This interval
+ is called the calculated interval. It is obtained by combining a
+ number of the pieces of state described above. The calculated
+ interval T is then determined as follows:
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 29]
+
+RFC 3550 RTP July 2003
+
+
+ 1. If the number of senders is less than or equal to 25% of the
+ membership (members), the interval depends on whether the
+ participant is a sender or not (based on the value of we_sent).
+ If the participant is a sender (we_sent true), the constant C is
+ set to the average RTCP packet size (avg_rtcp_size) divided by 25%
+ of the RTCP bandwidth (rtcp_bw), and the constant n is set to the
+ number of senders. If we_sent is not true, the constant C is set
+ to the average RTCP packet size divided by 75% of the RTCP
+ bandwidth. The constant n is set to the number of receivers
+ (members - senders). If the number of senders is greater than
+ 25%, senders and receivers are treated together. The constant C
+ is set to the average RTCP packet size divided by the total RTCP
+ bandwidth and n is set to the total number of members. As stated
+ in Section 6.2, an RTP profile MAY specify that the RTCP bandwidth
+ may be explicitly defined by two separate parameters (call them S
+ and R) for those participants which are senders and those which
+ are not. In that case, the 25% fraction becomes S/(S+R) and the
+ 75% fraction becomes R/(S+R). Note that if R is zero, the
+ percentage of senders is never greater than S/(S+R), and the
+ implementation must avoid division by zero.
+
+ 2. If the participant has not yet sent an RTCP packet (the variable
+ initial is true), the constant Tmin is set to 2.5 seconds, else it
+ is set to 5 seconds.
+
+ 3. The deterministic calculated interval Td is set to max(Tmin, n*C).
+
+ 4. The calculated interval T is set to a number uniformly distributed
+ between 0.5 and 1.5 times the deterministic calculated interval.
+
+ 5. The resulting value of T is divided by e-3/2=1.21828 to compensate
+ for the fact that the timer reconsideration algorithm converges to
+ a value of the RTCP bandwidth below the intended average.
+
+ This procedure results in an interval which is random, but which, on
+ average, gives at least 25% of the RTCP bandwidth to senders and the
+ rest to receivers. If the senders constitute more than one quarter
+ of the membership, this procedure splits the bandwidth equally among
+ all participants, on average.
+
+6.3.2 Initialization
+
+ Upon joining the session, the participant initializes tp to 0, tc to
+ 0, senders to 0, pmembers to 1, members to 1, we_sent to false,
+ rtcp_bw to the specified fraction of the session bandwidth, initial
+ to true, and avg_rtcp_size to the probable size of the first RTCP
+ packet that the application will later construct. The calculated
+ interval T is then computed, and the first packet is scheduled for
+
+
+
+Schulzrinne, et al. Standards Track [Page 30]
+
+RFC 3550 RTP July 2003
+
+
+ time tn = T. This means that a transmission timer is set which
+ expires at time T. Note that an application MAY use any desired
+ approach for implementing this timer.
+
+ The participant adds its own SSRC to the member table.
+
+6.3.3 Receiving an RTP or Non-BYE RTCP Packet
+
+ When an RTP or RTCP packet is received from a participant whose SSRC
+ is not in the member table, the SSRC is added to the table, and the
+ value for members is updated once the participant has been validated
+ as described in Section 6.2.1. The same processing occurs for each
+ CSRC in a validated RTP packet.
+
+ When an RTP packet is received from a participant whose SSRC is not
+ in the sender table, the SSRC is added to the table, and the value
+ for senders is updated.
+
+ For each compound RTCP packet received, the value of avg_rtcp_size is
+ updated:
+
+ avg_rtcp_size = (1/16) * packet_size + (15/16) * avg_rtcp_size
+
+ where packet_size is the size of the RTCP packet just received.
+
+6.3.4 Receiving an RTCP BYE Packet
+
+ Except as described in Section 6.3.7 for the case when an RTCP BYE is
+ to be transmitted, if the received packet is an RTCP BYE packet, the
+ SSRC is checked against the member table. If present, the entry is
+ removed from the table, and the value for members is updated. The
+ SSRC is then checked against the sender table. If present, the entry
+ is removed from the table, and the value for senders is updated.
+
+ Furthermore, to make the transmission rate of RTCP packets more
+ adaptive to changes in group membership, the following "reverse
+ reconsideration" algorithm SHOULD be executed when a BYE packet is
+ received that reduces members to a value less than pmembers:
+
+ o The value for tn is updated according to the following formula:
+
+ tn = tc + (members/pmembers) * (tn - tc)
+
+ o The value for tp is updated according the following formula:
+
+ tp = tc - (members/pmembers) * (tc - tp).
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 31]
+
+RFC 3550 RTP July 2003
+
+
+ o The next RTCP packet is rescheduled for transmission at time tn,
+ which is now earlier.
+
+ o The value of pmembers is set equal to members.
+
+ This algorithm does not prevent the group size estimate from
+ incorrectly dropping to zero for a short time due to premature
+ timeouts when most participants of a large session leave at once but
+ some remain. The algorithm does make the estimate return to the
+ correct value more rapidly. This situation is unusual enough and the
+ consequences are sufficiently harmless that this problem is deemed
+ only a secondary concern.
+
+6.3.5 Timing Out an SSRC
+
+ At occasional intervals, the participant MUST check to see if any of
+ the other participants time out. To do this, the participant
+ computes the deterministic (without the randomization factor)
+ calculated interval Td for a receiver, that is, with we_sent false.
+ Any other session member who has not sent an RTP or RTCP packet since
+ time tc - MTd (M is the timeout multiplier, and defaults to 5) is
+ timed out. This means that its SSRC is removed from the member list,
+ and members is updated. A similar check is performed on the sender
+ list. Any member on the sender list who has not sent an RTP packet
+ since time tc - 2T (within the last two RTCP report intervals) is
+ removed from the sender list, and senders is updated.
+
+ If any members time out, the reverse reconsideration algorithm
+ described in Section 6.3.4 SHOULD be performed.
+
+ The participant MUST perform this check at least once per RTCP
+ transmission interval.
+
+6.3.6 Expiration of Transmission Timer
+
+ When the packet transmission timer expires, the participant performs
+ the following operations:
+
+ o The transmission interval T is computed as described in Section
+ 6.3.1, including the randomization factor.
+
+ o If tp + T is less than or equal to tc, an RTCP packet is
+ transmitted. tp is set to tc, then another value for T is
+ calculated as in the previous step and tn is set to tc + T. The
+ transmission timer is set to expire again at time tn. If tp + T
+ is greater than tc, tn is set to tp + T. No RTCP packet is
+ transmitted. The transmission timer is set to expire at time tn.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 32]
+
+RFC 3550 RTP July 2003
+
+
+ o pmembers is set to members.
+
+ If an RTCP packet is transmitted, the value of initial is set to
+ FALSE. Furthermore, the value of avg_rtcp_size is updated:
+
+ avg_rtcp_size = (1/16) * packet_size + (15/16) * avg_rtcp_size
+
+ where packet_size is the size of the RTCP packet just transmitted.
+
+6.3.7 Transmitting a BYE Packet
+
+ When a participant wishes to leave a session, a BYE packet is
+ transmitted to inform the other participants of the event. In order
+ to avoid a flood of BYE packets when many participants leave the
+ system, a participant MUST execute the following algorithm if the
+ number of members is more than 50 when the participant chooses to
+ leave. This algorithm usurps the normal role of the members variable
+ to count BYE packets instead:
+
+ o When the participant decides to leave the system, tp is reset to
+ tc, the current time, members and pmembers are initialized to 1,
+ initial is set to 1, we_sent is set to false, senders is set to 0,
+ and avg_rtcp_size is set to the size of the compound BYE packet.
+ The calculated interval T is computed. The BYE packet is then
+ scheduled for time tn = tc + T.
+
+ o Every time a BYE packet from another participant is received,
+ members is incremented by 1 regardless of whether that participant
+ exists in the member table or not, and when SSRC sampling is in
+ use, regardless of whether or not the BYE SSRC would be included
+ in the sample. members is NOT incremented when other RTCP packets
+ or RTP packets are received, but only for BYE packets. Similarly,
+ avg_rtcp_size is updated only for received BYE packets. senders
+ is NOT updated when RTP packets arrive; it remains 0.
+
+ o Transmission of the BYE packet then follows the rules for
+ transmitting a regular RTCP packet, as above.
+
+ This allows BYE packets to be sent right away, yet controls their
+ total bandwidth usage. In the worst case, this could cause RTCP
+ control packets to use twice the bandwidth as normal (10%) -- 5% for
+ non-BYE RTCP packets and 5% for BYE.
+
+ A participant that does not want to wait for the above mechanism to
+ allow transmission of a BYE packet MAY leave the group without
+ sending a BYE at all. That participant will eventually be timed out
+ by the other group members.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 33]
+
+RFC 3550 RTP July 2003
+
+
+ If the group size estimate members is less than 50 when the
+ participant decides to leave, the participant MAY send a BYE packet
+ immediately. Alternatively, the participant MAY choose to execute
+ the above BYE backoff algorithm.
+
+ In either case, a participant which never sent an RTP or RTCP packet
+ MUST NOT send a BYE packet when they leave the group.
+
+6.3.8 Updating we_sent
+
+ The variable we_sent contains true if the participant has sent an RTP
+ packet recently, false otherwise. This determination is made by
+ using the same mechanisms as for managing the set of other
+ participants listed in the senders table. If the participant sends
+ an RTP packet when we_sent is false, it adds itself to the sender
+ table and sets we_sent to true. The reverse reconsideration
+ algorithm described in Section 6.3.4 SHOULD be performed to possibly
+ reduce the delay before sending an SR packet. Every time another RTP
+ packet is sent, the time of transmission of that packet is maintained
+ in the table. The normal sender timeout algorithm is then applied to
+ the participant -- if an RTP packet has not been transmitted since
+ time tc - 2T, the participant removes itself from the sender table,
+ decrements the sender count, and sets we_sent to false.
+
+6.3.9 Allocation of Source Description Bandwidth
+
+ This specification defines several source description (SDES) items in
+ addition to the mandatory CNAME item, such as NAME (personal name)
+ and EMAIL (email address). It also provides a means to define new
+ application-specific RTCP packet types. Applications should exercise
+ caution in allocating control bandwidth to this additional
+ information because it will slow down the rate at which reception
+ reports and CNAME are sent, thus impairing the performance of the
+ protocol. It is RECOMMENDED that no more than 20% of the RTCP
+ bandwidth allocated to a single participant be used to carry the
+ additional information. Furthermore, it is not intended that all
+ SDES items will be included in every application. Those that are
+ included SHOULD be assigned a fraction of the bandwidth according to
+ their utility. Rather than estimate these fractions dynamically, it
+ is recommended that the percentages be translated statically into
+ report interval counts based on the typical length of an item.
+
+ For example, an application may be designed to send only CNAME, NAME
+ and EMAIL and not any others. NAME might be given much higher
+ priority than EMAIL because the NAME would be displayed continuously
+ in the application's user interface, whereas EMAIL would be displayed
+ only when requested. At every RTCP interval, an RR packet and an
+ SDES packet with the CNAME item would be sent. For a small session
+
+
+
+Schulzrinne, et al. Standards Track [Page 34]
+
+RFC 3550 RTP July 2003
+
+
+ operating at the minimum interval, that would be every 5 seconds on
+ the average. Every third interval (15 seconds), one extra item would
+ be included in the SDES packet. Seven out of eight times this would
+ be the NAME item, and every eighth time (2 minutes) it would be the
+ EMAIL item.
+
+ When multiple applications operate in concert using cross-application
+ binding through a common CNAME for each participant, for example in a
+ multimedia conference composed of an RTP session for each medium, the
+ additional SDES information MAY be sent in only one RTP session. The
+ other sessions would carry only the CNAME item. In particular, this
+ approach should be applied to the multiple sessions of a layered
+ encoding scheme (see Section 2.4).
+
+6.4 Sender and Receiver Reports
+
+ RTP receivers provide reception quality feedback using RTCP report
+ packets which may take one of two forms depending upon whether or not
+ the receiver is also a sender. The only difference between the
+ sender report (SR) and receiver report (RR) forms, besides the packet
+ type code, is that the sender report includes a 20-byte sender
+ information section for use by active senders. The SR is issued if a
+ site has sent any data packets during the interval since issuing the
+ last report or the previous one, otherwise the RR is issued.
+
+ Both the SR and RR forms include zero or more reception report
+ blocks, one for each of the synchronization sources from which this
+ receiver has received RTP data packets since the last report.
+ Reports are not issued for contributing sources listed in the CSRC
+ list. Each reception report block provides statistics about the data
+ received from the particular source indicated in that block. Since a
+ maximum of 31 reception report blocks will fit in an SR or RR packet,
+ additional RR packets SHOULD be stacked after the initial SR or RR
+ packet as needed to contain the reception reports for all sources
+ heard during the interval since the last report. If there are too
+ many sources to fit all the necessary RR packets into one compound
+ RTCP packet without exceeding the MTU of the network path, then only
+ the subset that will fit into one MTU SHOULD be included in each
+ interval. The subsets SHOULD be selected round-robin across multiple
+ intervals so that all sources are reported.
+
+ The next sections define the formats of the two reports, how they may
+ be extended in a profile-specific manner if an application requires
+ additional feedback information, and how the reports may be used.
+ Details of reception reporting by translators and mixers is given in
+ Section 7.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 35]
+
+RFC 3550 RTP July 2003
+
+
+6.4.1 SR: Sender Report RTCP Packet
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+header |V=2|P| RC | PT=SR=200 | length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | SSRC of sender |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+sender | NTP timestamp, most significant word |
+info +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | NTP timestamp, least significant word |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | RTP timestamp |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | sender's packet count |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | sender's octet count |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+report | SSRC_1 (SSRC of first source) |
+block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 1 | fraction lost | cumulative number of packets lost |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | extended highest sequence number received |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | interarrival jitter |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | last SR (LSR) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | delay since last SR (DLSR) |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+report | SSRC_2 (SSRC of second source) |
+block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 2 : ... :
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+ | profile-specific extensions |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The sender report packet consists of three sections, possibly
+ followed by a fourth profile-specific extension section if defined.
+ The first section, the header, is 8 octets long. The fields have the
+ following meaning:
+
+ version (V): 2 bits
+ Identifies the version of RTP, which is the same in RTCP packets
+ as in RTP data packets. The version defined by this specification
+ is two (2).
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 36]
+
+RFC 3550 RTP July 2003
+
+
+ padding (P): 1 bit
+ If the padding bit is set, this individual RTCP packet contains
+ some additional padding octets at the end which are not part of
+ the control information but are included in the length field. The
+ last octet of the padding is a count of how many padding octets
+ should be ignored, including itself (it will be a multiple of
+ four). Padding may be needed by some encryption algorithms with
+ fixed block sizes. In a compound RTCP packet, padding is only
+ required on one individual packet because the compound packet is
+ encrypted as a whole for the method in Section 9.1. Thus, padding
+ MUST only be added to the last individual packet, and if padding
+ is added to that packet, the padding bit MUST be set only on that
+ packet. This convention aids the header validity checks described
+ in Appendix A.2 and allows detection of packets from some early
+ implementations that incorrectly set the padding bit on the first
+ individual packet and add padding to the last individual packet.
+
+ reception report count (RC): 5 bits
+ The number of reception report blocks contained in this packet. A
+ value of zero is valid.
+
+ packet type (PT): 8 bits
+ Contains the constant 200 to identify this as an RTCP SR packet.
+
+ length: 16 bits
+ The length of this RTCP packet in 32-bit words minus one,
+ including the header and any padding. (The offset of one makes
+ zero a valid length and avoids a possible infinite loop in
+ scanning a compound RTCP packet, while counting 32-bit words
+ avoids a validity check for a multiple of 4.)
+
+ SSRC: 32 bits
+ The synchronization source identifier for the originator of this
+ SR packet.
+
+ The second section, the sender information, is 20 octets long and is
+ present in every sender report packet. It summarizes the data
+ transmissions from this sender. The fields have the following
+ meaning:
+
+ NTP timestamp: 64 bits
+ Indicates the wallclock time (see Section 4) when this report was
+ sent so that it may be used in combination with timestamps
+ returned in reception reports from other receivers to measure
+ round-trip propagation to those receivers. Receivers should
+ expect that the measurement accuracy of the timestamp may be
+ limited to far less than the resolution of the NTP timestamp. The
+ measurement uncertainty of the timestamp is not indicated as it
+
+
+
+Schulzrinne, et al. Standards Track [Page 37]
+
+RFC 3550 RTP July 2003
+
+
+ may not be known. On a system that has no notion of wallclock
+ time but does have some system-specific clock such as "system
+ uptime", a sender MAY use that clock as a reference to calculate
+ relative NTP timestamps. It is important to choose a commonly
+ used clock so that if separate implementations are used to produce
+ the individual streams of a multimedia session, all
+ implementations will use the same clock. Until the year 2036,
+ relative and absolute timestamps will differ in the high bit so
+ (invalid) comparisons will show a large difference; by then one
+ hopes relative timestamps will no longer be needed. A sender that
+ has no notion of wallclock or elapsed time MAY set the NTP
+ timestamp to zero.
+
+ RTP timestamp: 32 bits
+ Corresponds to the same time as the NTP timestamp (above), but in
+ the same units and with the same random offset as the RTP
+ timestamps in data packets. This correspondence may be used for
+ intra- and inter-media synchronization for sources whose NTP
+ timestamps are synchronized, and may be used by media-independent
+ receivers to estimate the nominal RTP clock frequency. Note that
+ in most cases this timestamp will not be equal to the RTP
+ timestamp in any adjacent data packet. Rather, it MUST be
+ calculated from the corresponding NTP timestamp using the
+ relationship between the RTP timestamp counter and real time as
+ maintained by periodically checking the wallclock time at a
+ sampling instant.
+
+ sender's packet count: 32 bits
+ The total number of RTP data packets transmitted by the sender
+ since starting transmission up until the time this SR packet was
+ generated. The count SHOULD be reset if the sender changes its
+ SSRC identifier.
+
+ sender's octet count: 32 bits
+ The total number of payload octets (i.e., not including header or
+ padding) transmitted in RTP data packets by the sender since
+ starting transmission up until the time this SR packet was
+ generated. The count SHOULD be reset if the sender changes its
+ SSRC identifier. This field can be used to estimate the average
+ payload data rate.
+
+ The third section contains zero or more reception report blocks
+ depending on the number of other sources heard by this sender since
+ the last report. Each reception report block conveys statistics on
+ the reception of RTP packets from a single synchronization source.
+ Receivers SHOULD NOT carry over statistics when a source changes its
+ SSRC identifier due to a collision. These statistics are:
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 38]
+
+RFC 3550 RTP July 2003
+
+
+ SSRC_n (source identifier): 32 bits
+ The SSRC identifier of the source to which the information in this
+ reception report block pertains.
+
+ fraction lost: 8 bits
+ The fraction of RTP data packets from source SSRC_n lost since the
+ previous SR or RR packet was sent, expressed as a fixed point
+ number with the binary point at the left edge of the field. (That
+ is equivalent to taking the integer part after multiplying the
+ loss fraction by 256.) This fraction is defined to be the number
+ of packets lost divided by the number of packets expected, as
+ defined in the next paragraph. An implementation is shown in
+ Appendix A.3. If the loss is negative due to duplicates, the
+ fraction lost is set to zero. Note that a receiver cannot tell
+ whether any packets were lost after the last one received, and
+ that there will be no reception report block issued for a source
+ if all packets from that source sent during the last reporting
+ interval have been lost.
+
+ cumulative number of packets lost: 24 bits
+ The total number of RTP data packets from source SSRC_n that have
+ been lost since the beginning of reception. This number is
+ defined to be the number of packets expected less the number of
+ packets actually received, where the number of packets received
+ includes any which are late or duplicates. Thus, packets that
+ arrive late are not counted as lost, and the loss may be negative
+ if there are duplicates. The number of packets expected is
+ defined to be the extended last sequence number received, as
+ defined next, less the initial sequence number received. This may
+ be calculated as shown in Appendix A.3.
+
+ extended highest sequence number received: 32 bits
+ The low 16 bits contain the highest sequence number received in an
+ RTP data packet from source SSRC_n, and the most significant 16
+ bits extend that sequence number with the corresponding count of
+ sequence number cycles, which may be maintained according to the
+ algorithm in Appendix A.1. Note that different receivers within
+ the same session will generate different extensions to the
+ sequence number if their start times differ significantly.
+
+ interarrival jitter: 32 bits
+ An estimate of the statistical variance of the RTP data packet
+ interarrival time, measured in timestamp units and expressed as an
+ unsigned integer. The interarrival jitter J is defined to be the
+ mean deviation (smoothed absolute value) of the difference D in
+ packet spacing at the receiver compared to the sender for a pair
+ of packets. As shown in the equation below, this is equivalent to
+ the difference in the "relative transit time" for the two packets;
+
+
+
+Schulzrinne, et al. Standards Track [Page 39]
+
+RFC 3550 RTP July 2003
+
+
+ the relative transit time is the difference between a packet's RTP
+ timestamp and the receiver's clock at the time of arrival,
+ measured in the same units.
+
+ If Si is the RTP timestamp from packet i, and Ri is the time of
+ arrival in RTP timestamp units for packet i, then for two packets
+ i and j, D may be expressed as
+
+ D(i,j) = (Rj - Ri) - (Sj - Si) = (Rj - Sj) - (Ri - Si)
+
+ The interarrival jitter SHOULD be calculated continuously as each
+ data packet i is received from source SSRC_n, using this
+ difference D for that packet and the previous packet i-1 in order
+ of arrival (not necessarily in sequence), according to the formula
+
+ J(i) = J(i-1) + (|D(i-1,i)| - J(i-1))/16
+
+ Whenever a reception report is issued, the current value of J is
+ sampled.
+
+ The jitter calculation MUST conform to the formula specified here
+ in order to allow profile-independent monitors to make valid
+ interpretations of reports coming from different implementations.
+ This algorithm is the optimal first-order estimator and the gain
+ parameter 1/16 gives a good noise reduction ratio while
+ maintaining a reasonable rate of convergence [22]. A sample
+ implementation is shown in Appendix A.8. See Section 6.4.4 for a
+ discussion of the effects of varying packet duration and delay
+ before transmission.
+
+ last SR timestamp (LSR): 32 bits
+ The middle 32 bits out of 64 in the NTP timestamp (as explained in
+ Section 4) received as part of the most recent RTCP sender report
+ (SR) packet from source SSRC_n. If no SR has been received yet,
+ the field is set to zero.
+
+ delay since last SR (DLSR): 32 bits
+ The delay, expressed in units of 1/65536 seconds, between
+ receiving the last SR packet from source SSRC_n and sending this
+ reception report block. If no SR packet has been received yet
+ from SSRC_n, the DLSR field is set to zero.
+
+ Let SSRC_r denote the receiver issuing this receiver report.
+ Source SSRC_n can compute the round-trip propagation delay to
+ SSRC_r by recording the time A when this reception report block is
+ received. It calculates the total round-trip time A-LSR using the
+ last SR timestamp (LSR) field, and then subtracting this field to
+ leave the round-trip propagation delay as (A - LSR - DLSR). This
+
+
+
+Schulzrinne, et al. Standards Track [Page 40]
+
+RFC 3550 RTP July 2003
+
+
+ is illustrated in Fig. 2. Times are shown in both a hexadecimal
+ representation of the 32-bit fields and the equivalent floating-
+ point decimal representation. Colons indicate a 32-bit field
+ divided into a 16-bit integer part and 16-bit fraction part.
+
+ This may be used as an approximate measure of distance to cluster
+ receivers, although some links have very asymmetric delays.
+
+ [10 Nov 1995 11:33:25.125 UTC] [10 Nov 1995 11:33:36.5 UTC]
+ n SR(n) A=b710:8000 (46864.500 s)
+ ---------------------------------------------------------------->
+ v ^
+ ntp_sec =0xb44db705 v ^ dlsr=0x0005:4000 ( 5.250s)
+ ntp_frac=0x20000000 v ^ lsr =0xb705:2000 (46853.125s)
+ (3024992005.125 s) v ^
+ r v ^ RR(n)
+ ---------------------------------------------------------------->
+ |<-DLSR->|
+ (5.250 s)
+
+ A 0xb710:8000 (46864.500 s)
+ DLSR -0x0005:4000 ( 5.250 s)
+ LSR -0xb705:2000 (46853.125 s)
+ -------------------------------
+ delay 0x0006:2000 ( 6.125 s)
+
+ Figure 2: Example for round-trip time computation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 41]
+
+RFC 3550 RTP July 2003
+
+
+6.4.2 RR: Receiver Report RTCP Packet
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+header |V=2|P| RC | PT=RR=201 | length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | SSRC of packet sender |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+report | SSRC_1 (SSRC of first source) |
+block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 1 | fraction lost | cumulative number of packets lost |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | extended highest sequence number received |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | interarrival jitter |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | last SR (LSR) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | delay since last SR (DLSR) |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+report | SSRC_2 (SSRC of second source) |
+block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ 2 : ... :
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+ | profile-specific extensions |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The format of the receiver report (RR) packet is the same as that of
+ the SR packet except that the packet type field contains the constant
+ 201 and the five words of sender information are omitted (these are
+ the NTP and RTP timestamps and sender's packet and octet counts).
+ The remaining fields have the same meaning as for the SR packet.
+
+ An empty RR packet (RC = 0) MUST be put at the head of a compound
+ RTCP packet when there is no data transmission or reception to
+ report.
+
+6.4.3 Extending the Sender and Receiver Reports
+
+ A profile SHOULD define profile-specific extensions to the sender
+ report and receiver report if there is additional information that
+ needs to be reported regularly about the sender or receivers. This
+ method SHOULD be used in preference to defining another RTCP packet
+ type because it requires less overhead:
+
+ o fewer octets in the packet (no RTCP header or SSRC field);
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 42]
+
+RFC 3550 RTP July 2003
+
+
+ o simpler and faster parsing because applications running under that
+ profile would be programmed to always expect the extension fields
+ in the directly accessible location after the reception reports.
+
+ The extension is a fourth section in the sender- or receiver-report
+ packet which comes at the end after the reception report blocks, if
+ any. If additional sender information is required, then for sender
+ reports it would be included first in the extension section, but for
+ receiver reports it would not be present. If information about
+ receivers is to be included, that data SHOULD be structured as an
+ array of blocks parallel to the existing array of reception report
+ blocks; that is, the number of blocks would be indicated by the RC
+ field.
+
+6.4.4 Analyzing Sender and Receiver Reports
+
+ It is expected that reception quality feedback will be useful not
+ only for the sender but also for other receivers and third-party
+ monitors. The sender may modify its transmissions based on the
+ feedback; receivers can determine whether problems are local,
+ regional or global; network managers may use profile-independent
+ monitors that receive only the RTCP packets and not the corresponding
+ RTP data packets to evaluate the performance of their networks for
+ multicast distribution.
+
+ Cumulative counts are used in both the sender information and
+ receiver report blocks so that differences may be calculated between
+ any two reports to make measurements over both short and long time
+ periods, and to provide resilience against the loss of a report. The
+ difference between the last two reports received can be used to
+ estimate the recent quality of the distribution. The NTP timestamp
+ is included so that rates may be calculated from these differences
+ over the interval between two reports. Since that timestamp is
+ independent of the clock rate for the data encoding, it is possible
+ to implement encoding- and profile-independent quality monitors.
+
+ An example calculation is the packet loss rate over the interval
+ between two reception reports. The difference in the cumulative
+ number of packets lost gives the number lost during that interval.
+ The difference in the extended last sequence numbers received gives
+ the number of packets expected during the interval. The ratio of
+ these two is the packet loss fraction over the interval. This ratio
+ should equal the fraction lost field if the two reports are
+ consecutive, but otherwise it may not. The loss rate per second can
+ be obtained by dividing the loss fraction by the difference in NTP
+ timestamps, expressed in seconds. The number of packets received is
+ the number of packets expected minus the number lost. The number of
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 43]
+
+RFC 3550 RTP July 2003
+
+
+ packets expected may also be used to judge the statistical validity
+ of any loss estimates. For example, 1 out of 5 packets lost has a
+ lower significance than 200 out of 1000.
+
+ From the sender information, a third-party monitor can calculate the
+ average payload data rate and the average packet rate over an
+ interval without receiving the data. Taking the ratio of the two
+ gives the average payload size. If it can be assumed that packet
+ loss is independent of packet size, then the number of packets
+ received by a particular receiver times the average payload size (or
+ the corresponding packet size) gives the apparent throughput
+ available to that receiver.
+
+ In addition to the cumulative counts which allow long-term packet
+ loss measurements using differences between reports, the fraction
+ lost field provides a short-term measurement from a single report.
+ This becomes more important as the size of a session scales up enough
+ that reception state information might not be kept for all receivers
+ or the interval between reports becomes long enough that only one
+ report might have been received from a particular receiver.
+
+ The interarrival jitter field provides a second short-term measure of
+ network congestion. Packet loss tracks persistent congestion while
+ the jitter measure tracks transient congestion. The jitter measure
+ may indicate congestion before it leads to packet loss. The
+ interarrival jitter field is only a snapshot of the jitter at the
+ time of a report and is not intended to be taken quantitatively.
+ Rather, it is intended for comparison across a number of reports from
+ one receiver over time or from multiple receivers, e.g., within a
+ single network, at the same time. To allow comparison across
+ receivers, it is important the the jitter be calculated according to
+ the same formula by all receivers.
+
+ Because the jitter calculation is based on the RTP timestamp which
+ represents the instant when the first data in the packet was sampled,
+ any variation in the delay between that sampling instant and the time
+ the packet is transmitted will affect the resulting jitter that is
+ calculated. Such a variation in delay would occur for audio packets
+ of varying duration. It will also occur for video encodings because
+ the timestamp is the same for all the packets of one frame but those
+ packets are not all transmitted at the same time. The variation in
+ delay until transmission does reduce the accuracy of the jitter
+ calculation as a measure of the behavior of the network by itself,
+ but it is appropriate to include considering that the receiver buffer
+ must accommodate it. When the jitter calculation is used as a
+ comparative measure, the (constant) component due to variation in
+ delay until transmission subtracts out so that a change in the
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 44]
+
+RFC 3550 RTP July 2003
+
+
+ network jitter component can then be observed unless it is relatively
+ small. If the change is small, then it is likely to be
+ inconsequential.
+
+6.5 SDES: Source Description RTCP Packet
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+header |V=2|P| SC | PT=SDES=202 | length |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+chunk | SSRC/CSRC_1 |
+ 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | SDES items |
+ | ... |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+chunk | SSRC/CSRC_2 |
+ 2 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | SDES items |
+ | ... |
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+
+ The SDES packet is a three-level structure composed of a header and
+ zero or more chunks, each of which is composed of items describing
+ the source identified in that chunk. The items are described
+ individually in subsequent sections.
+
+ version (V), padding (P), length:
+ As described for the SR packet (see Section 6.4.1).
+
+ packet type (PT): 8 bits
+ Contains the constant 202 to identify this as an RTCP SDES packet.
+
+ source count (SC): 5 bits
+ The number of SSRC/CSRC chunks contained in this SDES packet. A
+ value of zero is valid but useless.
+
+ Each chunk consists of an SSRC/CSRC identifier followed by a list of
+ zero or more items, which carry information about the SSRC/CSRC.
+ Each chunk starts on a 32-bit boundary. Each item consists of an 8-
+ bit type field, an 8-bit octet count describing the length of the
+ text (thus, not including this two-octet header), and the text
+ itself. Note that the text can be no longer than 255 octets, but
+ this is consistent with the need to limit RTCP bandwidth consumption.
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 45]
+
+RFC 3550 RTP July 2003
+
+
+ The text is encoded according to the UTF-8 encoding specified in RFC
+ 2279 [5]. US-ASCII is a subset of this encoding and requires no
+ additional encoding. The presence of multi-octet encodings is
+ indicated by setting the most significant bit of a character to a
+ value of one.
+
+ Items are contiguous, i.e., items are not individually padded to a
+ 32-bit boundary. Text is not null terminated because some multi-
+ octet encodings include null octets. The list of items in each chunk
+ MUST be terminated by one or more null octets, the first of which is
+ interpreted as an item type of zero to denote the end of the list.
+ No length octet follows the null item type octet, but additional null
+ octets MUST be included if needed to pad until the next 32-bit
+ boundary. Note that this padding is separate from that indicated by
+ the P bit in the RTCP header. A chunk with zero items (four null
+ octets) is valid but useless.
+
+ End systems send one SDES packet containing their own source
+ identifier (the same as the SSRC in the fixed RTP header). A mixer
+ sends one SDES packet containing a chunk for each contributing source
+ from which it is receiving SDES information, or multiple complete
+ SDES packets in the format above if there are more than 31 such
+ sources (see Section 7).
+
+ The SDES items currently defined are described in the next sections.
+ Only the CNAME item is mandatory. Some items shown here may be
+ useful only for particular profiles, but the item types are all
+ assigned from one common space to promote shared use and to simplify
+ profile-independent applications. Additional items may be defined in
+ a profile by registering the type numbers with IANA as described in
+ Section 15.
+
+6.5.1 CNAME: Canonical End-Point Identifier SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | CNAME=1 | length | user and domain name ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The CNAME identifier has the following properties:
+
+ o Because the randomly allocated SSRC identifier may change if a
+ conflict is discovered or if a program is restarted, the CNAME
+ item MUST be included to provide the binding from the SSRC
+ identifier to an identifier for the source (sender or receiver)
+ that remains constant.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 46]
+
+RFC 3550 RTP July 2003
+
+
+ o Like the SSRC identifier, the CNAME identifier SHOULD also be
+ unique among all participants within one RTP session.
+
+ o To provide a binding across multiple media tools used by one
+ participant in a set of related RTP sessions, the CNAME SHOULD be
+ fixed for that participant.
+
+ o To facilitate third-party monitoring, the CNAME SHOULD be suitable
+ for either a program or a person to locate the source.
+
+ Therefore, the CNAME SHOULD be derived algorithmically and not
+ entered manually, when possible. To meet these requirements, the
+ following format SHOULD be used unless a profile specifies an
+ alternate syntax or semantics. The CNAME item SHOULD have the format
+ "user@host", or "host" if a user name is not available as on single-
+ user systems. For both formats, "host" is either the fully qualified
+ domain name of the host from which the real-time data originates,
+ formatted according to the rules specified in RFC 1034 [6], RFC 1035
+ [7] and Section 2.1 of RFC 1123 [8]; or the standard ASCII
+ representation of the host's numeric address on the interface used
+ for the RTP communication. For example, the standard ASCII
+ representation of an IP Version 4 address is "dotted decimal", also
+ known as dotted quad, and for IP Version 6, addresses are textually
+ represented as groups of hexadecimal digits separated by colons (with
+ variations as detailed in RFC 3513 [23]). Other address types are
+ expected to have ASCII representations that are mutually unique. The
+ fully qualified domain name is more convenient for a human observer
+ and may avoid the need to send a NAME item in addition, but it may be
+ difficult or impossible to obtain reliably in some operating
+ environments. Applications that may be run in such environments
+ SHOULD use the ASCII representation of the address instead.
+
+ Examples are "doe@sleepy.example.com", "doe@192.0.2.89" or
+ "doe@2201:056D::112E:144A:1E24" for a multi-user system. On a system
+ with no user name, examples would be "sleepy.example.com",
+ "192.0.2.89" or "2201:056D::112E:144A:1E24".
+
+ The user name SHOULD be in a form that a program such as "finger" or
+ "talk" could use, i.e., it typically is the login name rather than
+ the personal name. The host name is not necessarily identical to the
+ one in the participant's electronic mail address.
+
+ This syntax will not provide unique identifiers for each source if an
+ application permits a user to generate multiple sources from one
+ host. Such an application would have to rely on the SSRC to further
+ identify the source, or the profile for that application would have
+ to specify additional syntax for the CNAME identifier.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 47]
+
+RFC 3550 RTP July 2003
+
+
+ If each application creates its CNAME independently, the resulting
+ CNAMEs may not be identical as would be required to provide a binding
+ across multiple media tools belonging to one participant in a set of
+ related RTP sessions. If cross-media binding is required, it may be
+ necessary for the CNAME of each tool to be externally configured with
+ the same value by a coordination tool.
+
+ Application writers should be aware that private network address
+ assignments such as the Net-10 assignment proposed in RFC 1918 [24]
+ may create network addresses that are not globally unique. This
+ would lead to non-unique CNAMEs if hosts with private addresses and
+ no direct IP connectivity to the public Internet have their RTP
+ packets forwarded to the public Internet through an RTP-level
+ translator. (See also RFC 1627 [25].) To handle this case,
+ applications MAY provide a means to configure a unique CNAME, but the
+ burden is on the translator to translate CNAMEs from private
+ addresses to public addresses if necessary to keep private addresses
+ from being exposed.
+
+6.5.2 NAME: User Name SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | NAME=2 | length | common name of source ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ This is the real name used to describe the source, e.g., "John Doe,
+ Bit Recycler". It may be in any form desired by the user. For
+ applications such as conferencing, this form of name may be the most
+ desirable for display in participant lists, and therefore might be
+ sent most frequently of those items other than CNAME. Profiles MAY
+ establish such priorities. The NAME value is expected to remain
+ constant at least for the duration of a session. It SHOULD NOT be
+ relied upon to be unique among all participants in the session.
+
+6.5.3 EMAIL: Electronic Mail Address SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | EMAIL=3 | length | email address of source ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The email address is formatted according to RFC 2822 [9], for
+ example, "John.Doe@example.com". The EMAIL value is expected to
+ remain constant for the duration of a session.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 48]
+
+RFC 3550 RTP July 2003
+
+
+6.5.4 PHONE: Phone Number SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | PHONE=4 | length | phone number of source ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The phone number SHOULD be formatted with the plus sign replacing the
+ international access code. For example, "+1 908 555 1212" for a
+ number in the United States.
+
+6.5.5 LOC: Geographic User Location SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | LOC=5 | length | geographic location of site ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Depending on the application, different degrees of detail are
+ appropriate for this item. For conference applications, a string
+ like "Murray Hill, New Jersey" may be sufficient, while, for an
+ active badge system, strings like "Room 2A244, AT&T BL MH" might be
+ appropriate. The degree of detail is left to the implementation
+ and/or user, but format and content MAY be prescribed by a profile.
+ The LOC value is expected to remain constant for the duration of a
+ session, except for mobile hosts.
+
+6.5.6 TOOL: Application or Tool Name SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | TOOL=6 | length |name/version of source appl. ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ A string giving the name and possibly version of the application
+ generating the stream, e.g., "videotool 1.2". This information may
+ be useful for debugging purposes and is similar to the Mailer or
+ Mail-System-Version SMTP headers. The TOOL value is expected to
+ remain constant for the duration of the session.
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 49]
+
+RFC 3550 RTP July 2003
+
+
+6.5.7 NOTE: Notice/Status SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | NOTE=7 | length | note about the source ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The following semantics are suggested for this item, but these or
+ other semantics MAY be explicitly defined by a profile. The NOTE
+ item is intended for transient messages describing the current state
+ of the source, e.g., "on the phone, can't talk". Or, during a
+ seminar, this item might be used to convey the title of the talk. It
+ should be used only to carry exceptional information and SHOULD NOT
+ be included routinely by all participants because this would slow
+ down the rate at which reception reports and CNAME are sent, thus
+ impairing the performance of the protocol. In particular, it SHOULD
+ NOT be included as an item in a user's configuration file nor
+ automatically generated as in a quote-of-the-day.
+
+ Since the NOTE item may be important to display while it is active,
+ the rate at which other non-CNAME items such as NAME are transmitted
+ might be reduced so that the NOTE item can take that part of the RTCP
+ bandwidth. When the transient message becomes inactive, the NOTE
+ item SHOULD continue to be transmitted a few times at the same
+ repetition rate but with a string of length zero to signal the
+ receivers. However, receivers SHOULD also consider the NOTE item
+ inactive if it is not received for a small multiple of the repetition
+ rate, or perhaps 20-30 RTCP intervals.
+
+6.5.8 PRIV: Private Extensions SDES Item
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | PRIV=8 | length | prefix length |prefix string...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ ... | value string ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ This item is used to define experimental or application-specific SDES
+ extensions. The item contains a prefix consisting of a length-string
+ pair, followed by the value string filling the remainder of the item
+ and carrying the desired information. The prefix length field is 8
+ bits long. The prefix string is a name chosen by the person defining
+ the PRIV item to be unique with respect to other PRIV items this
+ application might receive. The application creator might choose to
+ use the application name plus an additional subtype identification if
+
+
+
+Schulzrinne, et al. Standards Track [Page 50]
+
+RFC 3550 RTP July 2003
+
+
+ needed. Alternatively, it is RECOMMENDED that others choose a name
+ based on the entity they represent, then coordinate the use of the
+ name within that entity.
+
+ Note that the prefix consumes some space within the item's total
+ length of 255 octets, so the prefix should be kept as short as
+ possible. This facility and the constrained RTCP bandwidth SHOULD
+ NOT be overloaded; it is not intended to satisfy all the control
+ communication requirements of all applications.
+
+ SDES PRIV prefixes will not be registered by IANA. If some form of
+ the PRIV item proves to be of general utility, it SHOULD instead be
+ assigned a regular SDES item type registered with IANA so that no
+ prefix is required. This simplifies use and increases transmission
+ efficiency.
+
+6.6 BYE: Goodbye RTCP Packet
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |V=2|P| SC | PT=BYE=203 | length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | SSRC/CSRC |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ : ... :
+ +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
+(opt) | length | reason for leaving ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The BYE packet indicates that one or more sources are no longer
+ active.
+
+ version (V), padding (P), length:
+ As described for the SR packet (see Section 6.4.1).
+
+ packet type (PT): 8 bits
+ Contains the constant 203 to identify this as an RTCP BYE packet.
+
+ source count (SC): 5 bits
+ The number of SSRC/CSRC identifiers included in this BYE packet.
+ A count value of zero is valid, but useless.
+
+ The rules for when a BYE packet should be sent are specified in
+ Sections 6.3.7 and 8.2.
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 51]
+
+RFC 3550 RTP July 2003
+
+
+ If a BYE packet is received by a mixer, the mixer SHOULD forward the
+ BYE packet with the SSRC/CSRC identifier(s) unchanged. If a mixer
+ shuts down, it SHOULD send a BYE packet listing all contributing
+ sources it handles, as well as its own SSRC identifier. Optionally,
+ the BYE packet MAY include an 8-bit octet count followed by that many
+ octets of text indicating the reason for leaving, e.g., "camera
+ malfunction" or "RTP loop detected". The string has the same
+ encoding as that described for SDES. If the string fills the packet
+ to the next 32-bit boundary, the string is not null terminated. If
+ not, the BYE packet MUST be padded with null octets to the next 32-
+ bit boundary. This padding is separate from that indicated by the P
+ bit in the RTCP header.
+
+6.7 APP: Application-Defined RTCP Packet
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |V=2|P| subtype | PT=APP=204 | length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | SSRC/CSRC |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | name (ASCII) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | application-dependent data ...
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ The APP packet is intended for experimental use as new applications
+ and new features are developed, without requiring packet type value
+ registration. APP packets with unrecognized names SHOULD be ignored.
+ After testing and if wider use is justified, it is RECOMMENDED that
+ each APP packet be redefined without the subtype and name fields and
+ registered with IANA using an RTCP packet type.
+
+ version (V), padding (P), length:
+ As described for the SR packet (see Section 6.4.1).
+
+ subtype: 5 bits
+ May be used as a subtype to allow a set of APP packets to be
+ defined under one unique name, or for any application-dependent
+ data.
+
+ packet type (PT): 8 bits
+ Contains the constant 204 to identify this as an RTCP APP packet.
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 52]
+
+RFC 3550 RTP July 2003
+
+
+ name: 4 octets
+ A name chosen by the person defining the set of APP packets to be
+ unique with respect to other APP packets this application might
+ receive. The application creator might choose to use the
+ application name, and then coordinate the allocation of subtype
+ values to others who want to define new packet types for the
+ application. Alternatively, it is RECOMMENDED that others choose
+ a name based on the entity they represent, then coordinate the use
+ of the name within that entity. The name is interpreted as a
+ sequence of four ASCII characters, with uppercase and lowercase
+ characters treated as distinct.
+
+ application-dependent data: variable length
+ Application-dependent data may or may not appear in an APP packet.
+ It is interpreted by the application and not RTP itself. It MUST
+ be a multiple of 32 bits long.
+
+7. RTP Translators and Mixers
+
+ In addition to end systems, RTP supports the notion of "translators"
+ and "mixers", which could be considered as "intermediate systems" at
+ the RTP level. Although this support adds some complexity to the
+ protocol, the need for these functions has been clearly established
+ by experiments with multicast audio and video applications in the
+ Internet. Example uses of translators and mixers given in Section
+ 2.3 stem from the presence of firewalls and low bandwidth
+ connections, both of which are likely to remain.
+
+7.1 General Description
+
+ An RTP translator/mixer connects two or more transport-level
+ "clouds". Typically, each cloud is defined by a common network and
+ transport protocol (e.g., IP/UDP) plus a multicast address and
+ transport level destination port or a pair of unicast addresses and
+ ports. (Network-level protocol translators, such as IP version 4 to
+ IP version 6, may be present within a cloud invisibly to RTP.) One
+ system may serve as a translator or mixer for a number of RTP
+ sessions, but each is considered a logically separate entity.
+
+ In order to avoid creating a loop when a translator or mixer is
+ installed, the following rules MUST be observed:
+
+ o Each of the clouds connected by translators and mixers
+ participating in one RTP session either MUST be distinct from all
+ the others in at least one of these parameters (protocol, address,
+ port), or MUST be isolated at the network level from the others.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 53]
+
+RFC 3550 RTP July 2003
+
+
+ o A derivative of the first rule is that there MUST NOT be multiple
+ translators or mixers connected in parallel unless by some
+ arrangement they partition the set of sources to be forwarded.
+
+ Similarly, all RTP end systems that can communicate through one or
+ more RTP translators or mixers share the same SSRC space, that is,
+ the SSRC identifiers MUST be unique among all these end systems.
+ Section 8.2 describes the collision resolution algorithm by which
+ SSRC identifiers are kept unique and loops are detected.
+
+ There may be many varieties of translators and mixers designed for
+ different purposes and applications. Some examples are to add or
+ remove encryption, change the encoding of the data or the underlying
+ protocols, or replicate between a multicast address and one or more
+ unicast addresses. The distinction between translators and mixers is
+ that a translator passes through the data streams from different
+ sources separately, whereas a mixer combines them to form one new
+ stream:
+
+ Translator: Forwards RTP packets with their SSRC identifier
+ intact; this makes it possible for receivers to identify
+ individual sources even though packets from all the sources pass
+ through the same translator and carry the translator's network
+ source address. Some kinds of translators will pass through the
+ data untouched, but others MAY change the encoding of the data and
+ thus the RTP data payload type and timestamp. If multiple data
+ packets are re-encoded into one, or vice versa, a translator MUST
+ assign new sequence numbers to the outgoing packets. Losses in
+ the incoming packet stream may induce corresponding gaps in the
+ outgoing sequence numbers. Receivers cannot detect the presence
+ of a translator unless they know by some other means what payload
+ type or transport address was used by the original source.
+
+ Mixer: Receives streams of RTP data packets from one or more
+ sources, possibly changes the data format, combines the streams in
+ some manner and then forwards the combined stream. Since the
+ timing among multiple input sources will not generally be
+ synchronized, the mixer will make timing adjustments among the
+ streams and generate its own timing for the combined stream, so it
+ is the synchronization source. Thus, all data packets forwarded
+ by a mixer MUST be marked with the mixer's own SSRC identifier.
+ In order to preserve the identity of the original sources
+ contributing to the mixed packet, the mixer SHOULD insert their
+ SSRC identifiers into the CSRC identifier list following the fixed
+ RTP header of the packet. A mixer that is also itself a
+ contributing source for some packet SHOULD explicitly include its
+ own SSRC identifier in the CSRC list for that packet.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 54]
+
+RFC 3550 RTP July 2003
+
+
+ For some applications, it MAY be acceptable for a mixer not to
+ identify sources in the CSRC list. However, this introduces the
+ danger that loops involving those sources could not be detected.
+
+ The advantage of a mixer over a translator for applications like
+ audio is that the output bandwidth is limited to that of one source
+ even when multiple sources are active on the input side. This may be
+ important for low-bandwidth links. The disadvantage is that
+ receivers on the output side don't have any control over which
+ sources are passed through or muted, unless some mechanism is
+ implemented for remote control of the mixer. The regeneration of
+ synchronization information by mixers also means that receivers can't
+ do inter-media synchronization of the original streams. A multi-
+ media mixer could do it.
+
+ [E1] [E6]
+ | |
+ E1:17 | E6:15 |
+ | | E6:15
+ V M1:48 (1,17) M1:48 (1,17) V M1:48 (1,17)
+ (M1)-------------><T1>-----------------><T2>-------------->[E7]
+ ^ ^ E4:47 ^ E4:47
+ E2:1 | E4:47 | | M3:89 (64,45)
+ | | |
+ [E2] [E4] M3:89 (64,45) |
+ | legend:
+ [E3] --------->(M2)----------->(M3)------------| [End system]
+ E3:64 M2:12 (64) ^ (Mixer)
+ | E5:45 <Translator>
+ |
+ [E5] source: SSRC (CSRCs)
+ ------------------->
+
+ Figure 3: Sample RTP network with end systems, mixers and translators
+
+ A collection of mixers and translators is shown in Fig. 3 to
+ illustrate their effect on SSRC and CSRC identifiers. In the figure,
+ end systems are shown as rectangles (named E), translators as
+ triangles (named T) and mixers as ovals (named M). The notation "M1:
+ 48(1,17)" designates a packet originating a mixer M1, identified by
+ M1's (random) SSRC value of 48 and two CSRC identifiers, 1 and 17,
+ copied from the SSRC identifiers of packets from E1 and E2.
+
+7.2 RTCP Processing in Translators
+
+ In addition to forwarding data packets, perhaps modified, translators
+ and mixers MUST also process RTCP packets. In many cases, they will
+ take apart the compound RTCP packets received from end systems to
+
+
+
+Schulzrinne, et al. Standards Track [Page 55]
+
+RFC 3550 RTP July 2003
+
+
+ aggregate SDES information and to modify the SR or RR packets.
+ Retransmission of this information may be triggered by the packet
+ arrival or by the RTCP interval timer of the translator or mixer
+ itself.
+
+ A translator that does not modify the data packets, for example one
+ that just replicates between a multicast address and a unicast
+ address, MAY simply forward RTCP packets unmodified as well. A
+ translator that transforms the payload in some way MUST make
+ corresponding transformations in the SR and RR information so that it
+ still reflects the characteristics of the data and the reception
+ quality. These translators MUST NOT simply forward RTCP packets. In
+ general, a translator SHOULD NOT aggregate SR and RR packets from
+ different sources into one packet since that would reduce the
+ accuracy of the propagation delay measurements based on the LSR and
+ DLSR fields.
+
+ SR sender information: A translator does not generate its own
+ sender information, but forwards the SR packets received from one
+ cloud to the others. The SSRC is left intact but the sender
+ information MUST be modified if required by the translation. If a
+ translator changes the data encoding, it MUST change the "sender's
+ byte count" field. If it also combines several data packets into
+ one output packet, it MUST change the "sender's packet count"
+ field. If it changes the timestamp frequency, it MUST change the
+ "RTP timestamp" field in the SR packet.
+
+ SR/RR reception report blocks: A translator forwards reception
+ reports received from one cloud to the others. Note that these
+ flow in the direction opposite to the data. The SSRC is left
+ intact. If a translator combines several data packets into one
+ output packet, and therefore changes the sequence numbers, it MUST
+ make the inverse manipulation for the packet loss fields and the
+ "extended last sequence number" field. This may be complex. In
+ the extreme case, there may be no meaningful way to translate the
+ reception reports, so the translator MAY pass on no reception
+ report at all or a synthetic report based on its own reception.
+ The general rule is to do what makes sense for a particular
+ translation.
+
+ A translator does not require an SSRC identifier of its own, but
+ MAY choose to allocate one for the purpose of sending reports
+ about what it has received. These would be sent to all the
+ connected clouds, each corresponding to the translation of the
+ data stream as sent to that cloud, since reception reports are
+ normally multicast to all participants.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 56]
+
+RFC 3550 RTP July 2003
+
+
+ SDES: Translators typically forward without change the SDES
+ information they receive from one cloud to the others, but MAY,
+ for example, decide to filter non-CNAME SDES information if
+ bandwidth is limited. The CNAMEs MUST be forwarded to allow SSRC
+ identifier collision detection to work. A translator that
+ generates its own RR packets MUST send SDES CNAME information
+ about itself to the same clouds that it sends those RR packets.
+
+ BYE: Translators forward BYE packets unchanged. A translator
+ that is about to cease forwarding packets SHOULD send a BYE packet
+ to each connected cloud containing all the SSRC identifiers that
+ were previously being forwarded to that cloud, including the
+ translator's own SSRC identifier if it sent reports of its own.
+
+ APP: Translators forward APP packets unchanged.
+
+7.3 RTCP Processing in Mixers
+
+ Since a mixer generates a new data stream of its own, it does not
+ pass through SR or RR packets at all and instead generates new
+ information for both sides.
+
+ SR sender information: A mixer does not pass through sender
+ information from the sources it mixes because the characteristics
+ of the source streams are lost in the mix. As a synchronization
+ source, the mixer SHOULD generate its own SR packets with sender
+ information about the mixed data stream and send them in the same
+ direction as the mixed stream.
+
+ SR/RR reception report blocks: A mixer generates its own
+ reception reports for sources in each cloud and sends them out
+ only to the same cloud. It MUST NOT send these reception reports
+ to the other clouds and MUST NOT forward reception reports from
+ one cloud to the others because the sources would not be SSRCs
+ there (only CSRCs).
+
+ SDES: Mixers typically forward without change the SDES
+ information they receive from one cloud to the others, but MAY,
+ for example, decide to filter non-CNAME SDES information if
+ bandwidth is limited. The CNAMEs MUST be forwarded to allow SSRC
+ identifier collision detection to work. (An identifier in a CSRC
+ list generated by a mixer might collide with an SSRC identifier
+ generated by an end system.) A mixer MUST send SDES CNAME
+ information about itself to the same clouds that it sends SR or RR
+ packets.
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 57]
+
+RFC 3550 RTP July 2003
+
+
+ Since mixers do not forward SR or RR packets, they will typically
+ be extracting SDES packets from a compound RTCP packet. To
+ minimize overhead, chunks from the SDES packets MAY be aggregated
+ into a single SDES packet which is then stacked on an SR or RR
+ packet originating from the mixer. A mixer which aggregates SDES
+ packets will use more RTCP bandwidth than an individual source
+ because the compound packets will be longer, but that is
+ appropriate since the mixer represents multiple sources.
+ Similarly, a mixer which passes through SDES packets as they are
+ received will be transmitting RTCP packets at higher than the
+ single source rate, but again that is correct since the packets
+ come from multiple sources. The RTCP packet rate may be different
+ on each side of the mixer.
+
+ A mixer that does not insert CSRC identifiers MAY also refrain
+ from forwarding SDES CNAMEs. In this case, the SSRC identifier
+ spaces in the two clouds are independent. As mentioned earlier,
+ this mode of operation creates a danger that loops can't be
+ detected.
+
+ BYE: Mixers MUST forward BYE packets. A mixer that is about to
+ cease forwarding packets SHOULD send a BYE packet to each
+ connected cloud containing all the SSRC identifiers that were
+ previously being forwarded to that cloud, including the mixer's
+ own SSRC identifier if it sent reports of its own.
+
+ APP: The treatment of APP packets by mixers is application-specific.
+
+7.4 Cascaded Mixers
+
+ An RTP session may involve a collection of mixers and translators as
+ shown in Fig. 3. If two mixers are cascaded, such as M2 and M3 in
+ the figure, packets received by a mixer may already have been mixed
+ and may include a CSRC list with multiple identifiers. The second
+ mixer SHOULD build the CSRC list for the outgoing packet using the
+ CSRC identifiers from already-mixed input packets and the SSRC
+ identifiers from unmixed input packets. This is shown in the output
+ arc from mixer M3 labeled M3:89(64,45) in the figure. As in the case
+ of mixers that are not cascaded, if the resulting CSRC list has more
+ than 15 identifiers, the remainder cannot be included.
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 58]
+
+RFC 3550 RTP July 2003
+
+
+8. SSRC Identifier Allocation and Use
+
+ The SSRC identifier carried in the RTP header and in various fields
+ of RTCP packets is a random 32-bit number that is required to be
+ globally unique within an RTP session. It is crucial that the number
+ be chosen with care in order that participants on the same network or
+ starting at the same time are not likely to choose the same number.
+
+ It is not sufficient to use the local network address (such as an
+ IPv4 address) for the identifier because the address may not be
+ unique. Since RTP translators and mixers enable interoperation among
+ multiple networks with different address spaces, the allocation
+ patterns for addresses within two spaces might result in a much
+ higher rate of collision than would occur with random allocation.
+
+ Multiple sources running on one host would also conflict.
+
+ It is also not sufficient to obtain an SSRC identifier simply by
+ calling random() without carefully initializing the state. An
+ example of how to generate a random identifier is presented in
+ Appendix A.6.
+
+8.1 Probability of Collision
+
+ Since the identifiers are chosen randomly, it is possible that two or
+ more sources will choose the same number. Collision occurs with the
+ highest probability when all sources are started simultaneously, for
+ example when triggered automatically by some session management
+ event. If N is the number of sources and L the length of the
+ identifier (here, 32 bits), the probability that two sources
+ independently pick the same value can be approximated for large N
+ [26] as 1 - exp(-N**2 / 2**(L+1)). For N=1000, the probability is
+ roughly 10**-4.
+
+ The typical collision probability is much lower than the worst-case
+ above. When one new source joins an RTP session in which all the
+ other sources already have unique identifiers, the probability of
+ collision is just the fraction of numbers used out of the space.
+ Again, if N is the number of sources and L the length of the
+ identifier, the probability of collision is N / 2**L. For N=1000,
+ the probability is roughly 2*10**-7.
+
+ The probability of collision is further reduced by the opportunity
+ for a new source to receive packets from other participants before
+ sending its first packet (either data or control). If the new source
+ keeps track of the other participants (by SSRC identifier), then
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 59]
+
+RFC 3550 RTP July 2003
+
+
+ before transmitting its first packet the new source can verify that
+ its identifier does not conflict with any that have been received, or
+ else choose again.
+
+8.2 Collision Resolution and Loop Detection
+
+ Although the probability of SSRC identifier collision is low, all RTP
+ implementations MUST be prepared to detect collisions and take the
+ appropriate actions to resolve them. If a source discovers at any
+ time that another source is using the same SSRC identifier as its
+ own, it MUST send an RTCP BYE packet for the old identifier and
+ choose another random one. (As explained below, this step is taken
+ only once in case of a loop.) If a receiver discovers that two other
+ sources are colliding, it MAY keep the packets from one and discard
+ the packets from the other when this can be detected by different
+ source transport addresses or CNAMEs. The two sources are expected
+ to resolve the collision so that the situation doesn't last.
+
+ Because the random SSRC identifiers are kept globally unique for each
+ RTP session, they can also be used to detect loops that may be
+ introduced by mixers or translators. A loop causes duplication of
+ data and control information, either unmodified or possibly mixed, as
+ in the following examples:
+
+ o A translator may incorrectly forward a packet to the same
+ multicast group from which it has received the packet, either
+ directly or through a chain of translators. In that case, the
+ same packet appears several times, originating from different
+ network sources.
+
+ o Two translators incorrectly set up in parallel, i.e., with the
+ same multicast groups on both sides, would both forward packets
+ from one multicast group to the other. Unidirectional translators
+ would produce two copies; bidirectional translators would form a
+ loop.
+
+ o A mixer can close a loop by sending to the same transport
+ destination upon which it receives packets, either directly or
+ through another mixer or translator. In this case a source might
+ show up both as an SSRC on a data packet and a CSRC in a mixed
+ data packet.
+
+ A source may discover that its own packets are being looped, or that
+ packets from another source are being looped (a third-party loop).
+ Both loops and collisions in the random selection of a source
+ identifier result in packets arriving with the same SSRC identifier
+ but a different source transport address, which may be that of the
+ end system originating the packet or an intermediate system.
+
+
+
+Schulzrinne, et al. Standards Track [Page 60]
+
+RFC 3550 RTP July 2003
+
+
+ Therefore, if a source changes its source transport address, it MAY
+ also choose a new SSRC identifier to avoid being interpreted as a
+ looped source. (This is not MUST because in some applications of RTP
+ sources may be expected to change addresses during a session.) Note
+ that if a translator restarts and consequently changes the source
+ transport address (e.g., changes the UDP source port number) on which
+ it forwards packets, then all those packets will appear to receivers
+ to be looped because the SSRC identifiers are applied by the original
+ source and will not change. This problem can be avoided by keeping
+ the source transport address fixed across restarts, but in any case
+ will be resolved after a timeout at the receivers.
+
+ Loops or collisions occurring on the far side of a translator or
+ mixer cannot be detected using the source transport address if all
+ copies of the packets go through the translator or mixer, however,
+ collisions may still be detected when chunks from two RTCP SDES
+ packets contain the same SSRC identifier but different CNAMEs.
+
+ To detect and resolve these conflicts, an RTP implementation MUST
+ include an algorithm similar to the one described below, though the
+ implementation MAY choose a different policy for which packets from
+ colliding third-party sources are kept. The algorithm described
+ below ignores packets from a new source or loop that collide with an
+ established source. It resolves collisions with the participant's
+ own SSRC identifier by sending an RTCP BYE for the old identifier and
+ choosing a new one. However, when the collision was induced by a
+ loop of the participant's own packets, the algorithm will choose a
+ new identifier only once and thereafter ignore packets from the
+ looping source transport address. This is required to avoid a flood
+ of BYE packets.
+
+ This algorithm requires keeping a table indexed by the source
+ identifier and containing the source transport addresses from the
+ first RTP packet and first RTCP packet received with that identifier,
+ along with other state for that source. Two source transport
+ addresses are required since, for example, the UDP source port
+ numbers may be different on RTP and RTCP packets. However, it may be
+ assumed that the network address is the same in both source transport
+ addresses.
+
+ Each SSRC or CSRC identifier received in an RTP or RTCP packet is
+ looked up in the source identifier table in order to process that
+ data or control information. The source transport address from the
+ packet is compared to the corresponding source transport address in
+ the table to detect a loop or collision if they don't match. For
+ control packets, each element with its own SSRC identifier, for
+ example an SDES chunk, requires a separate lookup. (The SSRC
+ identifier in a reception report block is an exception because it
+
+
+
+Schulzrinne, et al. Standards Track [Page 61]
+
+RFC 3550 RTP July 2003
+
+
+ identifies a source heard by the reporter, and that SSRC identifier
+ is unrelated to the source transport address of the RTCP packet sent
+ by the reporter.) If the SSRC or CSRC is not found, a new entry is
+ created. These table entries are removed when an RTCP BYE packet is
+ received with the corresponding SSRC identifier and validated by a
+ matching source transport address, or after no packets have arrived
+ for a relatively long time (see Section 6.2.1).
+
+ Note that if two sources on the same host are transmitting with the
+ same source identifier at the time a receiver begins operation, it
+ would be possible that the first RTP packet received came from one of
+ the sources while the first RTCP packet received came from the other.
+ This would cause the wrong RTCP information to be associated with the
+ RTP data, but this situation should be sufficiently rare and harmless
+ that it may be disregarded.
+
+ In order to track loops of the participant's own data packets, the
+ implementation MUST also keep a separate list of source transport
+ addresses (not identifiers) that have been found to be conflicting.
+ As in the source identifier table, two source transport addresses
+ MUST be kept to separately track conflicting RTP and RTCP packets.
+ Note that the conflicting address list should be short, usually
+ empty. Each element in this list stores the source addresses plus
+ the time when the most recent conflicting packet was received. An
+ element MAY be removed from the list when no conflicting packet has
+ arrived from that source for a time on the order of 10 RTCP report
+ intervals (see Section 6.2).
+
+ For the algorithm as shown, it is assumed that the participant's own
+ source identifier and state are included in the source identifier
+ table. The algorithm could be restructured to first make a separate
+ comparison against the participant's own source identifier.
+
+ if (SSRC or CSRC identifier is not found in the source
+ identifier table) {
+ create a new entry storing the data or control source
+ transport address, the SSRC or CSRC and other state;
+ }
+
+ /* Identifier is found in the table */
+
+ else if (table entry was created on receipt of a control packet
+ and this is the first data packet or vice versa) {
+ store the source transport address from this packet;
+ }
+ else if (source transport address from the packet does not match
+ the one saved in the table entry for this identifier) {
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 62]
+
+RFC 3550 RTP July 2003
+
+
+ /* An identifier collision or a loop is indicated */
+
+ if (source identifier is not the participant's own) {
+ /* OPTIONAL error counter step */
+ if (source identifier is from an RTCP SDES chunk
+ containing a CNAME item that differs from the CNAME
+ in the table entry) {
+ count a third-party collision;
+ } else {
+ count a third-party loop;
+ }
+ abort processing of data packet or control element;
+ /* MAY choose a different policy to keep new source */
+ }
+
+ /* A collision or loop of the participant's own packets */
+
+ else if (source transport address is found in the list of
+ conflicting data or control source transport
+ addresses) {
+ /* OPTIONAL error counter step */
+ if (source identifier is not from an RTCP SDES chunk
+ containing a CNAME item or CNAME is the
+ participant's own) {
+ count occurrence of own traffic looped;
+ }
+ mark current time in conflicting address list entry;
+ abort processing of data packet or control element;
+ }
+
+ /* New collision, change SSRC identifier */
+
+ else {
+ log occurrence of a collision;
+ create a new entry in the conflicting data or control
+ source transport address list and mark current time;
+ send an RTCP BYE packet with the old SSRC identifier;
+ choose a new SSRC identifier;
+ create a new entry in the source identifier table with
+ the old SSRC plus the source transport address from
+ the data or control packet being processed;
+ }
+ }
+
+ In this algorithm, packets from a newly conflicting source address
+ will be ignored and packets from the original source address will be
+ kept. If no packets arrive from the original source for an extended
+ period, the table entry will be timed out and the new source will be
+
+
+
+Schulzrinne, et al. Standards Track [Page 63]
+
+RFC 3550 RTP July 2003
+
+
+ able to take over. This might occur if the original source detects
+ the collision and moves to a new source identifier, but in the usual
+ case an RTCP BYE packet will be received from the original source to
+ delete the state without having to wait for a timeout.
+
+ If the original source address was received through a mixer (i.e.,
+ learned as a CSRC) and later the same source is received directly,
+ the receiver may be well advised to switch to the new source address
+ unless other sources in the mix would be lost. Furthermore, for
+ applications such as telephony in which some sources such as mobile
+ entities may change addresses during the course of an RTP session,
+ the RTP implementation SHOULD modify the collision detection
+ algorithm to accept packets from the new source transport address.
+ To guard against flip-flopping between addresses if a genuine
+ collision does occur, the algorithm SHOULD include some means to
+ detect this case and avoid switching.
+
+ When a new SSRC identifier is chosen due to a collision, the
+ candidate identifier SHOULD first be looked up in the source
+ identifier table to see if it was already in use by some other
+ source. If so, another candidate MUST be generated and the process
+ repeated.
+
+ A loop of data packets to a multicast destination can cause severe
+ network flooding. All mixers and translators MUST implement a loop
+ detection algorithm like the one here so that they can break loops.
+ This should limit the excess traffic to no more than one duplicate
+ copy of the original traffic, which may allow the session to continue
+ so that the cause of the loop can be found and fixed. However, in
+ extreme cases where a mixer or translator does not properly break the
+ loop and high traffic levels result, it may be necessary for end
+ systems to cease transmitting data or control packets entirely. This
+ decision may depend upon the application. An error condition SHOULD
+ be indicated as appropriate. Transmission MAY be attempted again
+ periodically after a long, random time (on the order of minutes).
+
+8.3 Use with Layered Encodings
+
+ For layered encodings transmitted on separate RTP sessions (see
+ Section 2.4), a single SSRC identifier space SHOULD be used across
+ the sessions of all layers and the core (base) layer SHOULD be used
+ for SSRC identifier allocation and collision resolution. When a
+ source discovers that it has collided, it transmits an RTCP BYE
+ packet on only the base layer but changes the SSRC identifier to the
+ new value in all layers.
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 64]
+
+RFC 3550 RTP July 2003
+
+
+9. Security
+
+ Lower layer protocols may eventually provide all the security
+ services that may be desired for applications of RTP, including
+ authentication, integrity, and confidentiality. These services have
+ been specified for IP in [27]. Since the initial audio and video
+ applications using RTP needed a confidentiality service before such
+ services were available for the IP layer, the confidentiality service
+ described in the next section was defined for use with RTP and RTCP.
+ That description is included here to codify existing practice. New
+ applications of RTP MAY implement this RTP-specific confidentiality
+ service for backward compatibility, and/or they MAY implement
+ alternative security services. The overhead on the RTP protocol for
+ this confidentiality service is low, so the penalty will be minimal
+ if this service is obsoleted by other services in the future.
+
+ Alternatively, other services, other implementations of services and
+ other algorithms may be defined for RTP in the future. In
+ particular, an RTP profile called Secure Real-time Transport Protocol
+ (SRTP) [28] is being developed to provide confidentiality of the RTP
+ payload while leaving the RTP header in the clear so that link-level
+ header compression algorithms can still operate. It is expected that
+ SRTP will be the correct choice for many applications. SRTP is based
+ on the Advanced Encryption Standard (AES) and provides stronger
+ security than the service described here. No claim is made that the
+ methods presented here are appropriate for a particular security
+ need. A profile may specify which services and algorithms should be
+ offered by applications, and may provide guidance as to their
+ appropriate use.
+
+ Key distribution and certificates are outside the scope of this
+ document.
+
+9.1 Confidentiality
+
+ Confidentiality means that only the intended receiver(s) can decode
+ the received packets; for others, the packet contains no useful
+ information. Confidentiality of the content is achieved by
+ encryption.
+
+ When it is desired to encrypt RTP or RTCP according to the method
+ specified in this section, all the octets that will be encapsulated
+ for transmission in a single lower-layer packet are encrypted as a
+ unit. For RTCP, a 32-bit random number redrawn for each unit MUST be
+ prepended to the unit before encryption. For RTP, no prefix is
+ prepended; instead, the sequence number and timestamp fields are
+ initialized with random offsets. This is considered to be a weak
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 65]
+
+RFC 3550 RTP July 2003
+
+
+ initialization vector (IV) because of poor randomness properties. In
+ addition, if the subsequent field, the SSRC, can be manipulated by an
+ enemy, there is further weakness of the encryption method.
+
+ For RTCP, an implementation MAY segregate the individual RTCP packets
+ in a compound RTCP packet into two separate compound RTCP packets,
+ one to be encrypted and one to be sent in the clear. For example,
+ SDES information might be encrypted while reception reports were sent
+ in the clear to accommodate third-party monitors that are not privy
+ to the encryption key. In this example, depicted in Fig. 4, the SDES
+ information MUST be appended to an RR packet with no reports (and the
+ random number) to satisfy the requirement that all compound RTCP
+ packets begin with an SR or RR packet. The SDES CNAME item is
+ required in either the encrypted or unencrypted packet, but not both.
+ The same SDES information SHOULD NOT be carried in both packets as
+ this may compromise the encryption.
+
+ UDP packet UDP packet
+ ----------------------------- ------------------------------
+ [random][RR][SDES #CNAME ...] [SR #senderinfo #site1 #site2]
+ ----------------------------- ------------------------------
+ encrypted not encrypted
+
+ #: SSRC identifier
+
+ Figure 4: Encrypted and non-encrypted RTCP packets
+
+ The presence of encryption and the use of the correct key are
+ confirmed by the receiver through header or payload validity checks.
+ Examples of such validity checks for RTP and RTCP headers are given
+ in Appendices A.1 and A.2.
+
+ To be consistent with existing implementations of the initial
+ specification of RTP in RFC 1889, the default encryption algorithm is
+ the Data Encryption Standard (DES) algorithm in cipher block chaining
+ (CBC) mode, as described in Section 1.1 of RFC 1423 [29], except that
+ padding to a multiple of 8 octets is indicated as described for the P
+ bit in Section 5.1. The initialization vector is zero because random
+ values are supplied in the RTP header or by the random prefix for
+ compound RTCP packets. For details on the use of CBC initialization
+ vectors, see [30].
+
+ Implementations that support the encryption method specified here
+ SHOULD always support the DES algorithm in CBC mode as the default
+ cipher for this method to maximize interoperability. This method was
+ chosen because it has been demonstrated to be easy and practical to
+ use in experimental audio and video tools in operation on the
+ Internet. However, DES has since been found to be too easily broken.
+
+
+
+Schulzrinne, et al. Standards Track [Page 66]
+
+RFC 3550 RTP July 2003
+
+
+ It is RECOMMENDED that stronger encryption algorithms such as
+ Triple-DES be used in place of the default algorithm. Furthermore,
+ secure CBC mode requires that the first block of each packet be XORed
+ with a random, independent IV of the same size as the cipher's block
+ size. For RTCP, this is (partially) achieved by prepending each
+ packet with a 32-bit random number, independently chosen for each
+ packet. For RTP, the timestamp and sequence number start from random
+ values, but consecutive packets will not be independently randomized.
+ It should be noted that the randomness in both cases (RTP and RTCP)
+ is limited. High-security applications SHOULD consider other, more
+ conventional, protection means. Other encryption algorithms MAY be
+ specified dynamically for a session by non-RTP means. In particular,
+ the SRTP profile [28] based on AES is being developed to take into
+ account known plaintext and CBC plaintext manipulation concerns, and
+ will be the correct choice in the future.
+
+ As an alternative to encryption at the IP level or at the RTP level
+ as described above, profiles MAY define additional payload types for
+ encrypted encodings. Those encodings MUST specify how padding and
+ other aspects of the encryption are to be handled. This method
+ allows encrypting only the data while leaving the headers in the
+ clear for applications where that is desired. It may be particularly
+ useful for hardware devices that will handle both decryption and
+ decoding. It is also valuable for applications where link-level
+ compression of RTP and lower-layer headers is desired and
+ confidentiality of the payload (but not addresses) is sufficient
+ since encryption of the headers precludes compression.
+
+9.2 Authentication and Message Integrity
+
+ Authentication and message integrity services are not defined at the
+ RTP level since these services would not be directly feasible without
+ a key management infrastructure. It is expected that authentication
+ and integrity services will be provided by lower layer protocols.
+
+10. Congestion Control
+
+ All transport protocols used on the Internet need to address
+ congestion control in some way [31]. RTP is not an exception, but
+ because the data transported over RTP is often inelastic (generated
+ at a fixed or controlled rate), the means to control congestion in
+ RTP may be quite different from those for other transport protocols
+ such as TCP. In one sense, inelasticity reduces the risk of
+ congestion because the RTP stream will not expand to consume all
+ available bandwidth as a TCP stream can. However, inelasticity also
+ means that the RTP stream cannot arbitrarily reduce its load on the
+ network to eliminate congestion when it occurs.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 67]
+
+RFC 3550 RTP July 2003
+
+
+ Since RTP may be used for a wide variety of applications in many
+ different contexts, there is no single congestion control mechanism
+ that will work for all. Therefore, congestion control SHOULD be
+ defined in each RTP profile as appropriate. For some profiles, it
+ may be sufficient to include an applicability statement restricting
+ the use of that profile to environments where congestion is avoided
+ by engineering. For other profiles, specific methods such as data
+ rate adaptation based on RTCP feedback may be required.
+
+11. RTP over Network and Transport Protocols
+
+ This section describes issues specific to carrying RTP packets within
+ particular network and transport protocols. The following rules
+ apply unless superseded by protocol-specific definitions outside this
+ specification.
+
+ RTP relies on the underlying protocol(s) to provide demultiplexing of
+ RTP data and RTCP control streams. For UDP and similar protocols,
+ RTP SHOULD use an even destination port number and the corresponding
+ RTCP stream SHOULD use the next higher (odd) destination port number.
+ For applications that take a single port number as a parameter and
+ derive the RTP and RTCP port pair from that number, if an odd number
+ is supplied then the application SHOULD replace that number with the
+ next lower (even) number to use as the base of the port pair. For
+ applications in which the RTP and RTCP destination port numbers are
+ specified via explicit, separate parameters (using a signaling
+ protocol or other means), the application MAY disregard the
+ restrictions that the port numbers be even/odd and consecutive
+ although the use of an even/odd port pair is still encouraged. The
+ RTP and RTCP port numbers MUST NOT be the same since RTP relies on
+ the port numbers to demultiplex the RTP data and RTCP control
+ streams.
+
+ In a unicast session, both participants need to identify a port pair
+ for receiving RTP and RTCP packets. Both participants MAY use the
+ same port pair. A participant MUST NOT assume that the source port
+ of the incoming RTP or RTCP packet can be used as the destination
+ port for outgoing RTP or RTCP packets. When RTP data packets are
+ being sent in both directions, each participant's RTCP SR packets
+ MUST be sent to the port that the other participant has specified for
+ reception of RTCP. The RTCP SR packets combine sender information
+ for the outgoing data plus reception report information for the
+ incoming data. If a side is not actively sending data (see Section
+ 6.4), an RTCP RR packet is sent instead.
+
+ It is RECOMMENDED that layered encoding applications (see Section
+ 2.4) use a set of contiguous port numbers. The port numbers MUST be
+ distinct because of a widespread deficiency in existing operating
+
+
+
+Schulzrinne, et al. Standards Track [Page 68]
+
+RFC 3550 RTP July 2003
+
+
+ systems that prevents use of the same port with multiple multicast
+ addresses, and for unicast, there is only one permissible address.
+ Thus for layer n, the data port is P + 2n, and the control port is P
+ + 2n + 1. When IP multicast is used, the addresses MUST also be
+ distinct because multicast routing and group membership are managed
+ on an address granularity. However, allocation of contiguous IP
+ multicast addresses cannot be assumed because some groups may require
+ different scopes and may therefore be allocated from different
+ address ranges.
+
+ The previous paragraph conflicts with the SDP specification, RFC 2327
+ [15], which says that it is illegal for both multiple addresses and
+ multiple ports to be specified in the same session description
+ because the association of addresses with ports could be ambiguous.
+ It is intended that this restriction will be relaxed in a revision of
+ RFC 2327 to allow an equal number of addresses and ports to be
+ specified with a one-to-one mapping implied.
+
+ RTP data packets contain no length field or other delineation,
+ therefore RTP relies on the underlying protocol(s) to provide a
+ length indication. The maximum length of RTP packets is limited only
+ by the underlying protocols.
+
+ If RTP packets are to be carried in an underlying protocol that
+ provides the abstraction of a continuous octet stream rather than
+ messages (packets), an encapsulation of the RTP packets MUST be
+ defined to provide a framing mechanism. Framing is also needed if
+ the underlying protocol may contain padding so that the extent of the
+ RTP payload cannot be determined. The framing mechanism is not
+ defined here.
+
+ A profile MAY specify a framing method to be used even when RTP is
+ carried in protocols that do provide framing in order to allow
+ carrying several RTP packets in one lower-layer protocol data unit,
+ such as a UDP packet. Carrying several RTP packets in one network or
+ transport packet reduces header overhead and may simplify
+ synchronization between different streams.
+
+12. Summary of Protocol Constants
+
+ This section contains a summary listing of the constants defined in
+ this specification.
+
+ The RTP payload type (PT) constants are defined in profiles rather
+ than this document. However, the octet of the RTP header which
+ contains the marker bit(s) and payload type MUST avoid the reserved
+ values 200 and 201 (decimal) to distinguish RTP packets from the RTCP
+ SR and RR packet types for the header validation procedure described
+
+
+
+Schulzrinne, et al. Standards Track [Page 69]
+
+RFC 3550 RTP July 2003
+
+
+ in Appendix A.1. For the standard definition of one marker bit and a
+ 7-bit payload type field as shown in this specification, this
+ restriction means that payload types 72 and 73 are reserved.
+
+12.1 RTCP Packet Types
+
+ abbrev. name value
+ SR sender report 200
+ RR receiver report 201
+ SDES source description 202
+ BYE goodbye 203
+ APP application-defined 204
+
+ These type values were chosen in the range 200-204 for improved
+ header validity checking of RTCP packets compared to RTP packets or
+ other unrelated packets. When the RTCP packet type field is compared
+ to the corresponding octet of the RTP header, this range corresponds
+ to the marker bit being 1 (which it usually is not in data packets)
+ and to the high bit of the standard payload type field being 1 (since
+ the static payload types are typically defined in the low half).
+ This range was also chosen to be some distance numerically from 0 and
+ 255 since all-zeros and all-ones are common data patterns.
+
+ Since all compound RTCP packets MUST begin with SR or RR, these codes
+ were chosen as an even/odd pair to allow the RTCP validity check to
+ test the maximum number of bits with mask and value.
+
+ Additional RTCP packet types may be registered through IANA (see
+ Section 15).
+
+12.2 SDES Types
+
+ abbrev. name value
+ END end of SDES list 0
+ CNAME canonical name 1
+ NAME user name 2
+ EMAIL user's electronic mail address 3
+ PHONE user's phone number 4
+ LOC geographic user location 5
+ TOOL name of application or tool 6
+ NOTE notice about the source 7
+ PRIV private extensions 8
+
+ Additional SDES types may be registered through IANA (see Section
+ 15).
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 70]
+
+RFC 3550 RTP July 2003
+
+
+13. RTP Profiles and Payload Format Specifications
+
+ A complete specification of RTP for a particular application will
+ require one or more companion documents of two types described here:
+ profiles, and payload format specifications.
+
+ RTP may be used for a variety of applications with somewhat differing
+ requirements. The flexibility to adapt to those requirements is
+ provided by allowing multiple choices in the main protocol
+ specification, then selecting the appropriate choices or defining
+ extensions for a particular environment and class of applications in
+ a separate profile document. Typically an application will operate
+ under only one profile in a particular RTP session, so there is no
+ explicit indication within the RTP protocol itself as to which
+ profile is in use. A profile for audio and video applications may be
+ found in the companion RFC 3551. Profiles are typically titled "RTP
+ Profile for ...".
+
+ The second type of companion document is a payload format
+ specification, which defines how a particular kind of payload data,
+ such as H.261 encoded video, should be carried in RTP. These
+ documents are typically titled "RTP Payload Format for XYZ
+ Audio/Video Encoding". Payload formats may be useful under multiple
+ profiles and may therefore be defined independently of any particular
+ profile. The profile documents are then responsible for assigning a
+ default mapping of that format to a payload type value if needed.
+
+ Within this specification, the following items have been identified
+ for possible definition within a profile, but this list is not meant
+ to be exhaustive:
+
+ RTP data header: The octet in the RTP data header that contains
+ the marker bit and payload type field MAY be redefined by a
+ profile to suit different requirements, for example with more or
+ fewer marker bits (Section 5.3, p. 18).
+
+ Payload types: Assuming that a payload type field is included,
+ the profile will usually define a set of payload formats (e.g.,
+ media encodings) and a default static mapping of those formats to
+ payload type values. Some of the payload formats may be defined
+ by reference to separate payload format specifications. For each
+ payload type defined, the profile MUST specify the RTP timestamp
+ clock rate to be used (Section 5.1, p. 14).
+
+ RTP data header additions: Additional fields MAY be appended to
+ the fixed RTP data header if some additional functionality is
+ required across the profile's class of applications independent of
+ payload type (Section 5.3, p. 18).
+
+
+
+Schulzrinne, et al. Standards Track [Page 71]
+
+RFC 3550 RTP July 2003
+
+
+ RTP data header extensions: The contents of the first 16 bits of
+ the RTP data header extension structure MUST be defined if use of
+ that mechanism is to be allowed under the profile for
+ implementation-specific extensions (Section 5.3.1, p. 18).
+
+ RTCP packet types: New application-class-specific RTCP packet
+ types MAY be defined and registered with IANA.
+
+ RTCP report interval: A profile SHOULD specify that the values
+ suggested in Section 6.2 for the constants employed in the
+ calculation of the RTCP report interval will be used. Those are
+ the RTCP fraction of session bandwidth, the minimum report
+ interval, and the bandwidth split between senders and receivers.
+ A profile MAY specify alternate values if they have been
+ demonstrated to work in a scalable manner.
+
+ SR/RR extension: An extension section MAY be defined for the
+ RTCP SR and RR packets if there is additional information that
+ should be reported regularly about the sender or receivers
+ (Section 6.4.3, p. 42 and 43).
+
+ SDES use: The profile MAY specify the relative priorities for
+ RTCP SDES items to be transmitted or excluded entirely (Section
+ 6.3.9); an alternate syntax or semantics for the CNAME item
+ (Section 6.5.1); the format of the LOC item (Section 6.5.5); the
+ semantics and use of the NOTE item (Section 6.5.7); or new SDES
+ item types to be registered with IANA.
+
+ Security: A profile MAY specify which security services and
+ algorithms should be offered by applications, and MAY provide
+ guidance as to their appropriate use (Section 9, p. 65).
+
+ String-to-key mapping: A profile MAY specify how a user-provided
+ password or pass phrase is mapped into an encryption key.
+
+ Congestion: A profile SHOULD specify the congestion control
+ behavior appropriate for that profile.
+
+ Underlying protocol: Use of a particular underlying network or
+ transport layer protocol to carry RTP packets MAY be required.
+
+ Transport mapping: A mapping of RTP and RTCP to transport-level
+ addresses, e.g., UDP ports, other than the standard mapping
+ defined in Section 11, p. 68 may be specified.
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 72]
+
+RFC 3550 RTP July 2003
+
+
+ Encapsulation: An encapsulation of RTP packets may be defined to
+ allow multiple RTP data packets to be carried in one lower-layer
+ packet or to provide framing over underlying protocols that do not
+ already do so (Section 11, p. 69).
+
+ It is not expected that a new profile will be required for every
+ application. Within one application class, it would be better to
+ extend an existing profile rather than make a new one in order to
+ facilitate interoperation among the applications since each will
+ typically run under only one profile. Simple extensions such as the
+ definition of additional payload type values or RTCP packet types may
+ be accomplished by registering them through IANA and publishing their
+ descriptions in an addendum to the profile or in a payload format
+ specification.
+
+14. Security Considerations
+
+ RTP suffers from the same security liabilities as the underlying
+ protocols. For example, an impostor can fake source or destination
+ network addresses, or change the header or payload. Within RTCP, the
+ CNAME and NAME information may be used to impersonate another
+ participant. In addition, RTP may be sent via IP multicast, which
+ provides no direct means for a sender to know all the receivers of
+ the data sent and therefore no measure of privacy. Rightly or not,
+ users may be more sensitive to privacy concerns with audio and video
+ communication than they have been with more traditional forms of
+ network communication [33]. Therefore, the use of security
+ mechanisms with RTP is important. These mechanisms are discussed in
+ Section 9.
+
+ RTP-level translators or mixers may be used to allow RTP traffic to
+ reach hosts behind firewalls. Appropriate firewall security
+ principles and practices, which are beyond the scope of this
+ document, should be followed in the design and installation of these
+ devices and in the admission of RTP applications for use behind the
+ firewall.
+
+15. IANA Considerations
+
+ Additional RTCP packet types and SDES item types may be registered
+ through the Internet Assigned Numbers Authority (IANA). Since these
+ number spaces are small, allowing unconstrained registration of new
+ values would not be prudent. To facilitate review of requests and to
+ promote shared use of new types among multiple applications, requests
+ for registration of new values must be documented in an RFC or other
+ permanent and readily available reference such as the product of
+ another cooperative standards body (e.g., ITU-T). Other requests may
+ also be accepted, under the advice of a "designated expert."
+
+
+
+Schulzrinne, et al. Standards Track [Page 73]
+
+RFC 3550 RTP July 2003
+
+
+ (Contact the IANA for the contact information of the current expert.)
+
+ RTP profile specifications SHOULD register with IANA a name for the
+ profile in the form "RTP/xxx", where xxx is a short abbreviation of
+ the profile title. These names are for use by higher-level control
+ protocols, such as the Session Description Protocol (SDP), RFC 2327
+ [15], to refer to transport methods.
+
+16. Intellectual Property Rights Statement
+
+ The IETF takes no position regarding the validity or scope of any
+ intellectual property or other rights that might be claimed to
+ pertain to the implementation or use of the technology described in
+ this document or the extent to which any license under such rights
+ might or might not be available; neither does it represent that it
+ has made any effort to identify any such rights. Information on the
+ IETF's procedures with respect to rights in standards-track and
+ standards-related documentation can be found in BCP-11. Copies of
+ claims of rights made available for publication and any assurances of
+ licenses to be made available, or the result of an attempt made to
+ obtain a general license or permission for the use of such
+ proprietary rights by implementors or users of this specification can
+ be obtained from the IETF Secretariat.
+
+ The IETF invites any interested party to bring to its attention any
+ copyrights, patents or patent applications, or other proprietary
+ rights which may cover technology that may be required to practice
+ this standard. Please address the information to the IETF Executive
+ Director.
+
+17. Acknowledgments
+
+ This memorandum is based on discussions within the IETF Audio/Video
+ Transport working group chaired by Stephen Casner and Colin Perkins.
+ The current protocol has its origins in the Network Voice Protocol
+ and the Packet Video Protocol (Danny Cohen and Randy Cole) and the
+ protocol implemented by the vat application (Van Jacobson and Steve
+ McCanne). Christian Huitema provided ideas for the random identifier
+ generator. Extensive analysis and simulation of the timer
+ reconsideration algorithm was done by Jonathan Rosenberg. The
+ additions for layered encodings were specified by Michael Speer and
+ Steve McCanne.
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 74]
+
+RFC 3550 RTP July 2003
+
+
+Appendix A - Algorithms
+
+ We provide examples of C code for aspects of RTP sender and receiver
+ algorithms. There may be other implementation methods that are
+ faster in particular operating environments or have other advantages.
+ These implementation notes are for informational purposes only and
+ are meant to clarify the RTP specification.
+
+ The following definitions are used for all examples; for clarity and
+ brevity, the structure definitions are only valid for 32-bit big-
+ endian (most significant octet first) architectures. Bit fields are
+ assumed to be packed tightly in big-endian bit order, with no
+ additional padding. Modifications would be required to construct a
+ portable implementation.
+
+ /*
+ * rtp.h -- RTP header file
+ */
+ #include <sys/types.h>
+
+ /*
+ * The type definitions below are valid for 32-bit architectures and
+ * may have to be adjusted for 16- or 64-bit architectures.
+ */
+ typedef unsigned char u_int8;
+ typedef unsigned short u_int16;
+ typedef unsigned int u_int32;
+ typedef short int16;
+
+ /*
+ * Current protocol version.
+ */
+ #define RTP_VERSION 2
+
+ #define RTP_SEQ_MOD (1<<16)
+ #define RTP_MAX_SDES 255 /* maximum text length for SDES */
+
+ typedef enum {
+ RTCP_SR = 200,
+ RTCP_RR = 201,
+ RTCP_SDES = 202,
+ RTCP_BYE = 203,
+ RTCP_APP = 204
+ } rtcp_type_t;
+
+ typedef enum {
+ RTCP_SDES_END = 0,
+ RTCP_SDES_CNAME = 1,
+
+
+
+Schulzrinne, et al. Standards Track [Page 75]
+
+RFC 3550 RTP July 2003
+
+
+ RTCP_SDES_NAME = 2,
+ RTCP_SDES_EMAIL = 3,
+ RTCP_SDES_PHONE = 4,
+ RTCP_SDES_LOC = 5,
+ RTCP_SDES_TOOL = 6,
+ RTCP_SDES_NOTE = 7,
+ RTCP_SDES_PRIV = 8
+ } rtcp_sdes_type_t;
+
+ /*
+ * RTP data header
+ */
+ typedef struct {
+ unsigned int version:2; /* protocol version */
+ unsigned int p:1; /* padding flag */
+ unsigned int x:1; /* header extension flag */
+ unsigned int cc:4; /* CSRC count */
+ unsigned int m:1; /* marker bit */
+ unsigned int pt:7; /* payload type */
+ unsigned int seq:16; /* sequence number */
+ u_int32 ts; /* timestamp */
+ u_int32 ssrc; /* synchronization source */
+ u_int32 csrc[1]; /* optional CSRC list */
+ } rtp_hdr_t;
+
+ /*
+ * RTCP common header word
+ */
+ typedef struct {
+ unsigned int version:2; /* protocol version */
+ unsigned int p:1; /* padding flag */
+ unsigned int count:5; /* varies by packet type */
+ unsigned int pt:8; /* RTCP packet type */
+ u_int16 length; /* pkt len in words, w/o this word */
+ } rtcp_common_t;
+
+ /*
+ * Big-endian mask for version, padding bit and packet type pair
+ */
+ #define RTCP_VALID_MASK (0xc000 | 0x2000 | 0xfe)
+ #define RTCP_VALID_VALUE ((RTP_VERSION << 14) | RTCP_SR)
+
+ /*
+ * Reception report block
+ */
+ typedef struct {
+ u_int32 ssrc; /* data source being reported */
+ unsigned int fraction:8; /* fraction lost since last SR/RR */
+
+
+
+Schulzrinne, et al. Standards Track [Page 76]
+
+RFC 3550 RTP July 2003
+
+
+ int lost:24; /* cumul. no. pkts lost (signed!) */
+ u_int32 last_seq; /* extended last seq. no. received */
+ u_int32 jitter; /* interarrival jitter */
+ u_int32 lsr; /* last SR packet from this source */
+ u_int32 dlsr; /* delay since last SR packet */
+ } rtcp_rr_t;
+
+ /*
+ * SDES item
+ */
+ typedef struct {
+ u_int8 type; /* type of item (rtcp_sdes_type_t) */
+ u_int8 length; /* length of item (in octets) */
+ char data[1]; /* text, not null-terminated */
+ } rtcp_sdes_item_t;
+
+ /*
+ * One RTCP packet
+ */
+ typedef struct {
+ rtcp_common_t common; /* common header */
+ union {
+ /* sender report (SR) */
+ struct {
+ u_int32 ssrc; /* sender generating this report */
+ u_int32 ntp_sec; /* NTP timestamp */
+ u_int32 ntp_frac;
+ u_int32 rtp_ts; /* RTP timestamp */
+ u_int32 psent; /* packets sent */
+ u_int32 osent; /* octets sent */
+ rtcp_rr_t rr[1]; /* variable-length list */
+ } sr;
+
+ /* reception report (RR) */
+ struct {
+ u_int32 ssrc; /* receiver generating this report */
+ rtcp_rr_t rr[1]; /* variable-length list */
+ } rr;
+
+ /* source description (SDES) */
+ struct rtcp_sdes {
+ u_int32 src; /* first SSRC/CSRC */
+ rtcp_sdes_item_t item[1]; /* list of SDES items */
+ } sdes;
+
+ /* BYE */
+ struct {
+ u_int32 src[1]; /* list of sources */
+
+
+
+Schulzrinne, et al. Standards Track [Page 77]
+
+RFC 3550 RTP July 2003
+
+
+ /* can't express trailing text for reason */
+ } bye;
+ } r;
+ } rtcp_t;
+
+ typedef struct rtcp_sdes rtcp_sdes_t;
+
+ /*
+ * Per-source state information
+ */
+ typedef struct {
+ u_int16 max_seq; /* highest seq. number seen */
+ u_int32 cycles; /* shifted count of seq. number cycles */
+ u_int32 base_seq; /* base seq number */
+ u_int32 bad_seq; /* last 'bad' seq number + 1 */
+ u_int32 probation; /* sequ. packets till source is valid */
+ u_int32 received; /* packets received */
+ u_int32 expected_prior; /* packet expected at last interval */
+ u_int32 received_prior; /* packet received at last interval */
+ u_int32 transit; /* relative trans time for prev pkt */
+ u_int32 jitter; /* estimated jitter */
+ /* ... */
+ } source;
+
+A.1 RTP Data Header Validity Checks
+
+ An RTP receiver should check the validity of the RTP header on
+ incoming packets since they might be encrypted or might be from a
+ different application that happens to be misaddressed. Similarly, if
+ encryption according to the method described in Section 9 is enabled,
+ the header validity check is needed to verify that incoming packets
+ have been correctly decrypted, although a failure of the header
+ validity check (e.g., unknown payload type) may not necessarily
+ indicate decryption failure.
+
+ Only weak validity checks are possible on an RTP data packet from a
+ source that has not been heard before:
+
+ o RTP version field must equal 2.
+
+ o The payload type must be known, and in particular it must not be
+ equal to SR or RR.
+
+ o If the P bit is set, then the last octet of the packet must
+ contain a valid octet count, in particular, less than the total
+ packet length minus the header size.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 78]
+
+RFC 3550 RTP July 2003
+
+
+ o The X bit must be zero if the profile does not specify that the
+ header extension mechanism may be used. Otherwise, the extension
+ length field must be less than the total packet size minus the
+ fixed header length and padding.
+
+ o The length of the packet must be consistent with CC and payload
+ type (if payloads have a known length).
+
+ The last three checks are somewhat complex and not always possible,
+ leaving only the first two which total just a few bits. If the SSRC
+ identifier in the packet is one that has been received before, then
+ the packet is probably valid and checking if the sequence number is
+ in the expected range provides further validation. If the SSRC
+ identifier has not been seen before, then data packets carrying that
+ identifier may be considered invalid until a small number of them
+ arrive with consecutive sequence numbers. Those invalid packets MAY
+ be discarded or they MAY be stored and delivered once validation has
+ been achieved if the resulting delay is acceptable.
+
+ The routine update_seq shown below ensures that a source is declared
+ valid only after MIN_SEQUENTIAL packets have been received in
+ sequence. It also validates the sequence number seq of a newly
+ received packet and updates the sequence state for the packet's
+ source in the structure to which s points.
+
+ When a new source is heard for the first time, that is, its SSRC
+ identifier is not in the table (see Section 8.2), and the per-source
+ state is allocated for it, s->probation is set to the number of
+ sequential packets required before declaring a source valid
+ (parameter MIN_SEQUENTIAL) and other variables are initialized:
+
+ init_seq(s, seq);
+ s->max_seq = seq - 1;
+ s->probation = MIN_SEQUENTIAL;
+
+ A non-zero s->probation marks the source as not yet valid so the
+ state may be discarded after a short timeout rather than a long one,
+ as discussed in Section 6.2.1.
+
+ After a source is considered valid, the sequence number is considered
+ valid if it is no more than MAX_DROPOUT ahead of s->max_seq nor more
+ than MAX_MISORDER behind. If the new sequence number is ahead of
+ max_seq modulo the RTP sequence number range (16 bits), but is
+ smaller than max_seq, it has wrapped around and the (shifted) count
+ of sequence number cycles is incremented. A value of one is returned
+ to indicate a valid sequence number.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 79]
+
+RFC 3550 RTP July 2003
+
+
+ Otherwise, the value zero is returned to indicate that the validation
+ failed, and the bad sequence number plus 1 is stored. If the next
+ packet received carries the next higher sequence number, it is
+ considered the valid start of a new packet sequence presumably caused
+ by an extended dropout or a source restart. Since multiple complete
+ sequence number cycles may have been missed, the packet loss
+ statistics are reset.
+
+ Typical values for the parameters are shown, based on a maximum
+ misordering time of 2 seconds at 50 packets/second and a maximum
+ dropout of 1 minute. The dropout parameter MAX_DROPOUT should be a
+ small fraction of the 16-bit sequence number space to give a
+ reasonable probability that new sequence numbers after a restart will
+ not fall in the acceptable range for sequence numbers from before the
+ restart.
+
+ void init_seq(source *s, u_int16 seq)
+ {
+ s->base_seq = seq;
+ s->max_seq = seq;
+ s->bad_seq = RTP_SEQ_MOD + 1; /* so seq == bad_seq is false */
+ s->cycles = 0;
+ s->received = 0;
+ s->received_prior = 0;
+ s->expected_prior = 0;
+ /* other initialization */
+ }
+
+ int update_seq(source *s, u_int16 seq)
+ {
+ u_int16 udelta = seq - s->max_seq;
+ const int MAX_DROPOUT = 3000;
+ const int MAX_MISORDER = 100;
+ const int MIN_SEQUENTIAL = 2;
+
+ /*
+ * Source is not valid until MIN_SEQUENTIAL packets with
+ * sequential sequence numbers have been received.
+ */
+ if (s->probation) {
+ /* packet is in sequence */
+ if (seq == s->max_seq + 1) {
+ s->probation--;
+ s->max_seq = seq;
+ if (s->probation == 0) {
+ init_seq(s, seq);
+ s->received++;
+ return 1;
+
+
+
+Schulzrinne, et al. Standards Track [Page 80]
+
+RFC 3550 RTP July 2003
+
+
+ }
+ } else {
+ s->probation = MIN_SEQUENTIAL - 1;
+ s->max_seq = seq;
+ }
+ return 0;
+ } else if (udelta < MAX_DROPOUT) {
+ /* in order, with permissible gap */
+ if (seq < s->max_seq) {
+ /*
+ * Sequence number wrapped - count another 64K cycle.
+ */
+ s->cycles += RTP_SEQ_MOD;
+ }
+ s->max_seq = seq;
+ } else if (udelta <= RTP_SEQ_MOD - MAX_MISORDER) {
+ /* the sequence number made a very large jump */
+ if (seq == s->bad_seq) {
+ /*
+ * Two sequential packets -- assume that the other side
+ * restarted without telling us so just re-sync
+ * (i.e., pretend this was the first packet).
+ */
+ init_seq(s, seq);
+ }
+ else {
+ s->bad_seq = (seq + 1) & (RTP_SEQ_MOD-1);
+ return 0;
+ }
+ } else {
+ /* duplicate or reordered packet */
+ }
+ s->received++;
+ return 1;
+ }
+
+ The validity check can be made stronger requiring more than two
+ packets in sequence. The disadvantages are that a larger number of
+ initial packets will be discarded (or delayed in a queue) and that
+ high packet loss rates could prevent validation. However, because
+ the RTCP header validation is relatively strong, if an RTCP packet is
+ received from a source before the data packets, the count could be
+ adjusted so that only two packets are required in sequence. If
+ initial data loss for a few seconds can be tolerated, an application
+ MAY choose to discard all data packets from a source until a valid
+ RTCP packet has been received from that source.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 81]
+
+RFC 3550 RTP July 2003
+
+
+ Depending on the application and encoding, algorithms may exploit
+ additional knowledge about the payload format for further validation.
+ For payload types where the timestamp increment is the same for all
+ packets, the timestamp values can be predicted from the previous
+ packet received from the same source using the sequence number
+ difference (assuming no change in payload type).
+
+ A strong "fast-path" check is possible since with high probability
+ the first four octets in the header of a newly received RTP data
+ packet will be just the same as that of the previous packet from the
+ same SSRC except that the sequence number will have increased by one.
+ Similarly, a single-entry cache may be used for faster SSRC lookups
+ in applications where data is typically received from one source at a
+ time.
+
+A.2 RTCP Header Validity Checks
+
+ The following checks should be applied to RTCP packets.
+
+ o RTP version field must equal 2.
+
+ o The payload type field of the first RTCP packet in a compound
+ packet must be equal to SR or RR.
+
+ o The padding bit (P) should be zero for the first packet of a
+ compound RTCP packet because padding should only be applied, if it
+ is needed, to the last packet.
+
+ o The length fields of the individual RTCP packets must add up to
+ the overall length of the compound RTCP packet as received. This
+ is a fairly strong check.
+
+ The code fragment below performs all of these checks. The packet
+ type is not checked for subsequent packets since unknown packet types
+ may be present and should be ignored.
+
+ u_int32 len; /* length of compound RTCP packet in words */
+ rtcp_t *r; /* RTCP header */
+ rtcp_t *end; /* end of compound RTCP packet */
+
+ if ((*(u_int16 *)r & RTCP_VALID_MASK) != RTCP_VALID_VALUE) {
+ /* something wrong with packet format */
+ }
+ end = (rtcp_t *)((u_int32 *)r + len);
+
+ do r = (rtcp_t *)((u_int32 *)r + r->common.length + 1);
+ while (r < end && r->common.version == 2);
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 82]
+
+RFC 3550 RTP July 2003
+
+
+ if (r != end) {
+ /* something wrong with packet format */
+ }
+
+A.3 Determining Number of Packets Expected and Lost
+
+ In order to compute packet loss rates, the number of RTP packets
+ expected and actually received from each source needs to be known,
+ using per-source state information defined in struct source
+ referenced via pointer s in the code below. The number of packets
+ received is simply the count of packets as they arrive, including any
+ late or duplicate packets. The number of packets expected can be
+ computed by the receiver as the difference between the highest
+ sequence number received (s->max_seq) and the first sequence number
+ received (s->base_seq). Since the sequence number is only 16 bits
+ and will wrap around, it is necessary to extend the highest sequence
+ number with the (shifted) count of sequence number wraparounds
+ (s->cycles). Both the received packet count and the count of cycles
+ are maintained the RTP header validity check routine in Appendix A.1.
+
+ extended_max = s->cycles + s->max_seq;
+ expected = extended_max - s->base_seq + 1;
+
+ The number of packets lost is defined to be the number of packets
+ expected less the number of packets actually received:
+
+ lost = expected - s->received;
+
+ Since this signed number is carried in 24 bits, it should be clamped
+ at 0x7fffff for positive loss or 0x800000 for negative loss rather
+ than wrapping around.
+
+ The fraction of packets lost during the last reporting interval
+ (since the previous SR or RR packet was sent) is calculated from
+ differences in the expected and received packet counts across the
+ interval, where expected_prior and received_prior are the values
+ saved when the previous reception report was generated:
+
+ expected_interval = expected - s->expected_prior;
+ s->expected_prior = expected;
+ received_interval = s->received - s->received_prior;
+ s->received_prior = s->received;
+ lost_interval = expected_interval - received_interval;
+ if (expected_interval == 0 || lost_interval <= 0) fraction = 0;
+ else fraction = (lost_interval << 8) / expected_interval;
+
+ The resulting fraction is an 8-bit fixed point number with the binary
+ point at the left edge.
+
+
+
+Schulzrinne, et al. Standards Track [Page 83]
+
+RFC 3550 RTP July 2003
+
+
+A.4 Generating RTCP SDES Packets
+
+ This function builds one SDES chunk into buffer b composed of argc
+ items supplied in arrays type, value and length. It returns a
+ pointer to the next available location within b.
+
+ char *rtp_write_sdes(char *b, u_int32 src, int argc,
+ rtcp_sdes_type_t type[], char *value[],
+ int length[])
+ {
+ rtcp_sdes_t *s = (rtcp_sdes_t *)b;
+ rtcp_sdes_item_t *rsp;
+ int i;
+ int len;
+ int pad;
+
+ /* SSRC header */
+ s->src = src;
+ rsp = &s->item[0];
+
+ /* SDES items */
+ for (i = 0; i < argc; i++) {
+ rsp->type = type[i];
+ len = length[i];
+ if (len > RTP_MAX_SDES) {
+ /* invalid length, may want to take other action */
+ len = RTP_MAX_SDES;
+ }
+ rsp->length = len;
+ memcpy(rsp->data, value[i], len);
+ rsp = (rtcp_sdes_item_t *)&rsp->data[len];
+ }
+
+ /* terminate with end marker and pad to next 4-octet boundary */
+ len = ((char *) rsp) - b;
+ pad = 4 - (len & 0x3);
+ b = (char *) rsp;
+ while (pad--) *b++ = RTCP_SDES_END;
+
+ return b;
+ }
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 84]
+
+RFC 3550 RTP July 2003
+
+
+A.5 Parsing RTCP SDES Packets
+
+ This function parses an SDES packet, calling functions find_member()
+ to find a pointer to the information for a session member given the
+ SSRC identifier and member_sdes() to store the new SDES information
+ for that member. This function expects a pointer to the header of
+ the RTCP packet.
+
+ void rtp_read_sdes(rtcp_t *r)
+ {
+ int count = r->common.count;
+ rtcp_sdes_t *sd = &r->r.sdes;
+ rtcp_sdes_item_t *rsp, *rspn;
+ rtcp_sdes_item_t *end = (rtcp_sdes_item_t *)
+ ((u_int32 *)r + r->common.length + 1);
+ source *s;
+
+ while (--count >= 0) {
+ rsp = &sd->item[0];
+ if (rsp >= end) break;
+ s = find_member(sd->src);
+
+ for (; rsp->type; rsp = rspn ) {
+ rspn = (rtcp_sdes_item_t *)((char*)rsp+rsp->length+2);
+ if (rspn >= end) {
+ rsp = rspn;
+ break;
+ }
+ member_sdes(s, rsp->type, rsp->data, rsp->length);
+ }
+ sd = (rtcp_sdes_t *)
+ ((u_int32 *)sd + (((char *)rsp - (char *)sd) >> 2)+1);
+ }
+ if (count >= 0) {
+ /* invalid packet format */
+ }
+ }
+
+A.6 Generating a Random 32-bit Identifier
+
+ The following subroutine generates a random 32-bit identifier using
+ the MD5 routines published in RFC 1321 [32]. The system routines may
+ not be present on all operating systems, but they should serve as
+ hints as to what kinds of information may be used. Other system
+ calls that may be appropriate include
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 85]
+
+RFC 3550 RTP July 2003
+
+
+ o getdomainname(),
+
+ o getwd(), or
+
+ o getrusage().
+
+ "Live" video or audio samples are also a good source of random
+ numbers, but care must be taken to avoid using a turned-off
+ microphone or blinded camera as a source [17].
+
+ Use of this or a similar routine is recommended to generate the
+ initial seed for the random number generator producing the RTCP
+ period (as shown in Appendix A.7), to generate the initial values for
+ the sequence number and timestamp, and to generate SSRC values.
+ Since this routine is likely to be CPU-intensive, its direct use to
+ generate RTCP periods is inappropriate because predictability is not
+ an issue. Note that this routine produces the same result on
+ repeated calls until the value of the system clock changes unless
+ different values are supplied for the type argument.
+
+ /*
+ * Generate a random 32-bit quantity.
+ */
+ #include <sys/types.h> /* u_long */
+ #include <sys/time.h> /* gettimeofday() */
+ #include <unistd.h> /* get..() */
+ #include <stdio.h> /* printf() */
+ #include <time.h> /* clock() */
+ #include <sys/utsname.h> /* uname() */
+ #include "global.h" /* from RFC 1321 */
+ #include "md5.h" /* from RFC 1321 */
+
+ #define MD_CTX MD5_CTX
+ #define MDInit MD5Init
+ #define MDUpdate MD5Update
+ #define MDFinal MD5Final
+
+ static u_long md_32(char *string, int length)
+ {
+ MD_CTX context;
+ union {
+ char c[16];
+ u_long x[4];
+ } digest;
+ u_long r;
+ int i;
+
+ MDInit (&context);
+
+
+
+Schulzrinne, et al. Standards Track [Page 86]
+
+RFC 3550 RTP July 2003
+
+
+ MDUpdate (&context, string, length);
+ MDFinal ((unsigned char *)&digest, &context);
+ r = 0;
+ for (i = 0; i < 3; i++) {
+ r ^= digest.x[i];
+ }
+ return r;
+ } /* md_32 */
+
+ /*
+ * Return random unsigned 32-bit quantity. Use 'type' argument if
+ * you need to generate several different values in close succession.
+ */
+ u_int32 random32(int type)
+ {
+ struct {
+ int type;
+ struct timeval tv;
+ clock_t cpu;
+ pid_t pid;
+ u_long hid;
+ uid_t uid;
+ gid_t gid;
+ struct utsname name;
+ } s;
+
+ gettimeofday(&s.tv, 0);
+ uname(&s.name);
+ s.type = type;
+ s.cpu = clock();
+ s.pid = getpid();
+ s.hid = gethostid();
+ s.uid = getuid();
+ s.gid = getgid();
+ /* also: system uptime */
+
+ return md_32((char *)&s, sizeof(s));
+ } /* random32 */
+
+A.7 Computing the RTCP Transmission Interval
+
+ The following functions implement the RTCP transmission and reception
+ rules described in Section 6.2. These rules are coded in several
+ functions:
+
+ o rtcp_interval() computes the deterministic calculated interval,
+ measured in seconds. The parameters are defined in Section 6.3.
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 87]
+
+RFC 3550 RTP July 2003
+
+
+ o OnExpire() is called when the RTCP transmission timer expires.
+
+ o OnReceive() is called whenever an RTCP packet is received.
+
+ Both OnExpire() and OnReceive() have event e as an argument. This is
+ the next scheduled event for that participant, either an RTCP report
+ or a BYE packet. It is assumed that the following functions are
+ available:
+
+ o Schedule(time t, event e) schedules an event e to occur at time t.
+ When time t arrives, the function OnExpire is called with e as an
+ argument.
+
+ o Reschedule(time t, event e) reschedules a previously scheduled
+ event e for time t.
+
+ o SendRTCPReport(event e) sends an RTCP report.
+
+ o SendBYEPacket(event e) sends a BYE packet.
+
+ o TypeOfEvent(event e) returns EVENT_BYE if the event being
+ processed is for a BYE packet to be sent, else it returns
+ EVENT_REPORT.
+
+ o PacketType(p) returns PACKET_RTCP_REPORT if packet p is an RTCP
+ report (not BYE), PACKET_BYE if its a BYE RTCP packet, and
+ PACKET_RTP if its a regular RTP data packet.
+
+ o ReceivedPacketSize() and SentPacketSize() return the size of the
+ referenced packet in octets.
+
+ o NewMember(p) returns a 1 if the participant who sent packet p is
+ not currently in the member list, 0 otherwise. Note this function
+ is not sufficient for a complete implementation because each CSRC
+ identifier in an RTP packet and each SSRC in a BYE packet should
+ be processed.
+
+ o NewSender(p) returns a 1 if the participant who sent packet p is
+ not currently in the sender sublist of the member list, 0
+ otherwise.
+
+ o AddMember() and RemoveMember() to add and remove participants from
+ the member list.
+
+ o AddSender() and RemoveSender() to add and remove participants from
+ the sender sublist of the member list.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 88]
+
+RFC 3550 RTP July 2003
+
+
+ These functions would have to be extended for an implementation that
+ allows the RTCP bandwidth fractions for senders and non-senders to be
+ specified as explicit parameters rather than fixed values of 25% and
+ 75%. The extended implementation of rtcp_interval() would need to
+ avoid division by zero if one of the parameters was zero.
+
+ double rtcp_interval(int members,
+ int senders,
+ double rtcp_bw,
+ int we_sent,
+ double avg_rtcp_size,
+ int initial)
+ {
+ /*
+ * Minimum average time between RTCP packets from this site (in
+ * seconds). This time prevents the reports from `clumping' when
+ * sessions are small and the law of large numbers isn't helping
+ * to smooth out the traffic. It also keeps the report interval
+ * from becoming ridiculously small during transient outages like
+ * a network partition.
+ */
+ double const RTCP_MIN_TIME = 5.;
+ /*
+ * Fraction of the RTCP bandwidth to be shared among active
+ * senders. (This fraction was chosen so that in a typical
+ * session with one or two active senders, the computed report
+ * time would be roughly equal to the minimum report time so that
+ * we don't unnecessarily slow down receiver reports.) The
+ * receiver fraction must be 1 - the sender fraction.
+ */
+ double const RTCP_SENDER_BW_FRACTION = 0.25;
+ double const RTCP_RCVR_BW_FRACTION = (1-RTCP_SENDER_BW_FRACTION);
+ /*
+ /* To compensate for "timer reconsideration" converging to a
+ * value below the intended average.
+ */
+ double const COMPENSATION = 2.71828 - 1.5;
+
+ double t; /* interval */
+ double rtcp_min_time = RTCP_MIN_TIME;
+ int n; /* no. of members for computation */
+
+ /*
+ * Very first call at application start-up uses half the min
+ * delay for quicker notification while still allowing some time
+ * before reporting for randomization and to learn about other
+ * sources so the report interval will converge to the correct
+ * interval more quickly.
+
+
+
+Schulzrinne, et al. Standards Track [Page 89]
+
+RFC 3550 RTP July 2003
+
+
+ */
+ if (initial) {
+ rtcp_min_time /= 2;
+ }
+ /*
+ * Dedicate a fraction of the RTCP bandwidth to senders unless
+ * the number of senders is large enough that their share is
+ * more than that fraction.
+ */
+ n = members;
+ if (senders <= members * RTCP_SENDER_BW_FRACTION) {
+ if (we_sent) {
+ rtcp_bw *= RTCP_SENDER_BW_FRACTION;
+ n = senders;
+ } else {
+ rtcp_bw *= RTCP_RCVR_BW_FRACTION;
+ n -= senders;
+ }
+ }
+
+ /*
+ * The effective number of sites times the average packet size is
+ * the total number of octets sent when each site sends a report.
+ * Dividing this by the effective bandwidth gives the time
+ * interval over which those packets must be sent in order to
+ * meet the bandwidth target, with a minimum enforced. In that
+ * time interval we send one report so this time is also our
+ * average time between reports.
+ */
+ t = avg_rtcp_size * n / rtcp_bw;
+ if (t < rtcp_min_time) t = rtcp_min_time;
+
+ /*
+ * To avoid traffic bursts from unintended synchronization with
+ * other sites, we then pick our actual next report interval as a
+ * random number uniformly distributed between 0.5*t and 1.5*t.
+ */
+ t = t * (drand48() + 0.5);
+ t = t / COMPENSATION;
+ return t;
+ }
+
+ void OnExpire(event e,
+ int members,
+ int senders,
+ double rtcp_bw,
+ int we_sent,
+ double *avg_rtcp_size,
+
+
+
+Schulzrinne, et al. Standards Track [Page 90]
+
+RFC 3550 RTP July 2003
+
+
+ int *initial,
+ time_tp tc,
+ time_tp *tp,
+ int *pmembers)
+ {
+ /* This function is responsible for deciding whether to send an
+ * RTCP report or BYE packet now, or to reschedule transmission.
+ * It is also responsible for updating the pmembers, initial, tp,
+ * and avg_rtcp_size state variables. This function should be
+ * called upon expiration of the event timer used by Schedule().
+ */
+
+ double t; /* Interval */
+ double tn; /* Next transmit time */
+
+ /* In the case of a BYE, we use "timer reconsideration" to
+ * reschedule the transmission of the BYE if necessary */
+
+ if (TypeOfEvent(e) == EVENT_BYE) {
+ t = rtcp_interval(members,
+ senders,
+ rtcp_bw,
+ we_sent,
+ *avg_rtcp_size,
+ *initial);
+ tn = *tp + t;
+ if (tn <= tc) {
+ SendBYEPacket(e);
+ exit(1);
+ } else {
+ Schedule(tn, e);
+ }
+
+ } else if (TypeOfEvent(e) == EVENT_REPORT) {
+ t = rtcp_interval(members,
+ senders,
+ rtcp_bw,
+ we_sent,
+ *avg_rtcp_size,
+ *initial);
+ tn = *tp + t;
+ if (tn <= tc) {
+ SendRTCPReport(e);
+ *avg_rtcp_size = (1./16.)*SentPacketSize(e) +
+ (15./16.)*(*avg_rtcp_size);
+ *tp = tc;
+
+ /* We must redraw the interval. Don't reuse the
+
+
+
+Schulzrinne, et al. Standards Track [Page 91]
+
+RFC 3550 RTP July 2003
+
+
+ one computed above, since its not actually
+ distributed the same, as we are conditioned
+ on it being small enough to cause a packet to
+ be sent */
+
+ t = rtcp_interval(members,
+ senders,
+ rtcp_bw,
+ we_sent,
+ *avg_rtcp_size,
+ *initial);
+
+ Schedule(t+tc,e);
+ *initial = 0;
+ } else {
+ Schedule(tn, e);
+ }
+ *pmembers = members;
+ }
+ }
+
+ void OnReceive(packet p,
+ event e,
+ int *members,
+ int *pmembers,
+ int *senders,
+ double *avg_rtcp_size,
+ double *tp,
+ double tc,
+ double tn)
+ {
+ /* What we do depends on whether we have left the group, and are
+ * waiting to send a BYE (TypeOfEvent(e) == EVENT_BYE) or an RTCP
+ * report. p represents the packet that was just received. */
+
+ if (PacketType(p) == PACKET_RTCP_REPORT) {
+ if (NewMember(p) && (TypeOfEvent(e) == EVENT_REPORT)) {
+ AddMember(p);
+ *members += 1;
+ }
+ *avg_rtcp_size = (1./16.)*ReceivedPacketSize(p) +
+ (15./16.)*(*avg_rtcp_size);
+ } else if (PacketType(p) == PACKET_RTP) {
+ if (NewMember(p) && (TypeOfEvent(e) == EVENT_REPORT)) {
+ AddMember(p);
+ *members += 1;
+ }
+ if (NewSender(p) && (TypeOfEvent(e) == EVENT_REPORT)) {
+
+
+
+Schulzrinne, et al. Standards Track [Page 92]
+
+RFC 3550 RTP July 2003
+
+
+ AddSender(p);
+ *senders += 1;
+ }
+ } else if (PacketType(p) == PACKET_BYE) {
+ *avg_rtcp_size = (1./16.)*ReceivedPacketSize(p) +
+ (15./16.)*(*avg_rtcp_size);
+
+ if (TypeOfEvent(e) == EVENT_REPORT) {
+ if (NewSender(p) == FALSE) {
+ RemoveSender(p);
+ *senders -= 1;
+ }
+
+ if (NewMember(p) == FALSE) {
+ RemoveMember(p);
+ *members -= 1;
+ }
+
+ if (*members < *pmembers) {
+ tn = tc +
+ (((double) *members)/(*pmembers))*(tn - tc);
+ *tp = tc -
+ (((double) *members)/(*pmembers))*(tc - *tp);
+
+ /* Reschedule the next report for time tn */
+
+ Reschedule(tn, e);
+ *pmembers = *members;
+ }
+
+ } else if (TypeOfEvent(e) == EVENT_BYE) {
+ *members += 1;
+ }
+ }
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 93]
+
+RFC 3550 RTP July 2003
+
+
+A.8 Estimating the Interarrival Jitter
+
+ The code fragments below implement the algorithm given in Section
+ 6.4.1 for calculating an estimate of the statistical variance of the
+ RTP data interarrival time to be inserted in the interarrival jitter
+ field of reception reports. The inputs are r->ts, the timestamp from
+ the incoming packet, and arrival, the current time in the same units.
+ Here s points to state for the source; s->transit holds the relative
+ transit time for the previous packet, and s->jitter holds the
+ estimated jitter. The jitter field of the reception report is
+ measured in timestamp units and expressed as an unsigned integer, but
+ the jitter estimate is kept in a floating point. As each data packet
+ arrives, the jitter estimate is updated:
+
+ int transit = arrival - r->ts;
+ int d = transit - s->transit;
+ s->transit = transit;
+ if (d < 0) d = -d;
+ s->jitter += (1./16.) * ((double)d - s->jitter);
+
+ When a reception report block (to which rr points) is generated for
+ this member, the current jitter estimate is returned:
+
+ rr->jitter = (u_int32) s->jitter;
+
+ Alternatively, the jitter estimate can be kept as an integer, but
+ scaled to reduce round-off error. The calculation is the same except
+ for the last line:
+
+ s->jitter += d - ((s->jitter + 8) >> 4);
+
+ In this case, the estimate is sampled for the reception report as:
+
+ rr->jitter = s->jitter >> 4;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 94]
+
+RFC 3550 RTP July 2003
+
+
+Appendix B - Changes from RFC 1889
+
+ Most of this RFC is identical to RFC 1889. There are no changes in
+ the packet formats on the wire, only changes to the rules and
+ algorithms governing how the protocol is used. The biggest change is
+ an enhancement to the scalable timer algorithm for calculating when
+ to send RTCP packets:
+
+ o The algorithm for calculating the RTCP transmission interval
+ specified in Sections 6.2 and 6.3 and illustrated in Appendix A.7
+ is augmented to include "reconsideration" to minimize transmission
+ in excess of the intended rate when many participants join a
+ session simultaneously, and "reverse reconsideration" to reduce
+ the incidence and duration of false participant timeouts when the
+ number of participants drops rapidly. Reverse reconsideration is
+ also used to possibly shorten the delay before sending RTCP SR
+ when transitioning from passive receiver to active sender mode.
+
+ o Section 6.3.7 specifies new rules controlling when an RTCP BYE
+ packet should be sent in order to avoid a flood of packets when
+ many participants leave a session simultaneously.
+
+ o The requirement to retain state for inactive participants for a
+ period long enough to span typical network partitions was removed
+ from Section 6.2.1. In a session where many participants join for
+ a brief time and fail to send BYE, this requirement would cause a
+ significant overestimate of the number of participants. The
+ reconsideration algorithm added in this revision compensates for
+ the large number of new participants joining simultaneously when a
+ partition heals.
+
+ It should be noted that these enhancements only have a significant
+ effect when the number of session participants is large (thousands)
+ and most of the participants join or leave at the same time. This
+ makes testing in a live network difficult. However, the algorithm
+ was subjected to a thorough analysis and simulation to verify its
+ performance. Furthermore, the enhanced algorithm was designed to
+ interoperate with the algorithm in RFC 1889 such that the degree of
+ reduction in excess RTCP bandwidth during a step join is proportional
+ to the fraction of participants that implement the enhanced
+ algorithm. Interoperation of the two algorithms has been verified
+ experimentally on live networks.
+
+ Other functional changes were:
+
+ o Section 6.2.1 specifies that implementations may store only a
+ sampling of the participants' SSRC identifiers to allow scaling to
+ very large sessions. Algorithms are specified in RFC 2762 [21].
+
+
+
+Schulzrinne, et al. Standards Track [Page 95]
+
+RFC 3550 RTP July 2003
+
+
+ o In Section 6.2 it is specified that RTCP sender and non-sender
+ bandwidths may be set as separate parameters of the session rather
+ than a strict percentage of the session bandwidth, and may be set
+ to zero. The requirement that RTCP was mandatory for RTP sessions
+ using IP multicast was relaxed. However, a clarification was also
+ added that turning off RTCP is NOT RECOMMENDED.
+
+ o In Sections 6.2, 6.3.1 and Appendix A.7, it is specified that the
+ fraction of participants below which senders get dedicated RTCP
+ bandwidth changes from the fixed 1/4 to a ratio based on the RTCP
+ sender and non-sender bandwidth parameters when those are given.
+ The condition that no bandwidth is dedicated to senders when there
+ are no senders was removed since that is expected to be a
+ transitory state. It also keeps non-senders from using sender
+ RTCP bandwidth when that is not intended.
+
+ o Also in Section 6.2 it is specified that the minimum RTCP interval
+ may be scaled to smaller values for high bandwidth sessions, and
+ that the initial RTCP delay may be set to zero for unicast
+ sessions.
+
+ o Timing out a participant is to be based on inactivity for a number
+ of RTCP report intervals calculated using the receiver RTCP
+ bandwidth fraction even for active senders.
+
+ o Sections 7.2 and 7.3 specify that translators and mixers should
+ send BYE packets for the sources they are no longer forwarding.
+
+ o Rule changes for layered encodings are defined in Sections 2.4,
+ 6.3.9, 8.3 and 11. In the last of these, it is noted that the
+ address and port assignment rule conflicts with the SDP
+ specification, RFC 2327 [15], but it is intended that this
+ restriction will be relaxed in a revision of RFC 2327.
+
+ o The convention for using even/odd port pairs for RTP and RTCP in
+ Section 11 was clarified to refer to destination ports. The
+ requirement to use an even/odd port pair was removed if the two
+ ports are specified explicitly. For unicast RTP sessions,
+ distinct port pairs may be used for the two ends (Sections 3, 7.1
+ and 11).
+
+ o A new Section 10 was added to explain the requirement for
+ congestion control in applications using RTP.
+
+ o In Section 8.2, the requirement that a new SSRC identifier MUST be
+ chosen whenever the source transport address is changed has been
+ relaxed to say that a new SSRC identifier MAY be chosen.
+ Correspondingly, it was clarified that an implementation MAY
+
+
+
+Schulzrinne, et al. Standards Track [Page 96]
+
+RFC 3550 RTP July 2003
+
+
+ choose to keep packets from the new source address rather than the
+ existing source address when an SSRC collision occurs between two
+ other participants, and SHOULD do so for applications such as
+ telephony in which some sources such as mobile entities may change
+ addresses during the course of an RTP session.
+
+ o An indentation bug in the RFC 1889 printing of the pseudo-code for
+ the collision detection and resolution algorithm in Section 8.2
+ has been corrected by translating the syntax to pseudo C language,
+ and the algorithm has been modified to remove the restriction that
+ both RTP and RTCP must be sent from the same source port number.
+
+ o The description of the padding mechanism for RTCP packets was
+ clarified and it is specified that padding MUST only be applied to
+ the last packet of a compound RTCP packet.
+
+ o In Section A.1, initialization of base_seq was corrected to be seq
+ rather than seq - 1, and the text was corrected to say the bad
+ sequence number plus 1 is stored. The initialization of max_seq
+ and other variables for the algorithm was separated from the text
+ to make clear that this initialization must be done in addition to
+ calling the init_seq() function (and a few words lost in RFC 1889
+ when processing the document from source to output form were
+ restored).
+
+ o Clamping of number of packets lost in Section A.3 was corrected to
+ use both positive and negative limits.
+
+ o The specification of "relative" NTP timestamp in the RTCP SR
+ section now defines these timestamps to be based on the most
+ common system-specific clock, such as system uptime, rather than
+ on session elapsed time which would not be the same for multiple
+ applications started on the same machine at different times.
+
+ Non-functional changes:
+
+ o It is specified that a receiver MUST ignore packets with payload
+ types it does not understand.
+
+ o In Fig. 2, the floating point NTP timestamp value was corrected,
+ some missing leading zeros were added in a hex number, and the UTC
+ timezone was specified.
+
+ o The inconsequence of NTP timestamps wrapping around in the year
+ 2036 is explained.
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 97]
+
+RFC 3550 RTP July 2003
+
+
+ o The policy for registration of RTCP packet types and SDES types
+ was clarified in a new Section 15, IANA Considerations. The
+ suggestion that experimenters register the numbers they need and
+ then unregister those which prove to be unneeded has been removed
+ in favor of using APP and PRIV. Registration of profile names was
+ also specified.
+
+ o The reference for the UTF-8 character set was changed from an
+ X/Open Preliminary Specification to be RFC 2279.
+
+ o The reference for RFC 1597 was updated to RFC 1918 and the
+ reference for RFC 2543 was updated to RFC 3261.
+
+ o The last paragraph of the introduction in RFC 1889, which
+ cautioned implementors to limit deployment in the Internet, was
+ removed because it was deemed no longer relevant.
+
+ o A non-normative note regarding the use of RTP with Source-Specific
+ Multicast (SSM) was added in Section 6.
+
+ o The definition of "RTP session" in Section 3 was expanded to
+ acknowledge that a single session may use multiple destination
+ transport addresses (as was always the case for a translator or
+ mixer) and to explain that the distinguishing feature of an RTP
+ session is that each corresponds to a separate SSRC identifier
+ space. A new definition of "multimedia session" was added to
+ reduce confusion about the word "session".
+
+ o The meaning of "sampling instant" was explained in more detail as
+ part of the definition of the timestamp field of the RTP header in
+ Section 5.1.
+
+ o Small clarifications of the text have been made in several places,
+ some in response to questions from readers. In particular:
+
+ - In RFC 1889, the first five words of the second sentence of
+ Section 2.2 were lost in processing the document from source to
+ output form, but are now restored.
+
+ - A definition for "RTP media type" was added in Section 3 to
+ allow the explanation of multiplexing RTP sessions in Section
+ 5.2 to be more clear regarding the multiplexing of multiple
+ media. That section also now explains that multiplexing
+ multiple sources of the same medium based on SSRC identifiers
+ may be appropriate and is the norm for multicast sessions.
+
+ - The definition for "non-RTP means" was expanded to include
+ examples of other protocols constituting non-RTP means.
+
+
+
+Schulzrinne, et al. Standards Track [Page 98]
+
+RFC 3550 RTP July 2003
+
+
+ - The description of the session bandwidth parameter is expanded
+ in Section 6.2, including a clarification that the control
+ traffic bandwidth is in addition to the session bandwidth for
+ the data traffic.
+
+ - The effect of varying packet duration on the jitter calculation
+ was explained in Section 6.4.4.
+
+ - The method for terminating and padding a sequence of SDES items
+ was clarified in Section 6.5.
+
+ - IPv6 address examples were added in the description of SDES
+ CNAME in Section 6.5.1, and "example.com" was used in place of
+ other example domain names.
+
+ - The Security section added a formal reference to IPSEC now that
+ it is available, and says that the confidentiality method
+ defined in this specification is primarily to codify existing
+ practice. It is RECOMMENDED that stronger encryption
+ algorithms such as Triple-DES be used in place of the default
+ algorithm, and noted that the SRTP profile based on AES will be
+ the correct choice in the future. A caution about the weakness
+ of the RTP header as an initialization vector was added. It
+ was also noted that payload-only encryption is necessary to
+ allow for header compression.
+
+ - The method for partial encryption of RTCP was clarified; in
+ particular, SDES CNAME is carried in only one part when the
+ compound RTCP packet is split.
+
+ - It is clarified that only one compound RTCP packet should be
+ sent per reporting interval and that if there are too many
+ active sources for the reports to fit in the MTU, then a subset
+ of the sources should be selected round-robin over multiple
+ intervals.
+
+ - A note was added in Appendix A.1 that packets may be saved
+ during RTP header validation and delivered upon success.
+
+ - Section 7.3 now explains that a mixer aggregating SDES packets
+ uses more RTCP bandwidth due to longer packets, and a mixer
+ passing through RTCP naturally sends packets at higher than the
+ single source rate, but both behaviors are valid.
+
+ - Section 13 clarifies that an RTP application may use multiple
+ profiles but typically only one in a given session.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 99]
+
+RFC 3550 RTP July 2003
+
+
+ - The terms MUST, SHOULD, MAY, etc. are used as defined in RFC
+ 2119.
+
+ - The bibliography was divided into normative and informative
+ references.
+
+References
+
+Normative References
+
+ [1] Schulzrinne, H. and S. Casner, "RTP Profile for Audio and Video
+ Conferences with Minimal Control", RFC 3551, July 2003.
+
+ [2] Bradner, S., "Key Words for Use in RFCs to Indicate Requirement
+ Levels", BCP 14, RFC 2119, March 1997.
+
+ [3] Postel, J., "Internet Protocol", STD 5, RFC 791, September 1981.
+
+ [4] Mills, D., "Network Time Protocol (Version 3) Specification,
+ Implementation and Analysis", RFC 1305, March 1992.
+
+ [5] Yergeau, F., "UTF-8, a Transformation Format of ISO 10646", RFC
+ 2279, January 1998.
+
+ [6] Mockapetris, P., "Domain Names - Concepts and Facilities", STD
+ 13, RFC 1034, November 1987.
+
+ [7] Mockapetris, P., "Domain Names - Implementation and
+ Specification", STD 13, RFC 1035, November 1987.
+
+ [8] Braden, R., "Requirements for Internet Hosts - Application and
+ Support", STD 3, RFC 1123, October 1989.
+
+ [9] Resnick, P., "Internet Message Format", RFC 2822, April 2001.
+
+Informative References
+
+ [10] Clark, D. and D. Tennenhouse, "Architectural Considerations for
+ a New Generation of Protocols," in SIGCOMM Symposium on
+ Communications Architectures and Protocols , (Philadelphia,
+ Pennsylvania), pp. 200--208, IEEE Computer Communications
+ Review, Vol. 20(4), September 1990.
+
+ [11] Schulzrinne, H., "Issues in designing a transport protocol for
+ audio and video conferences and other multiparticipant real-time
+ applications." expired Internet Draft, October 1993.
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 100]
+
+RFC 3550 RTP July 2003
+
+
+ [12] Comer, D., Internetworking with TCP/IP , vol. 1. Englewood
+ Cliffs, New Jersey: Prentice Hall, 1991.
+
+ [13] Rosenberg, J., Schulzrinne, H., Camarillo, G., Johnston, A.,
+ Peterson, J., Sparks, R., Handley, M. and E. Schooler, "SIP:
+ Session Initiation Protocol", RFC 3261, June 2002.
+
+ [14] International Telecommunication Union, "Visual telephone systems
+ and equipment for local area networks which provide a non-
+ guaranteed quality of service", Recommendation H.323,
+ Telecommunication Standardization Sector of ITU, Geneva,
+ Switzerland, July 2003.
+
+ [15] Handley, M. and V. Jacobson, "SDP: Session Description
+ Protocol", RFC 2327, April 1998.
+
+ [16] Schulzrinne, H., Rao, A. and R. Lanphier, "Real Time Streaming
+ Protocol (RTSP)", RFC 2326, April 1998.
+
+ [17] Eastlake 3rd, D., Crocker, S. and J. Schiller, "Randomness
+ Recommendations for Security", RFC 1750, December 1994.
+
+ [18] Bolot, J.-C., Turletti, T. and I. Wakeman, "Scalable Feedback
+ Control for Multicast Video Distribution in the Internet", in
+ SIGCOMM Symposium on Communications Architectures and Protocols,
+ (London, England), pp. 58--67, ACM, August 1994.
+
+ [19] Busse, I., Deffner, B. and H. Schulzrinne, "Dynamic QoS Control
+ of Multimedia Applications Based on RTP", Computer
+ Communications , vol. 19, pp. 49--58, January 1996.
+
+ [20] Floyd, S. and V. Jacobson, "The Synchronization of Periodic
+ Routing Messages", in SIGCOMM Symposium on Communications
+ Architectures and Protocols (D. P. Sidhu, ed.), (San Francisco,
+ California), pp. 33--44, ACM, September 1993. Also in [34].
+
+ [21] Rosenberg, J. and H. Schulzrinne, "Sampling of the Group
+ Membership in RTP", RFC 2762, February 2000.
+
+ [22] Cadzow, J., Foundations of Digital Signal Processing and Data
+ Analysis New York, New York: Macmillan, 1987.
+
+ [23] Hinden, R. and S. Deering, "Internet Protocol Version 6 (IPv6)
+ Addressing Architecture", RFC 3513, April 2003.
+
+ [24] Rekhter, Y., Moskowitz, B., Karrenberg, D., de Groot, G. and E.
+ Lear, "Address Allocation for Private Internets", RFC 1918,
+ February 1996.
+
+
+
+Schulzrinne, et al. Standards Track [Page 101]
+
+RFC 3550 RTP July 2003
+
+
+ [25] Lear, E., Fair, E., Crocker, D. and T. Kessler, "Network 10
+ Considered Harmful (Some Practices Shouldn't be Codified)", RFC
+ 1627, July 1994.
+
+ [26] Feller, W., An Introduction to Probability Theory and its
+ Applications, vol. 1. New York, New York: John Wiley and Sons,
+ third ed., 1968.
+
+ [27] Kent, S. and R. Atkinson, "Security Architecture for the
+ Internet Protocol", RFC 2401, November 1998.
+
+ [28] Baugher, M., Blom, R., Carrara, E., McGrew, D., Naslund, M.,
+ Norrman, K. and D. Oran, "Secure Real-time Transport Protocol",
+ Work in Progress, April 2003.
+
+ [29] Balenson, D., "Privacy Enhancement for Internet Electronic Mail:
+ Part III", RFC 1423, February 1993.
+
+ [30] Voydock, V. and S. Kent, "Security Mechanisms in High-Level
+ Network Protocols", ACM Computing Surveys, vol. 15, pp. 135-171,
+ June 1983.
+
+ [31] Floyd, S., "Congestion Control Principles", BCP 41, RFC 2914,
+ September 2000.
+
+ [32] Rivest, R., "The MD5 Message-Digest Algorithm", RFC 1321, April
+ 1992.
+
+ [33] Stubblebine, S., "Security Services for Multimedia
+ Conferencing", in 16th National Computer Security Conference,
+ (Baltimore, Maryland), pp. 391--395, September 1993.
+
+ [34] Floyd, S. and V. Jacobson, "The Synchronization of Periodic
+ Routing Messages", IEEE/ACM Transactions on Networking, vol. 2,
+ pp. 122--136, April 1994.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 102]
+
+RFC 3550 RTP July 2003
+
+
+Authors' Addresses
+
+ Henning Schulzrinne
+ Department of Computer Science
+ Columbia University
+ 1214 Amsterdam Avenue
+ New York, NY 10027
+ United States
+
+ EMail: schulzrinne@cs.columbia.edu
+
+
+ Stephen L. Casner
+ Packet Design
+ 3400 Hillview Avenue, Building 3
+ Palo Alto, CA 94304
+ United States
+
+ EMail: casner@acm.org
+
+
+ Ron Frederick
+ Blue Coat Systems Inc.
+ 650 Almanor Avenue
+ Sunnyvale, CA 94085
+ United States
+
+ EMail: ronf@bluecoat.com
+
+
+ Van Jacobson
+ Packet Design
+ 3400 Hillview Avenue, Building 3
+ Palo Alto, CA 94304
+ United States
+
+ EMail: van@packetdesign.com
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 103]
+
+RFC 3550 RTP July 2003
+
+
+Full Copyright Statement
+
+ Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+ This document and translations of it may be copied and furnished to
+ others, and derivative works that comment on or otherwise explain it
+ or assist in its implementation may be prepared, copied, published
+ and distributed, in whole or in part, without restriction of any
+ kind, provided that the above copyright notice and this paragraph are
+ included on all such copies and derivative works. However, this
+ document itself may not be modified in any way, such as by removing
+ the copyright notice or references to the Internet Society or other
+ Internet organizations, except as needed for the purpose of
+ developing Internet standards in which case the procedures for
+ copyrights defined in the Internet Standards process must be
+ followed, or as required to translate it into languages other than
+ English.
+
+ The limited permissions granted above are perpetual and will not be
+ revoked by the Internet Society or its successors or assigns.
+
+ This document and the information contained herein is provided on an
+ "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+ Funding for the RFC Editor function is currently provided by the
+ Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne, et al. Standards Track [Page 104]
+
diff --git a/src/modules/rtp/rfc3551.txt b/src/modules/rtp/rfc3551.txt
new file mode 100644
index 00000000..c43ff34d
--- /dev/null
+++ b/src/modules/rtp/rfc3551.txt
@@ -0,0 +1,2467 @@
+
+
+
+
+
+
+Network Working Group H. Schulzrinne
+Request for Comments: 3551 Columbia University
+Obsoletes: 1890 S. Casner
+Category: Standards Track Packet Design
+ July 2003
+
+
+ RTP Profile for Audio and Video Conferences
+ with Minimal Control
+
+Status of this Memo
+
+ This document specifies an Internet standards track protocol for the
+ Internet community, and requests discussion and suggestions for
+ improvements. Please refer to the current edition of the "Internet
+ Official Protocol Standards" (STD 1) for the standardization state
+ and status of this protocol. Distribution of this memo is unlimited.
+
+Copyright Notice
+
+ Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+Abstract
+
+ This document describes a profile called "RTP/AVP" for the use of the
+ real-time transport protocol (RTP), version 2, and the associated
+ control protocol, RTCP, within audio and video multiparticipant
+ conferences with minimal control. It provides interpretations of
+ generic fields within the RTP specification suitable for audio and
+ video conferences. In particular, this document defines a set of
+ default mappings from payload type numbers to encodings.
+
+ This document also describes how audio and video data may be carried
+ within RTP. It defines a set of standard encodings and their names
+ when used within RTP. The descriptions provide pointers to reference
+ implementations and the detailed standards. This document is meant
+ as an aid for implementors of audio, video and other real-time
+ multimedia applications.
+
+ This memorandum obsoletes RFC 1890. It is mostly backwards-
+ compatible except for functions removed because two interoperable
+ implementations were not found. The additions to RFC 1890 codify
+ existing practice in the use of payload formats under this profile
+ and include new payload formats defined since RFC 1890 was published.
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 1]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+Table of Contents
+
+ 1. Introduction ................................................. 3
+ 1.1 Terminology ............................................. 3
+ 2. RTP and RTCP Packet Forms and Protocol Behavior .............. 4
+ 3. Registering Additional Encodings ............................. 6
+ 4. Audio ........................................................ 8
+ 4.1 Encoding-Independent Rules .............................. 8
+ 4.2 Operating Recommendations ............................... 9
+ 4.3 Guidelines for Sample-Based Audio Encodings ............. 10
+ 4.4 Guidelines for Frame-Based Audio Encodings .............. 11
+ 4.5 Audio Encodings ......................................... 12
+ 4.5.1 DVI4 ............................................ 13
+ 4.5.2 G722 ............................................ 14
+ 4.5.3 G723 ............................................ 14
+ 4.5.4 G726-40, G726-32, G726-24, and G726-16 .......... 18
+ 4.5.5 G728 ............................................ 19
+ 4.5.6 G729 ............................................ 20
+ 4.5.7 G729D and G729E ................................. 22
+ 4.5.8 GSM ............................................. 24
+ 4.5.9 GSM-EFR ......................................... 27
+ 4.5.10 L8 .............................................. 27
+ 4.5.11 L16 ............................................. 27
+ 4.5.12 LPC ............................................. 27
+ 4.5.13 MPA ............................................. 28
+ 4.5.14 PCMA and PCMU ................................... 28
+ 4.5.15 QCELP ........................................... 28
+ 4.5.16 RED ............................................. 29
+ 4.5.17 VDVI ............................................ 29
+ 5. Video ........................................................ 30
+ 5.1 CelB .................................................... 30
+ 5.2 JPEG .................................................... 30
+ 5.3 H261 .................................................... 30
+ 5.4 H263 .................................................... 31
+ 5.5 H263-1998 ............................................... 31
+ 5.6 MPV ..................................................... 31
+ 5.7 MP2T .................................................... 31
+ 5.8 nv ...................................................... 32
+ 6. Payload Type Definitions ..................................... 32
+ 7. RTP over TCP and Similar Byte Stream Protocols ............... 34
+ 8. Port Assignment .............................................. 34
+ 9. Changes from RFC 1890 ........................................ 35
+ 10. Security Considerations ...................................... 38
+ 11. IANA Considerations .......................................... 39
+ 12. References ................................................... 39
+ 12.1 Normative References .................................... 39
+ 12.2 Informative References .................................. 39
+ 13. Current Locations of Related Resources ....................... 41
+
+
+
+Schulzrinne & Casner Standards Track [Page 2]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ 14. Acknowledgments .............................................. 42
+ 15. Intellectual Property Rights Statement ....................... 43
+ 16. Authors' Addresses ........................................... 43
+ 17. Full Copyright Statement ..................................... 44
+
+1. Introduction
+
+ This profile defines aspects of RTP left unspecified in the RTP
+ Version 2 protocol definition (RFC 3550) [1]. This profile is
+ intended for the use within audio and video conferences with minimal
+ session control. In particular, no support for the negotiation of
+ parameters or membership control is provided. The profile is
+ expected to be useful in sessions where no negotiation or membership
+ control are used (e.g., using the static payload types and the
+ membership indications provided by RTCP), but this profile may also
+ be useful in conjunction with a higher-level control protocol.
+
+ Use of this profile may be implicit in the use of the appropriate
+ applications; there may be no explicit indication by port number,
+ protocol identifier or the like. Applications such as session
+ directories may use the name for this profile specified in Section
+ 11.
+
+ Other profiles may make different choices for the items specified
+ here.
+
+ This document also defines a set of encodings and payload formats for
+ audio and video. These payload format descriptions are included here
+ only as a matter of convenience since they are too small to warrant
+ separate documents. Use of these payload formats is NOT REQUIRED to
+ use this profile. Only the binding of some of the payload formats to
+ static payload type numbers in Tables 4 and 5 is normative.
+
+1.1 Terminology
+
+ The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
+ "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+ document are to be interpreted as described in RFC 2119 [2] and
+ indicate requirement levels for implementations compliant with this
+ RTP profile.
+
+ This document defines the term media type as dividing encodings of
+ audio and video content into three classes: audio, video and
+ audio/video (interleaved).
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 3]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+2. RTP and RTCP Packet Forms and Protocol Behavior
+
+ The section "RTP Profiles and Payload Format Specifications" of RFC
+ 3550 enumerates a number of items that can be specified or modified
+ in a profile. This section addresses these items. Generally, this
+ profile follows the default and/or recommended aspects of the RTP
+ specification.
+
+ RTP data header: The standard format of the fixed RTP data
+ header is used (one marker bit).
+
+ Payload types: Static payload types are defined in Section 6.
+
+ RTP data header additions: No additional fixed fields are
+ appended to the RTP data header.
+
+ RTP data header extensions: No RTP header extensions are
+ defined, but applications operating under this profile MAY use
+ such extensions. Thus, applications SHOULD NOT assume that the
+ RTP header X bit is always zero and SHOULD be prepared to ignore
+ the header extension. If a header extension is defined in the
+ future, that definition MUST specify the contents of the first 16
+ bits in such a way that multiple different extensions can be
+ identified.
+
+ RTCP packet types: No additional RTCP packet types are defined
+ by this profile specification.
+
+ RTCP report interval: The suggested constants are to be used for
+ the RTCP report interval calculation. Sessions operating under
+ this profile MAY specify a separate parameter for the RTCP traffic
+ bandwidth rather than using the default fraction of the session
+ bandwidth. The RTCP traffic bandwidth MAY be divided into two
+ separate session parameters for those participants which are
+ active data senders and those which are not. Following the
+ recommendation in the RTP specification [1] that 1/4 of the RTCP
+ bandwidth be dedicated to data senders, the RECOMMENDED default
+ values for these two parameters would be 1.25% and 3.75%,
+ respectively. For a particular session, the RTCP bandwidth for
+ non-data-senders MAY be set to zero when operating on
+ unidirectional links or for sessions that don't require feedback
+ on the quality of reception. The RTCP bandwidth for data senders
+ SHOULD be kept non-zero so that sender reports can still be sent
+ for inter-media synchronization and to identify the source by
+ CNAME. The means by which the one or two session parameters for
+ RTCP bandwidth are specified is beyond the scope of this memo.
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 4]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ SR/RR extension: No extension section is defined for the RTCP SR
+ or RR packet.
+
+ SDES use: Applications MAY use any of the SDES items described
+ in the RTP specification. While CNAME information MUST be sent
+ every reporting interval, other items SHOULD only be sent every
+ third reporting interval, with NAME sent seven out of eight times
+ within that slot and the remaining SDES items cyclically taking up
+ the eighth slot, as defined in Section 6.2.2 of the RTP
+ specification. In other words, NAME is sent in RTCP packets 1, 4,
+ 7, 10, 13, 16, 19, while, say, EMAIL is used in RTCP packet 22.
+
+ Security: The RTP default security services are also the default
+ under this profile.
+
+ String-to-key mapping: No mapping is specified by this profile.
+
+ Congestion: RTP and this profile may be used in the context of
+ enhanced network service, for example, through Integrated Services
+ (RFC 1633) [4] or Differentiated Services (RFC 2475) [5], or they
+ may be used with best effort service.
+
+ If enhanced service is being used, RTP receivers SHOULD monitor
+ packet loss to ensure that the service that was requested is
+ actually being delivered. If it is not, then they SHOULD assume
+ that they are receiving best-effort service and behave
+ accordingly.
+
+ If best-effort service is being used, RTP receivers SHOULD monitor
+ packet loss to ensure that the packet loss rate is within
+ acceptable parameters. Packet loss is considered acceptable if a
+ TCP flow across the same network path and experiencing the same
+ network conditions would achieve an average throughput, measured
+ on a reasonable timescale, that is not less than the RTP flow is
+ achieving. This condition can be satisfied by implementing
+ congestion control mechanisms to adapt the transmission rate (or
+ the number of layers subscribed for a layered multicast session),
+ or by arranging for a receiver to leave the session if the loss
+ rate is unacceptably high.
+
+ The comparison to TCP cannot be specified exactly, but is intended
+ as an "order-of-magnitude" comparison in timescale and throughput.
+ The timescale on which TCP throughput is measured is the round-
+ trip time of the connection. In essence, this requirement states
+ that it is not acceptable to deploy an application (using RTP or
+ any other transport protocol) on the best-effort Internet which
+ consumes bandwidth arbitrarily and does not compete fairly with
+ TCP within an order of magnitude.
+
+
+
+Schulzrinne & Casner Standards Track [Page 5]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ Underlying protocol: The profile specifies the use of RTP over
+ unicast and multicast UDP as well as TCP. (This does not preclude
+ the use of these definitions when RTP is carried by other lower-
+ layer protocols.)
+
+ Transport mapping: The standard mapping of RTP and RTCP to
+ transport-level addresses is used.
+
+ Encapsulation: This profile leaves to applications the
+ specification of RTP encapsulation in protocols other than UDP.
+
+3. Registering Additional Encodings
+
+ This profile lists a set of encodings, each of which is comprised of
+ a particular media data compression or representation plus a payload
+ format for encapsulation within RTP. Some of those payload formats
+ are specified here, while others are specified in separate RFCs. It
+ is expected that additional encodings beyond the set listed here will
+ be created in the future and specified in additional payload format
+ RFCs.
+
+ This profile also assigns to each encoding a short name which MAY be
+ used by higher-level control protocols, such as the Session
+ Description Protocol (SDP), RFC 2327 [6], to identify encodings
+ selected for a particular RTP session.
+
+ In some contexts it may be useful to refer to these encodings in the
+ form of a MIME content-type. To facilitate this, RFC 3555 [7]
+ provides registrations for all of the encodings names listed here as
+ MIME subtype names under the "audio" and "video" MIME types through
+ the MIME registration procedure as specified in RFC 2048 [8].
+
+ Any additional encodings specified for use under this profile (or
+ others) may also be assigned names registered as MIME subtypes with
+ the Internet Assigned Numbers Authority (IANA). This registry
+ provides a means to insure that the names assigned to the additional
+ encodings are kept unique. RFC 3555 specifies the information that
+ is required for the registration of RTP encodings.
+
+ In addition to assigning names to encodings, this profile also
+ assigns static RTP payload type numbers to some of them. However,
+ the payload type number space is relatively small and cannot
+ accommodate assignments for all existing and future encodings.
+ During the early stages of RTP development, it was necessary to use
+ statically assigned payload types because no other mechanism had been
+ specified to bind encodings to payload types. It was anticipated
+ that non-RTP means beyond the scope of this memo (such as directory
+ services or invitation protocols) would be specified to establish a
+
+
+
+Schulzrinne & Casner Standards Track [Page 6]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ dynamic mapping between a payload type and an encoding. Now,
+ mechanisms for defining dynamic payload type bindings have been
+ specified in the Session Description Protocol (SDP) and in other
+ protocols such as ITU-T Recommendation H.323/H.245. These mechanisms
+ associate the registered name of the encoding/payload format, along
+ with any additional required parameters, such as the RTP timestamp
+ clock rate and number of channels, with a payload type number. This
+ association is effective only for the duration of the RTP session in
+ which the dynamic payload type binding is made. This association
+ applies only to the RTP session for which it is made, thus the
+ numbers can be re-used for different encodings in different sessions
+ so the number space limitation is avoided.
+
+ This profile reserves payload type numbers in the range 96-127
+ exclusively for dynamic assignment. Applications SHOULD first use
+ values in this range for dynamic payload types. Those applications
+ which need to define more than 32 dynamic payload types MAY bind
+ codes below 96, in which case it is RECOMMENDED that unassigned
+ payload type numbers be used first. However, the statically assigned
+ payload types are default bindings and MAY be dynamically bound to
+ new encodings if needed. Redefining payload types below 96 may cause
+ incorrect operation if an attempt is made to join a session without
+ obtaining session description information that defines the dynamic
+ payload types.
+
+ Dynamic payload types SHOULD NOT be used without a well-defined
+ mechanism to indicate the mapping. Systems that expect to
+ interoperate with others operating under this profile SHOULD NOT make
+ their own assignments of proprietary encodings to particular, fixed
+ payload types.
+
+ This specification establishes the policy that no additional static
+ payload types will be assigned beyond the ones defined in this
+ document. Establishing this policy avoids the problem of trying to
+ create a set of criteria for accepting static assignments and
+ encourages the implementation and deployment of the dynamic payload
+ type mechanisms.
+
+ The final set of static payload type assignments is provided in
+ Tables 4 and 5.
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 7]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4. Audio
+
+4.1 Encoding-Independent Rules
+
+ Since the ability to suppress silence is one of the primary
+ motivations for using packets to transmit voice, the RTP header
+ carries both a sequence number and a timestamp to allow a receiver to
+ distinguish between lost packets and periods of time when no data was
+ transmitted. Discontiguous transmission (silence suppression) MAY be
+ used with any audio payload format. Receivers MUST assume that
+ senders may suppress silence unless this is restricted by signaling
+ specified elsewhere. (Even if the transmitter does not suppress
+ silence, the receiver should be prepared to handle periods when no
+ data is present since packets may be lost.)
+
+ Some payload formats (see Sections 4.5.3 and 4.5.6) define a "silence
+ insertion descriptor" or "comfort noise" frame to specify parameters
+ for artificial noise that may be generated during a period of silence
+ to approximate the background noise at the source. For other payload
+ formats, a generic Comfort Noise (CN) payload format is specified in
+ RFC 3389 [9]. When the CN payload format is used with another
+ payload format, different values in the RTP payload type field
+ distinguish comfort-noise packets from those of the selected payload
+ format.
+
+ For applications which send either no packets or occasional comfort-
+ noise packets during silence, the first packet of a talkspurt, that
+ is, the first packet after a silence period during which packets have
+ not been transmitted contiguously, SHOULD be distinguished by setting
+ the marker bit in the RTP data header to one. The marker bit in all
+ other packets is zero. The beginning of a talkspurt MAY be used to
+ adjust the playout delay to reflect changing network delays.
+ Applications without silence suppression MUST set the marker bit to
+ zero.
+
+ The RTP clock rate used for generating the RTP timestamp is
+ independent of the number of channels and the encoding; it usually
+ equals the number of sampling periods per second. For N-channel
+ encodings, each sampling period (say, 1/8,000 of a second) generates
+ N samples. (This terminology is standard, but somewhat confusing, as
+ the total number of samples generated per second is then the sampling
+ rate times the channel count.)
+
+ If multiple audio channels are used, channels are numbered left-to-
+ right, starting at one. In RTP audio packets, information from
+ lower-numbered channels precedes that from higher-numbered channels.
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 8]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ For more than two channels, the convention followed by the AIFF-C
+ audio interchange format SHOULD be followed [3], using the following
+ notation, unless some other convention is specified for a particular
+ encoding or payload format:
+
+ l left
+ r right
+ c center
+ S surround
+ F front
+ R rear
+
+ channels description channel
+ 1 2 3 4 5 6
+ _________________________________________________
+ 2 stereo l r
+ 3 l r c
+ 4 l c r S
+ 5 Fl Fr Fc Sl Sr
+ 6 l lc c r rc S
+
+ Note: RFC 1890 defined two conventions for the ordering of four
+ audio channels. Since the ordering is indicated implicitly by
+ the number of channels, this was ambiguous. In this revision,
+ the order described as "quadrophonic" has been eliminated to
+ remove the ambiguity. This choice was based on the observation
+ that quadrophonic consumer audio format did not become popular
+ whereas surround-sound subsequently has.
+
+ Samples for all channels belonging to a single sampling instant MUST
+ be within the same packet. The interleaving of samples from
+ different channels depends on the encoding. General guidelines are
+ given in Section 4.3 and 4.4.
+
+ The sampling frequency SHOULD be drawn from the set: 8,000, 11,025,
+ 16,000, 22,050, 24,000, 32,000, 44,100 and 48,000 Hz. (Older Apple
+ Macintosh computers had a native sample rate of 22,254.54 Hz, which
+ can be converted to 22,050 with acceptable quality by dropping 4
+ samples in a 20 ms frame.) However, most audio encodings are defined
+ for a more restricted set of sampling frequencies. Receivers SHOULD
+ be prepared to accept multi-channel audio, but MAY choose to only
+ play a single channel.
+
+4.2 Operating Recommendations
+
+ The following recommendations are default operating parameters.
+ Applications SHOULD be prepared to handle other values. The ranges
+ given are meant to give guidance to application writers, allowing a
+
+
+
+Schulzrinne & Casner Standards Track [Page 9]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ set of applications conforming to these guidelines to interoperate
+ without additional negotiation. These guidelines are not intended to
+ restrict operating parameters for applications that can negotiate a
+ set of interoperable parameters, e.g., through a conference control
+ protocol.
+
+ For packetized audio, the default packetization interval SHOULD have
+ a duration of 20 ms or one frame, whichever is longer, unless
+ otherwise noted in Table 1 (column "ms/packet"). The packetization
+ interval determines the minimum end-to-end delay; longer packets
+ introduce less header overhead but higher delay and make packet loss
+ more noticeable. For non-interactive applications such as lectures
+ or for links with severe bandwidth constraints, a higher
+ packetization delay MAY be used. A receiver SHOULD accept packets
+ representing between 0 and 200 ms of audio data. (For framed audio
+ encodings, a receiver SHOULD accept packets with a number of frames
+ equal to 200 ms divided by the frame duration, rounded up.) This
+ restriction allows reasonable buffer sizing for the receiver.
+
+4.3 Guidelines for Sample-Based Audio Encodings
+
+ In sample-based encodings, each audio sample is represented by a
+ fixed number of bits. Within the compressed audio data, codes for
+ individual samples may span octet boundaries. An RTP audio packet
+ may contain any number of audio samples, subject to the constraint
+ that the number of bits per sample times the number of samples per
+ packet yields an integral octet count. Fractional encodings produce
+ less than one octet per sample.
+
+ The duration of an audio packet is determined by the number of
+ samples in the packet.
+
+ For sample-based encodings producing one or more octets per sample,
+ samples from different channels sampled at the same sampling instant
+ SHOULD be packed in consecutive octets. For example, for a two-
+ channel encoding, the octet sequence is (left channel, first sample),
+ (right channel, first sample), (left channel, second sample), (right
+ channel, second sample), .... For multi-octet encodings, octets
+ SHOULD be transmitted in network byte order (i.e., most significant
+ octet first).
+
+ The packing of sample-based encodings producing less than one octet
+ per sample is encoding-specific.
+
+ The RTP timestamp reflects the instant at which the first sample in
+ the packet was sampled, that is, the oldest information in the
+ packet.
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 10]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.4 Guidelines for Frame-Based Audio Encodings
+
+ Frame-based encodings encode a fixed-length block of audio into
+ another block of compressed data, typically also of fixed length.
+ For frame-based encodings, the sender MAY choose to combine several
+ such frames into a single RTP packet. The receiver can tell the
+ number of frames contained in an RTP packet, if all the frames have
+ the same length, by dividing the RTP payload length by the audio
+ frame size which is defined as part of the encoding. This does not
+ work when carrying frames of different sizes unless the frame sizes
+ are relatively prime. If not, the frames MUST indicate their size.
+
+ For frame-based codecs, the channel order is defined for the whole
+ block. That is, for two-channel audio, right and left samples SHOULD
+ be coded independently, with the encoded frame for the left channel
+ preceding that for the right channel.
+
+ All frame-oriented audio codecs SHOULD be able to encode and decode
+ several consecutive frames within a single packet. Since the frame
+ size for the frame-oriented codecs is given, there is no need to use
+ a separate designation for the same encoding, but with different
+ number of frames per packet.
+
+ RTP packets SHALL contain a whole number of frames, with frames
+ inserted according to age within a packet, so that the oldest frame
+ (to be played first) occurs immediately after the RTP packet header.
+ The RTP timestamp reflects the instant at which the first sample in
+ the first frame was sampled, that is, the oldest information in the
+ packet.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 11]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5 Audio Encodings
+
+ name of sampling default
+ encoding sample/frame bits/sample rate ms/frame ms/packet
+ __________________________________________________________________
+ DVI4 sample 4 var. 20
+ G722 sample 8 16,000 20
+ G723 frame N/A 8,000 30 30
+ G726-40 sample 5 8,000 20
+ G726-32 sample 4 8,000 20
+ G726-24 sample 3 8,000 20
+ G726-16 sample 2 8,000 20
+ G728 frame N/A 8,000 2.5 20
+ G729 frame N/A 8,000 10 20
+ G729D frame N/A 8,000 10 20
+ G729E frame N/A 8,000 10 20
+ GSM frame N/A 8,000 20 20
+ GSM-EFR frame N/A 8,000 20 20
+ L8 sample 8 var. 20
+ L16 sample 16 var. 20
+ LPC frame N/A 8,000 20 20
+ MPA frame N/A var. var.
+ PCMA sample 8 var. 20
+ PCMU sample 8 var. 20
+ QCELP frame N/A 8,000 20 20
+ VDVI sample var. var. 20
+
+ Table 1: Properties of Audio Encodings (N/A: not applicable; var.:
+ variable)
+
+ The characteristics of the audio encodings described in this document
+ are shown in Table 1; they are listed in order of their payload type
+ in Table 4. While most audio codecs are only specified for a fixed
+ sampling rate, some sample-based algorithms (indicated by an entry of
+ "var." in the sampling rate column of Table 1) may be used with
+ different sampling rates, resulting in different coded bit rates.
+ When used with a sampling rate other than that for which a static
+ payload type is defined, non-RTP means beyond the scope of this memo
+ MUST be used to define a dynamic payload type and MUST indicate the
+ selected RTP timestamp clock rate, which is usually the same as the
+ sampling rate for audio.
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 12]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5.1 DVI4
+
+ DVI4 uses an adaptive delta pulse code modulation (ADPCM) encoding
+ scheme that was specified by the Interactive Multimedia Association
+ (IMA) as the "IMA ADPCM wave type". However, the encoding defined
+ here as DVI4 differs in three respects from the IMA specification:
+
+ o The RTP DVI4 header contains the predicted value rather than the
+ first sample value contained the IMA ADPCM block header.
+
+ o IMA ADPCM blocks contain an odd number of samples, since the first
+ sample of a block is contained just in the header (uncompressed),
+ followed by an even number of compressed samples. DVI4 has an
+ even number of compressed samples only, using the `predict' word
+ from the header to decode the first sample.
+
+ o For DVI4, the 4-bit samples are packed with the first sample in
+ the four most significant bits and the second sample in the four
+ least significant bits. In the IMA ADPCM codec, the samples are
+ packed in the opposite order.
+
+ Each packet contains a single DVI block. This profile only defines
+ the 4-bit-per-sample version, while IMA also specified a 3-bit-per-
+ sample encoding.
+
+ The "header" word for each channel has the following structure:
+
+ int16 predict; /* predicted value of first sample
+ from the previous block (L16 format) */
+ u_int8 index; /* current index into stepsize table */
+ u_int8 reserved; /* set to zero by sender, ignored by receiver */
+
+ Each octet following the header contains two 4-bit samples, thus the
+ number of samples per packet MUST be even because there is no means
+ to indicate a partially filled last octet.
+
+ Packing of samples for multiple channels is for further study.
+
+ The IMA ADPCM algorithm was described in the document IMA Recommended
+ Practices for Enhancing Digital Audio Compatibility in Multimedia
+ Systems (version 3.0). However, the Interactive Multimedia
+ Association ceased operations in 1997. Resources for an archived
+ copy of that document and a software implementation of the RTP DVI4
+ encoding are listed in Section 13.
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 13]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5.2 G722
+
+ G722 is specified in ITU-T Recommendation G.722, "7 kHz audio-coding
+ within 64 kbit/s". The G.722 encoder produces a stream of octets,
+ each of which SHALL be octet-aligned in an RTP packet. The first bit
+ transmitted in the G.722 octet, which is the most significant bit of
+ the higher sub-band sample, SHALL correspond to the most significant
+ bit of the octet in the RTP packet.
+
+ Even though the actual sampling rate for G.722 audio is 16,000 Hz,
+ the RTP clock rate for the G722 payload format is 8,000 Hz because
+ that value was erroneously assigned in RFC 1890 and must remain
+ unchanged for backward compatibility. The octet rate or sample-pair
+ rate is 8,000 Hz.
+
+4.5.3 G723
+
+ G723 is specified in ITU Recommendation G.723.1, "Dual-rate speech
+ coder for multimedia communications transmitting at 5.3 and 6.3
+ kbit/s". The G.723.1 5.3/6.3 kbit/s codec was defined by the ITU-T
+ as a mandatory codec for ITU-T H.324 GSTN videophone terminal
+ applications. The algorithm has a floating point specification in
+ Annex B to G.723.1, a silence compression algorithm in Annex A to
+ G.723.1 and a scalable channel coding scheme for wireless
+ applications in G.723.1 Annex C.
+
+ This Recommendation specifies a coded representation that can be used
+ for compressing the speech signal component of multi-media services
+ at a very low bit rate. Audio is encoded in 30 ms frames, with an
+ additional delay of 7.5 ms due to look-ahead. A G.723.1 frame can be
+ one of three sizes: 24 octets (6.3 kb/s frame), 20 octets (5.3 kb/s
+ frame), or 4 octets. These 4-octet frames are called SID frames
+ (Silence Insertion Descriptor) and are used to specify comfort noise
+ parameters. There is no restriction on how 4, 20, and 24 octet
+ frames are intermixed. The least significant two bits of the first
+ octet in the frame determine the frame size and codec type:
+
+ bits content octets/frame
+ 00 high-rate speech (6.3 kb/s) 24
+ 01 low-rate speech (5.3 kb/s) 20
+ 10 SID frame 4
+ 11 reserved
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 14]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ It is possible to switch between the two rates at any 30 ms frame
+ boundary. Both (5.3 kb/s and 6.3 kb/s) rates are a mandatory part of
+ the encoder and decoder. Receivers MUST accept both data rates and
+ MUST accept SID frames unless restriction of these capabilities has
+ been signaled. The MIME registration for G723 in RFC 3555 [7]
+ specifies parameters that MAY be used with MIME or SDP to restrict to
+ a single data rate or to restrict the use of SID frames. This coder
+ was optimized to represent speech with near-toll quality at the above
+ rates using a limited amount of complexity.
+
+ The packing of the encoded bit stream into octets and the
+ transmission order of the octets is specified in Rec. G.723.1 and is
+ the same as that produced by the G.723 C code reference
+ implementation. For the 6.3 kb/s data rate, this packing is
+ illustrated as follows, where the header (HDR) bits are always "0 0"
+ as shown in Fig. 1 to indicate operation at 6.3 kb/s, and the Z bit
+ is always set to zero. The diagrams show the bit packing in "network
+ byte order", also known as big-endian order. The bits of each 32-bit
+ word are numbered 0 to 31, with the most significant bit on the left
+ and numbered 0. The octets (bytes) of each word are transmitted most
+ significant octet first. The bits of each data field are numbered in
+ the order of the bit stream representation of the encoding (least
+ significant bit first). The vertical bars indicate the boundaries
+ between field fragments.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 15]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | LPC |HDR| LPC | LPC | ACL0 |LPC|
+ | | | | | | |
+ |0 0 0 0 0 0|0 0|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2|
+ |5 4 3 2 1 0| |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ACL2 |ACL|A| GAIN0 |ACL|ACL| GAIN0 | GAIN1 |
+ | | 1 |C| | 3 | 2 | | |
+ |0 0 0 0 0|0 0|0|0 0 0 0|0 0|0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|
+ |4 3 2 1 0|1 0|6|3 2 1 0|1 0|6 5|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | GAIN2 | GAIN1 | GAIN2 | GAIN3 | GRID | GAIN3 |
+ | | | | | | |
+ |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0|
+ |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|3 2 1 0|1 0 9 8|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | MSBPOS |Z|POS| MSBPOS | POS0 |POS| POS0 |
+ | | | 0 | | | 1 | |
+ |0 0 0 0 0 0 0|0|0 0|1 1 1 0 0 0|0 0 0 0 0 0 0 0|0 0|1 1 1 1 1 1|
+ |6 5 4 3 2 1 0| |1 0|2 1 0 9 8 7|9 8 7 6 5 4 3 2|1 0|5 4 3 2 1 0|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | POS1 | POS2 | POS1 | POS2 | POS3 | POS2 |
+ | | | | | | |
+ |0 0 0 0 0 0 0 0|0 0 0 0|1 1 1 1|1 1 0 0 0 0 0 0|0 0 0 0|1 1 1 1|
+ |9 8 7 6 5 4 3 2|3 2 1 0|3 2 1 0|1 0 9 8 7 6 5 4|3 2 1 0|5 4 3 2|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | POS3 | PSIG0 |POS|PSIG2| PSIG1 | PSIG3 |PSIG2|
+ | | | 3 | | | | |
+ |1 1 0 0 0 0 0 0|0 0 0 0 0 0|1 1|0 0 0|0 0 0 0 0|0 0 0 0 0|0 0 0|
+ |1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2|2 1 0|4 3 2 1 0|4 3 2 1 0|5 4 3|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 1: G.723 (6.3 kb/s) bit packing
+
+ For the 5.3 kb/s data rate, the header (HDR) bits are always "0 1",
+ as shown in Fig. 2, to indicate operation at 5.3 kb/s.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 16]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | LPC |HDR| LPC | LPC | ACL0 |LPC|
+ | | | | | | |
+ |0 0 0 0 0 0|0 1|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2|
+ |5 4 3 2 1 0| |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ACL2 |ACL|A| GAIN0 |ACL|ACL| GAIN0 | GAIN1 |
+ | | 1 |C| | 3 | 2 | | |
+ |0 0 0 0 0|0 0|0|0 0 0 0|0 0|0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|
+ |4 3 2 1 0|1 0|6|3 2 1 0|1 0|6 5|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | GAIN2 | GAIN1 | GAIN2 | GAIN3 | GRID | GAIN3 |
+ | | | | | | |
+ |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0|
+ |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|4 3 2 1|1 0 9 8|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | POS0 | POS1 | POS0 | POS1 | POS2 |
+ | | | | | |
+ |0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|
+ |7 6 5 4 3 2 1 0|3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | POS3 | POS2 | POS3 | PSIG1 | PSIG0 | PSIG3 | PSIG2 |
+ | | | | | | | |
+ |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0|0 0 0 0|0 0 0 0|0 0 0 0|
+ |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|3 2 1 0|3 2 1 0|3 2 1 0|3 2 1 0|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 2: G.723 (5.3 kb/s) bit packing
+
+ The packing of G.723.1 SID (silence) frames, which are indicated by
+ the header (HDR) bits having the pattern "1 0", is depicted in Fig.
+ 3.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | LPC |HDR| LPC | LPC | GAIN |LPC|
+ | | | | | | |
+ |0 0 0 0 0 0|1 0|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2|
+ |5 4 3 2 1 0| |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 3: G.723 SID mode bit packing
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 17]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5.4 G726-40, G726-32, G726-24, and G726-16
+
+ ITU-T Recommendation G.726 describes, among others, the algorithm
+ recommended for conversion of a single 64 kbit/s A-law or mu-law PCM
+ channel encoded at 8,000 samples/sec to and from a 40, 32, 24, or 16
+ kbit/s channel. The conversion is applied to the PCM stream using an
+ Adaptive Differential Pulse Code Modulation (ADPCM) transcoding
+ technique. The ADPCM representation consists of a series of
+ codewords with a one-to-one correspondence to the samples in the PCM
+ stream. The G726 data rates of 40, 32, 24, and 16 kbit/s have
+ codewords of 5, 4, 3, and 2 bits, respectively.
+
+ The 16 and 24 kbit/s encodings do not provide toll quality speech.
+ They are designed for used in overloaded Digital Circuit
+ Multiplication Equipment (DCME). ITU-T G.726 recommends that the 16
+ and 24 kbit/s encodings should be alternated with higher data rate
+ encodings to provide an average sample size of between 3.5 and 3.7
+ bits per sample.
+
+ The encodings of G.726 are here denoted as G726-40, G726-32, G726-24,
+ and G726-16. Prior to 1990, G721 described the 32 kbit/s ADPCM
+ encoding, and G723 described the 40, 32, and 16 kbit/s encodings.
+ Thus, G726-32 designates the same algorithm as G721 in RFC 1890.
+
+ A stream of G726 codewords contains no information on the encoding
+ being used, therefore transitions between G726 encoding types are not
+ permitted within a sequence of packed codewords. Applications MUST
+ determine the encoding type of packed codewords from the RTP payload
+ identifier.
+
+ No payload-specific header information SHALL be included as part of
+ the audio data. A stream of G726 codewords MUST be packed into
+ octets as follows: the first codeword is placed into the first octet
+ such that the least significant bit of the codeword aligns with the
+ least significant bit in the octet, the second codeword is then
+ packed so that its least significant bit coincides with the least
+ significant unoccupied bit in the octet. When a complete codeword
+ cannot be placed into an octet, the bits overlapping the octet
+ boundary are placed into the least significant bits of the next
+ octet. Packing MUST end with a completely packed final octet. The
+ number of codewords packed will therefore be a multiple of 8, 2, 8,
+ and 4 for G726-40, G726-32, G726-24, and G726-16, respectively. An
+ example of the packing scheme for G726-32 codewords is as shown,
+ where bit 7 is the least significant bit of the first octet, and bit
+ A3 is the least significant bit of the first codeword:
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 18]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+ |B B B B|A A A A|D D D D|C C C C| ...
+ |0 1 2 3|0 1 2 3|0 1 2 3|0 1 2 3|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+
+ An example of the packing scheme for G726-24 codewords follows, where
+ again bit 7 is the least significant bit of the first octet, and bit
+ A2 is the least significant bit of the first codeword:
+
+ 0 1 2
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+ |C C|B B B|A A A|F|E E E|D D D|C|H H H|G G G|F F| ...
+ |1 2|0 1 2|0 1 2|2|0 1 2|0 1 2|0|0 1 2|0 1 2|0 1|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
+
+ Note that the "little-endian" direction in which samples are packed
+ into octets in the G726-16, -24, -32 and -40 payload formats
+ specified here is consistent with ITU-T Recommendation X.420, but is
+ the opposite of what is specified in ITU-T Recommendation I.366.2
+ Annex E for ATM AAL2 transport. A second set of RTP payload formats
+ matching the packetization of I.366.2 Annex E and identified by MIME
+ subtypes AAL2-G726-16, -24, -32 and -40 will be specified in a
+ separate document.
+
+4.5.5 G728
+
+ G728 is specified in ITU-T Recommendation G.728, "Coding of speech at
+ 16 kbit/s using low-delay code excited linear prediction".
+
+ A G.278 encoder translates 5 consecutive audio samples into a 10-bit
+ codebook index, resulting in a bit rate of 16 kb/s for audio sampled
+ at 8,000 samples per second. The group of five consecutive samples
+ is called a vector. Four consecutive vectors, labeled V1 to V4
+ (where V1 is to be played first by the receiver), build one G.728
+ frame. The four vectors of 40 bits are packed into 5 octets, labeled
+ B1 through B5. B1 SHALL be placed first in the RTP packet.
+
+ Referring to the figure below, the principle for bit order is
+ "maintenance of bit significance". Bits from an older vector are
+ more significant than bits from newer vectors. The MSB of the frame
+ goes to the MSB of B1 and the LSB of the frame goes to LSB of B5.
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 19]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ 1 2 3 3
+ 0 0 0 0 9
+ ++++++++++++++++++++++++++++++++++++++++
+ <---V1---><---V2---><---V3---><---V4---> vectors
+ <--B1--><--B2--><--B3--><--B4--><--B5--> octets
+ <------------- frame 1 ---------------->
+
+ In particular, B1 contains the eight most significant bits of V1,
+ with the MSB of V1 being the MSB of B1. B2 contains the two least
+ significant bits of V1, the more significant of the two in its MSB,
+ and the six most significant bits of V2. B1 SHALL be placed first in
+ the RTP packet and B5 last.
+
+4.5.6 G729
+
+ G729 is specified in ITU-T Recommendation G.729, "Coding of speech at
+ 8 kbit/s using conjugate structure-algebraic code excited linear
+ prediction (CS-ACELP)". A reduced-complexity version of the G.729
+ algorithm is specified in Annex A to Rec. G.729. The speech coding
+ algorithms in the main body of G.729 and in G.729 Annex A are fully
+ interoperable with each other, so there is no need to further
+ distinguish between them. An implementation that signals or accepts
+ use of G729 payload format may implement either G.729 or G.729A
+ unless restricted by additional signaling specified elsewhere related
+ specifically to the encoding rather than the payload format. The
+ G.729 and G.729 Annex A codecs were optimized to represent speech
+ with high quality, where G.729 Annex A trades some speech quality for
+ an approximate 50% complexity reduction [10]. See the next Section
+ (4.5.7) for other data rates added in later G.729 Annexes. For all
+ data rates, the sampling frequency (and RTP timestamp clock rate) is
+ 8,000 Hz.
+
+ A voice activity detector (VAD) and comfort noise generator (CNG)
+ algorithm in Annex B of G.729 is RECOMMENDED for digital simultaneous
+ voice and data applications and can be used in conjunction with G.729
+ or G.729 Annex A. A G.729 or G.729 Annex A frame contains 10 octets,
+ while the G.729 Annex B comfort noise frame occupies 2 octets.
+ Receivers MUST accept comfort noise frames if restriction of their
+ use has not been signaled. The MIME registration for G729 in RFC
+ 3555 [7] specifies a parameter that MAY be used with MIME or SDP to
+ restrict the use of comfort noise frames.
+
+ A G729 RTP packet may consist of zero or more G.729 or G.729 Annex A
+ frames, followed by zero or one G.729 Annex B frames. The presence
+ of a comfort noise frame can be deduced from the length of the RTP
+ payload. The default packetization interval is 20 ms (two frames),
+ but in some situations it may be desirable to send 10 ms packets. An
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 20]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ example would be a transition from speech to comfort noise in the
+ first 10 ms of the packet. For some applications, a longer
+ packetization interval may be required to reduce the packet rate.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |L| L1 | L2 | L3 | P1 |P| C1 |
+ |0| | | | |0| |
+ | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7| |0 1 2 3 4|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | C1 | S1 | GA1 | GB1 | P2 | C2 |
+ | 1 1 1| | | | | |
+ |5 6 7 8 9 0 1 2|0 1 2 3|0 1 2|0 1 2 3|0 1 2 3 4|0 1 2 3 4 5 6 7|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | C2 | S2 | GA2 | GB2 |
+ | 1 1 1| | | |
+ |8 9 0 1 2|0 1 2 3|0 1 2|0 1 2 3|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 4: G.729 and G.729A bit packing
+
+ The transmitted parameters of a G.729/G.729A 10-ms frame, consisting
+ of 80 bits, are defined in Recommendation G.729, Table 8/G.729. The
+ mapping of the these parameters is given below in Fig. 4. The
+ diagrams show the bit packing in "network byte order", also known as
+ big-endian order. The bits of each 32-bit word are numbered 0 to 31,
+ with the most significant bit on the left and numbered 0. The octets
+ (bytes) of each word are transmitted most significant octet first.
+ The bits of each data field are numbered in the order as produced by
+ the G.729 C code reference implementation.
+
+ The packing of the G.729 Annex B comfort noise frame is shown in Fig.
+ 5.
+
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |L| LSF1 | LSF2 | GAIN |R|
+ |S| | | |E|
+ |F| | | |S|
+ |0|0 1 2 3 4|0 1 2 3|0 1 2 3 4|V| RESV = Reserved (zero)
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 5: G.729 Annex B bit packing
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 21]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5.7 G729D and G729E
+
+ Annexes D and E to ITU-T Recommendation G.729 provide additional data
+ rates. Because the data rate is not signaled in the bitstream, the
+ different data rates are given distinct RTP encoding names which are
+ mapped to distinct payload type numbers. G729D indicates a 6.4
+ kbit/s coding mode (G.729 Annex D, for momentary reduction in channel
+ capacity), while G729E indicates an 11.8 kbit/s mode (G.729 Annex E,
+ for improved performance with a wide range of narrow-band input
+ signals, e.g., music and background noise). Annex E has two
+ operating modes, backward adaptive and forward adaptive, which are
+ signaled by the first two bits in each frame (the most significant
+ two bits of the first octet).
+
+ The voice activity detector (VAD) and comfort noise generator (CNG)
+ algorithm specified in Annex B of G.729 may be used with Annex D and
+ Annex E frames in addition to G.729 and G.729 Annex A frames. The
+ algorithm details for the operation of Annexes D and E with the Annex
+ B CNG are specified in G.729 Annexes F and G. Note that Annexes F
+ and G do not introduce any new encodings. Receivers MUST accept
+ comfort noise frames if restriction of their use has not been
+ signaled. The MIME registrations for G729D and G729E in RFC 3555 [7]
+ specify a parameter that MAY be used with MIME or SDP to restrict the
+ use of comfort noise frames.
+
+ For G729D, an RTP packet may consist of zero or more G.729 Annex D
+ frames, followed by zero or one G.729 Annex B frame. Similarly, for
+ G729E, an RTP packet may consist of zero or more G.729 Annex E
+ frames, followed by zero or one G.729 Annex B frame. The presence of
+ a comfort noise frame can be deduced from the length of the RTP
+ payload.
+
+ A single RTP packet must contain frames of only one data rate,
+ optionally followed by one comfort noise frame. The data rate may be
+ changed from packet to packet by changing the payload type number.
+ G.729 Annexes D, E and H describe what the encoding and decoding
+ algorithms must do to accommodate a change in data rate.
+
+ For G729D, the bits of a G.729 Annex D frame are formatted as shown
+ below in Fig. 6 (cf. Table D.1/G.729). The frame length is 64 bits.
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 22]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |L| L1 | L2 | L3 | P1 | C1 |
+ |0| | | | | |
+ | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7|0 1 2 3 4 5|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | C1 |S1 | GA1 | GB1 | P2 | C2 |S2 | GA2 | GB2 |
+ | | | | | | | | | |
+ |6 7 8|0 1|0 1 2|0 1 2|0 1 2 3|0 1 2 3 4 5 6 7 8|0 1|0 1 2|0 1 2|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 6: G.729 Annex D bit packing
+
+ The net bit rate for the G.729 Annex E algorithm is 11.8 kbit/s and a
+ total of 118 bits are used. Two bits are appended as "don't care"
+ bits to complete an integer number of octets for the frame. For
+ G729E, the bits of a data frame are formatted as shown in the next
+ two diagrams (cf. Table E.1/G.729). The fields for the G729E forward
+ adaptive mode are packed as shown in Fig. 7.
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |0 0|L| L1 | L2 | L3 | P1 |P| C0_1|
+ | |0| | | | |0| |
+ | | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7| |0 1 2|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | C1_1 | C2_1 | C3_1 | C4_1 |
+ | | | | | |
+ |3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | GA1 | GB1 | P2 | C0_2 | C1_2 | C2_2 |
+ | | | | | | |
+ |0 1 2|0 1 2 3|0 1 2 3 4|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | C3_2 | C4_2 | GA2 | GB2 |DC |
+ | | | | | | |
+ |6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 7: G.729 Annex E (forward adaptive mode) bit packing
+
+ The fields for the G729E backward adaptive mode are packed as shown
+ in Fig. 8.
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 23]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ |1 1| P1 |P| C0_1 | C1_1 |
+ | | |0| 1 1 1| |
+ | |0 1 2 3 4 5 6 7|0|0 1 2 3 4 5 6 7 8 9 0 1 2|0 1 2 3 4 5 6 7|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | C2_1 | C3_1 | C4_1 |GA1 | GB1 |P2 |
+ | | | | | | | |
+ |8 9|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | C0_2 | C1_2 | C2_2 |
+ | | 1 1 1| | |
+ |2 3 4|0 1 2 3 4 5 6 7 8 9 0 1 2|0 1 2 3 4 5 6 7 8 9|0 1 2 3 4 5|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | C3_2 | C4_2 | GA2 | GB2 |DC |
+ | | | | | | |
+ |6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1|
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Figure 8: G.729 Annex E (backward adaptive mode) bit packing
+
+4.5.8 GSM
+
+ GSM (Group Speciale Mobile) denotes the European GSM 06.10 standard
+ for full-rate speech transcoding, ETS 300 961, which is based on
+ RPE/LTP (residual pulse excitation/long term prediction) coding at a
+ rate of 13 kb/s [11,12,13]. The text of the standard can be obtained
+ from:
+
+ ETSI (European Telecommunications Standards Institute)
+ ETSI Secretariat: B.P.152
+ F-06561 Valbonne Cedex
+ France
+ Phone: +33 92 94 42 00
+ Fax: +33 93 65 47 16
+
+ Blocks of 160 audio samples are compressed into 33 octets, for an
+ effective data rate of 13,200 b/s.
+
+4.5.8.1 General Packaging Issues
+
+ The GSM standard (ETS 300 961) specifies the bit stream produced by
+ the codec, but does not specify how these bits should be packed for
+ transmission. The packetization specified here has subsequently been
+ adopted in ETSI Technical Specification TS 101 318. Some software
+ implementations of the GSM codec use a different packing than that
+ specified here.
+
+
+
+Schulzrinne & Casner Standards Track [Page 24]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ field field name bits field field name bits
+ ________________________________________________
+ 1 LARc[0] 6 39 xmc[22] 3
+ 2 LARc[1] 6 40 xmc[23] 3
+ 3 LARc[2] 5 41 xmc[24] 3
+ 4 LARc[3] 5 42 xmc[25] 3
+ 5 LARc[4] 4 43 Nc[2] 7
+ 6 LARc[5] 4 44 bc[2] 2
+ 7 LARc[6] 3 45 Mc[2] 2
+ 8 LARc[7] 3 46 xmaxc[2] 6
+ 9 Nc[0] 7 47 xmc[26] 3
+ 10 bc[0] 2 48 xmc[27] 3
+ 11 Mc[0] 2 49 xmc[28] 3
+ 12 xmaxc[0] 6 50 xmc[29] 3
+ 13 xmc[0] 3 51 xmc[30] 3
+ 14 xmc[1] 3 52 xmc[31] 3
+ 15 xmc[2] 3 53 xmc[32] 3
+ 16 xmc[3] 3 54 xmc[33] 3
+ 17 xmc[4] 3 55 xmc[34] 3
+ 18 xmc[5] 3 56 xmc[35] 3
+ 19 xmc[6] 3 57 xmc[36] 3
+ 20 xmc[7] 3 58 xmc[37] 3
+ 21 xmc[8] 3 59 xmc[38] 3
+ 22 xmc[9] 3 60 Nc[3] 7
+ 23 xmc[10] 3 61 bc[3] 2
+ 24 xmc[11] 3 62 Mc[3] 2
+ 25 xmc[12] 3 63 xmaxc[3] 6
+ 26 Nc[1] 7 64 xmc[39] 3
+ 27 bc[1] 2 65 xmc[40] 3
+ 28 Mc[1] 2 66 xmc[41] 3
+ 29 xmaxc[1] 6 67 xmc[42] 3
+ 30 xmc[13] 3 68 xmc[43] 3
+ 31 xmc[14] 3 69 xmc[44] 3
+ 32 xmc[15] 3 70 xmc[45] 3
+ 33 xmc[16] 3 71 xmc[46] 3
+ 34 xmc[17] 3 72 xmc[47] 3
+ 35 xmc[18] 3 73 xmc[48] 3
+ 36 xmc[19] 3 74 xmc[49] 3
+ 37 xmc[20] 3 75 xmc[50] 3
+ 38 xmc[21] 3 76 xmc[51] 3
+
+ Table 2: Ordering of GSM variables
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 25]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ Octet Bit 0 Bit 1 Bit 2 Bit 3 Bit 4 Bit 5 Bit 6 Bit 7
+ _____________________________________________________________________
+ 0 1 1 0 1 LARc0.0 LARc0.1 LARc0.2 LARc0.3
+ 1 LARc0.4 LARc0.5 LARc1.0 LARc1.1 LARc1.2 LARc1.3 LARc1.4 LARc1.5
+ 2 LARc2.0 LARc2.1 LARc2.2 LARc2.3 LARc2.4 LARc3.0 LARc3.1 LARc3.2
+ 3 LARc3.3 LARc3.4 LARc4.0 LARc4.1 LARc4.2 LARc4.3 LARc5.0 LARc5.1
+ 4 LARc5.2 LARc5.3 LARc6.0 LARc6.1 LARc6.2 LARc7.0 LARc7.1 LARc7.2
+ 5 Nc0.0 Nc0.1 Nc0.2 Nc0.3 Nc0.4 Nc0.5 Nc0.6 bc0.0
+ 6 bc0.1 Mc0.0 Mc0.1 xmaxc00 xmaxc01 xmaxc02 xmaxc03 xmaxc04
+ 7 xmaxc05 xmc0.0 xmc0.1 xmc0.2 xmc1.0 xmc1.1 xmc1.2 xmc2.0
+ 8 xmc2.1 xmc2.2 xmc3.0 xmc3.1 xmc3.2 xmc4.0 xmc4.1 xmc4.2
+ 9 xmc5.0 xmc5.1 xmc5.2 xmc6.0 xmc6.1 xmc6.2 xmc7.0 xmc7.1
+ 10 xmc7.2 xmc8.0 xmc8.1 xmc8.2 xmc9.0 xmc9.1 xmc9.2 xmc10.0
+ 11 xmc10.1 xmc10.2 xmc11.0 xmc11.1 xmc11.2 xmc12.0 xmc12.1 xcm12.2
+ 12 Nc1.0 Nc1.1 Nc1.2 Nc1.3 Nc1.4 Nc1.5 Nc1.6 bc1.0
+ 13 bc1.1 Mc1.0 Mc1.1 xmaxc10 xmaxc11 xmaxc12 xmaxc13 xmaxc14
+ 14 xmax15 xmc13.0 xmc13.1 xmc13.2 xmc14.0 xmc14.1 xmc14.2 xmc15.0
+ 15 xmc15.1 xmc15.2 xmc16.0 xmc16.1 xmc16.2 xmc17.0 xmc17.1 xmc17.2
+ 16 xmc18.0 xmc18.1 xmc18.2 xmc19.0 xmc19.1 xmc19.2 xmc20.0 xmc20.1
+ 17 xmc20.2 xmc21.0 xmc21.1 xmc21.2 xmc22.0 xmc22.1 xmc22.2 xmc23.0
+ 18 xmc23.1 xmc23.2 xmc24.0 xmc24.1 xmc24.2 xmc25.0 xmc25.1 xmc25.2
+ 19 Nc2.0 Nc2.1 Nc2.2 Nc2.3 Nc2.4 Nc2.5 Nc2.6 bc2.0
+ 20 bc2.1 Mc2.0 Mc2.1 xmaxc20 xmaxc21 xmaxc22 xmaxc23 xmaxc24
+ 21 xmaxc25 xmc26.0 xmc26.1 xmc26.2 xmc27.0 xmc27.1 xmc27.2 xmc28.0
+ 22 xmc28.1 xmc28.2 xmc29.0 xmc29.1 xmc29.2 xmc30.0 xmc30.1 xmc30.2
+ 23 xmc31.0 xmc31.1 xmc31.2 xmc32.0 xmc32.1 xmc32.2 xmc33.0 xmc33.1
+ 24 xmc33.2 xmc34.0 xmc34.1 xmc34.2 xmc35.0 xmc35.1 xmc35.2 xmc36.0
+ 25 Xmc36.1 xmc36.2 xmc37.0 xmc37.1 xmc37.2 xmc38.0 xmc38.1 xmc38.2
+ 26 Nc3.0 Nc3.1 Nc3.2 Nc3.3 Nc3.4 Nc3.5 Nc3.6 bc3.0
+ 27 bc3.1 Mc3.0 Mc3.1 xmaxc30 xmaxc31 xmaxc32 xmaxc33 xmaxc34
+ 28 xmaxc35 xmc39.0 xmc39.1 xmc39.2 xmc40.0 xmc40.1 xmc40.2 xmc41.0
+ 29 xmc41.1 xmc41.2 xmc42.0 xmc42.1 xmc42.2 xmc43.0 xmc43.1 xmc43.2
+ 30 xmc44.0 xmc44.1 xmc44.2 xmc45.0 xmc45.1 xmc45.2 xmc46.0 xmc46.1
+ 31 xmc46.2 xmc47.0 xmc47.1 xmc47.2 xmc48.0 xmc48.1 xmc48.2 xmc49.0
+ 32 xmc49.1 xmc49.2 xmc50.0 xmc50.1 xmc50.2 xmc51.0 xmc51.1 xmc51.2
+
+ Table 3: GSM payload format
+
+ In the GSM packing used by RTP, the bits SHALL be packed beginning
+ from the most significant bit. Every 160 sample GSM frame is coded
+ into one 33 octet (264 bit) buffer. Every such buffer begins with a
+ 4 bit signature (0xD), followed by the MSB encoding of the fields of
+ the frame. The first octet thus contains 1101 in the 4 most
+ significant bits (0-3) and the 4 most significant bits of F1 (0-3) in
+ the 4 least significant bits (4-7). The second octet contains the 2
+ least significant bits of F1 in bits 0-1, and F2 in bits 2-7, and so
+ on. The order of the fields in the frame is described in Table 2.
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 26]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5.8.2 GSM Variable Names and Numbers
+
+ In the RTP encoding we have the bit pattern described in Table 3,
+ where F.i signifies the ith bit of the field F, bit 0 is the most
+ significant bit, and the bits of every octet are numbered from 0 to 7
+ from most to least significant.
+
+4.5.9 GSM-EFR
+
+ GSM-EFR denotes GSM 06.60 enhanced full rate speech transcoding,
+ specified in ETS 300 726 which is available from ETSI at the address
+ given in Section 4.5.8. This codec has a frame length of 244 bits.
+ For transmission in RTP, each codec frame is packed into a 31 octet
+ (248 bit) buffer beginning with a 4-bit signature 0xC in a manner
+ similar to that specified here for the original GSM 06.10 codec. The
+ packing is specified in ETSI Technical Specification TS 101 318.
+
+4.5.10 L8
+
+ L8 denotes linear audio data samples, using 8-bits of precision with
+ an offset of 128, that is, the most negative signal is encoded as
+ zero.
+
+4.5.11 L16
+
+ L16 denotes uncompressed audio data samples, using 16-bit signed
+ representation with 65,535 equally divided steps between minimum and
+ maximum signal level, ranging from -32,768 to 32,767. The value is
+ represented in two's complement notation and transmitted in network
+ byte order (most significant byte first).
+
+ The MIME registration for L16 in RFC 3555 [7] specifies parameters
+ that MAY be used with MIME or SDP to indicate that analog pre-
+ emphasis was applied to the signal before quantization or to indicate
+ that a multiple-channel audio stream follows a different channel
+ ordering convention than is specified in Section 4.1.
+
+4.5.12 LPC
+
+ LPC designates an experimental linear predictive encoding contributed
+ by Ron Frederick, which is based on an implementation written by Ron
+ Zuckerman posted to the Usenet group comp.dsp on June 26, 1992. The
+ codec generates 14 octets for every frame. The framesize is set to
+ 20 ms, resulting in a bit rate of 5,600 b/s.
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 27]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5.13 MPA
+
+ MPA denotes MPEG-1 or MPEG-2 audio encapsulated as elementary
+ streams. The encoding is defined in ISO standards ISO/IEC 11172-3
+ and 13818-3. The encapsulation is specified in RFC 2250 [14].
+
+ The encoding may be at any of three levels of complexity, called
+ Layer I, II and III. The selected layer as well as the sampling rate
+ and channel count are indicated in the payload. The RTP timestamp
+ clock rate is always 90,000, independent of the sampling rate.
+ MPEG-1 audio supports sampling rates of 32, 44.1, and 48 kHz (ISO/IEC
+ 11172-3, section 1.1; "Scope"). MPEG-2 supports sampling rates of
+ 16, 22.05 and 24 kHz. The number of samples per frame is fixed, but
+ the frame size will vary with the sampling rate and bit rate.
+
+ The MIME registration for MPA in RFC 3555 [7] specifies parameters
+ that MAY be used with MIME or SDP to restrict the selection of layer,
+ channel count, sampling rate, and bit rate.
+
+4.5.14 PCMA and PCMU
+
+ PCMA and PCMU are specified in ITU-T Recommendation G.711. Audio
+ data is encoded as eight bits per sample, after logarithmic scaling.
+ PCMU denotes mu-law scaling, PCMA A-law scaling. A detailed
+ description is given by Jayant and Noll [15]. Each G.711 octet SHALL
+ be octet-aligned in an RTP packet. The sign bit of each G.711 octet
+ SHALL correspond to the most significant bit of the octet in the RTP
+ packet (i.e., assuming the G.711 samples are handled as octets on the
+ host machine, the sign bit SHALL be the most significant bit of the
+ octet as defined by the host machine format). The 56 kb/s and 48
+ kb/s modes of G.711 are not applicable to RTP, since PCMA and PCMU
+ MUST always be transmitted as 8-bit samples.
+
+ See Section 4.1 regarding silence suppression.
+
+4.5.15 QCELP
+
+ The Electronic Industries Association (EIA) & Telecommunications
+ Industry Association (TIA) standard IS-733, "TR45: High Rate Speech
+ Service Option for Wideband Spread Spectrum Communications Systems",
+ defines the QCELP audio compression algorithm for use in wireless
+ CDMA applications. The QCELP CODEC compresses each 20 milliseconds
+ of 8,000 Hz, 16-bit sampled input speech into one of four different
+ size output frames: Rate 1 (266 bits), Rate 1/2 (124 bits), Rate 1/4
+ (54 bits) or Rate 1/8 (20 bits). For typical speech patterns, this
+ results in an average output of 6.8 kb/s for normal mode and 4.7 kb/s
+ for reduced rate mode. The packetization of the QCELP audio codec is
+ described in [16].
+
+
+
+Schulzrinne & Casner Standards Track [Page 28]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+4.5.16 RED
+
+ The redundant audio payload format "RED" is specified by RFC 2198
+ [17]. It defines a means by which multiple redundant copies of an
+ audio packet may be transmitted in a single RTP stream. Each packet
+ in such a stream contains, in addition to the audio data for that
+ packetization interval, a (more heavily compressed) copy of the data
+ from a previous packetization interval. This allows an approximation
+ of the data from lost packets to be recovered upon decoding of a
+ subsequent packet, giving much improved sound quality when compared
+ with silence substitution for lost packets.
+
+4.5.17 VDVI
+
+ VDVI is a variable-rate version of DVI4, yielding speech bit rates of
+ between 10 and 25 kb/s. It is specified for single-channel operation
+ only. Samples are packed into octets starting at the most-
+ significant bit. The last octet is padded with 1 bits if the last
+ sample does not fill the last octet. This padding is distinct from
+ the valid codewords. The receiver needs to detect the padding
+ because there is no explicit count of samples in the packet.
+
+ It uses the following encoding:
+
+ DVI4 codeword VDVI bit pattern
+ _______________________________
+ 0 00
+ 1 010
+ 2 1100
+ 3 11100
+ 4 111100
+ 5 1111100
+ 6 11111100
+ 7 11111110
+ 8 10
+ 9 011
+ 10 1101
+ 11 11101
+ 12 111101
+ 13 1111101
+ 14 11111101
+ 15 11111111
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 29]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+5. Video
+
+ The following sections describe the video encodings that are defined
+ in this memo and give their abbreviated names used for
+ identification. These video encodings and their payload types are
+ listed in Table 5.
+
+ All of these video encodings use an RTP timestamp frequency of 90,000
+ Hz, the same as the MPEG presentation time stamp frequency. This
+ frequency yields exact integer timestamp increments for the typical
+ 24 (HDTV), 25 (PAL), and 29.97 (NTSC) and 30 Hz (HDTV) frame rates
+ and 50, 59.94 and 60 Hz field rates. While 90 kHz is the RECOMMENDED
+ rate for future video encodings used within this profile, other rates
+ MAY be used. However, it is not sufficient to use the video frame
+ rate (typically between 15 and 30 Hz) because that does not provide
+ adequate resolution for typical synchronization requirements when
+ calculating the RTP timestamp corresponding to the NTP timestamp in
+ an RTCP SR packet. The timestamp resolution MUST also be sufficient
+ for the jitter estimate contained in the receiver reports.
+
+ For most of these video encodings, the RTP timestamp encodes the
+ sampling instant of the video image contained in the RTP data packet.
+ If a video image occupies more than one packet, the timestamp is the
+ same on all of those packets. Packets from different video images
+ are distinguished by their different timestamps.
+
+ Most of these video encodings also specify that the marker bit of the
+ RTP header SHOULD be set to one in the last packet of a video frame
+ and otherwise set to zero. Thus, it is not necessary to wait for a
+ following packet with a different timestamp to detect that a new
+ frame should be displayed.
+
+5.1 CelB
+
+ The CELL-B encoding is a proprietary encoding proposed by Sun
+ Microsystems. The byte stream format is described in RFC 2029 [18].
+
+5.2 JPEG
+
+ The encoding is specified in ISO Standards 10918-1 and 10918-2. The
+ RTP payload format is as specified in RFC 2435 [19].
+
+5.3 H261
+
+ The encoding is specified in ITU-T Recommendation H.261, "Video codec
+ for audiovisual services at p x 64 kbit/s". The packetization and
+ RTP-specific properties are described in RFC 2032 [20].
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 30]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+5.4 H263
+
+ The encoding is specified in the 1996 version of ITU-T Recommendation
+ H.263, "Video coding for low bit rate communication". The
+ packetization and RTP-specific properties are described in RFC 2190
+ [21]. The H263-1998 payload format is RECOMMENDED over this one for
+ use by new implementations.
+
+5.5 H263-1998
+
+ The encoding is specified in the 1998 version of ITU-T Recommendation
+ H.263, "Video coding for low bit rate communication". The
+ packetization and RTP-specific properties are described in RFC 2429
+ [22]. Because the 1998 version of H.263 is a superset of the 1996
+ syntax, this payload format can also be used with the 1996 version of
+ H.263, and is RECOMMENDED for this use by new implementations. This
+ payload format does not replace RFC 2190, which continues to be used
+ by existing implementations, and may be required for backward
+ compatibility in new implementations. Implementations using the new
+ features of the 1998 version of H.263 MUST use the payload format
+ described in RFC 2429.
+
+5.6 MPV
+
+ MPV designates the use of MPEG-1 and MPEG-2 video encoding elementary
+ streams as specified in ISO Standards ISO/IEC 11172 and 13818-2,
+ respectively. The RTP payload format is as specified in RFC 2250
+ [14], Section 3.
+
+ The MIME registration for MPV in RFC 3555 [7] specifies a parameter
+ that MAY be used with MIME or SDP to restrict the selection of the
+ type of MPEG video.
+
+5.7 MP2T
+
+ MP2T designates the use of MPEG-2 transport streams, for either audio
+ or video. The RTP payload format is described in RFC 2250 [14],
+ Section 2.
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 31]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+5.8 nv
+
+ The encoding is implemented in the program `nv', version 4, developed
+ at Xerox PARC by Ron Frederick. Further information is available
+ from the author:
+
+ Ron Frederick
+ Blue Coat Systems Inc.
+ 650 Almanor Avenue
+ Sunnyvale, CA 94085
+ United States
+ EMail: ronf@bluecoat.com
+
+6. Payload Type Definitions
+
+ Tables 4 and 5 define this profile's static payload type values for
+ the PT field of the RTP data header. In addition, payload type
+ values in the range 96-127 MAY be defined dynamically through a
+ conference control protocol, which is beyond the scope of this
+ document. For example, a session directory could specify that for a
+ given session, payload type 96 indicates PCMU encoding, 8,000 Hz
+ sampling rate, 2 channels. Entries in Tables 4 and 5 with payload
+ type "dyn" have no static payload type assigned and are only used
+ with a dynamic payload type. Payload type 2 was assigned to G721 in
+ RFC 1890 and to its equivalent successor G726-32 in draft versions of
+ this specification, but its use is now deprecated and that static
+ payload type is marked reserved due to conflicting use for the
+ payload formats G726-32 and AAL2-G726-32 (see Section 4.5.4).
+ Payload type 13 indicates the Comfort Noise (CN) payload format
+ specified in RFC 3389 [9]. Payload type 19 is marked "reserved"
+ because some draft versions of this specification assigned that
+ number to an earlier version of the comfort noise payload format.
+ The payload type range 72-76 is marked "reserved" so that RTCP and
+ RTP packets can be reliably distinguished (see Section "Summary of
+ Protocol Constants" of the RTP protocol specification).
+
+ The payload types currently defined in this profile are assigned to
+ exactly one of three categories or media types: audio only, video
+ only and those combining audio and video. The media types are marked
+ in Tables 4 and 5 as "A", "V" and "AV", respectively. Payload types
+ of different media types SHALL NOT be interleaved or multiplexed
+ within a single RTP session, but multiple RTP sessions MAY be used in
+ parallel to send multiple media types. An RTP source MAY change
+ payload types within the same media type during a session. See the
+ section "Multiplexing RTP Sessions" of RFC 3550 for additional
+ explanation.
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 32]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ PT encoding media type clock rate channels
+ name (Hz)
+ ___________________________________________________
+ 0 PCMU A 8,000 1
+ 1 reserved A
+ 2 reserved A
+ 3 GSM A 8,000 1
+ 4 G723 A 8,000 1
+ 5 DVI4 A 8,000 1
+ 6 DVI4 A 16,000 1
+ 7 LPC A 8,000 1
+ 8 PCMA A 8,000 1
+ 9 G722 A 8,000 1
+ 10 L16 A 44,100 2
+ 11 L16 A 44,100 1
+ 12 QCELP A 8,000 1
+ 13 CN A 8,000 1
+ 14 MPA A 90,000 (see text)
+ 15 G728 A 8,000 1
+ 16 DVI4 A 11,025 1
+ 17 DVI4 A 22,050 1
+ 18 G729 A 8,000 1
+ 19 reserved A
+ 20 unassigned A
+ 21 unassigned A
+ 22 unassigned A
+ 23 unassigned A
+ dyn G726-40 A 8,000 1
+ dyn G726-32 A 8,000 1
+ dyn G726-24 A 8,000 1
+ dyn G726-16 A 8,000 1
+ dyn G729D A 8,000 1
+ dyn G729E A 8,000 1
+ dyn GSM-EFR A 8,000 1
+ dyn L8 A var. var.
+ dyn RED A (see text)
+ dyn VDVI A var. 1
+
+ Table 4: Payload types (PT) for audio encodings
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 33]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ PT encoding media type clock rate
+ name (Hz)
+ _____________________________________________
+ 24 unassigned V
+ 25 CelB V 90,000
+ 26 JPEG V 90,000
+ 27 unassigned V
+ 28 nv V 90,000
+ 29 unassigned V
+ 30 unassigned V
+ 31 H261 V 90,000
+ 32 MPV V 90,000
+ 33 MP2T AV 90,000
+ 34 H263 V 90,000
+ 35-71 unassigned ?
+ 72-76 reserved N/A N/A
+ 77-95 unassigned ?
+ 96-127 dynamic ?
+ dyn H263-1998 V 90,000
+
+ Table 5: Payload types (PT) for video and combined
+ encodings
+
+ Session participants agree through mechanisms beyond the scope of
+ this specification on the set of payload types allowed in a given
+ session. This set MAY, for example, be defined by the capabilities
+ of the applications used, negotiated by a conference control protocol
+ or established by agreement between the human participants.
+
+ Audio applications operating under this profile SHOULD, at a minimum,
+ be able to send and/or receive payload types 0 (PCMU) and 5 (DVI4).
+ This allows interoperability without format negotiation and ensures
+ successful negotiation with a conference control protocol.
+
+7. RTP over TCP and Similar Byte Stream Protocols
+
+ Under special circumstances, it may be necessary to carry RTP in
+ protocols offering a byte stream abstraction, such as TCP, possibly
+ multiplexed with other data. The application MUST define its own
+ method of delineating RTP and RTCP packets (RTSP [23] provides an
+ example of such an encapsulation specification).
+
+8. Port Assignment
+
+ As specified in the RTP protocol definition, RTP data SHOULD be
+ carried on an even UDP port number and the corresponding RTCP packets
+ SHOULD be carried on the next higher (odd) port number.
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 34]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ Applications operating under this profile MAY use any such UDP port
+ pair. For example, the port pair MAY be allocated randomly by a
+ session management program. A single fixed port number pair cannot
+ be required because multiple applications using this profile are
+ likely to run on the same host, and there are some operating systems
+ that do not allow multiple processes to use the same UDP port with
+ different multicast addresses.
+
+ However, port numbers 5004 and 5005 have been registered for use with
+ this profile for those applications that choose to use them as the
+ default pair. Applications that operate under multiple profiles MAY
+ use this port pair as an indication to select this profile if they
+ are not subject to the constraint of the previous paragraph.
+ Applications need not have a default and MAY require that the port
+ pair be explicitly specified. The particular port numbers were
+ chosen to lie in the range above 5000 to accommodate port number
+ allocation practice within some versions of the Unix operating
+ system, where port numbers below 1024 can only be used by privileged
+ processes and port numbers between 1024 and 5000 are automatically
+ assigned by the operating system.
+
+9. Changes from RFC 1890
+
+ This RFC revises RFC 1890. It is mostly backwards-compatible with
+ RFC 1890 except for functions removed because two interoperable
+ implementations were not found. The additions to RFC 1890 codify
+ existing practice in the use of payload formats under this profile.
+ Since this profile may be used without using any of the payload
+ formats listed here, the addition of new payload formats in this
+ revision does not affect backwards compatibility. The changes are
+ listed below, categorized into functional and non-functional changes.
+
+ Functional changes:
+
+ o Section 11, "IANA Considerations" was added to specify the
+ registration of the name for this profile. That appendix also
+ references a new Section 3 "Registering Additional Encodings"
+ which establishes a policy that no additional registration of
+ static payload types for this profile will be made beyond those
+ added in this revision and included in Tables 4 and 5. Instead,
+ additional encoding names may be registered as MIME subtypes for
+ binding to dynamic payload types. Non-normative references were
+ added to RFC 3555 [7] where MIME subtypes for all the listed
+ payload formats are registered, some with optional parameters for
+ use of the payload formats.
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 35]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ o Static payload types 4, 16, 17 and 34 were added to incorporate
+ IANA registrations made since the publication of RFC 1890, along
+ with the corresponding payload format descriptions for G723 and
+ H263.
+
+ o Following working group discussion, static payload types 12 and 18
+ were added along with the corresponding payload format
+ descriptions for QCELP and G729. Static payload type 13 was
+ assigned to the Comfort Noise (CN) payload format defined in RFC
+ 3389. Payload type 19 was marked reserved because it had been
+ temporarily allocated to an earlier version of Comfort Noise
+ present in some draft revisions of this document.
+
+ o The payload format for G721 was renamed to G726-32 following the
+ ITU-T renumbering, and the payload format description for G726 was
+ expanded to include the -16, -24 and -40 data rates. Because of
+ confusion regarding draft revisions of this document, some
+ implementations of these G726 payload formats packed samples into
+ octets starting with the most significant bit rather than the
+ least significant bit as specified here. To partially resolve
+ this incompatibility, new payload formats named AAL2-G726-16, -24,
+ -32 and -40 will be specified in a separate document (see note in
+ Section 4.5.4), and use of static payload type 2 is deprecated as
+ explained in Section 6.
+
+ o Payload formats G729D and G729E were added following the ITU-T
+ addition of Annexes D and E to Recommendation G.729. Listings
+ were added for payload formats GSM-EFR, RED, and H263-1998
+ published in other documents subsequent to RFC 1890. These
+ additional payload formats are referenced only by dynamic payload
+ type numbers.
+
+ o The descriptions of the payload formats for G722, G728, GSM, VDVI
+ were expanded.
+
+ o The payload format for 1016 audio was removed and its static
+ payload type assignment 1 was marked "reserved" because two
+ interoperable implementations were not found.
+
+ o Requirements for congestion control were added in Section 2.
+
+ o This profile follows the suggestion in the revised RTP spec that
+ RTCP bandwidth may be specified separately from the session
+ bandwidth and separately for active senders and passive receivers.
+
+ o The mapping of a user pass-phrase string into an encryption key
+ was deleted from Section 2 because two interoperable
+ implementations were not found.
+
+
+
+Schulzrinne & Casner Standards Track [Page 36]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ o The "quadrophonic" sample ordering convention for four-channel
+ audio was removed to eliminate an ambiguity as noted in Section
+ 4.1.
+
+ Non-functional changes:
+
+ o In Section 4.1, it is now explicitly stated that silence
+ suppression is allowed for all audio payload formats. (This has
+ always been the case and derives from a fundamental aspect of
+ RTP's design and the motivations for packet audio, but was not
+ explicit stated before.) The use of comfort noise is also
+ explained.
+
+ o In Section 4.1, the requirement level for setting of the marker
+ bit on the first packet after silence for audio was changed from
+ "is" to "SHOULD be", and clarified that the marker bit is set only
+ when packets are intentionally not sent.
+
+ o Similarly, text was added to specify that the marker bit SHOULD be
+ set to one on the last packet of a video frame, and that video
+ frames are distinguished by their timestamps.
+
+ o RFC references are added for payload formats published after RFC
+ 1890.
+
+ o The security considerations and full copyright sections were
+ added.
+
+ o According to Peter Hoddie of Apple, only pre-1994 Macintosh used
+ the 22254.54 rate and none the 11127.27 rate, so the latter was
+ dropped from the discussion of suggested sampling frequencies.
+
+ o Table 1 was corrected to move some values from the "ms/packet"
+ column to the "default ms/packet" column where they belonged.
+
+ o Since the Interactive Multimedia Association ceased operations, an
+ alternate resource was provided for a referenced IMA document.
+
+ o A note has been added for G722 to clarify a discrepancy between
+ the actual sampling rate and the RTP timestamp clock rate.
+
+ o Small clarifications of the text have been made in several places,
+ some in response to questions from readers. In particular:
+
+ - A definition for "media type" is given in Section 1.1 to allow
+ the explanation of multiplexing RTP sessions in Section 6 to be
+ more clear regarding the multiplexing of multiple media.
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 37]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ - The explanation of how to determine the number of audio frames
+ in a packet from the length was expanded.
+
+ - More description of the allocation of bandwidth to SDES items
+ is given.
+
+ - A note was added that the convention for the order of channels
+ specified in Section 4.1 may be overridden by a particular
+ encoding or payload format specification.
+
+ - The terms MUST, SHOULD, MAY, etc. are used as defined in RFC
+ 2119.
+
+ o A second author for this document was added.
+
+10. Security Considerations
+
+ Implementations using the profile defined in this specification are
+ subject to the security considerations discussed in the RTP
+ specification [1]. This profile does not specify any different
+ security services. The primary function of this profile is to list a
+ set of data compression encodings for audio and video media.
+
+ Confidentiality of the media streams is achieved by encryption.
+ Because the data compression used with the payload formats described
+ in this profile is applied end-to-end, encryption may be performed
+ after compression so there is no conflict between the two operations.
+
+ A potential denial-of-service threat exists for data encodings using
+ compression techniques that have non-uniform receiver-end
+ computational load. The attacker can inject pathological datagrams
+ into the stream which are complex to decode and cause the receiver to
+ be overloaded.
+
+ As with any IP-based protocol, in some circumstances a receiver may
+ be overloaded simply by the receipt of too many packets, either
+ desired or undesired. Network-layer authentication MAY be used to
+ discard packets from undesired sources, but the processing cost of
+ the authentication itself may be too high. In a multicast
+ environment, source pruning is implemented in IGMPv3 (RFC 3376) [24]
+ and in multicast routing protocols to allow a receiver to select
+ which sources are allowed to reach it.
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 38]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+11. IANA Considerations
+
+ The RTP specification establishes a registry of profile names for use
+ by higher-level control protocols, such as the Session Description
+ Protocol (SDP), RFC 2327 [6], to refer to transport methods. This
+ profile registers the name "RTP/AVP".
+
+ Section 3 establishes the policy that no additional registration of
+ static RTP payload types for this profile will be made beyond those
+ added in this document revision and included in Tables 4 and 5. IANA
+ may reference that section in declining to accept any additional
+ registration requests. In Tables 4 and 5, note that types 1 and 2
+ have been marked reserved and the set of "dyn" payload types included
+ has been updated. These changes are explained in Sections 6 and 9.
+
+12. References
+
+12.1 Normative References
+
+ [1] Schulzrinne, H., Casner, S., Frederick, R. and V. Jacobson,
+ "RTP: A Transport Protocol for Real-Time Applications", RFC
+ 3550, July 2003.
+
+ [2] Bradner, S., "Key Words for Use in RFCs to Indicate Requirement
+ Levels", BCP 14, RFC 2119, March 1997.
+
+ [3] Apple Computer, "Audio Interchange File Format AIFF-C", August
+ 1991. (also ftp://ftp.sgi.com/sgi/aiff-c.9.26.91.ps.Z).
+
+12.2 Informative References
+
+ [4] Braden, R., Clark, D. and S. Shenker, "Integrated Services in
+ the Internet Architecture: an Overview", RFC 1633, June 1994.
+
+ [5] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. and W.
+ Weiss, "An Architecture for Differentiated Service", RFC 2475,
+ December 1998.
+
+ [6] Handley, M. and V. Jacobson, "SDP: Session Description
+ Protocol", RFC 2327, April 1998.
+
+ [7] Casner, S. and P. Hoschka, "MIME Type Registration of RTP
+ Payload Types", RFC 3555, July 2003.
+
+ [8] Freed, N., Klensin, J. and J. Postel, "Multipurpose Internet
+ Mail Extensions (MIME) Part Four: Registration Procedures", BCP
+ 13, RFC 2048, November 1996.
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 39]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ [9] Zopf, R., "Real-time Transport Protocol (RTP) Payload for
+ Comfort Noise (CN)", RFC 3389, September 2002.
+
+ [10] Deleam, D. and J.-P. Petit, "Real-time implementations of the
+ recent ITU-T low bit rate speech coders on the TI TMS320C54X
+ DSP: results, methodology, and applications", in Proc. of
+ International Conference on Signal Processing, Technology, and
+ Applications (ICSPAT) , (Boston, Massachusetts), pp. 1656--1660,
+ October 1996.
+
+ [11] Mouly, M. and M.-B. Pautet, The GSM system for mobile
+ communications Lassay-les-Chateaux, France: Europe Media
+ Duplication, 1993.
+
+ [12] Degener, J., "Digital Speech Compression", Dr. Dobb's Journal,
+ December 1994.
+
+ [13] Redl, S., Weber, M. and M. Oliphant, An Introduction to GSM
+ Boston: Artech House, 1995.
+
+ [14] Hoffman, D., Fernando, G., Goyal, V. and M. Civanlar, "RTP
+ Payload Format for MPEG1/MPEG2 Video", RFC 2250, January 1998.
+
+ [15] Jayant, N. and P. Noll, Digital Coding of Waveforms--Principles
+ and Applications to Speech and Video Englewood Cliffs, New
+ Jersey: Prentice-Hall, 1984.
+
+ [16] McKay, K., "RTP Payload Format for PureVoice(tm) Audio", RFC
+ 2658, August 1999.
+
+ [17] Perkins, C., Kouvelas, I., Hodson, O., Hardman, V., Handley, M.,
+ Bolot, J.-C., Vega-Garcia, A. and S. Fosse-Parisis, "RTP Payload
+ for Redundant Audio Data", RFC 2198, September 1997.
+
+ [18] Speer, M. and D. Hoffman, "RTP Payload Format of Sun's CellB
+ Video Encoding", RFC 2029, October 1996.
+
+ [19] Berc, L., Fenner, W., Frederick, R., McCanne, S. and P. Stewart,
+ "RTP Payload Format for JPEG-Compressed Video", RFC 2435,
+ October 1998.
+
+ [20] Turletti, T. and C. Huitema, "RTP Payload Format for H.261 Video
+ Streams", RFC 2032, October 1996.
+
+ [21] Zhu, C., "RTP Payload Format for H.263 Video Streams", RFC 2190,
+ September 1997.
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 40]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ [22] Bormann, C., Cline, L., Deisher, G., Gardos, T., Maciocco, C.,
+ Newell, D., Ott, J., Sullivan, G., Wenger, S. and C. Zhu, "RTP
+ Payload Format for the 1998 Version of ITU-T Rec. H.263 Video
+ (H.263+)", RFC 2429, October 1998.
+
+ [23] Schulzrinne, H., Rao, A. and R. Lanphier, "Real Time Streaming
+ Protocol (RTSP)", RFC 2326, April 1998.
+
+ [24] Cain, B., Deering, S., Kouvelas, I., Fenner, B. and A.
+ Thyagarajan, "Internet Group Management Protocol, Version 3",
+ RFC 3376, October 2002.
+
+13. Current Locations of Related Resources
+
+ Note: Several sections below refer to the ITU-T Software Tool
+ Library (STL). It is available from the ITU Sales Service, Place des
+ Nations, CH-1211 Geneve 20, Switzerland (also check
+ http://www.itu.int). The ITU-T STL is covered by a license defined
+ in ITU-T Recommendation G.191, "Software tools for speech and audio
+ coding standardization".
+
+ DVI4
+
+ An archived copy of the document IMA Recommended Practices for
+ Enhancing Digital Audio Compatibility in Multimedia Systems (version
+ 3.0), which describes the IMA ADPCM algorithm, is available at:
+
+ http://www.cs.columbia.edu/~hgs/audio/dvi/
+
+ An implementation is available from Jack Jansen at
+
+ ftp://ftp.cwi.nl/local/pub/audio/adpcm.shar
+
+ G722
+
+ An implementation of the G.722 algorithm is available as part of the
+ ITU-T STL, described above.
+
+ G723
+
+ The reference C code implementation defining the G.723.1 algorithm
+ and its Annexes A, B, and C are available as an integral part of
+ Recommendation G.723.1 from the ITU Sales Service, address listed
+ above. Both the algorithm and C code are covered by a specific
+ license. The ITU-T Secretariat should be contacted to obtain such
+ licensing information.
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 41]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+ G726
+
+ G726 is specified in the ITU-T Recommendation G.726, "40, 32, 24, and
+ 16 kb/s Adaptive Differential Pulse Code Modulation (ADPCM)". An
+ implementation of the G.726 algorithm is available as part of the
+ ITU-T STL, described above.
+
+ G729
+
+ The reference C code implementation defining the G.729 algorithm and
+ its Annexes A through I are available as an integral part of
+ Recommendation G.729 from the ITU Sales Service, listed above. Annex
+ I contains the integrated C source code for all G.729 operating
+ modes. The G.729 algorithm and associated C code are covered by a
+ specific license. The contact information for obtaining the license
+ is available from the ITU-T Secretariat.
+
+ GSM
+
+ A reference implementation was written by Carsten Bormann and Jutta
+ Degener (then at TU Berlin, Germany). It is available at
+
+ http://www.dmn.tzi.org/software/gsm/
+
+ Although the RPE-LTP algorithm is not an ITU-T standard, there is a C
+ code implementation of the RPE-LTP algorithm available as part of the
+ ITU-T STL. The STL implementation is an adaptation of the TU Berlin
+ version.
+
+ LPC
+
+ An implementation is available at
+
+ ftp://parcftp.xerox.com/pub/net-research/lpc.tar.Z
+
+ PCMU, PCMA
+
+ An implementation of these algorithms is available as part of the
+ ITU-T STL, described above.
+
+14. Acknowledgments
+
+ The comments and careful review of Simao Campos, Richard Cox and AVT
+ Working Group participants are gratefully acknowledged. The GSM
+ description was adopted from the IMTC Voice over IP Forum Service
+ Interoperability Implementation Agreement (January 1997). Fred Burg
+ and Terry Lyons helped with the G.729 description.
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 42]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+15. Intellectual Property Rights Statement
+
+ The IETF takes no position regarding the validity or scope of any
+ intellectual property or other rights that might be claimed to
+ pertain to the implementation or use of the technology described in
+ this document or the extent to which any license under such rights
+ might or might not be available; neither does it represent that it
+ has made any effort to identify any such rights. Information on the
+ IETF's procedures with respect to rights in standards-track and
+ standards-related documentation can be found in BCP-11. Copies of
+ claims of rights made available for publication and any assurances of
+ licenses to be made available, or the result of an attempt made to
+ obtain a general license or permission for the use of such
+ proprietary rights by implementors or users of this specification can
+ be obtained from the IETF Secretariat.
+
+ The IETF invites any interested party to bring to its attention any
+ copyrights, patents or patent applications, or other proprietary
+ rights which may cover technology that may be required to practice
+ this standard. Please address the information to the IETF Executive
+ Director.
+
+16. Authors' Addresses
+
+ Henning Schulzrinne
+ Department of Computer Science
+ Columbia University
+ 1214 Amsterdam Avenue
+ New York, NY 10027
+ United States
+
+ EMail: schulzrinne@cs.columbia.edu
+
+
+ Stephen L. Casner
+ Packet Design
+ 3400 Hillview Avenue, Building 3
+ Palo Alto, CA 94304
+ United States
+
+ EMail: casner@acm.org
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 43]
+
+RFC 3551 RTP A/V Profile July 2003
+
+
+17. Full Copyright Statement
+
+ Copyright (C) The Internet Society (2003). All Rights Reserved.
+
+ This document and translations of it may be copied and furnished to
+ others, and derivative works that comment on or otherwise explain it
+ or assist in its implementation may be prepared, copied, published
+ and distributed, in whole or in part, without restriction of any
+ kind, provided that the above copyright notice and this paragraph are
+ included on all such copies and derivative works. However, this
+ document itself may not be modified in any way, such as by removing
+ the copyright notice or references to the Internet Society or other
+ Internet organizations, except as needed for the purpose of
+ developing Internet standards in which case the procedures for
+ copyrights defined in the Internet Standards process must be
+ followed, or as required to translate it into languages other than
+ English.
+
+ The limited permissions granted above are perpetual and will not be
+ revoked by the Internet Society or its successors or assigns.
+
+ This document and the information contained herein is provided on an
+ "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
+ TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
+ BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
+ HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+
+Acknowledgement
+
+ Funding for the RFC Editor function is currently provided by the
+ Internet Society.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Schulzrinne & Casner Standards Track [Page 44]
+
diff --git a/src/modules/rtp/rtp.c b/src/modules/rtp/rtp.c
new file mode 100644
index 00000000..a3e78d84
--- /dev/null
+++ b/src/modules/rtp/rtp.c
@@ -0,0 +1,193 @@
+/* $Id$ */
+
+/***
+ This file is part of polypaudio.
+
+ polypaudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ polypaudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with polypaudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#include <polypcore/log.h>
+
+#include "rtp.h"
+
+pa_rtp_context* pa_rtp_context_init_send(pa_rtp_context *c, int fd, uint32_t ssrc, uint8_t payload) {
+ assert(c);
+ assert(fd >= 0);
+
+ c->fd = fd;
+ c->sequence = (uint16_t) (rand()*rand());
+ c->timestamp = 0;
+ c->ssrc = ssrc ? ssrc : (uint32_t) (rand()*rand());
+ c->payload = payload & 127;
+
+ return c;
+}
+
+#define MAX_IOVECS 16
+
+int pa_rtp_send(pa_rtp_context *c, size_t size, pa_memblockq *q) {
+ struct iovec iov[MAX_IOVECS];
+ pa_memblock* mb[MAX_IOVECS];
+ int iov_idx = 1;
+ size_t n = 0, skip = 0;
+
+ assert(c);
+ assert(size > 0);
+ assert(q);
+
+ if (pa_memblockq_get_length(q) < size)
+ return 0;
+
+ for (;;) {
+ int r;
+ pa_memchunk chunk;
+
+ if ((r = pa_memblockq_peek(q, &chunk)) >= 0) {
+
+ size_t k = n + chunk.length > size ? size - n : chunk.length;
+
+ if (chunk.memblock) {
+ iov[iov_idx].iov_base = (uint8_t*) chunk.memblock->data + chunk.index;
+ iov[iov_idx].iov_len = k;
+ mb[iov_idx] = chunk.memblock;
+ iov_idx ++;
+
+ n += k;
+ }
+
+ skip += k;
+ pa_memblockq_drop(q, &chunk, k);
+ }
+
+ if (r < 0 || !chunk.memblock || n >= size || iov_idx >= MAX_IOVECS) {
+ uint32_t header[3];
+ struct msghdr m;
+ int k, i;
+
+ if (n > 0) {
+ header[0] = htonl(((uint32_t) 2 << 30) | ((uint32_t) c->payload << 16) | ((uint32_t) c->sequence));
+ header[1] = htonl(c->timestamp);
+ header[2] = htonl(c->ssrc);
+
+ iov[0].iov_base = header;
+ iov[0].iov_len = sizeof(header);
+
+ m.msg_name = NULL;
+ m.msg_namelen = 0;
+ m.msg_iov = iov;
+ m.msg_iovlen = iov_idx;
+ m.msg_control = NULL;
+ m.msg_controllen = 0;
+ m.msg_flags = 0;
+
+ k = sendmsg(c->fd, &m, MSG_DONTWAIT);
+
+ for (i = 1; i < iov_idx; i++)
+ pa_memblock_unref(mb[i]);
+
+ c->sequence++;
+ } else
+ k = 0;
+
+ c->timestamp += skip;
+
+ if (k < 0) {
+ if (errno != EAGAIN) /* If the queue is full, just ignore it */
+ pa_log(__FILE__": sendmsg() failed: %s", strerror(errno));
+ return -1;
+ }
+
+ if (r < 0 || pa_memblockq_get_length(q) < size)
+ break;
+
+ n = 0;
+ skip = 0;
+ iov_idx = 1;
+ }
+ }
+
+ return 0;
+}
+
+pa_rtp_context* pa_rtp_context_init_recv(pa_rtp_context *c, int fd) {
+ assert(c);
+
+ c->fd = fd;
+ return c;
+}
+
+int pa_rtp_recv(pa_rtp_context *c, pa_memchunk *chunk) {
+ assert(c);
+ assert(chunk);
+
+ return 0;
+}
+
+uint8_t pa_rtp_payload_type(const pa_sample_spec *ss) {
+ assert(ss);
+
+ if (ss->format == PA_SAMPLE_ULAW && ss->rate == 8000 && ss->channels == 1)
+ return 0;
+ if (ss->format == PA_SAMPLE_ALAW && ss->rate == 8000 && ss->channels == 1)
+ return 0;
+ if (ss->format == PA_SAMPLE_S16BE && ss->rate == 44100 && ss->channels == 2)
+ return 10;
+ if (ss->format == PA_SAMPLE_S16BE && ss->rate == 44100 && ss->channels == 1)
+ return 11;
+
+ return 127;
+}
+
+pa_sample_spec *pa_rtp_sample_spec_fixup(pa_sample_spec * ss) {
+ assert(ss);
+
+ if (!pa_rtp_sample_spec_valid(ss))
+ ss->format = PA_SAMPLE_S16BE;
+
+ assert(pa_rtp_sample_spec_valid(ss));
+ return ss;
+}
+
+int pa_rtp_sample_spec_valid(const pa_sample_spec *ss) {
+ assert(ss);
+
+ if (!pa_sample_spec_valid(ss))
+ return 0;
+
+ return
+ ss->format == PA_SAMPLE_U8 ||
+ ss->format == PA_SAMPLE_ALAW ||
+ ss->format == PA_SAMPLE_ULAW ||
+ ss->format == PA_SAMPLE_S16BE;
+}
+
+void pa_rtp_context_destroy(pa_rtp_context *c) {
+ assert(c);
+
+ close(c->fd);
+}
diff --git a/src/modules/rtp/rtp.h b/src/modules/rtp/rtp.h
new file mode 100644
index 00000000..e925cc0e
--- /dev/null
+++ b/src/modules/rtp/rtp.h
@@ -0,0 +1,51 @@
+#ifndef foortphfoo
+#define foortphfoo
+
+/* $Id$ */
+
+/***
+ This file is part of polypaudio.
+
+ polypaudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ polypaudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with polypaudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#include <inttypes.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <polypcore/memblockq.h>
+#include <polypcore/memchunk.h>
+
+typedef struct pa_rtp_context {
+ int fd;
+ uint16_t sequence;
+ uint32_t timestamp;
+ uint32_t ssrc;
+ uint8_t payload;
+} pa_rtp_context;
+
+pa_rtp_context* pa_rtp_context_init_send(pa_rtp_context *c, int fd, uint32_t ssrc, uint8_t payload);
+int pa_rtp_send(pa_rtp_context *c, size_t size, pa_memblockq *q);
+
+pa_rtp_context* pa_rtp_context_init_recv(pa_rtp_context *c, int fd);
+int pa_rtp_recv(pa_rtp_context *c, pa_memchunk *chunk);
+
+uint8_t pa_rtp_payload_type(const pa_sample_spec *ss);
+pa_sample_spec* pa_rtp_sample_spec_fixup(pa_sample_spec *ss);
+int pa_rtp_sample_spec_valid(const pa_sample_spec *ss);
+
+void pa_rtp_context_destroy(pa_rtp_context *c);
+
+#endif
diff --git a/src/modules/rtp/sap.c b/src/modules/rtp/sap.c
new file mode 100644
index 00000000..ebf20bc4
--- /dev/null
+++ b/src/modules/rtp/sap.c
@@ -0,0 +1,107 @@
+/* $Id$ */
+
+/***
+ This file is part of polypaudio.
+
+ polypaudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ polypaudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with polypaudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <polypcore/util.h>
+#include <polypcore/log.h>
+#include <polypcore/xmalloc.h>
+
+#include "sap.h"
+
+pa_sap_context* pa_sap_context_init_send(pa_sap_context *c, int fd, char *sdp_data) {
+ assert(c);
+ assert(fd >= 0);
+ assert(sdp_data);
+
+ c->fd = fd;
+ c->sdp_data = sdp_data;
+ c->msg_id_hash = (uint16_t) (rand()*rand());
+
+ return c;
+}
+
+void pa_sap_context_destroy(pa_sap_context *c) {
+ assert(c);
+
+ close(c->fd);
+ pa_xfree(c->sdp_data);
+}
+
+int pa_sap_send(pa_sap_context *c, int goodbye) {
+ uint32_t header;
+ const char mime[] = "application/sdp";
+ struct sockaddr_storage sa_buf;
+ struct sockaddr *sa = (struct sockaddr*) &sa_buf;
+ socklen_t salen = sizeof(sa_buf);
+ struct iovec iov[4];
+ struct msghdr m;
+ int k;
+
+ if (getsockname(c->fd, sa, &salen) < 0) {
+ pa_log("getsockname() failed: %s\n", strerror(errno));
+ return -1;
+ }
+
+ assert(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+
+ header = htonl(((uint32_t) 1 << 29) |
+ (sa->sa_family == AF_INET6 ? (uint32_t) 1 << 28 : 0) |
+ (goodbye ? (uint32_t) 1 << 26 : 0) |
+ (c->msg_id_hash));
+
+ iov[0].iov_base = &header;
+ iov[0].iov_len = sizeof(header);
+
+ iov[1].iov_base = sa->sa_family == AF_INET ? (void*) &((struct sockaddr_in*) sa)->sin_addr : (void*) &((struct sockaddr_in6*) sa)->sin6_addr;
+ iov[1].iov_len = sa->sa_family == AF_INET ? 4 : 16;
+
+ iov[2].iov_base = (char*) mime;
+ iov[2].iov_len = sizeof(mime);
+
+ iov[3].iov_base = c->sdp_data;
+ iov[3].iov_len = strlen(c->sdp_data);
+
+ m.msg_name = NULL;
+ m.msg_namelen = 0;
+ m.msg_iov = iov;
+ m.msg_iovlen = 4;
+ m.msg_control = NULL;
+ m.msg_controllen = 0;
+ m.msg_flags = 0;
+
+ if ((k = sendmsg(c->fd, &m, MSG_DONTWAIT)) < 0)
+ pa_log("sendmsg() failed: %s\n", strerror(errno));
+
+ return k;
+}
diff --git a/src/modules/rtp/sap.h b/src/modules/rtp/sap.h
new file mode 100644
index 00000000..787b39f7
--- /dev/null
+++ b/src/modules/rtp/sap.h
@@ -0,0 +1,43 @@
+#ifndef foosaphfoo
+#define foosaphfoo
+
+/* $Id$ */
+
+/***
+ This file is part of polypaudio.
+
+ polypaudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ polypaudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with polypaudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#include <inttypes.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <polypcore/memblockq.h>
+#include <polypcore/memchunk.h>
+
+typedef struct pa_sap_context {
+ int fd;
+ char *sdp_data;
+
+ uint16_t msg_id_hash;
+} pa_sap_context;
+
+pa_sap_context* pa_sap_context_init_send(pa_sap_context *c, int fd, char *sdp_data);
+void pa_sap_context_destroy(pa_sap_context *c);
+
+int pa_sap_send(pa_sap_context *c, int goodbye);
+
+#endif
diff --git a/src/modules/rtp/sdp.c b/src/modules/rtp/sdp.c
new file mode 100644
index 00000000..99e8c12b
--- /dev/null
+++ b/src/modules/rtp/sdp.c
@@ -0,0 +1,87 @@
+/* $Id$ */
+
+/***
+ This file is part of polypaudio.
+
+ polypaudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ polypaudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with polypaudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <polypcore/util.h>
+
+#include "sdp.h"
+
+static const char* map_format(pa_sample_format_t f) {
+ switch (f) {
+ case PA_SAMPLE_S16BE: return "L16";
+ case PA_SAMPLE_U8: return "L8";
+ case PA_SAMPLE_ALAW: return "PCMA";
+ case PA_SAMPLE_ULAW: return "PCMU";
+ default:
+ return NULL;
+ }
+}
+
+char *pa_sdp_build(int af, const void *src, const void *dst, const char *name, uint16_t port, uint8_t payload, const pa_sample_spec *ss) {
+ uint32_t ntp;
+ char buf_src[64], buf_dst[64];
+ const char *u, *f, *a;
+
+ assert(src);
+ assert(dst);
+ assert(af == AF_INET || af == AF_INET6);
+
+ f = map_format(ss->format);
+ assert(f);
+
+ if (!(u = getenv("USER")))
+ if (!(u = getenv("USERNAME")))
+ u = "-";
+
+ ntp = time(NULL) + 2208988800;
+
+ a = inet_ntop(af, src, buf_src, sizeof(buf_src));
+ assert(a);
+ a = inet_ntop(af, dst, buf_dst, sizeof(buf_dst));
+ assert(a);
+
+ return pa_sprintf_malloc(
+ "v=0\n"
+ "o=%s %lu 0 IN %s %s\n"
+ "s=%s\n"
+ "c=IN %s %s\n"
+ "t=%lu 0\n"
+ "a=recvonly\n"
+ "m=audio %u RTP/AVP %i\n"
+ "a=rtpmap:%i %s/%u/%u\n"
+ "a=type:broadcast\n",
+ u, (unsigned long) ntp, af == AF_INET ? "IP4" : "IP6", buf_src,
+ name,
+ af == AF_INET ? "IP4" : "IP6", buf_dst,
+ (unsigned long) ntp,
+ port, payload,
+ payload, f, ss->rate, ss->channels);
+}
diff --git a/src/modules/rtp/sdp.h b/src/modules/rtp/sdp.h
new file mode 100644
index 00000000..10820067
--- /dev/null
+++ b/src/modules/rtp/sdp.h
@@ -0,0 +1,33 @@
+#ifndef foosdphfoo
+#define foosdphfoo
+
+/* $Id$ */
+
+/***
+ This file is part of polypaudio.
+
+ polypaudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ polypaudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with polypaudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#include <inttypes.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <polyp/sample.h>
+
+char *pa_sdp_build(int af, const void *src, const void *dst, const char *name, uint16_t port, uint8_t payload, const pa_sample_spec *ss);
+
+#endif