From 9522b4484206ef3a99fb5586831a27fdfea0c373 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 14 Apr 2006 23:47:33 +0000 Subject: add an RTP sender module git-svn-id: file:///home/lennart/svn/public/pulseaudio/trunk@712 fefdeb5f-60dc-0310-8127-8f9354f1896f --- src/modules/rtp/Makefile | 13 + src/modules/rtp/module-rtp-monitor.c | 340 ++ src/modules/rtp/rfc2327.txt | 2355 ++++++++++++++ src/modules/rtp/rfc2974.txt | 1011 ++++++ src/modules/rtp/rfc3550.txt | 5827 ++++++++++++++++++++++++++++++++++ src/modules/rtp/rfc3551.txt | 2467 ++++++++++++++ src/modules/rtp/rtp.c | 193 ++ src/modules/rtp/rtp.h | 51 + src/modules/rtp/sap.c | 107 + src/modules/rtp/sap.h | 43 + src/modules/rtp/sdp.c | 87 + src/modules/rtp/sdp.h | 33 + 12 files changed, 12527 insertions(+) create mode 100644 src/modules/rtp/Makefile create mode 100644 src/modules/rtp/module-rtp-monitor.c create mode 100644 src/modules/rtp/rfc2327.txt create mode 100644 src/modules/rtp/rfc2974.txt create mode 100644 src/modules/rtp/rfc3550.txt create mode 100644 src/modules/rtp/rfc3551.txt create mode 100644 src/modules/rtp/rtp.c create mode 100644 src/modules/rtp/rtp.h create mode 100644 src/modules/rtp/sap.c create mode 100644 src/modules/rtp/sap.h create mode 100644 src/modules/rtp/sdp.c create mode 100644 src/modules/rtp/sdp.h (limited to 'src/modules') diff --git a/src/modules/rtp/Makefile b/src/modules/rtp/Makefile new file mode 100644 index 00000000..316beb72 --- /dev/null +++ b/src/modules/rtp/Makefile @@ -0,0 +1,13 @@ +# This is a dirty trick just to ease compilation with emacs +# +# This file is not intended to be distributed or anything +# +# So: don't touch it, even better ignore it! + +all: + $(MAKE) -C ../.. + +clean: + $(MAKE) -C ../.. clean + +.PHONY: all clean diff --git a/src/modules/rtp/module-rtp-monitor.c b/src/modules/rtp/module-rtp-monitor.c new file mode 100644 index 00000000..66332093 --- /dev/null +++ b/src/modules/rtp/module-rtp-monitor.c @@ -0,0 +1,340 @@ + +/*** + This file is part of polypaudio. + + polypaudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + polypaudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with polypaudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "module-rtp-monitor-symdef.h" + +#include "rtp.h" +#include "sdp.h" +#include "sap.h" + +PA_MODULE_AUTHOR("Lennart Poettering") +PA_MODULE_DESCRIPTION("Read data from source and send it to the network via RTP") +PA_MODULE_VERSION(PACKAGE_VERSION) +PA_MODULE_USAGE( + "source= " + "format= " + "channels= " + "rate= " + "destinaton= " + "port= " + "mtu= " +) + +#define DEFAULT_PORT 5666 +#define SAP_PORT 9875 +#define DEFAULT_DESTINATION "224.0.0.252" +#define MEMBLOCKQ_MAXLENGTH (1024*170) +#define DEFAULT_MTU 1024 +#define SAP_INTERVAL 5000000 + +static const char* const valid_modargs[] = { + "source", + "format", + "channels", + "rate", + "destination", + "port", + NULL +}; + +struct userdata { + pa_module *module; + pa_core *core; + + pa_source_output *source_output; + pa_memblockq *memblockq; + + pa_rtp_context rtp_context; + pa_sap_context sap_context; + size_t mtu; + + pa_time_event *sap_event; +}; + +static void source_output_push(pa_source_output *o, const pa_memchunk *chunk) { + struct userdata *u; + assert(o); + u = o->userdata; + + if (pa_memblockq_push(u->memblockq, chunk) < 0) { + pa_log(__FILE__": Failed to push chunk into memblockq."); + return; + } + + pa_rtp_send(&u->rtp_context, u->mtu, u->memblockq); +} + +static void source_output_kill(pa_source_output* o) { + struct userdata *u; + assert(o); + u = o->userdata; + + pa_module_unload_request(u->module); + + pa_source_output_disconnect(u->source_output); + pa_source_output_unref(u->source_output); + u->source_output = NULL; +} + +static pa_usec_t source_output_get_latency (pa_source_output *o) { + struct userdata *u; + assert(o); + u = o->userdata; + + return pa_bytes_to_usec(pa_memblockq_get_length(u->memblockq), &o->sample_spec); +} + +static void sap_event(pa_mainloop_api *m, pa_time_event *t, const struct timeval *tv, void *userdata) { + struct userdata *u = userdata; + struct timeval next; + + assert(m); + assert(t); + assert(tv); + assert(u); + + pa_sap_send(&u->sap_context, 0); + + pa_log("SAP update"); + pa_gettimeofday(&next); + pa_timeval_add(&next, SAP_INTERVAL); + m->time_restart(t, &next); +} + +int pa__init(pa_core *c, pa_module*m) { + struct userdata *u; + pa_modargs *ma = NULL; + const char *dest; + uint32_t port = DEFAULT_PORT, mtu; + int af, fd = -1, sap_fd = -1; + pa_source *s; + pa_sample_spec ss; + pa_channel_map cm; + struct sockaddr_in sa4, sap_sa4; + struct sockaddr_in6 sa6, sap_sa6; + struct sockaddr_storage sa_dst; + pa_source_output *o = NULL; + uint8_t payload; + char *p; + int r; + socklen_t k; + struct timeval tv; + + assert(c); + assert(m); + + if (!(ma = pa_modargs_new(m->argument, valid_modargs))) { + pa_log(__FILE__": failed to parse module arguments"); + goto fail; + } + + if (!(s = pa_namereg_get(m->core, pa_modargs_get_value(ma, "source", NULL), PA_NAMEREG_SOURCE, 1))) { + pa_log(__FILE__": source does not exist."); + goto fail; + } + + ss = s->sample_spec; + pa_rtp_sample_spec_fixup(&ss); + cm = s->channel_map; + if (pa_modargs_get_sample_spec(ma, &ss) < 0) { + pa_log(__FILE__": failed to parse sample specification"); + goto fail; + } + + if (!pa_rtp_sample_spec_valid(&ss)) { + pa_log(__FILE__": specified sample type not compatible with RTP"); + goto fail; + } + + if (ss.channels != cm.channels) + pa_channel_map_init_auto(&cm, ss.channels); + + payload = pa_rtp_payload_type(&ss); + + mtu = (DEFAULT_MTU/pa_frame_size(&ss))*pa_frame_size(&ss); + + if (pa_modargs_get_value_u32(ma, "mtu", &mtu) < 0 || mtu < 1 || mtu % pa_frame_size(&ss) != 0) { + pa_log(__FILE__": invalid mtu."); + goto fail; + } + + if (pa_modargs_get_value_u32(ma, "port", &port) < 0 || port < 1 || port > 0xFFFF) { + pa_log(__FILE__": port= expects a numerical argument between 1 and 65535."); + goto fail; + } + + if ((dest = pa_modargs_get_value(ma, "destination", DEFAULT_DESTINATION))) { + if (inet_pton(AF_INET6, dest, &sa6.sin6_addr) > 0) { + sa6.sin6_family = af = AF_INET6; + sa6.sin6_port = htons(port); + sap_sa6 = sa6; + sap_sa6.sin6_port = htons(SAP_PORT); + } else if (inet_pton(AF_INET, dest, &sa4.sin_addr) > 0) { + sa4.sin_family = af = AF_INET; + sa4.sin_port = htons(port); + sap_sa4 = sa4; + sap_sa4.sin_port = htons(SAP_PORT); + } else { + pa_log(__FILE__": invalid destination '%s'", dest); + goto fail; + } + } + + if ((fd = socket(af, SOCK_DGRAM, 0)) < 0) { + pa_log(__FILE__": socket() failed: %s", strerror(errno)); + goto fail; + } + + if (connect(fd, af == AF_INET ? (struct sockaddr*) &sa4 : (struct sockaddr*) &sa6, af == AF_INET ? sizeof(sa4) : sizeof(sa6)) < 0) { + pa_log(__FILE__": connect() failed: %s", strerror(errno)); + goto fail; + } + + if ((sap_fd = socket(af, SOCK_DGRAM, 0)) < 0) { + pa_log(__FILE__": socket() failed: %s", strerror(errno)); + goto fail; + } + + if (connect(sap_fd, af == AF_INET ? (struct sockaddr*) &sap_sa4 : (struct sockaddr*) &sap_sa6, af == AF_INET ? sizeof(sap_sa4) : sizeof(sap_sa6)) < 0) { + pa_log(__FILE__": connect() failed: %s", strerror(errno)); + goto fail; + } + + if (!(o = pa_source_output_new(s, __FILE__, "RTP Monitor Stream", &ss, &cm, PA_RESAMPLER_INVALID))) { + pa_log(__FILE__": failed to create source output."); + goto fail; + } + + o->push = source_output_push; + o->kill = source_output_kill; + o->get_latency = source_output_get_latency; + o->owner = m; + + u = pa_xnew(struct userdata, 1); + m->userdata = u; + o->userdata = u; + + u->module = m; + u->core = c; + u->source_output = o; + + u->memblockq = pa_memblockq_new( + 0, + MEMBLOCKQ_MAXLENGTH, + MEMBLOCKQ_MAXLENGTH, + pa_frame_size(&ss), + 1, + 0, + NULL, + c->memblock_stat); + + u->mtu = mtu; + + k = sizeof(sa_dst); + r = getsockname(fd, (struct sockaddr*) &sa_dst, &k); + assert(r >= 0); + + p = pa_sdp_build(af, + af == AF_INET ? (void*) &((struct sockaddr_in*) &sa_dst)->sin_addr : (void*) &((struct sockaddr_in6*) &sa_dst)->sin6_addr, + af == AF_INET ? (void*) &sa4.sin_addr : (void*) &sa6.sin6_addr, + "Polypaudio RTP Stream", port, payload, &ss); + + pa_rtp_context_init_send(&u->rtp_context, fd, 0, payload); + pa_sap_context_init_send(&u->sap_context, sap_fd, p); + + pa_log_info("RTP stream initialized with mtu %u on %s:%u, SSRC=0x%08x, payload=%u, initial sequence #%u", mtu, dest, port, u->rtp_context.ssrc, payload, u->rtp_context.sequence); + pa_log_info("SDP-Data:\n%s\nEOF", p); + + pa_sap_send(&u->sap_context, 0); + + pa_gettimeofday(&tv); + pa_timeval_add(&tv, SAP_INTERVAL); + u->sap_event = c->mainloop->time_new(c->mainloop, &tv, sap_event, u); + + pa_modargs_free(ma); + + return 0; + +fail: + if (ma) + pa_modargs_free(ma); + + if (fd >= 0) + close(fd); + + if (sap_fd >= 0) + close(sap_fd); + + if (o) { + pa_source_output_disconnect(o); + pa_source_output_unref(o); + } + + return -1; +} + +void pa__done(pa_core *c, pa_module*m) { + struct userdata *u; + assert(c); + assert(m); + + if (!(u = m->userdata)) + return; + + c->mainloop->time_free(u->sap_event); + + if (u->source_output) { + pa_source_output_disconnect(u->source_output); + pa_source_output_unref(u->source_output); + } + + pa_rtp_context_destroy(&u->rtp_context); + + pa_sap_send(&u->sap_context, 1); + pa_sap_context_destroy(&u->sap_context); + + pa_memblockq_free(u->memblockq); + + pa_xfree(u); +} diff --git a/src/modules/rtp/rfc2327.txt b/src/modules/rtp/rfc2327.txt new file mode 100644 index 00000000..ce77de61 --- /dev/null +++ b/src/modules/rtp/rfc2327.txt @@ -0,0 +1,2355 @@ + + + + + + +Network Working Group M. Handley +Request for Comments: 2327 V. Jacobson +Category: Standards Track ISI/LBNL + April 1998 + + + SDP: Session Description Protocol + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1998). All Rights Reserved. + +Abstract + + This document defines the Session Description Protocol, SDP. SDP is + intended for describing multimedia sessions for the purposes of + session announcement, session invitation, and other forms of + multimedia session initiation. + + This document is a product of the Multiparty Multimedia Session + Control (MMUSIC) working group of the Internet Engineering Task + Force. Comments are solicited and should be addressed to the working + group's mailing list at confctrl@isi.edu and/or the authors. + +1. Introduction + + On the Internet multicast backbone (Mbone), a session directory tool + is used to advertise multimedia conferences and communicate the + conference addresses and conference tool-specific information + necessary for participation. This document defines a session + description protocol for this purpose, and for general real-time + multimedia session description purposes. This memo does not describe + multicast address allocation or the distribution of SDP messages in + detail. These are described in accompanying memos. SDP is not + intended for negotiation of media encodings. + + + + + + + + +Handley & Jacobson Standards Track [Page 1] + +RFC 2327 SDP April 1998 + + +2. Background + + The Mbone is the part of the internet that supports IP multicast, and + thus permits efficient many-to-many communication. It is used + extensively for multimedia conferencing. Such conferences usually + have the property that tight coordination of conference membership is + not necessary; to receive a conference, a user at an Mbone site only + has to know the conference's multicast group address and the UDP + ports for the conference data streams. + + Session directories assist the advertisement of conference sessions + and communicate the relevant conference setup information to + prospective participants. SDP is designed to convey such information + to recipients. SDP is purely a format for session description - it + does not incorporate a transport protocol, and is intended to use + different transport protocols as appropriate including the Session + Announcement Protocol [4], Session Initiation Protocol [11], Real- + Time Streaming Protocol [12], electronic mail using the MIME + extensions, and the Hypertext Transport Protocol. + + SDP is intended to be general purpose so that it can be used for a + wider range of network environments and applications than just + multicast session directories. However, it is not intended to + support negotiation of session content or media encodings - this is + viewed as outside the scope of session description. + +3. Glossary of Terms + + The following terms are used in this document, and have specific + meaning within the context of this document. + + Conference + A multimedia conference is a set of two or more communicating users + along with the software they are using to communicate. + + Session + A multimedia session is a set of multimedia senders and receivers + and the data streams flowing from senders to receivers. A + multimedia conference is an example of a multimedia session. + + Session Advertisement + See session announcement. + + Session Announcement + A session announcement is a mechanism by which a session + description is conveyed to users in a proactive fashion, i.e., the + session description was not explicitly requested by the user. + + + + +Handley & Jacobson Standards Track [Page 2] + +RFC 2327 SDP April 1998 + + + Session Description + A well defined format for conveying sufficient information to + discover and participate in a multimedia session. + +3.1. Terminology + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119. + +4. SDP Usage + +4.1. Multicast Announcements + + SDP is a session description protocol for multimedia sessions. A + common mode of usage is for a client to announce a conference session + by periodically multicasting an announcement packet to a well known + multicast address and port using the Session Announcement Protocol + (SAP). + + SAP packets are UDP packets with the following format: + + |--------------------| + | SAP header | + |--------------------| + | text payload | + |////////// + + + The header is the Session Announcement Protocol header. SAP is + described in more detail in a companion memo [4] + + The text payload is an SDP session description, as described in this + memo. The text payload should be no greater than 1 Kbyte in length. + If announced by SAP, only one session announcement is permitted in a + single packet. + +4.2. Email and WWW Announcements + + Alternative means of conveying session descriptions include + electronic mail and the World Wide Web. For both email and WWW + distribution, the use of the MIME content type "application/sdp" + should be used. This enables the automatic launching of applications + for participation in the session from the WWW client or mail reader + in a standard manner. + + + + + + +Handley & Jacobson Standards Track [Page 3] + +RFC 2327 SDP April 1998 + + + Note that announcements of multicast sessions made only via email or + the World Wide Web (WWW) do not have the property that the receiver + of a session announcement can necessarily receive the session because + the multicast sessions may be restricted in scope, and access to the + WWW server or reception of email is possible outside this scope. SAP + announcements do not suffer from this mismatch. + +5. Requirements and Recommendations + + The purpose of SDP is to convey information about media streams in + multimedia sessions to allow the recipients of a session description + to participate in the session. SDP is primarily intended for use in + an internetwork, although it is sufficiently general that it can + describe conferences in other network environments. + + A multimedia session, for these purposes, is defined as a set of + media streams that exist for some duration of time. Media streams + can be many-to-many. The times during which the session is active + need not be continuous. + + Thus far, multicast based sessions on the Internet have differed from + many other forms of conferencing in that anyone receiving the traffic + can join the session (unless the session traffic is encrypted). In + such an environment, SDP serves two primary purposes. It is a means + to communicate the existence of a session, and is a means to convey + sufficient information to enable joining and participating in the + session. In a unicast environment, only the latter purpose is likely + to be relevant. + + Thus SDP includes: + + o Session name and purpose + + o Time(s) the session is active + + o The media comprising the session + + o Information to receive those media (addresses, ports, formats and + so on) + + As resources necessary to participate in a session may be limited, + some additional information may also be desirable: + + o Information about the bandwidth to be used by the conference + + o Contact information for the person responsible for the session + + + + + +Handley & Jacobson Standards Track [Page 4] + +RFC 2327 SDP April 1998 + + + In general, SDP must convey sufficient information to be able to join + a session (with the possible exception of encryption keys) and to + announce the resources to be used to non-participants that may need + to know. + +5.1. Media Information + + SDP includes: + + o The type of media (video, audio, etc) + + o The transport protocol (RTP/UDP/IP, H.320, etc) + + o The format of the media (H.261 video, MPEG video, etc) + + For an IP multicast session, the following are also conveyed: + + o Multicast address for media + + o Transport Port for media + + This address and port are the destination address and destination + port of the multicast stream, whether being sent, received, or both. + + For an IP unicast session, the following are conveyed: + + o Remote address for media + + o Transport port for contact address + + The semantics of this address and port depend on the media and + transport protocol defined. By default, this is the remote address + and remote port to which data is sent, and the remote address and + local port on which to receive data. However, some media may define + to use these to establish a control channel for the actual media + flow. + +5.2. Timing Information + + Sessions may either be bounded or unbounded in time. Whether or not + they are bounded, they may be only active at specific times. + + SDP can convey: + + o An arbitrary list of start and stop times bounding the session + + o For each bound, repeat times such as "every Wednesday at 10am for + one hour" + + + +Handley & Jacobson Standards Track [Page 5] + +RFC 2327 SDP April 1998 + + + This timing information is globally consistent, irrespective of local + time zone or daylight saving time. + +5.3. Private Sessions + + It is possible to create both public sessions and private sessions. + Private sessions will typically be conveyed by encrypting the session + description to distribute it. The details of how encryption is + performed are dependent on the mechanism used to convey SDP - see [4] + for how this is done for session announcements. + + If a session announcement is private it is possible to use that + private announcement to convey encryption keys necessary to decode + each of the media in a conference, including enough information to + know which encryption scheme is used for each media. + +5.4. Obtaining Further Information about a Session + + A session description should convey enough information to decide + whether or not to participate in a session. SDP may include + additional pointers in the form of Universal Resources Identifiers + (URIs) for more information about the session. + +5.5. Categorisation + + When many session descriptions are being distributed by SAP or any + other advertisement mechanism, it may be desirable to filter + announcements that are of interest from those that are not. SDP + supports a categorisation mechanism for sessions that is capable of + being automated. + +5.6. Internationalization + + The SDP specification recommends the use of the ISO 10646 character + sets in the UTF-8 encoding (RFC 2044) to allow many different + languages to be represented. However, to assist in compact + representations, SDP also allows other character sets such as ISO + 8859-1 to be used when desired. Internationalization only applies to + free-text fields (session name and background information), and not + to SDP as a whole. + +6. SDP Specification + + SDP session descriptions are entirely textual using the ISO 10646 + character set in UTF-8 encoding. SDP field names and attributes names + use only the US-ASCII subset of UTF-8, but textual fields and + attribute values may use the full ISO 10646 character set. The + textual form, as opposed to a binary encoding such as ASN/1 or XDR, + + + +Handley & Jacobson Standards Track [Page 6] + +RFC 2327 SDP April 1998 + + + was chosen to enhance portability, to enable a variety of transports + to be used (e.g, session description in a MIME email message) and to + allow flexible, text-based toolkits (e.g., Tcl/Tk ) to be used to + generate and to process session descriptions. However, since the + total bandwidth allocated to all SAP announcements is strictly + limited, the encoding is deliberately compact. Also, since + announcements may be transported via very unreliable means (e.g., + email) or damaged by an intermediate caching server, the encoding was + designed with strict order and formatting rules so that most errors + would result in malformed announcements which could be detected + easily and discarded. This also allows rapid discarding of encrypted + announcements for which a receiver does not have the correct key. + + An SDP session description consists of a number of lines of text of + the form = is always exactly one character and is + case-significant. is a structured text string whose format + depends on . It also will be case-significant unless a + specific field defines otherwise. Whitespace is not permitted either + side of the `=' sign. In general is either a number of fields + delimited by a single space character or a free format string. + + A session description consists of a session-level description + (details that apply to the whole session and all media streams) and + optionally several media-level descriptions (details that apply onto + to a single media stream). + + An announcement consists of a session-level section followed by zero + or more media-level sections. The session-level part starts with a + `v=' line and continues to the first media-level section. The media + description starts with an `m=' line and continues to the next media + description or end of the whole session description. In general, + session-level values are the default for all media unless overridden + by an equivalent media-level value. + + When SDP is conveyed by SAP, only one session description is allowed + per packet. When SDP is conveyed by other means, many SDP session + descriptions may be concatenated together (the `v=' line indicating + the start of a session description terminates the previous + description). Some lines in each description are required and some + are optional but all must appear in exactly the order given here (the + fixed order greatly enhances error detection and allows for a simple + parser). Optional items are marked with a `*'. + +Session description + v= (protocol version) + o= (owner/creator and session identifier). + s= (session name) + i=* (session information) + + + +Handley & Jacobson Standards Track [Page 7] + +RFC 2327 SDP April 1998 + + + u=* (URI of description) + e=* (email address) + p=* (phone number) + c=* (connection information - not required if included in all media) + b=* (bandwidth information) + One or more time descriptions (see below) + z=* (time zone adjustments) + k=* (encryption key) + a=* (zero or more session attribute lines) + Zero or more media descriptions (see below) + +Time description + t= (time the session is active) + r=* (zero or more repeat times) + +Media description + m= (media name and transport address) + i=* (media title) + c=* (connection information - optional if included at session-level) + b=* (bandwidth information) + k=* (encryption key) + a=* (zero or more media attribute lines) + + The set of `type' letters is deliberately small and not intended to + be extensible -- SDP parsers must completely ignore any announcement + that contains a `type' letter that it does not understand. The + `attribute' mechanism ("a=" described below) is the primary means for + extending SDP and tailoring it to particular applications or media. + Some attributes (the ones listed in this document) have a defined + meaning but others may be added on an application-, media- or + session-specific basis. A session directory must ignore any + attribute it doesn't understand. + + The connection (`c=') and attribute (`a=') information in the + session-level section applies to all the media of that session unless + overridden by connection information or an attribute of the same name + in the media description. For instance, in the example below, each + media behaves as if it were given a `recvonly' attribute. + + An example SDP description is: + + v=0 + o=mhandley 2890844526 2890842807 IN IP4 126.16.64.4 + s=SDP Seminar + i=A Seminar on the session description protocol + u=http://www.cs.ucl.ac.uk/staff/M.Handley/sdp.03.ps + e=mjh@isi.edu (Mark Handley) + c=IN IP4 224.2.17.12/127 + + + +Handley & Jacobson Standards Track [Page 8] + +RFC 2327 SDP April 1998 + + + t=2873397496 2873404696 + a=recvonly + m=audio 49170 RTP/AVP 0 + m=video 51372 RTP/AVP 31 + m=application 32416 udp wb + a=orient:portrait + + Text records such as the session name and information are bytes + strings which may contain any byte with the exceptions of 0x00 (Nul), + 0x0a (ASCII newline) and 0x0d (ASCII carriage return). The sequence + CRLF (0x0d0a) is used to end a record, although parsers should be + tolerant and also accept records terminated with a single newline + character. By default these byte strings contain ISO-10646 + characters in UTF-8 encoding, but this default may be changed using + the `charset' attribute. + + Protocol Version + + v=0 + + The "v=" field gives the version of the Session Description Protocol. + There is no minor version number. + + Origin + + o=
+
+ + The "o=" field gives the originator of the session (their username + and the address of the user's host) plus a session id and session + version number. + + is the user's login on the originating host, or it is "-" + if the originating host does not support the concept of user ids. + must not contain spaces. is a numeric string + such that the tuple of , , , +
and
form a globally unique identifier for + the session. + + The method of allocation is up to the creating tool, but + it has been suggested that a Network Time Protocol (NTP) timestamp be + used to ensure uniqueness [1]. + + is a version number for this announcement. It is needed + for proxy announcements to detect which of several announcements for + the same session is the most recent. Again its usage is up to the + + + + + +Handley & Jacobson Standards Track [Page 9] + +RFC 2327 SDP April 1998 + + + creating tool, so long as is increased when a modification + is made to the session data. Again, it is recommended (but not + mandatory) that an NTP timestamp is used. + + is a text string giving the type of network. + Initially "IN" is defined to have the meaning "Internet".
is a text string giving the type of the address that follows. + Initially "IP4" and "IP6" are defined.
is the globally + unique address of the machine from which the session was created. + For an address type of IP4, this is either the fully-qualified domain + name of the machine, or the dotted-decimal representation of the IP + version 4 address of the machine. For an address type of IP6, this + is either the fully-qualified domain name of the machine, or the + compressed textual representation of the IP version 6 address of the + machine. For both IP4 and IP6, the fully-qualified domain name is + the form that SHOULD be given unless this is unavailable, in which + case the globally unique address may be substituted. A local IP + address MUST NOT be used in any context where the SDP description + might leave the scope in which the address is meaningful. + + In general, the "o=" field serves as a globally unique identifier for + this version of this session description, and the subfields excepting + the version taken together identify the session irrespective of any + modifications. + + Session Name + + s= + + The "s=" field is the session name. There must be one and only one + "s=" field per session description, and it must contain ISO 10646 + characters (but see also the `charset' attribute below). + + Session and Media Information + + i= + + The "i=" field is information about the session. There may be at + most one session-level "i=" field per session description, and at + most one "i=" field per media. Although it may be omitted, this is + discouraged for session announcements, and user interfaces for + composing sessions should require text to be entered. If it is + present it must contain ISO 10646 characters (but see also the + `charset' attribute below). + + A single "i=" field can also be used for each media definition. In + media definitions, "i=" fields are primarily intended for labeling + media streams. As such, they are most likely to be useful when a + + + +Handley & Jacobson Standards Track [Page 10] + +RFC 2327 SDP April 1998 + + + single session has more than one distinct media stream of the same + media type. An example would be two different whiteboards, one for + slides and one for feedback and questions. + + URI + + u= + + o A URI is a Universal Resource Identifier as used by WWW clients + + o The URI should be a pointer to additional information about the + conference + + o This field is optional, but if it is present it should be specified + before the first media field + + o No more than one URI field is allowed per session description + + + Email Address and Phone Number + + e= + p= + + o These specify contact information for the person responsible for + the conference. This is not necessarily the same person that + created the conference announcement. + + o Either an email field or a phone field must be specified. + Additional email and phone fields are allowed. + + o If these are present, they should be specified before the first + media field. + + o More than one email or phone field can be given for a session + description. + + o Phone numbers should be given in the conventional international + + format - preceded by a "+ and the international country code. + There must be a space or a hyphen ("-") between the country code + and the rest of the phone number. Spaces and hyphens may be used + to split up a phone field to aid readability if desired. For + example: + + p=+44-171-380-7777 or p=+1 617 253 6011 + + + + + +Handley & Jacobson Standards Track [Page 11] + +RFC 2327 SDP April 1998 + + + o Both email addresses and phone numbers can have an optional free + text string associated with them, normally giving the name of the + person who may be contacted. This should be enclosed in + parenthesis if it is present. For example: + + e=mjh@isi.edu (Mark Handley) + + The alternative RFC822 name quoting convention is also allowed for + both email addresses and phone numbers. For example, + + e=Mark Handley + + The free text string should be in the ISO-10646 character set with + UTF-8 encoding, or alternatively in ISO-8859-1 or other encodings + if the appropriate charset session-level attribute is set. + + Connection Data + + c=
+ + The "c=" field contains connection data. + + A session announcement must contain one "c=" field in each media + description (see below) or a "c=" field at the session-level. It may + contain a session-level "c=" field and one additional "c=" field per + media description, in which case the per-media values override the + session-level settings for the relevant media. + + The first sub-field is the network type, which is a text string + giving the type of network. Initially "IN" is defined to have the + meaning "Internet". + + The second sub-field is the address type. This allows SDP to be used + for sessions that are not IP based. Currently only IP4 is defined. + + The third sub-field is the connection address. Optional extra + subfields may be added after the connection address depending on the + value of the
field. + + For IP4 addresses, the connection address is defined as follows: + + o Typically the connection address will be a class-D IP multicast + + group address. If the session is not multicast, then the + connection address contains the fully-qualified domain name or the + unicast IP address of the expected data source or data relay or + data sink as determined by additional attribute fields. It is not + expected that fully-qualified domain names or unicast addresses + + + +Handley & Jacobson Standards Track [Page 12] + +RFC 2327 SDP April 1998 + + + will be given in a session description that is communicated by a + multicast announcement, though this is not prohibited. If a + unicast data stream is to pass through a network address + translator, the use of a fully-qualified domain name rather than an + unicast IP address is RECOMMENDED. In other cases, the use of an + IP address to specify a particular interface on a multi-homed host + might be required. Thus this specification leaves the decision as + to which to use up to the individual application, but all + applications MUST be able to cope with receiving both formats. + + o Conferences using an IP multicast connection address must also have + a time to live (TTL) value present in addition to the multicast + address. The TTL and the address together define the scope with + which multicast packets sent in this conference will be sent. TTL + values must be in the range 0-255. + + The TTL for the session is appended to the address using a slash as + a separator. An example is: + + c=IN IP4 224.2.1.1/127 + + Hierarchical or layered encoding schemes are data streams where the + encoding from a single media source is split into a number of + layers. The receiver can choose the desired quality (and hence + bandwidth) by only subscribing to a subset of these layers. Such + layered encodings are normally transmitted in multiple multicast + groups to allow multicast pruning. This technique keeps unwanted + traffic from sites only requiring certain levels of the hierarchy. + For applications requiring multiple multicast groups, we allow the + following notation to be used for the connection address: + + // + + If the number of addresses is not given it is assumed to be one. + Multicast addresses so assigned are contiguously allocated above + the base address, so that, for example: + + c=IN IP4 224.2.1.1/127/3 + + would state that addresses 224.2.1.1, 224.2.1.2 and 224.2.1.3 are + to be used at a ttl of 127. This is semantically identical to + including multiple "c=" lines in a media description: + + c=IN IP4 224.2.1.1/127 + c=IN IP4 224.2.1.2/127 + c=IN IP4 224.2.1.3/127 + + + + + +Handley & Jacobson Standards Track [Page 13] + +RFC 2327 SDP April 1998 + + + Multiple addresses or "c=" lines can only be specified on a per- + media basis, and not for a session-level "c=" field. + + It is illegal for the slash notation described above to be used for + IP unicast addresses. + + Bandwidth + + b=: + + o This specifies the proposed bandwidth to be used by the session or + media, and is optional. + + o is in kilobits per second + + o is a single alphanumeric word giving the meaning of the + bandwidth figure. + + o Two modifiers are initially defined: + + CT Conference Total: An implicit maximum bandwidth is associated with + each TTL on the Mbone or within a particular multicast + administrative scope region (the Mbone bandwidth vs. TTL limits are + given in the MBone FAQ). If the bandwidth of a session or media in + a session is different from the bandwidth implicit from the scope, + a `b=CT:...' line should be supplied for the session giving the + proposed upper limit to the bandwidth used. The primary purpose of + this is to give an approximate idea as to whether two or more + conferences can co-exist simultaneously. + + AS Application-Specific Maximum: The bandwidth is interpreted to be + application-specific, i.e., will be the application's concept of + maximum bandwidth. Normally this will coincide with what is set on + the application's "maximum bandwidth" control if applicable. + + Note that CT gives a total bandwidth figure for all the media at + all sites. AS gives a bandwidth figure for a single media at a + single site, although there may be many sites sending + simultaneously. + + o Extension Mechanism: Tool writers can define experimental bandwidth + modifiers by prefixing their modifier with "X-". For example: + + b=X-YZ:128 + + SDP parsers should ignore bandwidth fields with unknown modifiers. + Modifiers should be alpha-numeric and, although no length limit is + given, they are recommended to be short. + + + +Handley & Jacobson Standards Track [Page 14] + +RFC 2327 SDP April 1998 + + + Times, Repeat Times and Time Zones + + t= + + o "t=" fields specify the start and stop times for a conference + session. Multiple "t=" fields may be used if a session is active + at multiple irregularly spaced times; each additional "t=" field + specifies an additional period of time for which the session will + be active. If the session is active at regular times, an "r=" + field (see below) should be used in addition to and following a + "t=" field - in which case the "t=" field specifies the start and + stop times of the repeat sequence. + + o The first and second sub-fields give the start and stop times for + the conference respectively. These values are the decimal + representation of Network Time Protocol (NTP) time values in + seconds [1]. To convert these values to UNIX time, subtract + decimal 2208988800. + + o If the stop-time is set to zero, then the session is not bounded, + though it will not become active until after the start-time. If + the start-time is also zero, the session is regarded as permanent. + + User interfaces should strongly discourage the creation of + unbounded and permanent sessions as they give no information about + when the session is actually going to terminate, and so make + scheduling difficult. + + The general assumption may be made, when displaying unbounded + sessions that have not timed out to the user, that an unbounded + session will only be active until half an hour from the current + time or the session start time, whichever is the later. If + behaviour other than this is required, an end-time should be given + and modified as appropriate when new information becomes available + about when the session should really end. + + Permanent sessions may be shown to the user as never being active + unless there are associated repeat times which state precisely when + the session will be active. In general, permanent sessions should + not be created for any session expected to have a duration of less + than 2 months, and should be discouraged for sessions expected to + have a duration of less than 6 months. + + r= + + o "r=" fields specify repeat times for a session. For example, if + a session is active at 10am on Monday and 11am on Tuesday for one + + + +Handley & Jacobson Standards Track [Page 15] + +RFC 2327 SDP April 1998 + + + hour each week for three months, then the in the + corresponding "t=" field would be the NTP representation of 10am on + the first Monday, the would be 1 week, the + would be 1 hour, and the offsets would be zero + and 25 hours. The corresponding "t=" field stop time would be the + NTP representation of the end of the last session three months + later. By default all fields are in seconds, so the "r=" and "t=" + fields might be: + + t=3034423619 3042462419 + r=604800 3600 0 90000 + + To make announcements more compact, times may also be given in units + of days, hours or minutes. The syntax for these is a number + immediately followed by a single case-sensitive character. + Fractional units are not allowed - a smaller unit should be used + instead. The following unit specification characters are allowed: + + d - days (86400 seconds) + h - minutes (3600 seconds) + m - minutes (60 seconds) + s - seconds (allowed for completeness but not recommended) + + Thus, the above announcement could also have been written: + + r=7d 1h 0 25h + + Monthly and yearly repeats cannot currently be directly specified + with a single SDP repeat time - instead separate "t" fields should + be used to explicitly list the session times. + + z= .... + + o To schedule a repeated session which spans a change from daylight- + saving time to standard time or vice-versa, it is necessary to + specify offsets from the base repeat times. This is required + because different time zones change time at different times of day, + different countries change to or from daylight time on different + dates, and some countries do not have daylight saving time at all. + + Thus in order to schedule a session that is at the same time winter + and summer, it must be possible to specify unambiguously by whose + time zone a session is scheduled. To simplify this task for + receivers, we allow the sender to specify the NTP time that a time + zone adjustment happens and the offset from the time when the + session was first scheduled. The "z" field allows the sender to + specify a list of these adjustment times and offsets from the base + time. + + + +Handley & Jacobson Standards Track [Page 16] + +RFC 2327 SDP April 1998 + + + An example might be: + + z=2882844526 -1h 2898848070 0 + + This specifies that at time 2882844526 the time base by which the + session's repeat times are calculated is shifted back by 1 hour, + and that at time 2898848070 the session's original time base is + restored. Adjustments are always relative to the specified start + time - they are not cumulative. + + o If a session is likely to last several years, it is expected + that + the session announcement will be modified periodically rather than + transmit several years worth of adjustments in one announcement. + + Encryption Keys + + k= + k=: + + o The session description protocol may be used to convey encryption + keys. A key field is permitted before the first media entry (in + which case it applies to all media in the session), or for each + media entry as required. + + o The format of keys and their usage is outside the scope of this + document, but see [3]. + + o The method indicates the mechanism to be used to obtain a usable + key by external means, or from the encoded encryption key given. + + The following methods are defined: + + k=clear: + The encryption key (as described in [3] for RTP media streams + under the AV profile) is included untransformed in this key + field. + + k=base64: + The encryption key (as described in [3] for RTP media streams + under the AV profile) is included in this key field but has been + base64 encoded because it includes characters that are + prohibited in SDP. + + k=uri: + A Universal Resource Identifier as used by WWW clients is + included in this key field. The URI refers to the data + containing the key, and may require additional authentication + + + +Handley & Jacobson Standards Track [Page 17] + +RFC 2327 SDP April 1998 + + + before the key can be returned. When a request is made to the + given URI, the MIME content-type of the reply specifies the + encoding for the key in the reply. The key should not be + obtained until the user wishes to join the session to reduce + synchronisation of requests to the WWW server(s). + + k=prompt + No key is included in this SDP description, but the session or + media stream referred to by this key field is encrypted. The + user should be prompted for the key when attempting to join the + session, and this user-supplied key should then be used to + decrypt the media streams. + + Attributes + + a= + a=: + + Attributes are the primary means for extending SDP. Attributes may + be defined to be used as "session-level" attributes, "media-level" + attributes, or both. + + A media description may have any number of attributes ("a=" fields) + which are media specific. These are referred to as "media-level" + attributes and add information about the media stream. Attribute + fields can also be added before the first media field; these + "session-level" attributes convey additional information that applies + to the conference as a whole rather than to individual media; an + example might be the conference's floor control policy. + + Attribute fields may be of two forms: + + o property attributes. A property attribute is simply of the form + "a=". These are binary attributes, and the presence of the + attribute conveys that the attribute is a property of the session. + An example might be "a=recvonly". + + o value attributes. A value attribute is of the form + "a=:". An example might be that a whiteboard + could have the value attribute "a=orient:landscape" + + Attribute interpretation depends on the media tool being invoked. + Thus receivers of session descriptions should be configurable in + their interpretation of announcements in general and of attributes in + particular. + + Attribute names must be in the US-ASCII subset of ISO-10646/UTF-8. + + + + +Handley & Jacobson Standards Track [Page 18] + +RFC 2327 SDP April 1998 + + + Attribute values are byte strings, and MAY use any byte value except + 0x00 (Nul), 0x0A (LF), and 0x0D (CR). By default, attribute values + are to be interpreted as in ISO-10646 character set with UTF-8 + encoding. Unlike other text fields, attribute values are NOT + normally affected by the `charset' attribute as this would make + comparisons against known values problematic. However, when an + attribute is defined, it can be defined to be charset-dependent, in + which case it's value should be interpreted in the session charset + rather than in ISO-10646. + + Attributes that will be commonly used can be registered with IANA + (see Appendix B). Unregistered attributes should begin with "X-" to + prevent inadvertent collision with registered attributes. In either + case, if an attribute is received that is not understood, it should + simply be ignored by the receiver. + + Media Announcements + + m= + + A session description may contain a number of media descriptions. + Each media description starts with an "m=" field, and is terminated + by either the next "m=" field or by the end of the session + description. A media field also has several sub-fields: + + o The first sub-field is the media type. Currently defined media are + "audio", "video", "application", "data" and "control", though this + list may be extended as new communication modalities emerge (e.g., + telepresense). The difference between "application" and "data" is + that the former is a media flow such as whiteboard information, and + the latter is bulk-data transfer such as multicasting of program + executables which will not typically be displayed to the user. + "control" is used to specify an additional conference control + channel for the session. + + o The second sub-field is the transport port to which the media + stream will be sent. The meaning of the transport port depends on + the network being used as specified in the relevant "c" field and + on the transport protocol defined in the third sub-field. Other + ports used by the media application (such as the RTCP port, see + [2]) should be derived algorithmically from the base media port. + + Note: For transports based on UDP, the value should be in the range + 1024 to 65535 inclusive. For RTP compliance it should be an even + number. + + + + + + +Handley & Jacobson Standards Track [Page 19] + +RFC 2327 SDP April 1998 + + + For applications where hierarchically encoded streams are being + sent to a unicast address, it may be necessary to specify multiple + transport ports. This is done using a similar notation to that + used for IP multicast addresses in the "c=" field: + + m= / + + In such a case, the ports used depend on the transport protocol. + For RTP, only the even ports are used for data and the + corresponding one-higher odd port is used for RTCP. For example: + + m=video 49170/2 RTP/AVP 31 + + would specify that ports 49170 and 49171 form one RTP/RTCP pair and + 49172 and 49173 form the second RTP/RTCP pair. RTP/AVP is the + transport protocol and 31 is the format (see below). + + It is illegal for both multiple addresses to be specified in the + "c=" field and for multiple ports to be specified in the "m=" field + in the same session description. + + o The third sub-field is the transport protocol. The transport + protocol values are dependent on the address-type field in the "c=" + fields. Thus a "c=" field of IP4 defines that the transport + protocol runs over IP4. For IP4, it is normally expected that most + media traffic will be carried as RTP over UDP. The following + transport protocols are preliminarily defined, but may be extended + through registration of new protocols with IANA: + + - RTP/AVP - the IETF's Realtime Transport Protocol using the + Audio/Video profile carried over UDP. + + - udp - User Datagram Protocol + + If an application uses a single combined proprietary media format + and transport protocol over UDP, then simply specifying the + transport protocol as udp and using the format field to distinguish + the combined protocol is recommended. If a transport protocol is + used over UDP to carry several distinct media types that need to be + distinguished by a session directory, then specifying the transport + protocol and media format separately is necessary. RTP is an + example of a transport-protocol that carries multiple payload + formats that must be distinguished by the session directory for it + to know how to start appropriate tools, relays, mixers or + recorders. + + + + + + +Handley & Jacobson Standards Track [Page 20] + +RFC 2327 SDP April 1998 + + + The main reason to specify the transport-protocol in addition to + the media format is that the same standard media formats may be + carried over different transport protocols even when the network + protocol is the same - a historical example is vat PCM audio and + RTP PCM audio. In addition, relays and monitoring tools that are + transport-protocol-specific but format-independent are possible. + + For RTP media streams operating under the RTP Audio/Video Profile + [3], the protocol field is "RTP/AVP". Should other RTP profiles be + defined in the future, their profiles will be specified in the same + way. For example, the protocol field "RTP/XYZ" would specify RTP + operating under a profile whose short name is "XYZ". + + o The fourth and subsequent sub-fields are media formats. For audio + and video, these will normally be a media payload type as defined + in the RTP Audio/Video Profile. + + When a list of payload formats is given, this implies that all of + these formats may be used in the session, but the first of these + formats is the default format for the session. + + For media whose transport protocol is not RTP or UDP the format + field is protocol specific. Such formats should be defined in an + additional specification document. + + For media whose transport protocol is RTP, SDP can be used to + provide a dynamic binding of media encoding to RTP payload type. + The encoding names in the RTP AV Profile do not specify unique + audio encodings (in terms of clock rate and number of audio + channels), and so they are not used directly in SDP format fields. + Instead, the payload type number should be used to specify the + format for static payload types and the payload type number along + with additional encoding information should be used for dynamically + allocated payload types. + + An example of a static payload type is u-law PCM coded single + channel audio sampled at 8KHz. This is completely defined in the + RTP Audio/Video profile as payload type 0, so the media field for + such a stream sent to UDP port 49232 is: + + m=video 49232 RTP/AVP 0 + + An example of a dynamic payload type is 16 bit linear encoded + stereo audio sampled at 16KHz. If we wish to use dynamic RTP/AVP + payload type 98 for such a stream, additional information is + required to decode it: + + m=video 49232 RTP/AVP 98 + + + +Handley & Jacobson Standards Track [Page 21] + +RFC 2327 SDP April 1998 + + + a=rtpmap:98 L16/16000/2 + + The general form of an rtpmap attribute is: + + a=rtpmap: /[/] + + For audio streams, may specify the number of + audio channels. This parameter may be omitted if the number of + channels is one provided no additional parameters are needed. For + video streams, no encoding parameters are currently specified. + + Additional parameters may be defined in the future, but + codecspecific parameters should not be added. Parameters added to + an rtpmap attribute should only be those required for a session + directory to make the choice of appropriate media too to + participate in a session. Codec-specific parameters should be + added in other attributes. + + Up to one rtpmap attribute can be defined for each media format + specified. Thus we might have: + + m=audio 49230 RTP/AVP 96 97 98 + a=rtpmap:96 L8/8000 + a=rtpmap:97 L16/8000 + a=rtpmap:98 L16/11025/2 + + RTP profiles that specify the use of dynamic payload types must + define the set of valid encoding names and/or a means to register + encoding names if that profile is to be used with SDP. + + Experimental encoding formats can also be specified using rtpmap. + RTP formats that are not registered as standard format names must + be preceded by "X-". Thus a new experimental redundant audio + stream called GSMLPC using dynamic payload type 99 could be + specified as: + + m=video 49232 RTP/AVP 99 + a=rtpmap:99 X-GSMLPC/8000 + + Such an experimental encoding requires that any site wishing to + receive the media stream has relevant configured state in its + session directory to know which tools are appropriate. + + Note that RTP audio formats typically do not include information + about the number of samples per packet. If a non-default (as + defined in the RTP Audio/Video Profile) packetisation is required, + the "ptime" attribute is used as given below. + + + +Handley & Jacobson Standards Track [Page 22] + +RFC 2327 SDP April 1998 + + + For more details on RTP audio and video formats, see [3]. + + o Formats for non-RTP media should be registered as MIME content + types as described in Appendix B. For example, the LBL whiteboard + application might be registered as MIME content-type application/wb + with encoding considerations specifying that it operates over UDP, + with no appropriate file format. In SDP this would then be + expressed using a combination of the "media" field and the "fmt" + field, as follows: + + m=application 32416 udp wb + + Suggested Attributes + + The following attributes are suggested. Since application writers + may add new attributes as they are required, this list is not + exhaustive. + + a=cat: + This attribute gives the dot-separated hierarchical category of + the session. This is to enable a receiver to filter unwanted + sessions by category. It would probably have been a compulsory + separate field, except for its experimental nature at this time. + It is a session-level attribute, and is not dependent on charset. + + a=keywds: + Like the cat attribute, this is to assist identifying wanted + sessions at the receiver. This allows a receiver to select + interesting session based on keywords describing the purpose of + the session. It is a session-level attribute. It is a charset + dependent attribute, meaning that its value should be interpreted + in the charset specified for the session description if one is + specified, or by default in ISO 10646/UTF-8. + + a=tool: + This gives the name and version number of the tool used to create + the session description. It is a session-level attribute, and is + not dependent on charset. + + a=ptime: + This gives the length of time in milliseconds represented by the + media in a packet. This is probably only meaningful for audio + data. It should not be necessary to know ptime to decode RTP or + vat audio, and it is intended as a recommendation for the + encoding/packetisation of audio. It is a media attribute, and is + not dependent on charset. + + + + + +Handley & Jacobson Standards Track [Page 23] + +RFC 2327 SDP April 1998 + + + a=recvonly + This specifies that the tools should be started in receive-only + mode where applicable. It can be either a session or media + attribute, and is not dependent on charset. + + a=sendrecv + This specifies that the tools should be started in send and + receive mode. This is necessary for interactive conferences with + tools such as wb which defaults to receive only mode. It can be + either a session or media attribute, and is not dependent on + charset. + + a=sendonly + This specifies that the tools should be started in send-only + mode. An example may be where a different unicast address is to + be used for a traffic destination than for a traffic source. In + such a case, two media descriptions may be use, one sendonly and + one recvonly. It can be either a session or media attribute, but + would normally only be used as a media attribute, and is not + dependent on charset. + + a=orient: + Normally this is only used in a whiteboard media specification. + It specifies the orientation of a the whiteboard on the screen. + It is a media attribute. Permitted values are `portrait', + `landscape' and `seascape' (upside down landscape). It is not + dependent on charset + + a=type: + This specifies the type of the conference. Suggested values are + `broadcast', `meeting', `moderated', `test' and `H332'. + `recvonly' should be the default for `type:broadcast' sessions, + `type:meeting' should imply `sendrecv' and `type:moderated' + should indicate the use of a floor control tool and that the + media tools are started so as to "mute" new sites joining the + conference. + + Specifying the attribute type:H332 indicates that this loosely + coupled session is part of a H.332 session as defined in the ITU + H.332 specification [10]. Media tools should be started + `recvonly'. + + Specifying the attribute type:test is suggested as a hint that, + unless explicitly requested otherwise, receivers can safely avoid + displaying this session description to users. + + The type attribute is a session-level attribute, and is not + dependent on charset. + + + +Handley & Jacobson Standards Track [Page 24] + +RFC 2327 SDP April 1998 + + + a=charset: + This specifies the character set to be used to display the + session name and information data. By default, the ISO-10646 + character set in UTF-8 encoding is used. If a more compact + representation is required, other character sets may be used such + as ISO-8859-1 for Northern European languages. In particular, + the ISO 8859-1 is specified with the following SDP attribute: + + a=charset:ISO-8859-1 + + This is a session-level attribute; if this attribute is present, + it must be before the first media field. The charset specified + MUST be one of those registered with IANA, such as ISO-8859-1. + The character set identifier is a US-ASCII string and MUST be + compared against the IANA identifiers using a case-insensitive + comparison. If the identifier is not recognised or not + supported, all strings that are affected by it SHOULD be regarded + as byte strings. + + Note that a character set specified MUST still prohibit the use + of bytes 0x00 (Nul), 0x0A (LF) and 0x0d (CR). Character sets + requiring the use of these characters MUST define a quoting + mechanism that prevents these bytes appearing within text fields. + + a=sdplang: + This can be a session level attribute or a media level attribute. + As a session level attribute, it specifies the language for the + session description. As a media level attribute, it specifies + the language for any media-level SDP information field associated + with that media. Multiple sdplang attributes can be provided + either at session or media level if multiple languages in the + session description or media use multiple languages, in which + case the order of the attributes indicates the order of + importance of the various languages in the session or media from + most important to least important. + + In general, sending session descriptions consisting of multiple + languages should be discouraged. Instead, multiple descriptions + should be sent describing the session, one in each language. + However this is not possible with all transport mechanisms, and + so multiple sdplang attributes are allowed although not + recommended. + + The sdplang attribute value must be a single RFC 1766 language + tag in US-ASCII. It is not dependent on the charset attribute. + An sdplang attribute SHOULD be specified when a session is of + + + + + +Handley & Jacobson Standards Track [Page 25] + +RFC 2327 SDP April 1998 + + + sufficient scope to cross geographic boundaries where the + language of recipients cannot be assumed, or where the session is + in a different language from the locally assumed norm. + + a=lang: + This can be a session level attribute or a media level attribute. + As a session level attribute, it specifies the default language + for the session being described. As a media level attribute, it + specifies the language for that media, overriding any session- + level language specified. Multiple lang attributes can be + provided either at session or media level if multiple languages + if the session description or media use multiple languages, in + which case the order of the attributes indicates the order of + importance of the various languages in the session or media from + most important to least important. + + The lang attribute value must be a single RFC 1766 language tag + in US-ASCII. It is not dependent on the charset attribute. A + lang attribute SHOULD be specified when a session is of + sufficient scope to cross geographic boundaries where the + language of recipients cannot be assumed, or where the session is + in a different language from the locally assumed norm. + + a=framerate: + This gives the maximum video frame rate in frames/sec. It is + intended as a recommendation for the encoding of video data. + Decimal representations of fractional values using the notation + "." are allowed. It is a media attribute, is + only defined for video media, and is not dependent on charset. + + a=quality: + This gives a suggestion for the quality of the encoding as an + integer value. + + The intention of the quality attribute for video is to specify a + non-default trade-off between frame-rate and still-image quality. + For video, the value in the range 0 to 10, with the following + suggested meaning: + + 10 - the best still-image quality the compression scheme can + give. + + 5 - the default behaviour given no quality suggestion. + + 0 - the worst still-image quality the codec designer thinks is + still usable. + + It is a media attribute, and is not dependent on charset. + + + +Handley & Jacobson Standards Track [Page 26] + +RFC 2327 SDP April 1998 + + + a=fmtp: + This attribute allows parameters that are specific to a + particular format to be conveyed in a way that SDP doesn't have + to understand them. The format must be one of the formats + specified for the media. Format-specific parameters may be any + set of parameters required to be conveyed by SDP and given + unchanged to the media tool that will use this format. + + It is a media attribute, and is not dependent on charset. + +6.1. Communicating Conference Control Policy + + There is some debate over the way conference control policy should be + communicated. In general, the authors believe that an implicit + declarative style of specifying conference control is desirable where + possible. + + A simple declarative style uses a single conference attribute field + before the first media field, possibly supplemented by properties + such as `recvonly' for some of the media tools. This conference + attribute conveys the conference control policy. An example might be: + + a=type:moderated + + In some cases, however, it is possible that this may be insufficient + to communicate the details of an unusual conference control policy. + If this is the case, then a conference attribute specifying external + control might be set, and then one or more "media" fields might be + used to specify the conference control tools and configuration data + for those tools. An example is an ITU H.332 session: + + c=IN IP4 224.5.6.7 + a=type:H332 + m=audio 49230 RTP/AVP 0 + m=video 49232 RTP/AVP 31 + m=application 12349 udp wb + m=control 49234 H323 mc + c=IN IP4 134.134.157.81 + + In this example, a general conference attribute (type:H332) is + specified stating that conference control will be provided by an + external H.332 tool, and a contact addresses for the H.323 session + multipoint controller is given. + + In this document, only the declarative style of conference control + declaration is specified. Other forms of conference control should + specify an appropriate type attribute, and should define the + implications this has for control media. + + + +Handley & Jacobson Standards Track [Page 27] + +RFC 2327 SDP April 1998 + + +7. Security Considerations + + SDP is a session description format that describes multimedia + sessions. A session description should not be trusted unless it has + been obtained by an authenticated transport protocol from a trusted + source. Many different transport protocols may be used to distribute + session description, and the nature of the authentication will differ + from transport to transport. + + One transport that will frequently be used to distribute session + descriptions is the Session Announcement Protocol (SAP). SAP + provides both encryption and authentication mechanisms but due to the + nature of session announcements it is likely that there are many + occasions where the originator of a session announcement cannot be + authenticated because they are previously unknown to the receiver of + the announcement and because no common public key infrastructure is + available. + + On receiving a session description over an unauthenticated transport + mechanism or from an untrusted party, software parsing the session + should take a few precautions. Session description contain + information required to start software on the receivers system. + Software that parses a session description MUST not be able to start + other software except that which is specifically configured as + appropriate software to participate in multimedia sessions. It is + normally considered INAPPROPRIATE for software parsing a session + description to start, on a user's system, software that is + appropriate to participate in multimedia sessions, without the user + first being informed that such software will be started and giving + their consent. Thus a session description arriving by session + announcement, email, session invitation, or WWW page SHOULD not + deliver the user into an {it interactive} multimedia session without + the user being aware that this will happen. As it is not always + simple to tell whether a session is interactive or not, applications + that are unsure should assume sessions are interactive. + + In this specification, there are no attributes which would allow the + recipient of a session description to be informed to start multimedia + tools in a mode where they default to transmitting. Under some + circumstances it might be appropriate to define such attributes. If + this is done an application parsing a session description containing + such attributes SHOULD either ignore them, or inform the user that + joining this session will result in the automatic transmission of + multimedia data. The default behaviour for an unknown attribute is + to ignore it. + + + + + + +Handley & Jacobson Standards Track [Page 28] + +RFC 2327 SDP April 1998 + + + Session descriptions may be parsed at intermediate systems such as + firewalls for the purposes of opening a hole in the firewall to allow + the participation in multimedia sessions. It is considered + INAPPROPRIATE for a firewall to open such holes for unicast data + streams unless the session description comes in a request from inside + the firewall. + + For multicast sessions, it is likely that local administrators will + apply their own policies, but the exclusive use of "local" or "site- + local" administrative scope within the firewall and the refusal of + the firewall to open a hole for such scopes will provide separation + of global multicast sessions from local ones. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson Standards Track [Page 29] + +RFC 2327 SDP April 1998 + + +Appendix A: SDP Grammar + + This appendix provides an Augmented BNF grammar for SDP. ABNF is + defined in RFC 2234. + + + announcement = proto-version + origin-field + session-name-field + information-field + uri-field + email-fields + phone-fields + connection-field + bandwidth-fields + time-fields + key-field + attribute-fields + media-descriptions + + proto-version = "v=" 1*DIGIT CRLF + ;this memo describes version 0 + + origin-field = "o=" username space + sess-id space sess-version space + nettype space addrtype space + addr CRLF + + session-name-field = "s=" text CRLF + + information-field = ["i=" text CRLF] + + uri-field = ["u=" uri CRLF] + + email-fields = *("e=" email-address CRLF) + + phone-fields = *("p=" phone-number CRLF) + + + connection-field = ["c=" nettype space addrtype space + connection-address CRLF] + ;a connection field must be present + ;in every media description or at the + ;session-level + + + bandwidth-fields = *("b=" bwtype ":" bandwidth CRLF) + + + + +Handley & Jacobson Standards Track [Page 30] + +RFC 2327 SDP April 1998 + + + time-fields = 1*( "t=" start-time space stop-time + *(CRLF repeat-fields) CRLF) + [zone-adjustments CRLF] + + + repeat-fields = "r=" repeat-interval space typed-time + 1*(space typed-time) + + + zone-adjustments = time space ["-"] typed-time + *(space time space ["-"] typed-time) + + + key-field = ["k=" key-type CRLF] + + + key-type = "prompt" | + "clear:" key-data | + "base64:" key-data | + "uri:" uri + + + key-data = email-safe | "~" | " + + + attribute-fields = *("a=" attribute CRLF) + + + media-descriptions = *( media-field + information-field + *(connection-field) + bandwidth-fields + key-field + attribute-fields ) + + + media-field = "m=" media space port ["/" integer] + space proto 1*(space fmt) CRLF + + + media = 1*(alpha-numeric) + ;typically "audio", "video", "application" + ;or "data" + + fmt = 1*(alpha-numeric) + ;typically an RTP payload type for audio + ;and video media + + + + +Handley & Jacobson Standards Track [Page 31] + +RFC 2327 SDP April 1998 + + + proto = 1*(alpha-numeric) + ;typically "RTP/AVP" or "udp" for IP4 + + + port = 1*(DIGIT) + ;should in the range "1024" to "65535" inclusive + ;for UDP based media + + + attribute = (att-field ":" att-value) | att-field + + + att-field = 1*(alpha-numeric) + + + att-value = byte-string + + + sess-id = 1*(DIGIT) + ;should be unique for this originating username/host + + + sess-version = 1*(DIGIT) + ;0 is a new session + + + connection-address = multicast-address + | addr + + + multicast-address = 3*(decimal-uchar ".") decimal-uchar "/" ttl + [ "/" integer ] + ;multicast addresses may be in the range + ;224.0.0.0 to 239.255.255.255 + + ttl = decimal-uchar + + start-time = time | "0" + + stop-time = time | "0" + + time = POS-DIGIT 9*(DIGIT) + ;sufficient for 2 more centuries + + + repeat-interval = typed-time + + + + + +Handley & Jacobson Standards Track [Page 32] + +RFC 2327 SDP April 1998 + + + typed-time = 1*(DIGIT) [fixed-len-time-unit] + + + fixed-len-time-unit = "d" | "h" | "m" | "s" + + + bwtype = 1*(alpha-numeric) + + bandwidth = 1*(DIGIT) + + + username = safe + ;pretty wide definition, but doesn't include space + + + email-address = email | email "(" email-safe ")" | + email-safe "<" email ">" + + + email = ;defined in RFC822 + + + uri= ;defined in RFC1630 + + + phone-number = phone | phone "(" email-safe ")" | + email-safe "<" phone ">" + + + phone = "+" POS-DIGIT 1*(space | "-" | DIGIT) + ;there must be a space or hyphen between the + ;international code and the rest of the number. + + + nettype = "IN" + ;list to be extended + + + addrtype = "IP4" | "IP6" + ;list to be extended + + + addr = FQDN | unicast-address + + + FQDN = 4*(alpha-numeric|"-"|".") + ;fully qualified domain name as specified in RFC1035 + + + + +Handley & Jacobson Standards Track [Page 33] + +RFC 2327 SDP April 1998 + + + unicast-address = IP4-address | IP6-address + + + IP4-address = b1 "." decimal-uchar "." decimal-uchar "." b4 + b1 = decimal-uchar + ;less than "224"; not "0" or "127" + b4 = decimal-uchar + ;not "0" + + IP6-address = ;to be defined + + + text = byte-string + ;default is to interpret this as IS0-10646 UTF8 + ;ISO 8859-1 requires a "a=charset:ISO-8859-1" + ;session-level attribute to be used + + + byte-string = 1*(0x01..0x09|0x0b|0x0c|0x0e..0xff) + ;any byte except NUL, CR or LF + + + decimal-uchar = DIGIT + | POS-DIGIT DIGIT + | ("1" 2*(DIGIT)) + | ("2" ("0"|"1"|"2"|"3"|"4") DIGIT) + | ("2" "5" ("0"|"1"|"2"|"3"|"4"|"5")) + + + integer = POS-DIGIT *(DIGIT) + + + alpha-numeric = ALPHA | DIGIT + + + DIGIT = "0" | POS-DIGIT + + + POS-DIGIT = "1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9" + + + ALPHA = "a"|"b"|"c"|"d"|"e"|"f"|"g"|"h"|"i"|"j"|"k"| + "l"|"m"|"n"|"o "|"p"|"q"|"r"|"s"|"t"|"u"|"v"| + "w"|"x"|"y"|"z"|"A"|"B"|"C "|"D"|"E"|"F"|"G"| + "H"|"I"|"J"|"K"|"L"|"M"|"N"|"O"|"P"|" Q"|"R"| + "S"|"T"|"U"|"V"|"W"|"X"|"Y"|"Z" + + + + + +Handley & Jacobson Standards Track [Page 34] + +RFC 2327 SDP April 1998 + + + email-safe = safe | space | tab + + + safe = alpha-numeric | + "'" | "'" | "-" | "." | "/" | ":" | "?" | """ | + "#" | "$" | "&" | "*" | ";" | "=" | "@" | "[" | + "]" | "^" | "_" | "`" | "{" | "|" | "}" | "+" | + "~" | " + + + space = %d32 + tab = %d9 + CRLF = %d13.10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson Standards Track [Page 35] + +RFC 2327 SDP April 1998 + + +Appendix B: Guidelines for registering SDP names with IANA + + There are seven field names that may be registered with IANA. Using + the terminology in the SDP specification BNF, they are "media", + "proto", "fmt", "att-field", "bwtype", "nettype" and "addrtype". + + "media" (eg, audio, video, application, data). + + Packetized media types, such as those used by RTP, share the + namespace used by media types registry [RFC 2048] (i.e. "MIME + types"). The list of valid media names is the set of top-level + MIME content types. The set of media is intended to be small and + not to be extended except under rare circumstances. (The MIME + subtype corresponds to the "fmt" parameter below). + + "proto" + + In general this should be an IETF standards-track transport + protocol identifier such as RTP/AVP (rfc 1889 under the rfc 1890 + profile). + + However, people will want to invent their own proprietary + transport protocols. Some of these should be registered as a + "fmt" using "udp" as the protocol and some of which probably + can't be. + + Where the protocol and the application are intimately linked, + such as with the LBL whiteboard wb which used a proprietary and + special purpose protocol over UDP, the protocol name should be + "udp" and the format name that should be registered is "wb". The + rules for formats (see below) apply to such registrations. + + Where the proprietary transport protocol really carries many + different data formats, it is possible to register a new protocol + name with IANA. In such a case, an RFC MUST be produced + describing the protocol and referenced in the registration. Such + an RFC MAY be informational, although it is preferable if it is + standards-track. + + "fmt" + + The format namespace is dependent on the context of the "proto" + field, so a format cannot be registered without specifying one or + more transport protocols that it applies to. + + Formats cover all the possible encodings that might want to be + transported in a multimedia session. + + + + +Handley & Jacobson Standards Track [Page 36] + +RFC 2327 SDP April 1998 + + + For RTP formats that have been assigned static payload types, the + payload type number is used. For RTP formats using a dynamic + payload type number, the dynamic payload type number is given as + the format and an additional "rtpmap" attribute specifies the + format and parameters. + + For non-RTP formats, any unregistered format name may be + registered through the MIME-type registration process [RFC 2048]. + The type given here is the MIME subtype only (the top-level MIME + content type is specified by the media parameter). The MIME type + registration SHOULD reference a standards-track RFC which + describes the transport protocol for this media type. If there + is an existing MIME type for this format, the MIME registration + should be augmented to reference the transport specification for + this media type. If there is not an existing MIME type for this + format, and there exists no appropriate file format, this should + be noted in the encoding considerations as "no appropriate file + format". + + "att-field" (Attribute names) + + Attribute field names MAY be registered with IANA, although this + is not compulsory, and unknown attributes are simply ignored. + + When an attribute is registered, it must be accompanied by a + brief specification stating the following: + + o contact name, email address and telephone number + + o attribute-name (as it will appear in SDP) + + o long-form attribute name in English + + o type of attribute (session level, media level, or both) + + o whether the attribute value is subject to the charset + attribute. + + o a one paragraph explanation of the purpose of the attribute. + + o a specification of appropriate attribute values for this + attribute. + + IANA will not sanity check such attribute registrations except to + ensure that they do not clash with existing registrations. + + + + + + +Handley & Jacobson Standards Track [Page 37] + +RFC 2327 SDP April 1998 + + + Although the above is the minimum that IANA will accept, if the + attribute is expected to see widespread use and interoperability + is an issue, authors are encouraged to produce a standards-track + RFC that specifies the attribute more precisely. + + Submitters of registrations should ensure that the specification + is in the spirit of SDP attributes, most notably that the + attribute is platform independent in the sense that it makes no + implicit assumptions about operating systems and does not name + specific pieces of software in a manner that might inhibit + interoperability. + + "bwtype" (bandwidth specifiers) + + A proliferation of bandwidth specifiers is strongly discouraged. + + New bandwidth specifiers may be registered with IANA. The + submission MUST reference a standards-track RFC specifying the + semantics of the bandwidth specifier precisely, and indicating + when it should be used, and why the existing registered bandwidth + specifiers do not suffice. + + "nettype" (Network Type) + + New network types may be registered with IANA if SDP needs to be + used in the context of non-internet environments. Whilst these + are not normally the preserve of IANA, there may be circumstances + when an Internet application needs to interoperate with a non- + internet application, such as when gatewaying an internet + telephony call into the PSTN. The number of network types should + be small and should be rarely extended. A new network type + cannot be registered without registering at least one address + type to be used with that network type. A new network type + registration MUST reference an RFC which gives details of the + network type and address type and specifies how and when they + would be used. Such an RFC MAY be Informational. + + "addrtype" (Address Type) + + New address types may be registered with IANA. An address type + is only meaningful in the context of a network type, and any + registration of an address type MUST specify a registered network + type, or be submitted along with a network type registration. A + new address type registration MUST reference an RFC giving + details of the syntax of the address type. Such an RFC MAY be + Informational. Address types are not expected to be registered + frequently. + + + + +Handley & Jacobson Standards Track [Page 38] + +RFC 2327 SDP April 1998 + + + Registration Procedure + + To register a name the above guidelines should be followed regarding + the required level of documentation that is required. The + registration itself should be sent to IANA. Attribute registrations + should include the information given above. Other registrations + should include the following additional information: + + o contact name, email address and telephone number + + o name being registered (as it will appear in SDP) + + o long-form name in English + + o type of name ("media", "proto", "fmt", "bwtype", "nettype", or + "addrtype") + + o a one paragraph explanation of the purpose of the registered name. + + o a reference to the specification (eg RFC number) of the registered + name. + + IANA may refer any registration to the IESG or to any appropriate + IETF working group for review, and may request revisions to be made + before a registration will be made. + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson Standards Track [Page 39] + +RFC 2327 SDP April 1998 + + +Appendix C: Authors' Addresses + + Mark Handley + Information Sciences Institute + c/o MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139 + United States + electronic mail: mjh@isi.edu + + Van Jacobson + MS 46a-1121 + Lawrence Berkeley Laboratory + Berkeley, CA 94720 + United States + electronic mail: van@ee.lbl.gov + +Acknowledgments + + Many people in the IETF MMUSIC working group have made comments and + suggestions contributing to this document. In particular, we would + like to thank Eve Schooler, Steve Casner, Bill Fenner, Allison + Mankin, Ross Finlayson, Peter Parnes, Joerg Ott, Carsten Bormann, Rob + Lanphier and Steve Hanna. + +References + + [1] Mills, D., "Network Time Protocol (version 3) specification and + implementation", RFC 1305, March 1992. + + [2] Schulzrinne, H., Casner, S., Frederick, R. and V. Jacobson, "RTP: + A Transport Protocol for Real-Time Applications", RFC 1889, January + 1996. + + [3] Schulzrinne, H., "RTP Profile for Audio and Video Conferences + with Minimal Control", RFC 1890, January 1996 + + [4] Handley, M., "SAP - Session Announcement Protocol", Work in + Progress. + + [5] V. Jacobson, S. McCanne, "vat - X11-based audio teleconferencing + tool" vat manual page, Lawrence Berkeley Laboratory, 1994. + + [6] The Unicode Consortium, "The Unicode Standard -- Version 2.0", + Addison-Wesley, 1996. + + + + + + +Handley & Jacobson Standards Track [Page 40] + +RFC 2327 SDP April 1998 + + + [7] ISO/IEC 10646-1:1993. International Standard -- Information + technol- ogy -- Universal Multiple-Octet Coded Character Set (UCS) -- + Part 1: Architecture and Basic Multilingual Plane. Five amendments + and a techn- ical corrigendum have been published up to now. UTF-8 + is described in Annex R, published as Amendment 2. + + [8] Goldsmith, D., and M. Davis, "Using Unicode with MIME", RFC 1641, + July 1994. + + [9] Yergeau, F., "UTF-8, a transformation format of Unicode and ISO + 10646", RFC 2044, October 1996. + + [10] ITU-T Recommendation H.332 (1998): "Multimedia Terminal for + Receiving Internet-based H.323 Conferences", ITU, Geneva. + + [11] Handley, M., Schooler, E., and H. Schulzrinne, "Session + Initiation Protocol (SIP)", Work in Progress. + + [12] Schulzrinne, H., Rao, A., and R. Lanphier, "Real Time Streaming + Protocol (RTSP)", RFC 2326, April 1998. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson Standards Track [Page 41] + +RFC 2327 SDP April 1998 + + +Full Copyright Statement + + Copyright (C) The Internet Society (1998). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + + + + + + + + + + + + + + + + + + + + + + + + +Handley & Jacobson Standards Track [Page 42] + diff --git a/src/modules/rtp/rfc2974.txt b/src/modules/rtp/rfc2974.txt new file mode 100644 index 00000000..4a5aa626 --- /dev/null +++ b/src/modules/rtp/rfc2974.txt @@ -0,0 +1,1011 @@ + + + + + + +Network Working Group M. Handley +Request for Comments: 2974 ACIRI +Category: Experimental C. Perkins + USC/ISI + E. Whelan + UCL + October 2000 + + + Session Announcement Protocol + +Status of this Memo + + This memo defines an Experimental Protocol for the Internet + community. It does not specify an Internet standard of any kind. + Discussion and suggestions for improvement are requested. + Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + This document describes version 2 of the multicast session directory + announcement protocol, Session Announcement Protocol (SAP), and the + related issues affecting security and scalability that should be + taken into account by implementors. + +1 Introduction + + In order to assist the advertisement of multicast multimedia + conferences and other multicast sessions, and to communicate the + relevant session setup information to prospective participants, a + distributed session directory may be used. An instance of such a + session directory periodically multicasts packets containing a + description of the session, and these advertisements are received by + other session directories such that potential remote participants can + use the session description to start the tools required to + participate in the session. + + This memo describes the issues involved in the multicast announcement + of session description information and defines an announcement + protocol to be used. Sessions are described using the session + description protocol which is described in a companion memo [4]. + + + + + + +Handley, et al. Experimental [Page 1] + +RFC 2974 Session Announcement Protocol October 2000 + + +2 Terminology + + A SAP announcer periodically multicasts an announcement packet to a + well known multicast address and port. The announcement is multicast + with the same scope as the session it is announcing, ensuring that + the recipients of the announcement are within the scope of the + session the announcement describes (bandwidth and other such + constraints permitting). This is also important for the scalability + of the protocol, as it keeps local session announcements local. + + A SAP listener learns of the multicast scopes it is within (for + example, using the Multicast-Scope Zone Announcement Protocol [5]) + and listens on the well known SAP address and port for those scopes. + In this manner, it will eventually learn of all the sessions being + announced, allowing those sessions to be joined. + + The key words `MUST', `MUST NOT', `REQUIRED', `SHALL', `SHALL NOT', + `SHOULD', `SHOULD NOT', `RECOMMENDED', `MAY', and `OPTIONAL' in this + document are to be interpreted as described in [1]. + +3 Session Announcement + + As noted previously, a SAP announcer periodically sends an + announcement packet to a well known multicast address and port. + There is no rendezvous mechanism - the SAP announcer is not aware of + the presence or absence of any SAP listeners - and no additional + reliability is provided over the standard best-effort UDP/IP + semantics. + + That announcement contains a session description and SHOULD contain + an authentication header. The session description MAY be encrypted + although this is NOT RECOMMENDED (see section 7). + + A SAP announcement is multicast with the same scope as the session it + is announcing, ensuring that the recipients of the announcement are + within the scope of the session the announcement describes. There are + a number of possibilities: + + IPv4 global scope sessions use multicast addresses in the range + 224.2.128.0 - 224.2.255.255 with SAP announcements being sent to + 224.2.127.254 (note that 224.2.127.255 is used by the obsolete + SAPv0 and MUST NOT be used). + + + + + + + + + +Handley, et al. Experimental [Page 2] + +RFC 2974 Session Announcement Protocol October 2000 + + + IPv4 administrative scope sessions using administratively scoped IP + multicast as defined in [7]. The multicast address to be used for + announcements is the highest multicast address in the relevant + administrative scope zone. For example, if the scope range is + 239.16.32.0 - 239.16.33.255, then 239.16.33.255 is used for SAP + announcements. + + IPv6 sessions are announced on the address FF0X:0:0:0:0:0:2:7FFE + where X is the 4-bit scope value. For example, an announcement + for a link-local session assigned the address + FF02:0:0:0:0:0:1234:5678, should be advertised on SAP address + FF02:0:0:0:0:0:2:7FFE. + + Ensuring that a description is not used by a potential participant + outside the session scope is not addressed in this memo. + + SAP announcements MUST be sent on port 9875 and SHOULD be sent with + an IP time-to-live of 255 (the use of TTL scoping for multicast is + discouraged [7]). + + If a session uses addresses in multiple administrative scope ranges, + it is necessary for the announcer to send identical copies of the + announcement to each administrative scope range. It is up to the + listeners to parse such multiple announcements as the same session + (as identified by the SDP origin field, for example). The + announcement rate for each administrative scope range MUST be + calculated separately, as if the multiple announcements were + separate. + + Multiple announcers may announce a single session, as an aid to + robustness in the face of packet loss and failure of one or more + announcers. The rate at which each announcer repeats its + announcement MUST be scaled back such that the total announcement + rate is equal to that which a single server would choose. + Announcements made in this manner MUST be identical. + + If multiple announcements are being made for a session, then each + announcement MUST carry an authentication header signed by the same + key, or be treated as a completely separate announcement by + listeners. + + An IPv4 SAP listener SHOULD listen on the IPv4 global scope SAP + address and on the SAP addresses for each IPv4 administrative scope + zone it is within. The discovery of administrative scope zones is + outside the scope of this memo, but it is assumed that each SAP + listener within a particular scope zone is aware of that scope zone. + A SAP listener which supports IPv6 SHOULD also listen to the IPv6 SAP + addresses. + + + +Handley, et al. Experimental [Page 3] + +RFC 2974 Session Announcement Protocol October 2000 + + +3.1 Announcement Interval + + The time period between repetitions of an announcement is chosen such + that the total bandwidth used by all announcements on a single SAP + group remains below a preconfigured limit. If not otherwise + specified, the bandwidth limit SHOULD be assumed to be 4000 bits per + second. + + Each announcer is expected to listen to other announcements in order + to determine the total number of sessions being announced on a + particular group. Sessions are uniquely identified by the + combination of the message identifier hash and originating source + fields of the SAP header (note that SAP v0 announcers always set the + message identifier hash to zero, and if such an announcement is + received the entire message MUST be compared to determine + uniqueness). + + Announcements are made by periodic multicast to the group. The base + interval between announcements is derived from the number of + announcements being made in that group, the size of the announcement + and the configured bandwidth limit. The actual transmission time is + derived from this base interval as follows: + + 1. The announcer initializes the variable tp to be the last time a + particular announcement was transmitted (or the current time if + this is the first time this announcement is to be made). + + 2. Given a configured bandwidth limit in bits/second and an + announcement of ad_size bytes, the base announcement interval + in seconds is + + interval =max(300; (8*no_of_ads*ad_size)/limit) + + 3. An offset is calculated based on the base announcement interval + + offset= rand(interval* 2/3)-(interval/3) + + 4. The next transmission time for an announcement derived as + + tn =tp+ interval+ offset + + The announcer then sets a timer to expire at tn and waits. At time + tn the announcer SHOULD recalculate the next transmission time. If + the new value of tn is before the current time, the announcement is + sent immediately. Otherwise the transmission is rescheduled for the + new tn. This reconsideration prevents transient packet bursts on + startup and when a network partition heals. + + + + +Handley, et al. Experimental [Page 4] + +RFC 2974 Session Announcement Protocol October 2000 + + +4 Session Deletion + + Sessions may be deleted in one of several ways: + + Explicit Timeout The session description payload may contain + timestamp information specifying the start- and end-times of the + session. If the current time is later than the end-time of the + session, then the session SHOULD be deleted from the receiver's + session cache. + + Implicit Timeout A session announcement message should be received + periodically for each session description in a receiver's session + cache. The announcement period can be predicted by the receiver + from the set of sessions currently being announced. If a session + announcement message has not been received for ten times the + announcement period, or one hour, whichever is the greater, then + the session is deleted from the receiver's session cache. The one + hour minimum is to allow for transient network partitionings. + + Explicit Deletion A session deletion packet is received specifying + the session to be deleted. Session deletion packets SHOULD have a + valid authentication header, matching that used to authenticate + previous announcement packets. If this authentication is missing, + the deletion message SHOULD be ignored. + +5 Session Modification + + A pre-announced session can be modified by simply announcing the + modified session description. In this case, the version hash in the + SAP header MUST be changed to indicate to receivers that the packet + contents should be parsed (or decrypted and parsed if it is + encrypted). The session itself, as distinct from the session + announcement, is uniquely identified by the payload and not by the + message identifier hash in the header. + + The same rules apply for session modification as for session + deletion: + + o Either the modified announcement must contain an authentication + header signed by the same key as the cached session announcement + it is modifying, or: + + o The cached session announcement must not contain an authentication + header, and the session modification announcement must originate + from the same host as the session it is modifying. + + + + + + +Handley, et al. Experimental [Page 5] + +RFC 2974 Session Announcement Protocol October 2000 + + + If an announcement is received containing an authentication header + and the cached announcement did not contain an authentication header, + or it contained a different authentication header, then the modified + announcement MUST be treated as a new and different announcement, and + displayed in addition to the un-authenticated announcement. The same + should happen if a modified packet without an authentication header + is received from a different source than the original announcement. + + These rules prevent an announcement having an authentication header + added by a malicious user and then being deleted using that header, + and it also prevents a denial-of-service attack by someone putting + out a spoof announcement which, due to packet loss, reaches some + participants before the original announcement. Note that under such + circumstances, being able to authenticate the message originator is + the only way to discover which session is the correct session. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | V=1 |A|R|T|E|C| auth len | msg id hash | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + : originating source (32 or 128 bits) : + : : + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | optional authentication data | + : .... : + *-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* + | optional payload type | + + +-+- - - - - - - - - -+ + | |0| | + + - - - - - - - - - - - - - - - - - - - - +-+ | + | | + : payload : + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 1: Packet format + +6 Packet Format + + SAP data packets have the format described in figure 1. + + V: Version Number. The version number field MUST be set to 1 (SAPv2 + announcements which use only SAPv1 features are backwards + compatible, those which use new features can be detected by other + means, so the SAP version number doesn't need to change). + + + + +Handley, et al. Experimental [Page 6] + +RFC 2974 Session Announcement Protocol October 2000 + + + A: Address type. If the A bit is 0, the originating source field + contains a 32-bit IPv4 address. If the A bit is 1, the + originating source contains a 128-bit IPv6 address. + + R: Reserved. SAP announcers MUST set this to 0, SAP listeners MUST + ignore the contents of this field. + + T: Message Type. If the T field is set to 0 this is a session + announcement packet, if 1 this is a session deletion packet. + + E: Encryption Bit. If the encryption bit is set to 1, the payload of + the SAP packet is encrypted. If this bit is 0 the packet is not + encrypted. See section 7 for details of the encryption process. + + C: Compressed bit. If the compressed bit is set to 1, the payload is + compressed using the zlib compression algorithm [3]. If the + payload is to be compressed and encrypted, the compression MUST be + performed first. + + Authentication Length. An 8 bit unsigned quantity giving the number + of 32 bit words following the main SAP header that contain + authentication data. If it is zero, no authentication header is + present. + + Authentication data containing a digital signature of the packet, + with length as specified by the authentication length header + field. See section 8 for details of the authentication process. + + Message Identifier Hash. A 16 bit quantity that, used in combination + with the originating source, provides a globally unique identifier + indicating the precise version of this announcement. The choice + of value for this field is not specified here, except that it MUST + be unique for each session announced by a particular SAP announcer + and it MUST be changed if the session description is modified (and + a session deletion message SHOULD be sent for the old version of + the session). + + Earlier versions of SAP used a value of zero to mean that the hash + should be ignored and the payload should always be parsed. This + had the unfortunate side-effect that SAP announcers had to study + the payload data to determine how many unique sessions were being + advertised, making the calculation of the announcement interval + more complex that necessary. In order to decouple the session + announcement process from the contents of those announcements, SAP + announcers SHOULD NOT set the message identifier hash to zero. + + SAP listeners MAY silently discard messages if the message + identifier hash is set to zero. + + + +Handley, et al. Experimental [Page 7] + +RFC 2974 Session Announcement Protocol October 2000 + + + Originating Source. This gives the IP address of the original source + of the message. This is an IPv4 address if the A field is set to + zero, else it is an IPv6 address. The address is stored in + network byte order. + + SAPv0 permitted the originating source to be zero if the message + identifier hash was also zero. This practise is no longer legal, + and SAP announcers SHOULD NOT set the originating source to zero. + SAP listeners MAY silently discard packets with the originating + source set to zero. + + The header is followed by an optional payload type field and the + payload data itself. If the E or C bits are set in the header both + the payload type and payload are encrypted and/or compressed. + + The payload type field is a MIME content type specifier, describing + the format of the payload. This is a variable length ASCII text + string, followed by a single zero byte (ASCII NUL). The payload type + SHOULD be included in all packets. If the payload type is + `application/sdp' both the payload type and its terminating zero byte + MAY be omitted, although this is intended for backwards compatibility + with SAP v1 listeners only. + + The absence of a payload type field may be noted since the payload + section of such a packet will start with an SDP `v=0' field, which is + not a legal MIME content type specifier. + + All implementations MUST support payloads of type `application/sdp' + [4]. Other formats MAY be supported although since there is no + negotiation in SAP an announcer which chooses to use a session + description format other than SDP cannot know that the listeners are + able to understand the announcement. A proliferation of payload + types in announcements has the potential to lead to severe + interoperability problems, and for this reason, the use of non-SDP + payloads is NOT RECOMMENDED. + + If the packet is an announcement packet, the payload contains a + session description. + + If the packet is a session deletion packet, the payload contains a + session deletion message. If the payload format is `application/sdp' + the deletion message is a single SDP line consisting of the origin + field of the announcement to be deleted. + + It is desirable for the payload to be sufficiently small that SAP + packets do not get fragmented by the underlying network. + Fragmentation has a loss multiplier effect, which is known to + significantly affect the reliability of announcements. It is + + + +Handley, et al. Experimental [Page 8] + +RFC 2974 Session Announcement Protocol October 2000 + + + RECOMMENDED that SAP packets are smaller than 1kByte in length, + although if it is known that announcements will use a network with a + smaller MTU than this, then that SHOULD be used as the maximum + recommended packet size. + +7 Encrypted Announcements + + An announcement is received by all listeners in the scope to which it + is sent. If an announcement is encrypted, and many of the receivers + do not have the encryption key, there is a considerable waste of + bandwidth since those receivers cannot use the announcement they have + received. For this reason, the use of encrypted SAP announcements is + NOT RECOMMENDED on the global scope SAP group or on administrative + scope groups which may have many receivers which cannot decrypt those + announcements. + + The opinion of the authors is that encrypted SAP is useful in special + cases only, and that the vast majority of scenarios where encrypted + SAP has been proposed may be better served by distributing session + details using another mechanism. There are, however, certain + scenarios where encrypted announcements may be useful. For this + reason, the encryption bit is included in the SAP header to allow + experimentation with encrypted announcements. + + This memo does not specify details of the encryption algorithm to be + used or the means by which keys are generated and distributed. An + additional specification should define these, if it is desired to use + encrypted SAP. + + Note that if an encrypted announcement is being announced via a + proxy, then there may be no way for the proxy to discover that the + announcement has been superseded, and so it may continue to relay the + old announcement in addition to the new announcement. SAP provides + no mechanism to chain modified encrypted announcements, so it is + advisable to announce the unmodified session as deleted for a short + time after the modification has occurred. This does not guarantee + that all proxies have deleted the session, and so receivers of + encrypted sessions should be prepared to discard old versions of + session announcements that they may receive. In most cases however, + the only stateful proxy will be local to (and known to) the sender, + and an additional (local-area) protocol involving a handshake for + such session modifications can be used to avoid this problem. + + Session announcements that are encrypted with a symmetric algorithm + may allow a degree of privacy in the announcement of a session, but + it should be recognized that a user in possession of such a key can + pass it on to other users who should not be in possession of such a + key. Thus announcements to such a group of key holders cannot be + + + +Handley, et al. Experimental [Page 9] + +RFC 2974 Session Announcement Protocol October 2000 + + + assumed to have come from an authorized key holder unless there is an + appropriate authentication header signed by an authorized key holder. + In addition the recipients of such encrypted announcements cannot be + assumed to only be authorized key holders. Such encrypted + announcements do not provide any real security unless all of the + authorized key holders are trusted to maintain security of such + session directory keys. This property is shared by the multicast + session tools themselves, where it is possible for an un-trustworthy + member of the session to pass on encryption keys to un-authorized + users. However it is likely that keys used for the session tools + will be more short lived than those used for session directories. + + Similar considerations should apply when session announcements are + encrypted with an asymmetric algorithm, but then it is possible to + restrict the possessor(s) of the private key, so that announcements + to a key-holder group can not be made, even if one of the untrusted + members of the group proves to be un-trustworthy. + + 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | V=1 |P| Auth | | + +-+-+-+-+-+-+-+-+ | + | Format specific authentication subheader | + : .................. : + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 2: Format of the authentication data in the SAP header + +8 Authenticated Announcements + + The authentication header can be used for two purposes: + + o Verification that changes to a session description or deletion of + a session are permitted. + + o Authentication of the identity of the session creator. + + In some circumstances only verification is possible because a + certificate signed by a mutually trusted person or authority is not + available. However, under such circumstances, the session originator + may still be authenticated to be the same as the session originator + of previous sessions claiming to be from the same person. This may + or may not be sufficient depending on the purpose of the session and + the people involved. + + + + + + +Handley, et al. Experimental [Page 10] + +RFC 2974 Session Announcement Protocol October 2000 + + + Clearly the key used for the authentication should not be trusted to + belong to the session originator unless it has been separately + authenticated by some other means, such as being certified by a + trusted third party. Such certificates are not normally included in + an SAP header because they take more space than can normally be + afforded in an SAP packet, and such verification must therefore take + place by some other mechanism. However, as certified public keys are + normally locally cached, authentication of a particular key only has + to take place once, rather than every time the session directory + retransmits the announcement. + + SAP is not tied to any single authentication mechanism. + Authentication data in the header is self-describing, but the precise + format depends on the authentication mechanism in use. The generic + format of the authentication data is given in figure 2. The + structure of the format specific authentication subheader, using both + the PGP and the CMS formats, is discussed in sections 8.1 and 8.2 + respectively. Additional formats may be added in future. + + Version Number, V: The version number of the authentication format + specified by this memo is 1. + + Padding Bit, P: If necessary the authentication data is padded to be + a multiple of 32 bits and the padding bit is set. In this case + the last byte of the authentication data contains the number of + padding bytes (including the last byte) that must be discarded. + + Authentication Type, Auth: The authentication type is a 4 bit + encoded field that denotes the authentication infrastructure the + sender expects the recipients to use to check the authenticity and + integrity of the information. This defines the format of the + authentication subheader and can take the values: 0 = PGP format, + 1 = CMS format. All other values are undefined and SHOULD be + ignored. + + If a SAP packet is to be compressed or encrypted, this MUST be done + before the authentication is added. + + The digital signature in the authentication data MUST be calculated + over the entire packet, including the header. The authentication + length MUST be set to zero and the authentication data excluded when + calculating the digital signature. + + It is to be expected that sessions may be announced by a number of + different mechanisms, not only SAP. For example, a session + description may placed on a web page, sent by email or conveyed in a + + + + + +Handley, et al. Experimental [Page 11] + +RFC 2974 Session Announcement Protocol October 2000 + + + session initiation protocol. To ease interoperability with these + other mechanisms, application level security is employed, rather than + using IPsec authentication headers. + +8.1 PGP Authentication + + A full description of the PGP protocol can be found in [2]. When + using PGP for SAP authentication the basic format specific + authentication subheader comprises a digital signature packet as + described in [2]. The signature type MUST be 0x01 which means the + signature is that of a canonical text document. + +8.2 CMS Authentication + + A full description of the Cryptographic Message Syntax can be found + in [6]. The format specific authentication subheader will, in the + CMS case, have an ASN.1 ContentInfo type with the ContentType being + signedData. + + Use is made of the option available in PKCS#7 to leave the content + itself blank as the content which is signed is already present in the + packet. Inclusion of it within the SignedData type would duplicate + this data and increase the packet length unnecessarily. In addition + this allows recipients with either no interest in the authentication, + or with no mechanism for checking it, to more easily skip the + authentication information. + + There SHOULD be only one signerInfo and related fields corresponding + to the originator of the SAP announcement. The signingTime SHOULD be + present as a signedAttribute. However, due to the strict size + limitations on the size of SAP packets, certificates and CRLs SHOULD + NOT be included in the signedData structure. It is expected that + users of the protocol will have other methods for certificate and CRL + distribution. + +9 Scalability and caching + + SAP is intended to announce the existence of long-lived wide-area + multicast sessions. It is not an especially timely protocol: + sessions are announced by periodic multicast with a repeat rate on + the order of tens of minutes, and no enhanced reliability over UDP. + This leads to a long startup delay before a complete set of + announcements is heard by a listener. This delay is clearly + undesirable for interactive browsing of announced sessions. + + In order to reduce the delays inherent in SAP, it is recommended that + proxy caches are deployed. A SAP proxy cache is expected to listen + to all SAP groups in its scope, and to maintain an up-to-date list of + + + +Handley, et al. Experimental [Page 12] + +RFC 2974 Session Announcement Protocol October 2000 + + + all announced sessions along with the time each announcement was last + received. When a new SAP listeners starts, it should contact its + local proxy to download this information, which is then sufficient + for it to process future announcements directly, as if it has been + continually listening. + + The protocol by which a SAP listener contacts its local proxy cache + is not specified here. + +10 Security Considerations + + SAP contains mechanisms for ensuring integrity of session + announcements, for authenticating the origin of an announcement and + for encrypting such announcements (sections 7 and 8). + + As stated in section 5, if a session modification announcement is + received that contains a valid authentication header, but which is + not signed by the original creator of the session, then the session + must be treated as a new session in addition to the original session + with the same SDP origin information unless the originator of one of + the session descriptions can be authenticated using a certificate + signed by a trusted third party. If this were not done, there would + be a possible denial of service attack whereby a party listens for + new announcements, strips off the original authentication header, + modifies the session description, adds a new authentication header + and re-announces the session. If a rule was imposed that such spoof + announcements were ignored, then if packet loss or late starting of a + session directory instance caused the original announcement to fail + to arrive at a site, but the spoof announcement did so, this would + then prevent the original announcement from being accepted at that + site. + + A similar denial-of-service attack is possible if a session + announcement receiver relies completely on the originating source and + hash fields to indicate change, and fails to parse the remainder of + announcements for which it has seen the origin/hash combination + before. + + A denial of service attack is possible from a malicious site close to + a legitimate site which is making a session announcement. This can + happen if the malicious site floods the legitimate site with huge + numbers of (illegal) low TTL announcements describing high TTL + sessions. This may reduce the session announcement rate of the + legitimate announcement to below a tenth of the rate expected at + remote sites and therefore cause the session to time out. Such an + attack is likely to be easily detectable, and we do not provide any + mechanism here to prevent it. + + + + +Handley, et al. Experimental [Page 13] + +RFC 2974 Session Announcement Protocol October 2000 + + +A. Summary of differences between SAPv0 and SAPv1 + + For this purpose SAPv0 is defined as the protocol in use by version + 2.2 of the session directory tool, sdr. SAPv1 is the protocol + described in the 19 November 1996 version of this memo. The packet + headers of SAP messages are the same in V0 and V1 in that a V1 tool + can parse a V0 announcement header but not vice-versa. In SAPv0, the + fields have the following values: + + o Version Number: 0 + + o Message Type: 0 (Announcement) + + o Authentication Type: 0 (No Authentication) + + o Encryption Bit: 0 (No Encryption) + + o Compression Bit: 0 (No compression) + + o Message Id Hash: 0 (No Hash Specified) + + o Originating Source: 0 (No source specified, announcement has + not been relayed) + +B. Summary of differences between SAPv1 and SAPv2 + + The packet headers of SAP messages are the same in V1 and V2 in that + a V2 tool can parse a V1 announcement header but not necessarily + vice-versa. + + o The A bit has been added to the SAP header, replacing one of the + bits of the SAPv1 message type field. If set to zero the + announcement is of an IPv4 session, and the packet is backwards + compatible with SAPv1. If set to one the announcement is of an + IPv6 session, and SAPv1 listeners (which do not support IPv6) will + see this as an illegal message type (MT) field. + + o The second bit of the message type field in SAPv1 has been + replaced by a reserved, must-be-zero, bit. This bit was unused in + SAPv1, so this change just codifies existing usage. + + o SAPv1 specified encryption of the payload. SAPv2 includes the E + bit in the SAP header to indicate that the payload is encrypted, + but does not specify any details of the encryption. + + o SAPv1 allowed the message identifier hash and originating source + fields to be set to zero, for backwards compatibility. This is no + longer legal. + + + +Handley, et al. Experimental [Page 14] + +RFC 2974 Session Announcement Protocol October 2000 + + + o SAPv1 specified gzip compression. SAPv2 uses zlib (the only known + implementation of SAP compression used zlib, and gzip compression + was a mistake). + + o SAPv2 provides a more complete specification for authentication. + + o SAPv2 allows for non-SDP payloads to be transported. SAPv1 + required that the payload was SDP. + + o SAPv1 included a timeout field for encrypted announcement, SAPv2 + does not (and relies of explicit deletion messages or implicit + timeouts). + +C. Acknowledgements + + SAP and SDP were originally based on the protocol used by the sd + session directory from Van Jacobson at LBNL. Version 1 of SAP was + designed by Mark Handley as part of the European Commission MICE + (Esprit 7602) and MERCI (Telematics 1007) projects. Version 2 + includes authentication features developed by Edmund Whelan, Goli + Montasser-Kohsari and Peter Kirstein as part of the European + Commission ICE-TEL project (Telematics 1005), and support for IPv6 + developed by Maryann P. Maher and Colin Perkins. + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley, et al. Experimental [Page 15] + +RFC 2974 Session Announcement Protocol October 2000 + + +D. Authors' Addresses + + Mark Handley + AT&T Center for Internet Research at ICSI, + International Computer Science Institute, + 1947 Center Street, Suite 600, + Berkeley, CA 94704, USA + + EMail: mjh@aciri.org + + + Colin Perkins + USC Information Sciences Institute + 4350 N. Fairfax Drive, Suite 620 + Arlington, VA 22203, USA + + EMail: csp@isi.edu + + + Edmund Whelan + Department of Computer Science, + University College London, + Gower Street, + London, WC1E 6BT, UK + + EMail: e.whelan@cs.ucl.ac.uk + + + + + + + + + + + + + + + + + + + + + + + + + +Handley, et al. Experimental [Page 16] + +RFC 2974 Session Announcement Protocol October 2000 + + +References + + [1] Bradner, S., "Key words for use in RFCs to indicate requirement + levels", BCP 14, RFC 2119, March 1997. + + [2] Callas, J., Donnerhacke, L., Finney, H. and R. Thayer. "OpenPGP + message format", RFC 2440, November 1998. + + [3] Deutsch, P. and J.-L. Gailly, "Zlib compressed data format + specification version 3.3", RFC 1950, May 1996. + + [4] Handley, M. and V. Jacobson, "SDP: Session Description Protocol", + RFC 2327, April 1998. + + [5] Handley, M., Thaler, D. and R. Kermode, "Multicast-scope zone + announcement protocol (MZAP)", RFC 2776, February 2000. + + [6] Housley, R., "Cryptographic message syntax", RFC 2630, June 1999. + + [7] Mayer, D., "Administratively scoped IP multicast", RFC 2365, July + 1998. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Handley, et al. Experimental [Page 17] + +RFC 2974 Session Announcement Protocol October 2000 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Handley, et al. Experimental [Page 18] + diff --git a/src/modules/rtp/rfc3550.txt b/src/modules/rtp/rfc3550.txt new file mode 100644 index 00000000..165736cf --- /dev/null +++ b/src/modules/rtp/rfc3550.txt @@ -0,0 +1,5827 @@ + + + + + + +Network Working Group H. Schulzrinne +Request for Comments: 3550 Columbia University +Obsoletes: 1889 S. Casner +Category: Standards Track Packet Design + R. Frederick + Blue Coat Systems Inc. + V. Jacobson + Packet Design + July 2003 + + + RTP: A Transport Protocol for Real-Time Applications + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2003). All Rights Reserved. + +Abstract + + This memorandum describes RTP, the real-time transport protocol. RTP + provides end-to-end network transport functions suitable for + applications transmitting real-time data, such as audio, video or + simulation data, over multicast or unicast network services. RTP + does not address resource reservation and does not guarantee + quality-of-service for real-time services. The data transport is + augmented by a control protocol (RTCP) to allow monitoring of the + data delivery in a manner scalable to large multicast networks, and + to provide minimal control and identification functionality. RTP and + RTCP are designed to be independent of the underlying transport and + network layers. The protocol supports the use of RTP-level + translators and mixers. + + Most of the text in this memorandum is identical to RFC 1889 which it + obsoletes. There are no changes in the packet formats on the wire, + only changes to the rules and algorithms governing how the protocol + is used. The biggest change is an enhancement to the scalable timer + algorithm for calculating when to send RTCP packets in order to + minimize transmission in excess of the intended rate when many + participants join a session simultaneously. + + + + +Schulzrinne, et al. Standards Track [Page 1] + +RFC 3550 RTP July 2003 + + +Table of Contents + + 1. Introduction ................................................ 4 + 1.1 Terminology ............................................ 5 + 2. RTP Use Scenarios ........................................... 5 + 2.1 Simple Multicast Audio Conference ...................... 6 + 2.2 Audio and Video Conference ............................. 7 + 2.3 Mixers and Translators ................................. 7 + 2.4 Layered Encodings ...................................... 8 + 3. Definitions ................................................. 8 + 4. Byte Order, Alignment, and Time Format ...................... 12 + 5. RTP Data Transfer Protocol .................................. 13 + 5.1 RTP Fixed Header Fields ................................ 13 + 5.2 Multiplexing RTP Sessions .............................. 16 + 5.3 Profile-Specific Modifications to the RTP Header ....... 18 + 5.3.1 RTP Header Extension ............................ 18 + 6. RTP Control Protocol -- RTCP ................................ 19 + 6.1 RTCP Packet Format ..................................... 21 + 6.2 RTCP Transmission Interval ............................. 24 + 6.2.1 Maintaining the Number of Session Members ....... 28 + 6.3 RTCP Packet Send and Receive Rules ..................... 28 + 6.3.1 Computing the RTCP Transmission Interval ........ 29 + 6.3.2 Initialization .................................. 30 + 6.3.3 Receiving an RTP or Non-BYE RTCP Packet ......... 31 + 6.3.4 Receiving an RTCP BYE Packet .................... 31 + 6.3.5 Timing Out an SSRC .............................. 32 + 6.3.6 Expiration of Transmission Timer ................ 32 + 6.3.7 Transmitting a BYE Packet ....................... 33 + 6.3.8 Updating we_sent ................................ 34 + 6.3.9 Allocation of Source Description Bandwidth ...... 34 + 6.4 Sender and Receiver Reports ............................ 35 + 6.4.1 SR: Sender Report RTCP Packet ................... 36 + 6.4.2 RR: Receiver Report RTCP Packet ................. 42 + 6.4.3 Extending the Sender and Receiver Reports ....... 42 + 6.4.4 Analyzing Sender and Receiver Reports ........... 43 + 6.5 SDES: Source Description RTCP Packet ................... 45 + 6.5.1 CNAME: Canonical End-Point Identifier SDES Item . 46 + 6.5.2 NAME: User Name SDES Item ....................... 48 + 6.5.3 EMAIL: Electronic Mail Address SDES Item ........ 48 + 6.5.4 PHONE: Phone Number SDES Item ................... 49 + 6.5.5 LOC: Geographic User Location SDES Item ......... 49 + 6.5.6 TOOL: Application or Tool Name SDES Item ........ 49 + 6.5.7 NOTE: Notice/Status SDES Item ................... 50 + 6.5.8 PRIV: Private Extensions SDES Item .............. 50 + 6.6 BYE: Goodbye RTCP Packet ............................... 51 + 6.7 APP: Application-Defined RTCP Packet ................... 52 + 7. RTP Translators and Mixers .................................. 53 + 7.1 General Description .................................... 53 + + + +Schulzrinne, et al. Standards Track [Page 2] + +RFC 3550 RTP July 2003 + + + 7.2 RTCP Processing in Translators ......................... 55 + 7.3 RTCP Processing in Mixers .............................. 57 + 7.4 Cascaded Mixers ........................................ 58 + 8. SSRC Identifier Allocation and Use .......................... 59 + 8.1 Probability of Collision ............................... 59 + 8.2 Collision Resolution and Loop Detection ................ 60 + 8.3 Use with Layered Encodings ............................. 64 + 9. Security .................................................... 65 + 9.1 Confidentiality ........................................ 65 + 9.2 Authentication and Message Integrity ................... 67 + 10. Congestion Control .......................................... 67 + 11. RTP over Network and Transport Protocols .................... 68 + 12. Summary of Protocol Constants ............................... 69 + 12.1 RTCP Packet Types ...................................... 70 + 12.2 SDES Types ............................................. 70 + 13. RTP Profiles and Payload Format Specifications .............. 71 + 14. Security Considerations ..................................... 73 + 15. IANA Considerations ......................................... 73 + 16. Intellectual Property Rights Statement ...................... 74 + 17. Acknowledgments ............................................. 74 + Appendix A. Algorithms ........................................ 75 + Appendix A.1 RTP Data Header Validity Checks ................... 78 + Appendix A.2 RTCP Header Validity Checks ....................... 82 + Appendix A.3 Determining Number of Packets Expected and Lost ... 83 + Appendix A.4 Generating RTCP SDES Packets ...................... 84 + Appendix A.5 Parsing RTCP SDES Packets ......................... 85 + Appendix A.6 Generating a Random 32-bit Identifier ............. 85 + Appendix A.7 Computing the RTCP Transmission Interval .......... 87 + Appendix A.8 Estimating the Interarrival Jitter ................ 94 + Appendix B. Changes from RFC 1889 ............................. 95 + References ...................................................... 100 + Normative References ............................................ 100 + Informative References .......................................... 100 + Authors' Addresses .............................................. 103 + Full Copyright Statement ........................................ 104 + + + + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 3] + +RFC 3550 RTP July 2003 + + +1. Introduction + + This memorandum specifies the real-time transport protocol (RTP), + which provides end-to-end delivery services for data with real-time + characteristics, such as interactive audio and video. Those services + include payload type identification, sequence numbering, timestamping + and delivery monitoring. Applications typically run RTP on top of + UDP to make use of its multiplexing and checksum services; both + protocols contribute parts of the transport protocol functionality. + However, RTP may be used with other suitable underlying network or + transport protocols (see Section 11). RTP supports data transfer to + multiple destinations using multicast distribution if provided by the + underlying network. + + Note that RTP itself does not provide any mechanism to ensure timely + delivery or provide other quality-of-service guarantees, but relies + on lower-layer services to do so. It does not guarantee delivery or + prevent out-of-order delivery, nor does it assume that the underlying + network is reliable and delivers packets in sequence. The sequence + numbers included in RTP allow the receiver to reconstruct the + sender's packet sequence, but sequence numbers might also be used to + determine the proper location of a packet, for example in video + decoding, without necessarily decoding packets in sequence. + + While RTP is primarily designed to satisfy the needs of multi- + participant multimedia conferences, it is not limited to that + particular application. Storage of continuous data, interactive + distributed simulation, active badge, and control and measurement + applications may also find RTP applicable. + + This document defines RTP, consisting of two closely-linked parts: + + o the real-time transport protocol (RTP), to carry data that has + real-time properties. + + o the RTP control protocol (RTCP), to monitor the quality of service + and to convey information about the participants in an on-going + session. The latter aspect of RTCP may be sufficient for "loosely + controlled" sessions, i.e., where there is no explicit membership + control and set-up, but it is not necessarily intended to support + all of an application's control communication requirements. This + functionality may be fully or partially subsumed by a separate + session control protocol, which is beyond the scope of this + document. + + RTP represents a new style of protocol following the principles of + application level framing and integrated layer processing proposed by + Clark and Tennenhouse [10]. That is, RTP is intended to be malleable + + + +Schulzrinne, et al. Standards Track [Page 4] + +RFC 3550 RTP July 2003 + + + to provide the information required by a particular application and + will often be integrated into the application processing rather than + being implemented as a separate layer. RTP is a protocol framework + that is deliberately not complete. This document specifies those + functions expected to be common across all the applications for which + RTP would be appropriate. Unlike conventional protocols in which + additional functions might be accommodated by making the protocol + more general or by adding an option mechanism that would require + parsing, RTP is intended to be tailored through modifications and/or + additions to the headers as needed. Examples are given in Sections + 5.3 and 6.4.3. + + Therefore, in addition to this document, a complete specification of + RTP for a particular application will require one or more companion + documents (see Section 13): + + o a profile specification document, which defines a set of payload + type codes and their mapping to payload formats (e.g., media + encodings). A profile may also define extensions or modifications + to RTP that are specific to a particular class of applications. + Typically an application will operate under only one profile. A + profile for audio and video data may be found in the companion RFC + 3551 [1]. + + o payload format specification documents, which define how a + particular payload, such as an audio or video encoding, is to be + carried in RTP. + + A discussion of real-time services and algorithms for their + implementation as well as background discussion on some of the RTP + design decisions can be found in [11]. + +1.1 Terminology + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in BCP 14, RFC 2119 [2] + and indicate requirement levels for compliant RTP implementations. + +2. RTP Use Scenarios + + The following sections describe some aspects of the use of RTP. The + examples were chosen to illustrate the basic operation of + applications using RTP, not to limit what RTP may be used for. In + these examples, RTP is carried on top of IP and UDP, and follows the + conventions established by the profile for audio and video specified + in the companion RFC 3551. + + + + +Schulzrinne, et al. Standards Track [Page 5] + +RFC 3550 RTP July 2003 + + +2.1 Simple Multicast Audio Conference + + A working group of the IETF meets to discuss the latest protocol + document, using the IP multicast services of the Internet for voice + communications. Through some allocation mechanism the working group + chair obtains a multicast group address and pair of ports. One port + is used for audio data, and the other is used for control (RTCP) + packets. This address and port information is distributed to the + intended participants. If privacy is desired, the data and control + packets may be encrypted as specified in Section 9.1, in which case + an encryption key must also be generated and distributed. The exact + details of these allocation and distribution mechanisms are beyond + the scope of RTP. + + The audio conferencing application used by each conference + participant sends audio data in small chunks of, say, 20 ms duration. + Each chunk of audio data is preceded by an RTP header; RTP header and + data are in turn contained in a UDP packet. The RTP header indicates + what type of audio encoding (such as PCM, ADPCM or LPC) is contained + in each packet so that senders can change the encoding during a + conference, for example, to accommodate a new participant that is + connected through a low-bandwidth link or react to indications of + network congestion. + + The Internet, like other packet networks, occasionally loses and + reorders packets and delays them by variable amounts of time. To + cope with these impairments, the RTP header contains timing + information and a sequence number that allow the receivers to + reconstruct the timing produced by the source, so that in this + example, chunks of audio are contiguously played out the speaker + every 20 ms. This timing reconstruction is performed separately for + each source of RTP packets in the conference. The sequence number + can also be used by the receiver to estimate how many packets are + being lost. + + Since members of the working group join and leave during the + conference, it is useful to know who is participating at any moment + and how well they are receiving the audio data. For that purpose, + each instance of the audio application in the conference periodically + multicasts a reception report plus the name of its user on the RTCP + (control) port. The reception report indicates how well the current + speaker is being received and may be used to control adaptive + encodings. In addition to the user name, other identifying + information may also be included subject to control bandwidth limits. + A site sends the RTCP BYE packet (Section 6.6) when it leaves the + conference. + + + + + +Schulzrinne, et al. Standards Track [Page 6] + +RFC 3550 RTP July 2003 + + +2.2 Audio and Video Conference + + If both audio and video media are used in a conference, they are + transmitted as separate RTP sessions. That is, separate RTP and RTCP + packets are transmitted for each medium using two different UDP port + pairs and/or multicast addresses. There is no direct coupling at the + RTP level between the audio and video sessions, except that a user + participating in both sessions should use the same distinguished + (canonical) name in the RTCP packets for both so that the sessions + can be associated. + + One motivation for this separation is to allow some participants in + the conference to receive only one medium if they choose. Further + explanation is given in Section 5.2. Despite the separation, + synchronized playback of a source's audio and video can be achieved + using timing information carried in the RTCP packets for both + sessions. + +2.3 Mixers and Translators + + So far, we have assumed that all sites want to receive media data in + the same format. However, this may not always be appropriate. + Consider the case where participants in one area are connected + through a low-speed link to the majority of the conference + participants who enjoy high-speed network access. Instead of forcing + everyone to use a lower-bandwidth, reduced-quality audio encoding, an + RTP-level relay called a mixer may be placed near the low-bandwidth + area. This mixer resynchronizes incoming audio packets to + reconstruct the constant 20 ms spacing generated by the sender, mixes + these reconstructed audio streams into a single stream, translates + the audio encoding to a lower-bandwidth one and forwards the lower- + bandwidth packet stream across the low-speed link. These packets + might be unicast to a single recipient or multicast on a different + address to multiple recipients. The RTP header includes a means for + mixers to identify the sources that contributed to a mixed packet so + that correct talker indication can be provided at the receivers. + + Some of the intended participants in the audio conference may be + connected with high bandwidth links but might not be directly + reachable via IP multicast. For example, they might be behind an + application-level firewall that will not let any IP packets pass. + For these sites, mixing may not be necessary, in which case another + type of RTP-level relay called a translator may be used. Two + translators are installed, one on either side of the firewall, with + the outside one funneling all multicast packets received through a + secure connection to the translator inside the firewall. The + translator inside the firewall sends them again as multicast packets + to a multicast group restricted to the site's internal network. + + + +Schulzrinne, et al. Standards Track [Page 7] + +RFC 3550 RTP July 2003 + + + Mixers and translators may be designed for a variety of purposes. An + example is a video mixer that scales the images of individual people + in separate video streams and composites them into one video stream + to simulate a group scene. Other examples of translation include the + connection of a group of hosts speaking only IP/UDP to a group of + hosts that understand only ST-II, or the packet-by-packet encoding + translation of video streams from individual sources without + resynchronization or mixing. Details of the operation of mixers and + translators are given in Section 7. + +2.4 Layered Encodings + + Multimedia applications should be able to adjust the transmission + rate to match the capacity of the receiver or to adapt to network + congestion. Many implementations place the responsibility of rate- + adaptivity at the source. This does not work well with multicast + transmission because of the conflicting bandwidth requirements of + heterogeneous receivers. The result is often a least-common + denominator scenario, where the smallest pipe in the network mesh + dictates the quality and fidelity of the overall live multimedia + "broadcast". + + Instead, responsibility for rate-adaptation can be placed at the + receivers by combining a layered encoding with a layered transmission + system. In the context of RTP over IP multicast, the source can + stripe the progressive layers of a hierarchically represented signal + across multiple RTP sessions each carried on its own multicast group. + Receivers can then adapt to network heterogeneity and control their + reception bandwidth by joining only the appropriate subset of the + multicast groups. + + Details of the use of RTP with layered encodings are given in + Sections 6.3.9, 8.3 and 11. + +3. Definitions + + RTP payload: The data transported by RTP in a packet, for + example audio samples or compressed video data. The payload + format and interpretation are beyond the scope of this document. + + RTP packet: A data packet consisting of the fixed RTP header, a + possibly empty list of contributing sources (see below), and the + payload data. Some underlying protocols may require an + encapsulation of the RTP packet to be defined. Typically one + packet of the underlying protocol contains a single RTP packet, + but several RTP packets MAY be contained if permitted by the + encapsulation method (see Section 11). + + + + +Schulzrinne, et al. Standards Track [Page 8] + +RFC 3550 RTP July 2003 + + + RTCP packet: A control packet consisting of a fixed header part + similar to that of RTP data packets, followed by structured + elements that vary depending upon the RTCP packet type. The + formats are defined in Section 6. Typically, multiple RTCP + packets are sent together as a compound RTCP packet in a single + packet of the underlying protocol; this is enabled by the length + field in the fixed header of each RTCP packet. + + Port: The "abstraction that transport protocols use to + distinguish among multiple destinations within a given host + computer. TCP/IP protocols identify ports using small positive + integers." [12] The transport selectors (TSEL) used by the OSI + transport layer are equivalent to ports. RTP depends upon the + lower-layer protocol to provide some mechanism such as ports to + multiplex the RTP and RTCP packets of a session. + + Transport address: The combination of a network address and port + that identifies a transport-level endpoint, for example an IP + address and a UDP port. Packets are transmitted from a source + transport address to a destination transport address. + + RTP media type: An RTP media type is the collection of payload + types which can be carried within a single RTP session. The RTP + Profile assigns RTP media types to RTP payload types. + + Multimedia session: A set of concurrent RTP sessions among a + common group of participants. For example, a videoconference + (which is a multimedia session) may contain an audio RTP session + and a video RTP session. + + RTP session: An association among a set of participants + communicating with RTP. A participant may be involved in multiple + RTP sessions at the same time. In a multimedia session, each + medium is typically carried in a separate RTP session with its own + RTCP packets unless the the encoding itself multiplexes multiple + media into a single data stream. A participant distinguishes + multiple RTP sessions by reception of different sessions using + different pairs of destination transport addresses, where a pair + of transport addresses comprises one network address plus a pair + of ports for RTP and RTCP. All participants in an RTP session may + share a common destination transport address pair, as in the case + of IP multicast, or the pairs may be different for each + participant, as in the case of individual unicast network + addresses and port pairs. In the unicast case, a participant may + receive from all other participants in the session using the same + pair of ports, or may use a distinct pair of ports for each. + + + + + +Schulzrinne, et al. Standards Track [Page 9] + +RFC 3550 RTP July 2003 + + + The distinguishing feature of an RTP session is that each + maintains a full, separate space of SSRC identifiers (defined + next). The set of participants included in one RTP session + consists of those that can receive an SSRC identifier transmitted + by any one of the participants either in RTP as the SSRC or a CSRC + (also defined below) or in RTCP. For example, consider a three- + party conference implemented using unicast UDP with each + participant receiving from the other two on separate port pairs. + If each participant sends RTCP feedback about data received from + one other participant only back to that participant, then the + conference is composed of three separate point-to-point RTP + sessions. If each participant provides RTCP feedback about its + reception of one other participant to both of the other + participants, then the conference is composed of one multi-party + RTP session. The latter case simulates the behavior that would + occur with IP multicast communication among the three + participants. + + The RTP framework allows the variations defined here, but a + particular control protocol or application design will usually + impose constraints on these variations. + + Synchronization source (SSRC): The source of a stream of RTP + packets, identified by a 32-bit numeric SSRC identifier carried in + the RTP header so as not to be dependent upon the network address. + All packets from a synchronization source form part of the same + timing and sequence number space, so a receiver groups packets by + synchronization source for playback. Examples of synchronization + sources include the sender of a stream of packets derived from a + signal source such as a microphone or a camera, or an RTP mixer + (see below). A synchronization source may change its data format, + e.g., audio encoding, over time. The SSRC identifier is a + randomly chosen value meant to be globally unique within a + particular RTP session (see Section 8). A participant need not + use the same SSRC identifier for all the RTP sessions in a + multimedia session; the binding of the SSRC identifiers is + provided through RTCP (see Section 6.5.1). If a participant + generates multiple streams in one RTP session, for example from + separate video cameras, each MUST be identified as a different + SSRC. + + Contributing source (CSRC): A source of a stream of RTP packets + that has contributed to the combined stream produced by an RTP + mixer (see below). The mixer inserts a list of the SSRC + identifiers of the sources that contributed to the generation of a + particular packet into the RTP header of that packet. This list + is called the CSRC list. An example application is audio + conferencing where a mixer indicates all the talkers whose speech + + + +Schulzrinne, et al. Standards Track [Page 10] + +RFC 3550 RTP July 2003 + + + was combined to produce the outgoing packet, allowing the receiver + to indicate the current talker, even though all the audio packets + contain the same SSRC identifier (that of the mixer). + + End system: An application that generates the content to be sent + in RTP packets and/or consumes the content of received RTP + packets. An end system can act as one or more synchronization + sources in a particular RTP session, but typically only one. + + Mixer: An intermediate system that receives RTP packets from one + or more sources, possibly changes the data format, combines the + packets in some manner and then forwards a new RTP packet. Since + the timing among multiple input sources will not generally be + synchronized, the mixer will make timing adjustments among the + streams and generate its own timing for the combined stream. + Thus, all data packets originating from a mixer will be identified + as having the mixer as their synchronization source. + + Translator: An intermediate system that forwards RTP packets + with their synchronization source identifier intact. Examples of + translators include devices that convert encodings without mixing, + replicators from multicast to unicast, and application-level + filters in firewalls. + + Monitor: An application that receives RTCP packets sent by + participants in an RTP session, in particular the reception + reports, and estimates the current quality of service for + distribution monitoring, fault diagnosis and long-term statistics. + The monitor function is likely to be built into the application(s) + participating in the session, but may also be a separate + application that does not otherwise participate and does not send + or receive the RTP data packets (since they are on a separate + port). These are called third-party monitors. It is also + acceptable for a third-party monitor to receive the RTP data + packets but not send RTCP packets or otherwise be counted in the + session. + + Non-RTP means: Protocols and mechanisms that may be needed in + addition to RTP to provide a usable service. In particular, for + multimedia conferences, a control protocol may distribute + multicast addresses and keys for encryption, negotiate the + encryption algorithm to be used, and define dynamic mappings + between RTP payload type values and the payload formats they + represent for formats that do not have a predefined payload type + value. Examples of such protocols include the Session Initiation + Protocol (SIP) (RFC 3261 [13]), ITU Recommendation H.323 [14] and + applications using SDP (RFC 2327 [15]), such as RTSP (RFC 2326 + [16]). For simple + + + +Schulzrinne, et al. Standards Track [Page 11] + +RFC 3550 RTP July 2003 + + + applications, electronic mail or a conference database may also be + used. The specification of such protocols and mechanisms is + outside the scope of this document. + +4. Byte Order, Alignment, and Time Format + + All integer fields are carried in network byte order, that is, most + significant byte (octet) first. This byte order is commonly known as + big-endian. The transmission order is described in detail in [3]. + Unless otherwise noted, numeric constants are in decimal (base 10). + + All header data is aligned to its natural length, i.e., 16-bit fields + are aligned on even offsets, 32-bit fields are aligned at offsets + divisible by four, etc. Octets designated as padding have the value + zero. + + Wallclock time (absolute date and time) is represented using the + timestamp format of the Network Time Protocol (NTP), which is in + seconds relative to 0h UTC on 1 January 1900 [4]. The full + resolution NTP timestamp is a 64-bit unsigned fixed-point number with + the integer part in the first 32 bits and the fractional part in the + last 32 bits. In some fields where a more compact representation is + appropriate, only the middle 32 bits are used; that is, the low 16 + bits of the integer part and the high 16 bits of the fractional part. + The high 16 bits of the integer part must be determined + independently. + + An implementation is not required to run the Network Time Protocol in + order to use RTP. Other time sources, or none at all, may be used + (see the description of the NTP timestamp field in Section 6.4.1). + However, running NTP may be useful for synchronizing streams + transmitted from separate hosts. + + The NTP timestamp will wrap around to zero some time in the year + 2036, but for RTP purposes, only differences between pairs of NTP + timestamps are used. So long as the pairs of timestamps can be + assumed to be within 68 years of each other, using modular arithmetic + for subtractions and comparisons makes the wraparound irrelevant. + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 12] + +RFC 3550 RTP July 2003 + + +5. RTP Data Transfer Protocol + +5.1 RTP Fixed Header Fields + + The RTP header has the following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |V=2|P|X| CC |M| PT | sequence number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | synchronization source (SSRC) identifier | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + | contributing source (CSRC) identifiers | + | .... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The first twelve octets are present in every RTP packet, while the + list of CSRC identifiers is present only when inserted by a mixer. + The fields have the following meaning: + + version (V): 2 bits + This field identifies the version of RTP. The version defined by + this specification is two (2). (The value 1 is used by the first + draft version of RTP and the value 0 is used by the protocol + initially implemented in the "vat" audio tool.) + + padding (P): 1 bit + If the padding bit is set, the packet contains one or more + additional padding octets at the end which are not part of the + payload. The last octet of the padding contains a count of how + many padding octets should be ignored, including itself. Padding + may be needed by some encryption algorithms with fixed block sizes + or for carrying several RTP packets in a lower-layer protocol data + unit. + + extension (X): 1 bit + If the extension bit is set, the fixed header MUST be followed by + exactly one header extension, with a format defined in Section + 5.3.1. + + CSRC count (CC): 4 bits + The CSRC count contains the number of CSRC identifiers that follow + the fixed header. + + + + + +Schulzrinne, et al. Standards Track [Page 13] + +RFC 3550 RTP July 2003 + + + marker (M): 1 bit + The interpretation of the marker is defined by a profile. It is + intended to allow significant events such as frame boundaries to + be marked in the packet stream. A profile MAY define additional + marker bits or specify that there is no marker bit by changing the + number of bits in the payload type field (see Section 5.3). + + payload type (PT): 7 bits + This field identifies the format of the RTP payload and determines + its interpretation by the application. A profile MAY specify a + default static mapping of payload type codes to payload formats. + Additional payload type codes MAY be defined dynamically through + non-RTP means (see Section 3). A set of default mappings for + audio and video is specified in the companion RFC 3551 [1]. An + RTP source MAY change the payload type during a session, but this + field SHOULD NOT be used for multiplexing separate media streams + (see Section 5.2). + + A receiver MUST ignore packets with payload types that it does not + understand. + + sequence number: 16 bits + The sequence number increments by one for each RTP data packet + sent, and may be used by the receiver to detect packet loss and to + restore packet sequence. The initial value of the sequence number + SHOULD be random (unpredictable) to make known-plaintext attacks + on encryption more difficult, even if the source itself does not + encrypt according to the method in Section 9.1, because the + packets may flow through a translator that does. Techniques for + choosing unpredictable numbers are discussed in [17]. + + timestamp: 32 bits + The timestamp reflects the sampling instant of the first octet in + the RTP data packet. The sampling instant MUST be derived from a + clock that increments monotonically and linearly in time to allow + synchronization and jitter calculations (see Section 6.4.1). The + resolution of the clock MUST be sufficient for the desired + synchronization accuracy and for measuring packet arrival jitter + (one tick per video frame is typically not sufficient). The clock + frequency is dependent on the format of data carried as payload + and is specified statically in the profile or payload format + specification that defines the format, or MAY be specified + dynamically for payload formats defined through non-RTP means. If + RTP packets are generated periodically, the nominal sampling + instant as determined from the sampling clock is to be used, not a + reading of the system clock. As an example, for fixed-rate audio + the timestamp clock would likely increment by one for each + sampling period. If an audio application reads blocks covering + + + +Schulzrinne, et al. Standards Track [Page 14] + +RFC 3550 RTP July 2003 + + + 160 sampling periods from the input device, the timestamp would be + increased by 160 for each such block, regardless of whether the + block is transmitted in a packet or dropped as silent. + + The initial value of the timestamp SHOULD be random, as for the + sequence number. Several consecutive RTP packets will have equal + timestamps if they are (logically) generated at once, e.g., belong + to the same video frame. Consecutive RTP packets MAY contain + timestamps that are not monotonic if the data is not transmitted + in the order it was sampled, as in the case of MPEG interpolated + video frames. (The sequence numbers of the packets as transmitted + will still be monotonic.) + + RTP timestamps from different media streams may advance at + different rates and usually have independent, random offsets. + Therefore, although these timestamps are sufficient to reconstruct + the timing of a single stream, directly comparing RTP timestamps + from different media is not effective for synchronization. + Instead, for each medium the RTP timestamp is related to the + sampling instant by pairing it with a timestamp from a reference + clock (wallclock) that represents the time when the data + corresponding to the RTP timestamp was sampled. The reference + clock is shared by all media to be synchronized. The timestamp + pairs are not transmitted in every data packet, but at a lower + rate in RTCP SR packets as described in Section 6.4. + + The sampling instant is chosen as the point of reference for the + RTP timestamp because it is known to the transmitting endpoint and + has a common definition for all media, independent of encoding + delays or other processing. The purpose is to allow synchronized + presentation of all media sampled at the same time. + + Applications transmitting stored data rather than data sampled in + real time typically use a virtual presentation timeline derived + from wallclock time to determine when the next frame or other unit + of each medium in the stored data should be presented. In this + case, the RTP timestamp would reflect the presentation time for + each unit. That is, the RTP timestamp for each unit would be + related to the wallclock time at which the unit becomes current on + the virtual presentation timeline. Actual presentation occurs + some time later as determined by the receiver. + + An example describing live audio narration of prerecorded video + illustrates the significance of choosing the sampling instant as + the reference point. In this scenario, the video would be + presented locally for the narrator to view and would be + simultaneously transmitted using RTP. The "sampling instant" of a + video frame transmitted in RTP would be established by referencing + + + +Schulzrinne, et al. Standards Track [Page 15] + +RFC 3550 RTP July 2003 + + + its timestamp to the wallclock time when that video frame was + presented to the narrator. The sampling instant for the audio RTP + packets containing the narrator's speech would be established by + referencing the same wallclock time when the audio was sampled. + The audio and video may even be transmitted by different hosts if + the reference clocks on the two hosts are synchronized by some + means such as NTP. A receiver can then synchronize presentation + of the audio and video packets by relating their RTP timestamps + using the timestamp pairs in RTCP SR packets. + + SSRC: 32 bits + The SSRC field identifies the synchronization source. This + identifier SHOULD be chosen randomly, with the intent that no two + synchronization sources within the same RTP session will have the + same SSRC identifier. An example algorithm for generating a + random identifier is presented in Appendix A.6. Although the + probability of multiple sources choosing the same identifier is + low, all RTP implementations must be prepared to detect and + resolve collisions. Section 8 describes the probability of + collision along with a mechanism for resolving collisions and + detecting RTP-level forwarding loops based on the uniqueness of + the SSRC identifier. If a source changes its source transport + address, it must also choose a new SSRC identifier to avoid being + interpreted as a looped source (see Section 8.2). + + CSRC list: 0 to 15 items, 32 bits each + The CSRC list identifies the contributing sources for the payload + contained in this packet. The number of identifiers is given by + the CC field. If there are more than 15 contributing sources, + only 15 can be identified. CSRC identifiers are inserted by + mixers (see Section 7.1), using the SSRC identifiers of + contributing sources. For example, for audio packets the SSRC + identifiers of all sources that were mixed together to create a + packet are listed, allowing correct talker indication at the + receiver. + +5.2 Multiplexing RTP Sessions + + For efficient protocol processing, the number of multiplexing points + should be minimized, as described in the integrated layer processing + design principle [10]. In RTP, multiplexing is provided by the + destination transport address (network address and port number) which + is different for each RTP session. For example, in a teleconference + composed of audio and video media encoded separately, each medium + SHOULD be carried in a separate RTP session with its own destination + transport address. + + + + + +Schulzrinne, et al. Standards Track [Page 16] + +RFC 3550 RTP July 2003 + + + Separate audio and video streams SHOULD NOT be carried in a single + RTP session and demultiplexed based on the payload type or SSRC + fields. Interleaving packets with different RTP media types but + using the same SSRC would introduce several problems: + + 1. If, say, two audio streams shared the same RTP session and the + same SSRC value, and one were to change encodings and thus acquire + a different RTP payload type, there would be no general way of + identifying which stream had changed encodings. + + 2. An SSRC is defined to identify a single timing and sequence number + space. Interleaving multiple payload types would require + different timing spaces if the media clock rates differ and would + require different sequence number spaces to tell which payload + type suffered packet loss. + + 3. The RTCP sender and receiver reports (see Section 6.4) can only + describe one timing and sequence number space per SSRC and do not + carry a payload type field. + + 4. An RTP mixer would not be able to combine interleaved streams of + incompatible media into one stream. + + 5. Carrying multiple media in one RTP session precludes: the use of + different network paths or network resource allocations if + appropriate; reception of a subset of the media if desired, for + example just audio if video would exceed the available bandwidth; + and receiver implementations that use separate processes for the + different media, whereas using separate RTP sessions permits + either single- or multiple-process implementations. + + Using a different SSRC for each medium but sending them in the same + RTP session would avoid the first three problems but not the last + two. + + On the other hand, multiplexing multiple related sources of the same + medium in one RTP session using different SSRC values is the norm for + multicast sessions. The problems listed above don't apply: an RTP + mixer can combine multiple audio sources, for example, and the same + treatment is applicable for all of them. It may also be appropriate + to multiplex streams of the same medium using different SSRC values + in other scenarios where the last two problems do not apply. + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 17] + +RFC 3550 RTP July 2003 + + +5.3 Profile-Specific Modifications to the RTP Header + + The existing RTP data packet header is believed to be complete for + the set of functions required in common across all the application + classes that RTP might support. However, in keeping with the ALF + design principle, the header MAY be tailored through modifications or + additions defined in a profile specification while still allowing + profile-independent monitoring and recording tools to function. + + o The marker bit and payload type field carry profile-specific + information, but they are allocated in the fixed header since many + applications are expected to need them and might otherwise have to + add another 32-bit word just to hold them. The octet containing + these fields MAY be redefined by a profile to suit different + requirements, for example with more or fewer marker bits. If + there are any marker bits, one SHOULD be located in the most + significant bit of the octet since profile-independent monitors + may be able to observe a correlation between packet loss patterns + and the marker bit. + + o Additional information that is required for a particular payload + format, such as a video encoding, SHOULD be carried in the payload + section of the packet. This might be in a header that is always + present at the start of the payload section, or might be indicated + by a reserved value in the data pattern. + + o If a particular class of applications needs additional + functionality independent of payload format, the profile under + which those applications operate SHOULD define additional fixed + fields to follow immediately after the SSRC field of the existing + fixed header. Those applications will be able to quickly and + directly access the additional fields while profile-independent + monitors or recorders can still process the RTP packets by + interpreting only the first twelve octets. + + If it turns out that additional functionality is needed in common + across all profiles, then a new version of RTP should be defined to + make a permanent change to the fixed header. + +5.3.1 RTP Header Extension + + An extension mechanism is provided to allow individual + implementations to experiment with new payload-format-independent + functions that require additional information to be carried in the + RTP data packet header. This mechanism is designed so that the + header extension may be ignored by other interoperating + implementations that have not been extended. + + + + +Schulzrinne, et al. Standards Track [Page 18] + +RFC 3550 RTP July 2003 + + + Note that this header extension is intended only for limited use. + Most potential uses of this mechanism would be better done another + way, using the methods described in the previous section. For + example, a profile-specific extension to the fixed header is less + expensive to process because it is not conditional nor in a variable + location. Additional information required for a particular payload + format SHOULD NOT use this header extension, but SHOULD be carried in + the payload section of the packet. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | defined by profile | length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | header extension | + | .... | + + If the X bit in the RTP header is one, a variable-length header + extension MUST be appended to the RTP header, following the CSRC list + if present. The header extension contains a 16-bit length field that + counts the number of 32-bit words in the extension, excluding the + four-octet extension header (therefore zero is a valid length). Only + a single extension can be appended to the RTP data header. To allow + multiple interoperating implementations to each experiment + independently with different header extensions, or to allow a + particular implementation to experiment with more than one type of + header extension, the first 16 bits of the header extension are left + open for distinguishing identifiers or parameters. The format of + these 16 bits is to be defined by the profile specification under + which the implementations are operating. This RTP specification does + not define any header extensions itself. + +6. RTP Control Protocol -- RTCP + + The RTP control protocol (RTCP) is based on the periodic transmission + of control packets to all participants in the session, using the same + distribution mechanism as the data packets. The underlying protocol + MUST provide multiplexing of the data and control packets, for + example using separate port numbers with UDP. RTCP performs four + functions: + + 1. The primary function is to provide feedback on the quality of the + data distribution. This is an integral part of the RTP's role as + a transport protocol and is related to the flow and congestion + control functions of other transport protocols (see Section 10 on + the requirement for congestion control). The feedback may be + directly useful for control of adaptive encodings [18,19], but + experiments with IP multicasting have shown that it is also + + + +Schulzrinne, et al. Standards Track [Page 19] + +RFC 3550 RTP July 2003 + + + critical to get feedback from the receivers to diagnose faults in + the distribution. Sending reception feedback reports to all + participants allows one who is observing problems to evaluate + whether those problems are local or global. With a distribution + mechanism like IP multicast, it is also possible for an entity + such as a network service provider who is not otherwise involved + in the session to receive the feedback information and act as a + third-party monitor to diagnose network problems. This feedback + function is performed by the RTCP sender and receiver reports, + described below in Section 6.4. + + 2. RTCP carries a persistent transport-level identifier for an RTP + source called the canonical name or CNAME, Section 6.5.1. Since + the SSRC identifier may change if a conflict is discovered or a + program is restarted, receivers require the CNAME to keep track of + each participant. Receivers may also require the CNAME to + associate multiple data streams from a given participant in a set + of related RTP sessions, for example to synchronize audio and + video. Inter-media synchronization also requires the NTP and RTP + timestamps included in RTCP packets by data senders. + + 3. The first two functions require that all participants send RTCP + packets, therefore the rate must be controlled in order for RTP to + scale up to a large number of participants. By having each + participant send its control packets to all the others, each can + independently observe the number of participants. This number is + used to calculate the rate at which the packets are sent, as + explained in Section 6.2. + + 4. A fourth, OPTIONAL function is to convey minimal session control + information, for example participant identification to be + displayed in the user interface. This is most likely to be useful + in "loosely controlled" sessions where participants enter and + leave without membership control or parameter negotiation. RTCP + serves as a convenient channel to reach all the participants, but + it is not necessarily expected to support all the control + communication requirements of an application. A higher-level + session control protocol, which is beyond the scope of this + document, may be needed. + + Functions 1-3 SHOULD be used in all environments, but particularly in + the IP multicast environment. RTP application designers SHOULD avoid + mechanisms that can only work in unicast mode and will not scale to + larger numbers. Transmission of RTCP MAY be controlled separately + for senders and receivers, as described in Section 6.2, for cases + such as unidirectional links where feedback from receivers is not + possible. + + + + +Schulzrinne, et al. Standards Track [Page 20] + +RFC 3550 RTP July 2003 + + + Non-normative note: In the multicast routing approach + called Source-Specific Multicast (SSM), there is only one sender + per "channel" (a source address, group address pair), and + receivers (except for the channel source) cannot use multicast to + communicate directly with other channel members. The + recommendations here accommodate SSM only through Section 6.2's + option of turning off receivers' RTCP entirely. Future work will + specify adaptation of RTCP for SSM so that feedback from receivers + can be maintained. + +6.1 RTCP Packet Format + + This specification defines several RTCP packet types to carry a + variety of control information: + + SR: Sender report, for transmission and reception statistics from + participants that are active senders + + RR: Receiver report, for reception statistics from participants + that are not active senders and in combination with SR for + active senders reporting on more than 31 sources + + SDES: Source description items, including CNAME + + BYE: Indicates end of participation + + APP: Application-specific functions + + Each RTCP packet begins with a fixed part similar to that of RTP data + packets, followed by structured elements that MAY be of variable + length according to the packet type but MUST end on a 32-bit + boundary. The alignment requirement and a length field in the fixed + part of each packet are included to make RTCP packets "stackable". + Multiple RTCP packets can be concatenated without any intervening + separators to form a compound RTCP packet that is sent in a single + packet of the lower layer protocol, for example UDP. There is no + explicit count of individual RTCP packets in the compound packet + since the lower layer protocols are expected to provide an overall + length to determine the end of the compound packet. + + Each individual RTCP packet in the compound packet may be processed + independently with no requirements upon the order or combination of + packets. However, in order to perform the functions of the protocol, + the following constraints are imposed: + + + + + + + +Schulzrinne, et al. Standards Track [Page 21] + +RFC 3550 RTP July 2003 + + + o Reception statistics (in SR or RR) should be sent as often as + bandwidth constraints will allow to maximize the resolution of the + statistics, therefore each periodically transmitted compound RTCP + packet MUST include a report packet. + + o New receivers need to receive the CNAME for a source as soon as + possible to identify the source and to begin associating media for + purposes such as lip-sync, so each compound RTCP packet MUST also + include the SDES CNAME except when the compound RTCP packet is + split for partial encryption as described in Section 9.1. + + o The number of packet types that may appear first in the compound + packet needs to be limited to increase the number of constant bits + in the first word and the probability of successfully validating + RTCP packets against misaddressed RTP data packets or other + unrelated packets. + + Thus, all RTCP packets MUST be sent in a compound packet of at least + two individual packets, with the following format: + + Encryption prefix: If and only if the compound packet is to be + encrypted according to the method in Section 9.1, it MUST be + prefixed by a random 32-bit quantity redrawn for every compound + packet transmitted. If padding is required for the encryption, it + MUST be added to the last packet of the compound packet. + + SR or RR: The first RTCP packet in the compound packet MUST + always be a report packet to facilitate header validation as + described in Appendix A.2. This is true even if no data has been + sent or received, in which case an empty RR MUST be sent, and even + if the only other RTCP packet in the compound packet is a BYE. + + Additional RRs: If the number of sources for which reception + statistics are being reported exceeds 31, the number that will fit + into one SR or RR packet, then additional RR packets SHOULD follow + the initial report packet. + + SDES: An SDES packet containing a CNAME item MUST be included + in each compound RTCP packet, except as noted in Section 9.1. + Other source description items MAY optionally be included if + required by a particular application, subject to bandwidth + constraints (see Section 6.3.9). + + BYE or APP: Other RTCP packet types, including those yet to be + defined, MAY follow in any order, except that BYE SHOULD be the + last packet sent with a given SSRC/CSRC. Packet types MAY appear + more than once. + + + + +Schulzrinne, et al. Standards Track [Page 22] + +RFC 3550 RTP July 2003 + + + An individual RTP participant SHOULD send only one compound RTCP + packet per report interval in order for the RTCP bandwidth per + participant to be estimated correctly (see Section 6.2), except when + the compound RTCP packet is split for partial encryption as described + in Section 9.1. If there are too many sources to fit all the + necessary RR packets into one compound RTCP packet without exceeding + the maximum transmission unit (MTU) of the network path, then only + the subset that will fit into one MTU SHOULD be included in each + interval. The subsets SHOULD be selected round-robin across multiple + intervals so that all sources are reported. + + It is RECOMMENDED that translators and mixers combine individual RTCP + packets from the multiple sources they are forwarding into one + compound packet whenever feasible in order to amortize the packet + overhead (see Section 7). An example RTCP compound packet as might + be produced by a mixer is shown in Fig. 1. If the overall length of + a compound packet would exceed the MTU of the network path, it SHOULD + be segmented into multiple shorter compound packets to be transmitted + in separate packets of the underlying protocol. This does not impair + the RTCP bandwidth estimation because each compound packet represents + at least one distinct participant. Note that each of the compound + packets MUST begin with an SR or RR packet. + + An implementation SHOULD ignore incoming RTCP packets with types + unknown to it. Additional RTCP packet types may be registered with + the Internet Assigned Numbers Authority (IANA) as described in + Section 15. + + if encrypted: random 32-bit integer + | + |[--------- packet --------][---------- packet ----------][-packet-] + | + | receiver chunk chunk + V reports item item item item + -------------------------------------------------------------------- + R[SR #sendinfo #site1#site2][SDES #CNAME PHONE #CNAME LOC][BYE##why] + -------------------------------------------------------------------- + | | + |<----------------------- compound packet ----------------------->| + |<-------------------------- UDP packet ------------------------->| + + #: SSRC/CSRC identifier + + Figure 1: Example of an RTCP compound packet + + + + + + + +Schulzrinne, et al. Standards Track [Page 23] + +RFC 3550 RTP July 2003 + + +6.2 RTCP Transmission Interval + + RTP is designed to allow an application to scale automatically over + session sizes ranging from a few participants to thousands. For + example, in an audio conference the data traffic is inherently self- + limiting because only one or two people will speak at a time, so with + multicast distribution the data rate on any given link remains + relatively constant independent of the number of participants. + However, the control traffic is not self-limiting. If the reception + reports from each participant were sent at a constant rate, the + control traffic would grow linearly with the number of participants. + Therefore, the rate must be scaled down by dynamically calculating + the interval between RTCP packet transmissions. + + For each session, it is assumed that the data traffic is subject to + an aggregate limit called the "session bandwidth" to be divided among + the participants. This bandwidth might be reserved and the limit + enforced by the network. If there is no reservation, there may be + other constraints, depending on the environment, that establish the + "reasonable" maximum for the session to use, and that would be the + session bandwidth. The session bandwidth may be chosen based on some + cost or a priori knowledge of the available network bandwidth for the + session. It is somewhat independent of the media encoding, but the + encoding choice may be limited by the session bandwidth. Often, the + session bandwidth is the sum of the nominal bandwidths of the senders + expected to be concurrently active. For teleconference audio, this + number would typically be one sender's bandwidth. For layered + encodings, each layer is a separate RTP session with its own session + bandwidth parameter. + + The session bandwidth parameter is expected to be supplied by a + session management application when it invokes a media application, + but media applications MAY set a default based on the single-sender + data bandwidth for the encoding selected for the session. The + application MAY also enforce bandwidth limits based on multicast + scope rules or other criteria. All participants MUST use the same + value for the session bandwidth so that the same RTCP interval will + be calculated. + + Bandwidth calculations for control and data traffic include lower- + layer transport and network protocols (e.g., UDP and IP) since that + is what the resource reservation system would need to know. The + application can also be expected to know which of these protocols are + in use. Link level headers are not included in the calculation since + the packet will be encapsulated with different link level headers as + it travels. + + + + + +Schulzrinne, et al. Standards Track [Page 24] + +RFC 3550 RTP July 2003 + + + The control traffic should be limited to a small and known fraction + of the session bandwidth: small so that the primary function of the + transport protocol to carry data is not impaired; known so that the + control traffic can be included in the bandwidth specification given + to a resource reservation protocol, and so that each participant can + independently calculate its share. The control traffic bandwidth is + in addition to the session bandwidth for the data traffic. It is + RECOMMENDED that the fraction of the session bandwidth added for RTCP + be fixed at 5%. It is also RECOMMENDED that 1/4 of the RTCP + bandwidth be dedicated to participants that are sending data so that + in sessions with a large number of receivers but a small number of + senders, newly joining participants will more quickly receive the + CNAME for the sending sites. When the proportion of senders is + greater than 1/4 of the participants, the senders get their + proportion of the full RTCP bandwidth. While the values of these and + other constants in the interval calculation are not critical, all + participants in the session MUST use the same values so the same + interval will be calculated. Therefore, these constants SHOULD be + fixed for a particular profile. + + A profile MAY specify that the control traffic bandwidth may be a + separate parameter of the session rather than a strict percentage of + the session bandwidth. Using a separate parameter allows rate- + adaptive applications to set an RTCP bandwidth consistent with a + "typical" data bandwidth that is lower than the maximum bandwidth + specified by the session bandwidth parameter. + + The profile MAY further specify that the control traffic bandwidth + may be divided into two separate session parameters for those + participants which are active data senders and those which are not; + let us call the parameters S and R. Following the recommendation + that 1/4 of the RTCP bandwidth be dedicated to data senders, the + RECOMMENDED default values for these two parameters would be 1.25% + and 3.75%, respectively. When the proportion of senders is greater + than S/(S+R) of the participants, the senders get their proportion of + the sum of these parameters. Using two parameters allows RTCP + reception reports to be turned off entirely for a particular session + by setting the RTCP bandwidth for non-data-senders to zero while + keeping the RTCP bandwidth for data senders non-zero so that sender + reports can still be sent for inter-media synchronization. Turning + off RTCP reception reports is NOT RECOMMENDED because they are needed + for the functions listed at the beginning of Section 6, particularly + reception quality feedback and congestion control. However, doing so + may be appropriate for systems operating on unidirectional links or + for sessions that don't require feedback on the quality of reception + or liveness of receivers and that have other means to avoid + congestion. + + + + +Schulzrinne, et al. Standards Track [Page 25] + +RFC 3550 RTP July 2003 + + + The calculated interval between transmissions of compound RTCP + packets SHOULD also have a lower bound to avoid having bursts of + packets exceed the allowed bandwidth when the number of participants + is small and the traffic isn't smoothed according to the law of large + numbers. It also keeps the report interval from becoming too small + during transient outages like a network partition such that + adaptation is delayed when the partition heals. At application + startup, a delay SHOULD be imposed before the first compound RTCP + packet is sent to allow time for RTCP packets to be received from + other participants so the report interval will converge to the + correct value more quickly. This delay MAY be set to half the + minimum interval to allow quicker notification that the new + participant is present. The RECOMMENDED value for a fixed minimum + interval is 5 seconds. + + An implementation MAY scale the minimum RTCP interval to a smaller + value inversely proportional to the session bandwidth parameter with + the following limitations: + + o For multicast sessions, only active data senders MAY use the + reduced minimum value to calculate the interval for transmission + of compound RTCP packets. + + o For unicast sessions, the reduced value MAY be used by + participants that are not active data senders as well, and the + delay before sending the initial compound RTCP packet MAY be zero. + + o For all sessions, the fixed minimum SHOULD be used when + calculating the participant timeout interval (see Section 6.3.5) + so that implementations which do not use the reduced value for + transmitting RTCP packets are not timed out by other participants + prematurely. + + o The RECOMMENDED value for the reduced minimum in seconds is 360 + divided by the session bandwidth in kilobits/second. This minimum + is smaller than 5 seconds for bandwidths greater than 72 kb/s. + + The algorithm described in Section 6.3 and Appendix A.7 was designed + to meet the goals outlined in this section. It calculates the + interval between sending compound RTCP packets to divide the allowed + control traffic bandwidth among the participants. This allows an + application to provide fast response for small sessions where, for + example, identification of all participants is important, yet + automatically adapt to large sessions. The algorithm incorporates + the following characteristics: + + + + + + +Schulzrinne, et al. Standards Track [Page 26] + +RFC 3550 RTP July 2003 + + + o The calculated interval between RTCP packets scales linearly with + the number of members in the group. It is this linear factor + which allows for a constant amount of control traffic when summed + across all members. + + o The interval between RTCP packets is varied randomly over the + range [0.5,1.5] times the calculated interval to avoid unintended + synchronization of all participants [20]. The first RTCP packet + sent after joining a session is also delayed by a random variation + of half the minimum RTCP interval. + + o A dynamic estimate of the average compound RTCP packet size is + calculated, including all those packets received and sent, to + automatically adapt to changes in the amount of control + information carried. + + o Since the calculated interval is dependent on the number of + observed group members, there may be undesirable startup effects + when a new user joins an existing session, or many users + simultaneously join a new session. These new users will initially + have incorrect estimates of the group membership, and thus their + RTCP transmission interval will be too short. This problem can be + significant if many users join the session simultaneously. To + deal with this, an algorithm called "timer reconsideration" is + employed. This algorithm implements a simple back-off mechanism + which causes users to hold back RTCP packet transmission if the + group sizes are increasing. + + o When users leave a session, either with a BYE or by timeout, the + group membership decreases, and thus the calculated interval + should decrease. A "reverse reconsideration" algorithm is used to + allow members to more quickly reduce their intervals in response + to group membership decreases. + + o BYE packets are given different treatment than other RTCP packets. + When a user leaves a group, and wishes to send a BYE packet, it + may do so before its next scheduled RTCP packet. However, + transmission of BYEs follows a back-off algorithm which avoids + floods of BYE packets should a large number of members + simultaneously leave the session. + + This algorithm may be used for sessions in which all participants are + allowed to send. In that case, the session bandwidth parameter is + the product of the individual sender's bandwidth times the number of + participants, and the RTCP bandwidth is 5% of that. + + Details of the algorithm's operation are given in the sections that + follow. Appendix A.7 gives an example implementation. + + + +Schulzrinne, et al. Standards Track [Page 27] + +RFC 3550 RTP July 2003 + + +6.2.1 Maintaining the Number of Session Members + + Calculation of the RTCP packet interval depends upon an estimate of + the number of sites participating in the session. New sites are + added to the count when they are heard, and an entry for each SHOULD + be created in a table indexed by the SSRC or CSRC identifier (see + Section 8.2) to keep track of them. New entries MAY be considered + not valid until multiple packets carrying the new SSRC have been + received (see Appendix A.1), or until an SDES RTCP packet containing + a CNAME for that SSRC has been received. Entries MAY be deleted from + the table when an RTCP BYE packet with the corresponding SSRC + identifier is received, except that some straggler data packets might + arrive after the BYE and cause the entry to be recreated. Instead, + the entry SHOULD be marked as having received a BYE and then deleted + after an appropriate delay. + + A participant MAY mark another site inactive, or delete it if not yet + valid, if no RTP or RTCP packet has been received for a small number + of RTCP report intervals (5 is RECOMMENDED). This provides some + robustness against packet loss. All sites must have the same value + for this multiplier and must calculate roughly the same value for the + RTCP report interval in order for this timeout to work properly. + Therefore, this multiplier SHOULD be fixed for a particular profile. + + For sessions with a very large number of participants, it may be + impractical to maintain a table to store the SSRC identifier and + state information for all of them. An implementation MAY use SSRC + sampling, as described in [21], to reduce the storage requirements. + An implementation MAY use any other algorithm with similar + performance. A key requirement is that any algorithm considered + SHOULD NOT substantially underestimate the group size, although it + MAY overestimate. + +6.3 RTCP Packet Send and Receive Rules + + The rules for how to send, and what to do when receiving an RTCP + packet are outlined here. An implementation that allows operation in + a multicast environment or a multipoint unicast environment MUST meet + the requirements in Section 6.2. Such an implementation MAY use the + algorithm defined in this section to meet those requirements, or MAY + use some other algorithm so long as it provides equivalent or better + performance. An implementation which is constrained to two-party + unicast operation SHOULD still use randomization of the RTCP + transmission interval to avoid unintended synchronization of multiple + instances operating in the same environment, but MAY omit the "timer + reconsideration" and "reverse reconsideration" algorithms in Sections + 6.3.3, 6.3.6 and 6.3.7. + + + + +Schulzrinne, et al. Standards Track [Page 28] + +RFC 3550 RTP July 2003 + + + To execute these rules, a session participant must maintain several + pieces of state: + + tp: the last time an RTCP packet was transmitted; + + tc: the current time; + + tn: the next scheduled transmission time of an RTCP packet; + + pmembers: the estimated number of session members at the time tn + was last recomputed; + + members: the most current estimate for the number of session + members; + + senders: the most current estimate for the number of senders in + the session; + + rtcp_bw: The target RTCP bandwidth, i.e., the total bandwidth + that will be used for RTCP packets by all members of this session, + in octets per second. This will be a specified fraction of the + "session bandwidth" parameter supplied to the application at + startup. + + we_sent: Flag that is true if the application has sent data + since the 2nd previous RTCP report was transmitted. + + avg_rtcp_size: The average compound RTCP packet size, in octets, + over all RTCP packets sent and received by this participant. The + size includes lower-layer transport and network protocol headers + (e.g., UDP and IP) as explained in Section 6.2. + + initial: Flag that is true if the application has not yet sent + an RTCP packet. + + Many of these rules make use of the "calculated interval" between + packet transmissions. This interval is described in the following + section. + +6.3.1 Computing the RTCP Transmission Interval + + To maintain scalability, the average interval between packets from a + session participant should scale with the group size. This interval + is called the calculated interval. It is obtained by combining a + number of the pieces of state described above. The calculated + interval T is then determined as follows: + + + + + +Schulzrinne, et al. Standards Track [Page 29] + +RFC 3550 RTP July 2003 + + + 1. If the number of senders is less than or equal to 25% of the + membership (members), the interval depends on whether the + participant is a sender or not (based on the value of we_sent). + If the participant is a sender (we_sent true), the constant C is + set to the average RTCP packet size (avg_rtcp_size) divided by 25% + of the RTCP bandwidth (rtcp_bw), and the constant n is set to the + number of senders. If we_sent is not true, the constant C is set + to the average RTCP packet size divided by 75% of the RTCP + bandwidth. The constant n is set to the number of receivers + (members - senders). If the number of senders is greater than + 25%, senders and receivers are treated together. The constant C + is set to the average RTCP packet size divided by the total RTCP + bandwidth and n is set to the total number of members. As stated + in Section 6.2, an RTP profile MAY specify that the RTCP bandwidth + may be explicitly defined by two separate parameters (call them S + and R) for those participants which are senders and those which + are not. In that case, the 25% fraction becomes S/(S+R) and the + 75% fraction becomes R/(S+R). Note that if R is zero, the + percentage of senders is never greater than S/(S+R), and the + implementation must avoid division by zero. + + 2. If the participant has not yet sent an RTCP packet (the variable + initial is true), the constant Tmin is set to 2.5 seconds, else it + is set to 5 seconds. + + 3. The deterministic calculated interval Td is set to max(Tmin, n*C). + + 4. The calculated interval T is set to a number uniformly distributed + between 0.5 and 1.5 times the deterministic calculated interval. + + 5. The resulting value of T is divided by e-3/2=1.21828 to compensate + for the fact that the timer reconsideration algorithm converges to + a value of the RTCP bandwidth below the intended average. + + This procedure results in an interval which is random, but which, on + average, gives at least 25% of the RTCP bandwidth to senders and the + rest to receivers. If the senders constitute more than one quarter + of the membership, this procedure splits the bandwidth equally among + all participants, on average. + +6.3.2 Initialization + + Upon joining the session, the participant initializes tp to 0, tc to + 0, senders to 0, pmembers to 1, members to 1, we_sent to false, + rtcp_bw to the specified fraction of the session bandwidth, initial + to true, and avg_rtcp_size to the probable size of the first RTCP + packet that the application will later construct. The calculated + interval T is then computed, and the first packet is scheduled for + + + +Schulzrinne, et al. Standards Track [Page 30] + +RFC 3550 RTP July 2003 + + + time tn = T. This means that a transmission timer is set which + expires at time T. Note that an application MAY use any desired + approach for implementing this timer. + + The participant adds its own SSRC to the member table. + +6.3.3 Receiving an RTP or Non-BYE RTCP Packet + + When an RTP or RTCP packet is received from a participant whose SSRC + is not in the member table, the SSRC is added to the table, and the + value for members is updated once the participant has been validated + as described in Section 6.2.1. The same processing occurs for each + CSRC in a validated RTP packet. + + When an RTP packet is received from a participant whose SSRC is not + in the sender table, the SSRC is added to the table, and the value + for senders is updated. + + For each compound RTCP packet received, the value of avg_rtcp_size is + updated: + + avg_rtcp_size = (1/16) * packet_size + (15/16) * avg_rtcp_size + + where packet_size is the size of the RTCP packet just received. + +6.3.4 Receiving an RTCP BYE Packet + + Except as described in Section 6.3.7 for the case when an RTCP BYE is + to be transmitted, if the received packet is an RTCP BYE packet, the + SSRC is checked against the member table. If present, the entry is + removed from the table, and the value for members is updated. The + SSRC is then checked against the sender table. If present, the entry + is removed from the table, and the value for senders is updated. + + Furthermore, to make the transmission rate of RTCP packets more + adaptive to changes in group membership, the following "reverse + reconsideration" algorithm SHOULD be executed when a BYE packet is + received that reduces members to a value less than pmembers: + + o The value for tn is updated according to the following formula: + + tn = tc + (members/pmembers) * (tn - tc) + + o The value for tp is updated according the following formula: + + tp = tc - (members/pmembers) * (tc - tp). + + + + + +Schulzrinne, et al. Standards Track [Page 31] + +RFC 3550 RTP July 2003 + + + o The next RTCP packet is rescheduled for transmission at time tn, + which is now earlier. + + o The value of pmembers is set equal to members. + + This algorithm does not prevent the group size estimate from + incorrectly dropping to zero for a short time due to premature + timeouts when most participants of a large session leave at once but + some remain. The algorithm does make the estimate return to the + correct value more rapidly. This situation is unusual enough and the + consequences are sufficiently harmless that this problem is deemed + only a secondary concern. + +6.3.5 Timing Out an SSRC + + At occasional intervals, the participant MUST check to see if any of + the other participants time out. To do this, the participant + computes the deterministic (without the randomization factor) + calculated interval Td for a receiver, that is, with we_sent false. + Any other session member who has not sent an RTP or RTCP packet since + time tc - MTd (M is the timeout multiplier, and defaults to 5) is + timed out. This means that its SSRC is removed from the member list, + and members is updated. A similar check is performed on the sender + list. Any member on the sender list who has not sent an RTP packet + since time tc - 2T (within the last two RTCP report intervals) is + removed from the sender list, and senders is updated. + + If any members time out, the reverse reconsideration algorithm + described in Section 6.3.4 SHOULD be performed. + + The participant MUST perform this check at least once per RTCP + transmission interval. + +6.3.6 Expiration of Transmission Timer + + When the packet transmission timer expires, the participant performs + the following operations: + + o The transmission interval T is computed as described in Section + 6.3.1, including the randomization factor. + + o If tp + T is less than or equal to tc, an RTCP packet is + transmitted. tp is set to tc, then another value for T is + calculated as in the previous step and tn is set to tc + T. The + transmission timer is set to expire again at time tn. If tp + T + is greater than tc, tn is set to tp + T. No RTCP packet is + transmitted. The transmission timer is set to expire at time tn. + + + + +Schulzrinne, et al. Standards Track [Page 32] + +RFC 3550 RTP July 2003 + + + o pmembers is set to members. + + If an RTCP packet is transmitted, the value of initial is set to + FALSE. Furthermore, the value of avg_rtcp_size is updated: + + avg_rtcp_size = (1/16) * packet_size + (15/16) * avg_rtcp_size + + where packet_size is the size of the RTCP packet just transmitted. + +6.3.7 Transmitting a BYE Packet + + When a participant wishes to leave a session, a BYE packet is + transmitted to inform the other participants of the event. In order + to avoid a flood of BYE packets when many participants leave the + system, a participant MUST execute the following algorithm if the + number of members is more than 50 when the participant chooses to + leave. This algorithm usurps the normal role of the members variable + to count BYE packets instead: + + o When the participant decides to leave the system, tp is reset to + tc, the current time, members and pmembers are initialized to 1, + initial is set to 1, we_sent is set to false, senders is set to 0, + and avg_rtcp_size is set to the size of the compound BYE packet. + The calculated interval T is computed. The BYE packet is then + scheduled for time tn = tc + T. + + o Every time a BYE packet from another participant is received, + members is incremented by 1 regardless of whether that participant + exists in the member table or not, and when SSRC sampling is in + use, regardless of whether or not the BYE SSRC would be included + in the sample. members is NOT incremented when other RTCP packets + or RTP packets are received, but only for BYE packets. Similarly, + avg_rtcp_size is updated only for received BYE packets. senders + is NOT updated when RTP packets arrive; it remains 0. + + o Transmission of the BYE packet then follows the rules for + transmitting a regular RTCP packet, as above. + + This allows BYE packets to be sent right away, yet controls their + total bandwidth usage. In the worst case, this could cause RTCP + control packets to use twice the bandwidth as normal (10%) -- 5% for + non-BYE RTCP packets and 5% for BYE. + + A participant that does not want to wait for the above mechanism to + allow transmission of a BYE packet MAY leave the group without + sending a BYE at all. That participant will eventually be timed out + by the other group members. + + + + +Schulzrinne, et al. Standards Track [Page 33] + +RFC 3550 RTP July 2003 + + + If the group size estimate members is less than 50 when the + participant decides to leave, the participant MAY send a BYE packet + immediately. Alternatively, the participant MAY choose to execute + the above BYE backoff algorithm. + + In either case, a participant which never sent an RTP or RTCP packet + MUST NOT send a BYE packet when they leave the group. + +6.3.8 Updating we_sent + + The variable we_sent contains true if the participant has sent an RTP + packet recently, false otherwise. This determination is made by + using the same mechanisms as for managing the set of other + participants listed in the senders table. If the participant sends + an RTP packet when we_sent is false, it adds itself to the sender + table and sets we_sent to true. The reverse reconsideration + algorithm described in Section 6.3.4 SHOULD be performed to possibly + reduce the delay before sending an SR packet. Every time another RTP + packet is sent, the time of transmission of that packet is maintained + in the table. The normal sender timeout algorithm is then applied to + the participant -- if an RTP packet has not been transmitted since + time tc - 2T, the participant removes itself from the sender table, + decrements the sender count, and sets we_sent to false. + +6.3.9 Allocation of Source Description Bandwidth + + This specification defines several source description (SDES) items in + addition to the mandatory CNAME item, such as NAME (personal name) + and EMAIL (email address). It also provides a means to define new + application-specific RTCP packet types. Applications should exercise + caution in allocating control bandwidth to this additional + information because it will slow down the rate at which reception + reports and CNAME are sent, thus impairing the performance of the + protocol. It is RECOMMENDED that no more than 20% of the RTCP + bandwidth allocated to a single participant be used to carry the + additional information. Furthermore, it is not intended that all + SDES items will be included in every application. Those that are + included SHOULD be assigned a fraction of the bandwidth according to + their utility. Rather than estimate these fractions dynamically, it + is recommended that the percentages be translated statically into + report interval counts based on the typical length of an item. + + For example, an application may be designed to send only CNAME, NAME + and EMAIL and not any others. NAME might be given much higher + priority than EMAIL because the NAME would be displayed continuously + in the application's user interface, whereas EMAIL would be displayed + only when requested. At every RTCP interval, an RR packet and an + SDES packet with the CNAME item would be sent. For a small session + + + +Schulzrinne, et al. Standards Track [Page 34] + +RFC 3550 RTP July 2003 + + + operating at the minimum interval, that would be every 5 seconds on + the average. Every third interval (15 seconds), one extra item would + be included in the SDES packet. Seven out of eight times this would + be the NAME item, and every eighth time (2 minutes) it would be the + EMAIL item. + + When multiple applications operate in concert using cross-application + binding through a common CNAME for each participant, for example in a + multimedia conference composed of an RTP session for each medium, the + additional SDES information MAY be sent in only one RTP session. The + other sessions would carry only the CNAME item. In particular, this + approach should be applied to the multiple sessions of a layered + encoding scheme (see Section 2.4). + +6.4 Sender and Receiver Reports + + RTP receivers provide reception quality feedback using RTCP report + packets which may take one of two forms depending upon whether or not + the receiver is also a sender. The only difference between the + sender report (SR) and receiver report (RR) forms, besides the packet + type code, is that the sender report includes a 20-byte sender + information section for use by active senders. The SR is issued if a + site has sent any data packets during the interval since issuing the + last report or the previous one, otherwise the RR is issued. + + Both the SR and RR forms include zero or more reception report + blocks, one for each of the synchronization sources from which this + receiver has received RTP data packets since the last report. + Reports are not issued for contributing sources listed in the CSRC + list. Each reception report block provides statistics about the data + received from the particular source indicated in that block. Since a + maximum of 31 reception report blocks will fit in an SR or RR packet, + additional RR packets SHOULD be stacked after the initial SR or RR + packet as needed to contain the reception reports for all sources + heard during the interval since the last report. If there are too + many sources to fit all the necessary RR packets into one compound + RTCP packet without exceeding the MTU of the network path, then only + the subset that will fit into one MTU SHOULD be included in each + interval. The subsets SHOULD be selected round-robin across multiple + intervals so that all sources are reported. + + The next sections define the formats of the two reports, how they may + be extended in a profile-specific manner if an application requires + additional feedback information, and how the reports may be used. + Details of reception reporting by translators and mixers is given in + Section 7. + + + + + +Schulzrinne, et al. Standards Track [Page 35] + +RFC 3550 RTP July 2003 + + +6.4.1 SR: Sender Report RTCP Packet + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +header |V=2|P| RC | PT=SR=200 | length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | SSRC of sender | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +sender | NTP timestamp, most significant word | +info +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | NTP timestamp, least significant word | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | RTP timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | sender's packet count | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | sender's octet count | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report | SSRC_1 (SSRC of first source) | +block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 1 | fraction lost | cumulative number of packets lost | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | extended highest sequence number received | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | interarrival jitter | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | last SR (LSR) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | delay since last SR (DLSR) | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report | SSRC_2 (SSRC of second source) | +block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 2 : ... : + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + | profile-specific extensions | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The sender report packet consists of three sections, possibly + followed by a fourth profile-specific extension section if defined. + The first section, the header, is 8 octets long. The fields have the + following meaning: + + version (V): 2 bits + Identifies the version of RTP, which is the same in RTCP packets + as in RTP data packets. The version defined by this specification + is two (2). + + + + +Schulzrinne, et al. Standards Track [Page 36] + +RFC 3550 RTP July 2003 + + + padding (P): 1 bit + If the padding bit is set, this individual RTCP packet contains + some additional padding octets at the end which are not part of + the control information but are included in the length field. The + last octet of the padding is a count of how many padding octets + should be ignored, including itself (it will be a multiple of + four). Padding may be needed by some encryption algorithms with + fixed block sizes. In a compound RTCP packet, padding is only + required on one individual packet because the compound packet is + encrypted as a whole for the method in Section 9.1. Thus, padding + MUST only be added to the last individual packet, and if padding + is added to that packet, the padding bit MUST be set only on that + packet. This convention aids the header validity checks described + in Appendix A.2 and allows detection of packets from some early + implementations that incorrectly set the padding bit on the first + individual packet and add padding to the last individual packet. + + reception report count (RC): 5 bits + The number of reception report blocks contained in this packet. A + value of zero is valid. + + packet type (PT): 8 bits + Contains the constant 200 to identify this as an RTCP SR packet. + + length: 16 bits + The length of this RTCP packet in 32-bit words minus one, + including the header and any padding. (The offset of one makes + zero a valid length and avoids a possible infinite loop in + scanning a compound RTCP packet, while counting 32-bit words + avoids a validity check for a multiple of 4.) + + SSRC: 32 bits + The synchronization source identifier for the originator of this + SR packet. + + The second section, the sender information, is 20 octets long and is + present in every sender report packet. It summarizes the data + transmissions from this sender. The fields have the following + meaning: + + NTP timestamp: 64 bits + Indicates the wallclock time (see Section 4) when this report was + sent so that it may be used in combination with timestamps + returned in reception reports from other receivers to measure + round-trip propagation to those receivers. Receivers should + expect that the measurement accuracy of the timestamp may be + limited to far less than the resolution of the NTP timestamp. The + measurement uncertainty of the timestamp is not indicated as it + + + +Schulzrinne, et al. Standards Track [Page 37] + +RFC 3550 RTP July 2003 + + + may not be known. On a system that has no notion of wallclock + time but does have some system-specific clock such as "system + uptime", a sender MAY use that clock as a reference to calculate + relative NTP timestamps. It is important to choose a commonly + used clock so that if separate implementations are used to produce + the individual streams of a multimedia session, all + implementations will use the same clock. Until the year 2036, + relative and absolute timestamps will differ in the high bit so + (invalid) comparisons will show a large difference; by then one + hopes relative timestamps will no longer be needed. A sender that + has no notion of wallclock or elapsed time MAY set the NTP + timestamp to zero. + + RTP timestamp: 32 bits + Corresponds to the same time as the NTP timestamp (above), but in + the same units and with the same random offset as the RTP + timestamps in data packets. This correspondence may be used for + intra- and inter-media synchronization for sources whose NTP + timestamps are synchronized, and may be used by media-independent + receivers to estimate the nominal RTP clock frequency. Note that + in most cases this timestamp will not be equal to the RTP + timestamp in any adjacent data packet. Rather, it MUST be + calculated from the corresponding NTP timestamp using the + relationship between the RTP timestamp counter and real time as + maintained by periodically checking the wallclock time at a + sampling instant. + + sender's packet count: 32 bits + The total number of RTP data packets transmitted by the sender + since starting transmission up until the time this SR packet was + generated. The count SHOULD be reset if the sender changes its + SSRC identifier. + + sender's octet count: 32 bits + The total number of payload octets (i.e., not including header or + padding) transmitted in RTP data packets by the sender since + starting transmission up until the time this SR packet was + generated. The count SHOULD be reset if the sender changes its + SSRC identifier. This field can be used to estimate the average + payload data rate. + + The third section contains zero or more reception report blocks + depending on the number of other sources heard by this sender since + the last report. Each reception report block conveys statistics on + the reception of RTP packets from a single synchronization source. + Receivers SHOULD NOT carry over statistics when a source changes its + SSRC identifier due to a collision. These statistics are: + + + + +Schulzrinne, et al. Standards Track [Page 38] + +RFC 3550 RTP July 2003 + + + SSRC_n (source identifier): 32 bits + The SSRC identifier of the source to which the information in this + reception report block pertains. + + fraction lost: 8 bits + The fraction of RTP data packets from source SSRC_n lost since the + previous SR or RR packet was sent, expressed as a fixed point + number with the binary point at the left edge of the field. (That + is equivalent to taking the integer part after multiplying the + loss fraction by 256.) This fraction is defined to be the number + of packets lost divided by the number of packets expected, as + defined in the next paragraph. An implementation is shown in + Appendix A.3. If the loss is negative due to duplicates, the + fraction lost is set to zero. Note that a receiver cannot tell + whether any packets were lost after the last one received, and + that there will be no reception report block issued for a source + if all packets from that source sent during the last reporting + interval have been lost. + + cumulative number of packets lost: 24 bits + The total number of RTP data packets from source SSRC_n that have + been lost since the beginning of reception. This number is + defined to be the number of packets expected less the number of + packets actually received, where the number of packets received + includes any which are late or duplicates. Thus, packets that + arrive late are not counted as lost, and the loss may be negative + if there are duplicates. The number of packets expected is + defined to be the extended last sequence number received, as + defined next, less the initial sequence number received. This may + be calculated as shown in Appendix A.3. + + extended highest sequence number received: 32 bits + The low 16 bits contain the highest sequence number received in an + RTP data packet from source SSRC_n, and the most significant 16 + bits extend that sequence number with the corresponding count of + sequence number cycles, which may be maintained according to the + algorithm in Appendix A.1. Note that different receivers within + the same session will generate different extensions to the + sequence number if their start times differ significantly. + + interarrival jitter: 32 bits + An estimate of the statistical variance of the RTP data packet + interarrival time, measured in timestamp units and expressed as an + unsigned integer. The interarrival jitter J is defined to be the + mean deviation (smoothed absolute value) of the difference D in + packet spacing at the receiver compared to the sender for a pair + of packets. As shown in the equation below, this is equivalent to + the difference in the "relative transit time" for the two packets; + + + +Schulzrinne, et al. Standards Track [Page 39] + +RFC 3550 RTP July 2003 + + + the relative transit time is the difference between a packet's RTP + timestamp and the receiver's clock at the time of arrival, + measured in the same units. + + If Si is the RTP timestamp from packet i, and Ri is the time of + arrival in RTP timestamp units for packet i, then for two packets + i and j, D may be expressed as + + D(i,j) = (Rj - Ri) - (Sj - Si) = (Rj - Sj) - (Ri - Si) + + The interarrival jitter SHOULD be calculated continuously as each + data packet i is received from source SSRC_n, using this + difference D for that packet and the previous packet i-1 in order + of arrival (not necessarily in sequence), according to the formula + + J(i) = J(i-1) + (|D(i-1,i)| - J(i-1))/16 + + Whenever a reception report is issued, the current value of J is + sampled. + + The jitter calculation MUST conform to the formula specified here + in order to allow profile-independent monitors to make valid + interpretations of reports coming from different implementations. + This algorithm is the optimal first-order estimator and the gain + parameter 1/16 gives a good noise reduction ratio while + maintaining a reasonable rate of convergence [22]. A sample + implementation is shown in Appendix A.8. See Section 6.4.4 for a + discussion of the effects of varying packet duration and delay + before transmission. + + last SR timestamp (LSR): 32 bits + The middle 32 bits out of 64 in the NTP timestamp (as explained in + Section 4) received as part of the most recent RTCP sender report + (SR) packet from source SSRC_n. If no SR has been received yet, + the field is set to zero. + + delay since last SR (DLSR): 32 bits + The delay, expressed in units of 1/65536 seconds, between + receiving the last SR packet from source SSRC_n and sending this + reception report block. If no SR packet has been received yet + from SSRC_n, the DLSR field is set to zero. + + Let SSRC_r denote the receiver issuing this receiver report. + Source SSRC_n can compute the round-trip propagation delay to + SSRC_r by recording the time A when this reception report block is + received. It calculates the total round-trip time A-LSR using the + last SR timestamp (LSR) field, and then subtracting this field to + leave the round-trip propagation delay as (A - LSR - DLSR). This + + + +Schulzrinne, et al. Standards Track [Page 40] + +RFC 3550 RTP July 2003 + + + is illustrated in Fig. 2. Times are shown in both a hexadecimal + representation of the 32-bit fields and the equivalent floating- + point decimal representation. Colons indicate a 32-bit field + divided into a 16-bit integer part and 16-bit fraction part. + + This may be used as an approximate measure of distance to cluster + receivers, although some links have very asymmetric delays. + + [10 Nov 1995 11:33:25.125 UTC] [10 Nov 1995 11:33:36.5 UTC] + n SR(n) A=b710:8000 (46864.500 s) + ----------------------------------------------------------------> + v ^ + ntp_sec =0xb44db705 v ^ dlsr=0x0005:4000 ( 5.250s) + ntp_frac=0x20000000 v ^ lsr =0xb705:2000 (46853.125s) + (3024992005.125 s) v ^ + r v ^ RR(n) + ----------------------------------------------------------------> + |<-DLSR->| + (5.250 s) + + A 0xb710:8000 (46864.500 s) + DLSR -0x0005:4000 ( 5.250 s) + LSR -0xb705:2000 (46853.125 s) + ------------------------------- + delay 0x0006:2000 ( 6.125 s) + + Figure 2: Example for round-trip time computation + + + + + + + + + + + + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 41] + +RFC 3550 RTP July 2003 + + +6.4.2 RR: Receiver Report RTCP Packet + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +header |V=2|P| RC | PT=RR=201 | length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | SSRC of packet sender | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report | SSRC_1 (SSRC of first source) | +block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 1 | fraction lost | cumulative number of packets lost | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | extended highest sequence number received | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | interarrival jitter | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | last SR (LSR) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | delay since last SR (DLSR) | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +report | SSRC_2 (SSRC of second source) | +block +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 2 : ... : + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + | profile-specific extensions | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The format of the receiver report (RR) packet is the same as that of + the SR packet except that the packet type field contains the constant + 201 and the five words of sender information are omitted (these are + the NTP and RTP timestamps and sender's packet and octet counts). + The remaining fields have the same meaning as for the SR packet. + + An empty RR packet (RC = 0) MUST be put at the head of a compound + RTCP packet when there is no data transmission or reception to + report. + +6.4.3 Extending the Sender and Receiver Reports + + A profile SHOULD define profile-specific extensions to the sender + report and receiver report if there is additional information that + needs to be reported regularly about the sender or receivers. This + method SHOULD be used in preference to defining another RTCP packet + type because it requires less overhead: + + o fewer octets in the packet (no RTCP header or SSRC field); + + + + +Schulzrinne, et al. Standards Track [Page 42] + +RFC 3550 RTP July 2003 + + + o simpler and faster parsing because applications running under that + profile would be programmed to always expect the extension fields + in the directly accessible location after the reception reports. + + The extension is a fourth section in the sender- or receiver-report + packet which comes at the end after the reception report blocks, if + any. If additional sender information is required, then for sender + reports it would be included first in the extension section, but for + receiver reports it would not be present. If information about + receivers is to be included, that data SHOULD be structured as an + array of blocks parallel to the existing array of reception report + blocks; that is, the number of blocks would be indicated by the RC + field. + +6.4.4 Analyzing Sender and Receiver Reports + + It is expected that reception quality feedback will be useful not + only for the sender but also for other receivers and third-party + monitors. The sender may modify its transmissions based on the + feedback; receivers can determine whether problems are local, + regional or global; network managers may use profile-independent + monitors that receive only the RTCP packets and not the corresponding + RTP data packets to evaluate the performance of their networks for + multicast distribution. + + Cumulative counts are used in both the sender information and + receiver report blocks so that differences may be calculated between + any two reports to make measurements over both short and long time + periods, and to provide resilience against the loss of a report. The + difference between the last two reports received can be used to + estimate the recent quality of the distribution. The NTP timestamp + is included so that rates may be calculated from these differences + over the interval between two reports. Since that timestamp is + independent of the clock rate for the data encoding, it is possible + to implement encoding- and profile-independent quality monitors. + + An example calculation is the packet loss rate over the interval + between two reception reports. The difference in the cumulative + number of packets lost gives the number lost during that interval. + The difference in the extended last sequence numbers received gives + the number of packets expected during the interval. The ratio of + these two is the packet loss fraction over the interval. This ratio + should equal the fraction lost field if the two reports are + consecutive, but otherwise it may not. The loss rate per second can + be obtained by dividing the loss fraction by the difference in NTP + timestamps, expressed in seconds. The number of packets received is + the number of packets expected minus the number lost. The number of + + + + +Schulzrinne, et al. Standards Track [Page 43] + +RFC 3550 RTP July 2003 + + + packets expected may also be used to judge the statistical validity + of any loss estimates. For example, 1 out of 5 packets lost has a + lower significance than 200 out of 1000. + + From the sender information, a third-party monitor can calculate the + average payload data rate and the average packet rate over an + interval without receiving the data. Taking the ratio of the two + gives the average payload size. If it can be assumed that packet + loss is independent of packet size, then the number of packets + received by a particular receiver times the average payload size (or + the corresponding packet size) gives the apparent throughput + available to that receiver. + + In addition to the cumulative counts which allow long-term packet + loss measurements using differences between reports, the fraction + lost field provides a short-term measurement from a single report. + This becomes more important as the size of a session scales up enough + that reception state information might not be kept for all receivers + or the interval between reports becomes long enough that only one + report might have been received from a particular receiver. + + The interarrival jitter field provides a second short-term measure of + network congestion. Packet loss tracks persistent congestion while + the jitter measure tracks transient congestion. The jitter measure + may indicate congestion before it leads to packet loss. The + interarrival jitter field is only a snapshot of the jitter at the + time of a report and is not intended to be taken quantitatively. + Rather, it is intended for comparison across a number of reports from + one receiver over time or from multiple receivers, e.g., within a + single network, at the same time. To allow comparison across + receivers, it is important the the jitter be calculated according to + the same formula by all receivers. + + Because the jitter calculation is based on the RTP timestamp which + represents the instant when the first data in the packet was sampled, + any variation in the delay between that sampling instant and the time + the packet is transmitted will affect the resulting jitter that is + calculated. Such a variation in delay would occur for audio packets + of varying duration. It will also occur for video encodings because + the timestamp is the same for all the packets of one frame but those + packets are not all transmitted at the same time. The variation in + delay until transmission does reduce the accuracy of the jitter + calculation as a measure of the behavior of the network by itself, + but it is appropriate to include considering that the receiver buffer + must accommodate it. When the jitter calculation is used as a + comparative measure, the (constant) component due to variation in + delay until transmission subtracts out so that a change in the + + + + +Schulzrinne, et al. Standards Track [Page 44] + +RFC 3550 RTP July 2003 + + + network jitter component can then be observed unless it is relatively + small. If the change is small, then it is likely to be + inconsequential. + +6.5 SDES: Source Description RTCP Packet + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +header |V=2|P| SC | PT=SDES=202 | length | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +chunk | SSRC/CSRC_1 | + 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | SDES items | + | ... | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +chunk | SSRC/CSRC_2 | + 2 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | SDES items | + | ... | + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + + The SDES packet is a three-level structure composed of a header and + zero or more chunks, each of which is composed of items describing + the source identified in that chunk. The items are described + individually in subsequent sections. + + version (V), padding (P), length: + As described for the SR packet (see Section 6.4.1). + + packet type (PT): 8 bits + Contains the constant 202 to identify this as an RTCP SDES packet. + + source count (SC): 5 bits + The number of SSRC/CSRC chunks contained in this SDES packet. A + value of zero is valid but useless. + + Each chunk consists of an SSRC/CSRC identifier followed by a list of + zero or more items, which carry information about the SSRC/CSRC. + Each chunk starts on a 32-bit boundary. Each item consists of an 8- + bit type field, an 8-bit octet count describing the length of the + text (thus, not including this two-octet header), and the text + itself. Note that the text can be no longer than 255 octets, but + this is consistent with the need to limit RTCP bandwidth consumption. + + + + + + + +Schulzrinne, et al. Standards Track [Page 45] + +RFC 3550 RTP July 2003 + + + The text is encoded according to the UTF-8 encoding specified in RFC + 2279 [5]. US-ASCII is a subset of this encoding and requires no + additional encoding. The presence of multi-octet encodings is + indicated by setting the most significant bit of a character to a + value of one. + + Items are contiguous, i.e., items are not individually padded to a + 32-bit boundary. Text is not null terminated because some multi- + octet encodings include null octets. The list of items in each chunk + MUST be terminated by one or more null octets, the first of which is + interpreted as an item type of zero to denote the end of the list. + No length octet follows the null item type octet, but additional null + octets MUST be included if needed to pad until the next 32-bit + boundary. Note that this padding is separate from that indicated by + the P bit in the RTCP header. A chunk with zero items (four null + octets) is valid but useless. + + End systems send one SDES packet containing their own source + identifier (the same as the SSRC in the fixed RTP header). A mixer + sends one SDES packet containing a chunk for each contributing source + from which it is receiving SDES information, or multiple complete + SDES packets in the format above if there are more than 31 such + sources (see Section 7). + + The SDES items currently defined are described in the next sections. + Only the CNAME item is mandatory. Some items shown here may be + useful only for particular profiles, but the item types are all + assigned from one common space to promote shared use and to simplify + profile-independent applications. Additional items may be defined in + a profile by registering the type numbers with IANA as described in + Section 15. + +6.5.1 CNAME: Canonical End-Point Identifier SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | CNAME=1 | length | user and domain name ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The CNAME identifier has the following properties: + + o Because the randomly allocated SSRC identifier may change if a + conflict is discovered or if a program is restarted, the CNAME + item MUST be included to provide the binding from the SSRC + identifier to an identifier for the source (sender or receiver) + that remains constant. + + + + +Schulzrinne, et al. Standards Track [Page 46] + +RFC 3550 RTP July 2003 + + + o Like the SSRC identifier, the CNAME identifier SHOULD also be + unique among all participants within one RTP session. + + o To provide a binding across multiple media tools used by one + participant in a set of related RTP sessions, the CNAME SHOULD be + fixed for that participant. + + o To facilitate third-party monitoring, the CNAME SHOULD be suitable + for either a program or a person to locate the source. + + Therefore, the CNAME SHOULD be derived algorithmically and not + entered manually, when possible. To meet these requirements, the + following format SHOULD be used unless a profile specifies an + alternate syntax or semantics. The CNAME item SHOULD have the format + "user@host", or "host" if a user name is not available as on single- + user systems. For both formats, "host" is either the fully qualified + domain name of the host from which the real-time data originates, + formatted according to the rules specified in RFC 1034 [6], RFC 1035 + [7] and Section 2.1 of RFC 1123 [8]; or the standard ASCII + representation of the host's numeric address on the interface used + for the RTP communication. For example, the standard ASCII + representation of an IP Version 4 address is "dotted decimal", also + known as dotted quad, and for IP Version 6, addresses are textually + represented as groups of hexadecimal digits separated by colons (with + variations as detailed in RFC 3513 [23]). Other address types are + expected to have ASCII representations that are mutually unique. The + fully qualified domain name is more convenient for a human observer + and may avoid the need to send a NAME item in addition, but it may be + difficult or impossible to obtain reliably in some operating + environments. Applications that may be run in such environments + SHOULD use the ASCII representation of the address instead. + + Examples are "doe@sleepy.example.com", "doe@192.0.2.89" or + "doe@2201:056D::112E:144A:1E24" for a multi-user system. On a system + with no user name, examples would be "sleepy.example.com", + "192.0.2.89" or "2201:056D::112E:144A:1E24". + + The user name SHOULD be in a form that a program such as "finger" or + "talk" could use, i.e., it typically is the login name rather than + the personal name. The host name is not necessarily identical to the + one in the participant's electronic mail address. + + This syntax will not provide unique identifiers for each source if an + application permits a user to generate multiple sources from one + host. Such an application would have to rely on the SSRC to further + identify the source, or the profile for that application would have + to specify additional syntax for the CNAME identifier. + + + + +Schulzrinne, et al. Standards Track [Page 47] + +RFC 3550 RTP July 2003 + + + If each application creates its CNAME independently, the resulting + CNAMEs may not be identical as would be required to provide a binding + across multiple media tools belonging to one participant in a set of + related RTP sessions. If cross-media binding is required, it may be + necessary for the CNAME of each tool to be externally configured with + the same value by a coordination tool. + + Application writers should be aware that private network address + assignments such as the Net-10 assignment proposed in RFC 1918 [24] + may create network addresses that are not globally unique. This + would lead to non-unique CNAMEs if hosts with private addresses and + no direct IP connectivity to the public Internet have their RTP + packets forwarded to the public Internet through an RTP-level + translator. (See also RFC 1627 [25].) To handle this case, + applications MAY provide a means to configure a unique CNAME, but the + burden is on the translator to translate CNAMEs from private + addresses to public addresses if necessary to keep private addresses + from being exposed. + +6.5.2 NAME: User Name SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | NAME=2 | length | common name of source ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + This is the real name used to describe the source, e.g., "John Doe, + Bit Recycler". It may be in any form desired by the user. For + applications such as conferencing, this form of name may be the most + desirable for display in participant lists, and therefore might be + sent most frequently of those items other than CNAME. Profiles MAY + establish such priorities. The NAME value is expected to remain + constant at least for the duration of a session. It SHOULD NOT be + relied upon to be unique among all participants in the session. + +6.5.3 EMAIL: Electronic Mail Address SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | EMAIL=3 | length | email address of source ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The email address is formatted according to RFC 2822 [9], for + example, "John.Doe@example.com". The EMAIL value is expected to + remain constant for the duration of a session. + + + + +Schulzrinne, et al. Standards Track [Page 48] + +RFC 3550 RTP July 2003 + + +6.5.4 PHONE: Phone Number SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | PHONE=4 | length | phone number of source ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The phone number SHOULD be formatted with the plus sign replacing the + international access code. For example, "+1 908 555 1212" for a + number in the United States. + +6.5.5 LOC: Geographic User Location SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | LOC=5 | length | geographic location of site ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Depending on the application, different degrees of detail are + appropriate for this item. For conference applications, a string + like "Murray Hill, New Jersey" may be sufficient, while, for an + active badge system, strings like "Room 2A244, AT&T BL MH" might be + appropriate. The degree of detail is left to the implementation + and/or user, but format and content MAY be prescribed by a profile. + The LOC value is expected to remain constant for the duration of a + session, except for mobile hosts. + +6.5.6 TOOL: Application or Tool Name SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | TOOL=6 | length |name/version of source appl. ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + A string giving the name and possibly version of the application + generating the stream, e.g., "videotool 1.2". This information may + be useful for debugging purposes and is similar to the Mailer or + Mail-System-Version SMTP headers. The TOOL value is expected to + remain constant for the duration of the session. + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 49] + +RFC 3550 RTP July 2003 + + +6.5.7 NOTE: Notice/Status SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | NOTE=7 | length | note about the source ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The following semantics are suggested for this item, but these or + other semantics MAY be explicitly defined by a profile. The NOTE + item is intended for transient messages describing the current state + of the source, e.g., "on the phone, can't talk". Or, during a + seminar, this item might be used to convey the title of the talk. It + should be used only to carry exceptional information and SHOULD NOT + be included routinely by all participants because this would slow + down the rate at which reception reports and CNAME are sent, thus + impairing the performance of the protocol. In particular, it SHOULD + NOT be included as an item in a user's configuration file nor + automatically generated as in a quote-of-the-day. + + Since the NOTE item may be important to display while it is active, + the rate at which other non-CNAME items such as NAME are transmitted + might be reduced so that the NOTE item can take that part of the RTCP + bandwidth. When the transient message becomes inactive, the NOTE + item SHOULD continue to be transmitted a few times at the same + repetition rate but with a string of length zero to signal the + receivers. However, receivers SHOULD also consider the NOTE item + inactive if it is not received for a small multiple of the repetition + rate, or perhaps 20-30 RTCP intervals. + +6.5.8 PRIV: Private Extensions SDES Item + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | PRIV=8 | length | prefix length |prefix string... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + ... | value string ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + This item is used to define experimental or application-specific SDES + extensions. The item contains a prefix consisting of a length-string + pair, followed by the value string filling the remainder of the item + and carrying the desired information. The prefix length field is 8 + bits long. The prefix string is a name chosen by the person defining + the PRIV item to be unique with respect to other PRIV items this + application might receive. The application creator might choose to + use the application name plus an additional subtype identification if + + + +Schulzrinne, et al. Standards Track [Page 50] + +RFC 3550 RTP July 2003 + + + needed. Alternatively, it is RECOMMENDED that others choose a name + based on the entity they represent, then coordinate the use of the + name within that entity. + + Note that the prefix consumes some space within the item's total + length of 255 octets, so the prefix should be kept as short as + possible. This facility and the constrained RTCP bandwidth SHOULD + NOT be overloaded; it is not intended to satisfy all the control + communication requirements of all applications. + + SDES PRIV prefixes will not be registered by IANA. If some form of + the PRIV item proves to be of general utility, it SHOULD instead be + assigned a regular SDES item type registered with IANA so that no + prefix is required. This simplifies use and increases transmission + efficiency. + +6.6 BYE: Goodbye RTCP Packet + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |V=2|P| SC | PT=BYE=203 | length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | SSRC/CSRC | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + : ... : + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ +(opt) | length | reason for leaving ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The BYE packet indicates that one or more sources are no longer + active. + + version (V), padding (P), length: + As described for the SR packet (see Section 6.4.1). + + packet type (PT): 8 bits + Contains the constant 203 to identify this as an RTCP BYE packet. + + source count (SC): 5 bits + The number of SSRC/CSRC identifiers included in this BYE packet. + A count value of zero is valid, but useless. + + The rules for when a BYE packet should be sent are specified in + Sections 6.3.7 and 8.2. + + + + + + +Schulzrinne, et al. Standards Track [Page 51] + +RFC 3550 RTP July 2003 + + + If a BYE packet is received by a mixer, the mixer SHOULD forward the + BYE packet with the SSRC/CSRC identifier(s) unchanged. If a mixer + shuts down, it SHOULD send a BYE packet listing all contributing + sources it handles, as well as its own SSRC identifier. Optionally, + the BYE packet MAY include an 8-bit octet count followed by that many + octets of text indicating the reason for leaving, e.g., "camera + malfunction" or "RTP loop detected". The string has the same + encoding as that described for SDES. If the string fills the packet + to the next 32-bit boundary, the string is not null terminated. If + not, the BYE packet MUST be padded with null octets to the next 32- + bit boundary. This padding is separate from that indicated by the P + bit in the RTCP header. + +6.7 APP: Application-Defined RTCP Packet + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |V=2|P| subtype | PT=APP=204 | length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | SSRC/CSRC | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | name (ASCII) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | application-dependent data ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The APP packet is intended for experimental use as new applications + and new features are developed, without requiring packet type value + registration. APP packets with unrecognized names SHOULD be ignored. + After testing and if wider use is justified, it is RECOMMENDED that + each APP packet be redefined without the subtype and name fields and + registered with IANA using an RTCP packet type. + + version (V), padding (P), length: + As described for the SR packet (see Section 6.4.1). + + subtype: 5 bits + May be used as a subtype to allow a set of APP packets to be + defined under one unique name, or for any application-dependent + data. + + packet type (PT): 8 bits + Contains the constant 204 to identify this as an RTCP APP packet. + + + + + + + +Schulzrinne, et al. Standards Track [Page 52] + +RFC 3550 RTP July 2003 + + + name: 4 octets + A name chosen by the person defining the set of APP packets to be + unique with respect to other APP packets this application might + receive. The application creator might choose to use the + application name, and then coordinate the allocation of subtype + values to others who want to define new packet types for the + application. Alternatively, it is RECOMMENDED that others choose + a name based on the entity they represent, then coordinate the use + of the name within that entity. The name is interpreted as a + sequence of four ASCII characters, with uppercase and lowercase + characters treated as distinct. + + application-dependent data: variable length + Application-dependent data may or may not appear in an APP packet. + It is interpreted by the application and not RTP itself. It MUST + be a multiple of 32 bits long. + +7. RTP Translators and Mixers + + In addition to end systems, RTP supports the notion of "translators" + and "mixers", which could be considered as "intermediate systems" at + the RTP level. Although this support adds some complexity to the + protocol, the need for these functions has been clearly established + by experiments with multicast audio and video applications in the + Internet. Example uses of translators and mixers given in Section + 2.3 stem from the presence of firewalls and low bandwidth + connections, both of which are likely to remain. + +7.1 General Description + + An RTP translator/mixer connects two or more transport-level + "clouds". Typically, each cloud is defined by a common network and + transport protocol (e.g., IP/UDP) plus a multicast address and + transport level destination port or a pair of unicast addresses and + ports. (Network-level protocol translators, such as IP version 4 to + IP version 6, may be present within a cloud invisibly to RTP.) One + system may serve as a translator or mixer for a number of RTP + sessions, but each is considered a logically separate entity. + + In order to avoid creating a loop when a translator or mixer is + installed, the following rules MUST be observed: + + o Each of the clouds connected by translators and mixers + participating in one RTP session either MUST be distinct from all + the others in at least one of these parameters (protocol, address, + port), or MUST be isolated at the network level from the others. + + + + + +Schulzrinne, et al. Standards Track [Page 53] + +RFC 3550 RTP July 2003 + + + o A derivative of the first rule is that there MUST NOT be multiple + translators or mixers connected in parallel unless by some + arrangement they partition the set of sources to be forwarded. + + Similarly, all RTP end systems that can communicate through one or + more RTP translators or mixers share the same SSRC space, that is, + the SSRC identifiers MUST be unique among all these end systems. + Section 8.2 describes the collision resolution algorithm by which + SSRC identifiers are kept unique and loops are detected. + + There may be many varieties of translators and mixers designed for + different purposes and applications. Some examples are to add or + remove encryption, change the encoding of the data or the underlying + protocols, or replicate between a multicast address and one or more + unicast addresses. The distinction between translators and mixers is + that a translator passes through the data streams from different + sources separately, whereas a mixer combines them to form one new + stream: + + Translator: Forwards RTP packets with their SSRC identifier + intact; this makes it possible for receivers to identify + individual sources even though packets from all the sources pass + through the same translator and carry the translator's network + source address. Some kinds of translators will pass through the + data untouched, but others MAY change the encoding of the data and + thus the RTP data payload type and timestamp. If multiple data + packets are re-encoded into one, or vice versa, a translator MUST + assign new sequence numbers to the outgoing packets. Losses in + the incoming packet stream may induce corresponding gaps in the + outgoing sequence numbers. Receivers cannot detect the presence + of a translator unless they know by some other means what payload + type or transport address was used by the original source. + + Mixer: Receives streams of RTP data packets from one or more + sources, possibly changes the data format, combines the streams in + some manner and then forwards the combined stream. Since the + timing among multiple input sources will not generally be + synchronized, the mixer will make timing adjustments among the + streams and generate its own timing for the combined stream, so it + is the synchronization source. Thus, all data packets forwarded + by a mixer MUST be marked with the mixer's own SSRC identifier. + In order to preserve the identity of the original sources + contributing to the mixed packet, the mixer SHOULD insert their + SSRC identifiers into the CSRC identifier list following the fixed + RTP header of the packet. A mixer that is also itself a + contributing source for some packet SHOULD explicitly include its + own SSRC identifier in the CSRC list for that packet. + + + + +Schulzrinne, et al. Standards Track [Page 54] + +RFC 3550 RTP July 2003 + + + For some applications, it MAY be acceptable for a mixer not to + identify sources in the CSRC list. However, this introduces the + danger that loops involving those sources could not be detected. + + The advantage of a mixer over a translator for applications like + audio is that the output bandwidth is limited to that of one source + even when multiple sources are active on the input side. This may be + important for low-bandwidth links. The disadvantage is that + receivers on the output side don't have any control over which + sources are passed through or muted, unless some mechanism is + implemented for remote control of the mixer. The regeneration of + synchronization information by mixers also means that receivers can't + do inter-media synchronization of the original streams. A multi- + media mixer could do it. + + [E1] [E6] + | | + E1:17 | E6:15 | + | | E6:15 + V M1:48 (1,17) M1:48 (1,17) V M1:48 (1,17) + (M1)------------->----------------->-------------->[E7] + ^ ^ E4:47 ^ E4:47 + E2:1 | E4:47 | | M3:89 (64,45) + | | | + [E2] [E4] M3:89 (64,45) | + | legend: + [E3] --------->(M2)----------->(M3)------------| [End system] + E3:64 M2:12 (64) ^ (Mixer) + | E5:45 + | + [E5] source: SSRC (CSRCs) + -------------------> + + Figure 3: Sample RTP network with end systems, mixers and translators + + A collection of mixers and translators is shown in Fig. 3 to + illustrate their effect on SSRC and CSRC identifiers. In the figure, + end systems are shown as rectangles (named E), translators as + triangles (named T) and mixers as ovals (named M). The notation "M1: + 48(1,17)" designates a packet originating a mixer M1, identified by + M1's (random) SSRC value of 48 and two CSRC identifiers, 1 and 17, + copied from the SSRC identifiers of packets from E1 and E2. + +7.2 RTCP Processing in Translators + + In addition to forwarding data packets, perhaps modified, translators + and mixers MUST also process RTCP packets. In many cases, they will + take apart the compound RTCP packets received from end systems to + + + +Schulzrinne, et al. Standards Track [Page 55] + +RFC 3550 RTP July 2003 + + + aggregate SDES information and to modify the SR or RR packets. + Retransmission of this information may be triggered by the packet + arrival or by the RTCP interval timer of the translator or mixer + itself. + + A translator that does not modify the data packets, for example one + that just replicates between a multicast address and a unicast + address, MAY simply forward RTCP packets unmodified as well. A + translator that transforms the payload in some way MUST make + corresponding transformations in the SR and RR information so that it + still reflects the characteristics of the data and the reception + quality. These translators MUST NOT simply forward RTCP packets. In + general, a translator SHOULD NOT aggregate SR and RR packets from + different sources into one packet since that would reduce the + accuracy of the propagation delay measurements based on the LSR and + DLSR fields. + + SR sender information: A translator does not generate its own + sender information, but forwards the SR packets received from one + cloud to the others. The SSRC is left intact but the sender + information MUST be modified if required by the translation. If a + translator changes the data encoding, it MUST change the "sender's + byte count" field. If it also combines several data packets into + one output packet, it MUST change the "sender's packet count" + field. If it changes the timestamp frequency, it MUST change the + "RTP timestamp" field in the SR packet. + + SR/RR reception report blocks: A translator forwards reception + reports received from one cloud to the others. Note that these + flow in the direction opposite to the data. The SSRC is left + intact. If a translator combines several data packets into one + output packet, and therefore changes the sequence numbers, it MUST + make the inverse manipulation for the packet loss fields and the + "extended last sequence number" field. This may be complex. In + the extreme case, there may be no meaningful way to translate the + reception reports, so the translator MAY pass on no reception + report at all or a synthetic report based on its own reception. + The general rule is to do what makes sense for a particular + translation. + + A translator does not require an SSRC identifier of its own, but + MAY choose to allocate one for the purpose of sending reports + about what it has received. These would be sent to all the + connected clouds, each corresponding to the translation of the + data stream as sent to that cloud, since reception reports are + normally multicast to all participants. + + + + + +Schulzrinne, et al. Standards Track [Page 56] + +RFC 3550 RTP July 2003 + + + SDES: Translators typically forward without change the SDES + information they receive from one cloud to the others, but MAY, + for example, decide to filter non-CNAME SDES information if + bandwidth is limited. The CNAMEs MUST be forwarded to allow SSRC + identifier collision detection to work. A translator that + generates its own RR packets MUST send SDES CNAME information + about itself to the same clouds that it sends those RR packets. + + BYE: Translators forward BYE packets unchanged. A translator + that is about to cease forwarding packets SHOULD send a BYE packet + to each connected cloud containing all the SSRC identifiers that + were previously being forwarded to that cloud, including the + translator's own SSRC identifier if it sent reports of its own. + + APP: Translators forward APP packets unchanged. + +7.3 RTCP Processing in Mixers + + Since a mixer generates a new data stream of its own, it does not + pass through SR or RR packets at all and instead generates new + information for both sides. + + SR sender information: A mixer does not pass through sender + information from the sources it mixes because the characteristics + of the source streams are lost in the mix. As a synchronization + source, the mixer SHOULD generate its own SR packets with sender + information about the mixed data stream and send them in the same + direction as the mixed stream. + + SR/RR reception report blocks: A mixer generates its own + reception reports for sources in each cloud and sends them out + only to the same cloud. It MUST NOT send these reception reports + to the other clouds and MUST NOT forward reception reports from + one cloud to the others because the sources would not be SSRCs + there (only CSRCs). + + SDES: Mixers typically forward without change the SDES + information they receive from one cloud to the others, but MAY, + for example, decide to filter non-CNAME SDES information if + bandwidth is limited. The CNAMEs MUST be forwarded to allow SSRC + identifier collision detection to work. (An identifier in a CSRC + list generated by a mixer might collide with an SSRC identifier + generated by an end system.) A mixer MUST send SDES CNAME + information about itself to the same clouds that it sends SR or RR + packets. + + + + + + +Schulzrinne, et al. Standards Track [Page 57] + +RFC 3550 RTP July 2003 + + + Since mixers do not forward SR or RR packets, they will typically + be extracting SDES packets from a compound RTCP packet. To + minimize overhead, chunks from the SDES packets MAY be aggregated + into a single SDES packet which is then stacked on an SR or RR + packet originating from the mixer. A mixer which aggregates SDES + packets will use more RTCP bandwidth than an individual source + because the compound packets will be longer, but that is + appropriate since the mixer represents multiple sources. + Similarly, a mixer which passes through SDES packets as they are + received will be transmitting RTCP packets at higher than the + single source rate, but again that is correct since the packets + come from multiple sources. The RTCP packet rate may be different + on each side of the mixer. + + A mixer that does not insert CSRC identifiers MAY also refrain + from forwarding SDES CNAMEs. In this case, the SSRC identifier + spaces in the two clouds are independent. As mentioned earlier, + this mode of operation creates a danger that loops can't be + detected. + + BYE: Mixers MUST forward BYE packets. A mixer that is about to + cease forwarding packets SHOULD send a BYE packet to each + connected cloud containing all the SSRC identifiers that were + previously being forwarded to that cloud, including the mixer's + own SSRC identifier if it sent reports of its own. + + APP: The treatment of APP packets by mixers is application-specific. + +7.4 Cascaded Mixers + + An RTP session may involve a collection of mixers and translators as + shown in Fig. 3. If two mixers are cascaded, such as M2 and M3 in + the figure, packets received by a mixer may already have been mixed + and may include a CSRC list with multiple identifiers. The second + mixer SHOULD build the CSRC list for the outgoing packet using the + CSRC identifiers from already-mixed input packets and the SSRC + identifiers from unmixed input packets. This is shown in the output + arc from mixer M3 labeled M3:89(64,45) in the figure. As in the case + of mixers that are not cascaded, if the resulting CSRC list has more + than 15 identifiers, the remainder cannot be included. + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 58] + +RFC 3550 RTP July 2003 + + +8. SSRC Identifier Allocation and Use + + The SSRC identifier carried in the RTP header and in various fields + of RTCP packets is a random 32-bit number that is required to be + globally unique within an RTP session. It is crucial that the number + be chosen with care in order that participants on the same network or + starting at the same time are not likely to choose the same number. + + It is not sufficient to use the local network address (such as an + IPv4 address) for the identifier because the address may not be + unique. Since RTP translators and mixers enable interoperation among + multiple networks with different address spaces, the allocation + patterns for addresses within two spaces might result in a much + higher rate of collision than would occur with random allocation. + + Multiple sources running on one host would also conflict. + + It is also not sufficient to obtain an SSRC identifier simply by + calling random() without carefully initializing the state. An + example of how to generate a random identifier is presented in + Appendix A.6. + +8.1 Probability of Collision + + Since the identifiers are chosen randomly, it is possible that two or + more sources will choose the same number. Collision occurs with the + highest probability when all sources are started simultaneously, for + example when triggered automatically by some session management + event. If N is the number of sources and L the length of the + identifier (here, 32 bits), the probability that two sources + independently pick the same value can be approximated for large N + [26] as 1 - exp(-N**2 / 2**(L+1)). For N=1000, the probability is + roughly 10**-4. + + The typical collision probability is much lower than the worst-case + above. When one new source joins an RTP session in which all the + other sources already have unique identifiers, the probability of + collision is just the fraction of numbers used out of the space. + Again, if N is the number of sources and L the length of the + identifier, the probability of collision is N / 2**L. For N=1000, + the probability is roughly 2*10**-7. + + The probability of collision is further reduced by the opportunity + for a new source to receive packets from other participants before + sending its first packet (either data or control). If the new source + keeps track of the other participants (by SSRC identifier), then + + + + + +Schulzrinne, et al. Standards Track [Page 59] + +RFC 3550 RTP July 2003 + + + before transmitting its first packet the new source can verify that + its identifier does not conflict with any that have been received, or + else choose again. + +8.2 Collision Resolution and Loop Detection + + Although the probability of SSRC identifier collision is low, all RTP + implementations MUST be prepared to detect collisions and take the + appropriate actions to resolve them. If a source discovers at any + time that another source is using the same SSRC identifier as its + own, it MUST send an RTCP BYE packet for the old identifier and + choose another random one. (As explained below, this step is taken + only once in case of a loop.) If a receiver discovers that two other + sources are colliding, it MAY keep the packets from one and discard + the packets from the other when this can be detected by different + source transport addresses or CNAMEs. The two sources are expected + to resolve the collision so that the situation doesn't last. + + Because the random SSRC identifiers are kept globally unique for each + RTP session, they can also be used to detect loops that may be + introduced by mixers or translators. A loop causes duplication of + data and control information, either unmodified or possibly mixed, as + in the following examples: + + o A translator may incorrectly forward a packet to the same + multicast group from which it has received the packet, either + directly or through a chain of translators. In that case, the + same packet appears several times, originating from different + network sources. + + o Two translators incorrectly set up in parallel, i.e., with the + same multicast groups on both sides, would both forward packets + from one multicast group to the other. Unidirectional translators + would produce two copies; bidirectional translators would form a + loop. + + o A mixer can close a loop by sending to the same transport + destination upon which it receives packets, either directly or + through another mixer or translator. In this case a source might + show up both as an SSRC on a data packet and a CSRC in a mixed + data packet. + + A source may discover that its own packets are being looped, or that + packets from another source are being looped (a third-party loop). + Both loops and collisions in the random selection of a source + identifier result in packets arriving with the same SSRC identifier + but a different source transport address, which may be that of the + end system originating the packet or an intermediate system. + + + +Schulzrinne, et al. Standards Track [Page 60] + +RFC 3550 RTP July 2003 + + + Therefore, if a source changes its source transport address, it MAY + also choose a new SSRC identifier to avoid being interpreted as a + looped source. (This is not MUST because in some applications of RTP + sources may be expected to change addresses during a session.) Note + that if a translator restarts and consequently changes the source + transport address (e.g., changes the UDP source port number) on which + it forwards packets, then all those packets will appear to receivers + to be looped because the SSRC identifiers are applied by the original + source and will not change. This problem can be avoided by keeping + the source transport address fixed across restarts, but in any case + will be resolved after a timeout at the receivers. + + Loops or collisions occurring on the far side of a translator or + mixer cannot be detected using the source transport address if all + copies of the packets go through the translator or mixer, however, + collisions may still be detected when chunks from two RTCP SDES + packets contain the same SSRC identifier but different CNAMEs. + + To detect and resolve these conflicts, an RTP implementation MUST + include an algorithm similar to the one described below, though the + implementation MAY choose a different policy for which packets from + colliding third-party sources are kept. The algorithm described + below ignores packets from a new source or loop that collide with an + established source. It resolves collisions with the participant's + own SSRC identifier by sending an RTCP BYE for the old identifier and + choosing a new one. However, when the collision was induced by a + loop of the participant's own packets, the algorithm will choose a + new identifier only once and thereafter ignore packets from the + looping source transport address. This is required to avoid a flood + of BYE packets. + + This algorithm requires keeping a table indexed by the source + identifier and containing the source transport addresses from the + first RTP packet and first RTCP packet received with that identifier, + along with other state for that source. Two source transport + addresses are required since, for example, the UDP source port + numbers may be different on RTP and RTCP packets. However, it may be + assumed that the network address is the same in both source transport + addresses. + + Each SSRC or CSRC identifier received in an RTP or RTCP packet is + looked up in the source identifier table in order to process that + data or control information. The source transport address from the + packet is compared to the corresponding source transport address in + the table to detect a loop or collision if they don't match. For + control packets, each element with its own SSRC identifier, for + example an SDES chunk, requires a separate lookup. (The SSRC + identifier in a reception report block is an exception because it + + + +Schulzrinne, et al. Standards Track [Page 61] + +RFC 3550 RTP July 2003 + + + identifies a source heard by the reporter, and that SSRC identifier + is unrelated to the source transport address of the RTCP packet sent + by the reporter.) If the SSRC or CSRC is not found, a new entry is + created. These table entries are removed when an RTCP BYE packet is + received with the corresponding SSRC identifier and validated by a + matching source transport address, or after no packets have arrived + for a relatively long time (see Section 6.2.1). + + Note that if two sources on the same host are transmitting with the + same source identifier at the time a receiver begins operation, it + would be possible that the first RTP packet received came from one of + the sources while the first RTCP packet received came from the other. + This would cause the wrong RTCP information to be associated with the + RTP data, but this situation should be sufficiently rare and harmless + that it may be disregarded. + + In order to track loops of the participant's own data packets, the + implementation MUST also keep a separate list of source transport + addresses (not identifiers) that have been found to be conflicting. + As in the source identifier table, two source transport addresses + MUST be kept to separately track conflicting RTP and RTCP packets. + Note that the conflicting address list should be short, usually + empty. Each element in this list stores the source addresses plus + the time when the most recent conflicting packet was received. An + element MAY be removed from the list when no conflicting packet has + arrived from that source for a time on the order of 10 RTCP report + intervals (see Section 6.2). + + For the algorithm as shown, it is assumed that the participant's own + source identifier and state are included in the source identifier + table. The algorithm could be restructured to first make a separate + comparison against the participant's own source identifier. + + if (SSRC or CSRC identifier is not found in the source + identifier table) { + create a new entry storing the data or control source + transport address, the SSRC or CSRC and other state; + } + + /* Identifier is found in the table */ + + else if (table entry was created on receipt of a control packet + and this is the first data packet or vice versa) { + store the source transport address from this packet; + } + else if (source transport address from the packet does not match + the one saved in the table entry for this identifier) { + + + + +Schulzrinne, et al. Standards Track [Page 62] + +RFC 3550 RTP July 2003 + + + /* An identifier collision or a loop is indicated */ + + if (source identifier is not the participant's own) { + /* OPTIONAL error counter step */ + if (source identifier is from an RTCP SDES chunk + containing a CNAME item that differs from the CNAME + in the table entry) { + count a third-party collision; + } else { + count a third-party loop; + } + abort processing of data packet or control element; + /* MAY choose a different policy to keep new source */ + } + + /* A collision or loop of the participant's own packets */ + + else if (source transport address is found in the list of + conflicting data or control source transport + addresses) { + /* OPTIONAL error counter step */ + if (source identifier is not from an RTCP SDES chunk + containing a CNAME item or CNAME is the + participant's own) { + count occurrence of own traffic looped; + } + mark current time in conflicting address list entry; + abort processing of data packet or control element; + } + + /* New collision, change SSRC identifier */ + + else { + log occurrence of a collision; + create a new entry in the conflicting data or control + source transport address list and mark current time; + send an RTCP BYE packet with the old SSRC identifier; + choose a new SSRC identifier; + create a new entry in the source identifier table with + the old SSRC plus the source transport address from + the data or control packet being processed; + } + } + + In this algorithm, packets from a newly conflicting source address + will be ignored and packets from the original source address will be + kept. If no packets arrive from the original source for an extended + period, the table entry will be timed out and the new source will be + + + +Schulzrinne, et al. Standards Track [Page 63] + +RFC 3550 RTP July 2003 + + + able to take over. This might occur if the original source detects + the collision and moves to a new source identifier, but in the usual + case an RTCP BYE packet will be received from the original source to + delete the state without having to wait for a timeout. + + If the original source address was received through a mixer (i.e., + learned as a CSRC) and later the same source is received directly, + the receiver may be well advised to switch to the new source address + unless other sources in the mix would be lost. Furthermore, for + applications such as telephony in which some sources such as mobile + entities may change addresses during the course of an RTP session, + the RTP implementation SHOULD modify the collision detection + algorithm to accept packets from the new source transport address. + To guard against flip-flopping between addresses if a genuine + collision does occur, the algorithm SHOULD include some means to + detect this case and avoid switching. + + When a new SSRC identifier is chosen due to a collision, the + candidate identifier SHOULD first be looked up in the source + identifier table to see if it was already in use by some other + source. If so, another candidate MUST be generated and the process + repeated. + + A loop of data packets to a multicast destination can cause severe + network flooding. All mixers and translators MUST implement a loop + detection algorithm like the one here so that they can break loops. + This should limit the excess traffic to no more than one duplicate + copy of the original traffic, which may allow the session to continue + so that the cause of the loop can be found and fixed. However, in + extreme cases where a mixer or translator does not properly break the + loop and high traffic levels result, it may be necessary for end + systems to cease transmitting data or control packets entirely. This + decision may depend upon the application. An error condition SHOULD + be indicated as appropriate. Transmission MAY be attempted again + periodically after a long, random time (on the order of minutes). + +8.3 Use with Layered Encodings + + For layered encodings transmitted on separate RTP sessions (see + Section 2.4), a single SSRC identifier space SHOULD be used across + the sessions of all layers and the core (base) layer SHOULD be used + for SSRC identifier allocation and collision resolution. When a + source discovers that it has collided, it transmits an RTCP BYE + packet on only the base layer but changes the SSRC identifier to the + new value in all layers. + + + + + + +Schulzrinne, et al. Standards Track [Page 64] + +RFC 3550 RTP July 2003 + + +9. Security + + Lower layer protocols may eventually provide all the security + services that may be desired for applications of RTP, including + authentication, integrity, and confidentiality. These services have + been specified for IP in [27]. Since the initial audio and video + applications using RTP needed a confidentiality service before such + services were available for the IP layer, the confidentiality service + described in the next section was defined for use with RTP and RTCP. + That description is included here to codify existing practice. New + applications of RTP MAY implement this RTP-specific confidentiality + service for backward compatibility, and/or they MAY implement + alternative security services. The overhead on the RTP protocol for + this confidentiality service is low, so the penalty will be minimal + if this service is obsoleted by other services in the future. + + Alternatively, other services, other implementations of services and + other algorithms may be defined for RTP in the future. In + particular, an RTP profile called Secure Real-time Transport Protocol + (SRTP) [28] is being developed to provide confidentiality of the RTP + payload while leaving the RTP header in the clear so that link-level + header compression algorithms can still operate. It is expected that + SRTP will be the correct choice for many applications. SRTP is based + on the Advanced Encryption Standard (AES) and provides stronger + security than the service described here. No claim is made that the + methods presented here are appropriate for a particular security + need. A profile may specify which services and algorithms should be + offered by applications, and may provide guidance as to their + appropriate use. + + Key distribution and certificates are outside the scope of this + document. + +9.1 Confidentiality + + Confidentiality means that only the intended receiver(s) can decode + the received packets; for others, the packet contains no useful + information. Confidentiality of the content is achieved by + encryption. + + When it is desired to encrypt RTP or RTCP according to the method + specified in this section, all the octets that will be encapsulated + for transmission in a single lower-layer packet are encrypted as a + unit. For RTCP, a 32-bit random number redrawn for each unit MUST be + prepended to the unit before encryption. For RTP, no prefix is + prepended; instead, the sequence number and timestamp fields are + initialized with random offsets. This is considered to be a weak + + + + +Schulzrinne, et al. Standards Track [Page 65] + +RFC 3550 RTP July 2003 + + + initialization vector (IV) because of poor randomness properties. In + addition, if the subsequent field, the SSRC, can be manipulated by an + enemy, there is further weakness of the encryption method. + + For RTCP, an implementation MAY segregate the individual RTCP packets + in a compound RTCP packet into two separate compound RTCP packets, + one to be encrypted and one to be sent in the clear. For example, + SDES information might be encrypted while reception reports were sent + in the clear to accommodate third-party monitors that are not privy + to the encryption key. In this example, depicted in Fig. 4, the SDES + information MUST be appended to an RR packet with no reports (and the + random number) to satisfy the requirement that all compound RTCP + packets begin with an SR or RR packet. The SDES CNAME item is + required in either the encrypted or unencrypted packet, but not both. + The same SDES information SHOULD NOT be carried in both packets as + this may compromise the encryption. + + UDP packet UDP packet + ----------------------------- ------------------------------ + [random][RR][SDES #CNAME ...] [SR #senderinfo #site1 #site2] + ----------------------------- ------------------------------ + encrypted not encrypted + + #: SSRC identifier + + Figure 4: Encrypted and non-encrypted RTCP packets + + The presence of encryption and the use of the correct key are + confirmed by the receiver through header or payload validity checks. + Examples of such validity checks for RTP and RTCP headers are given + in Appendices A.1 and A.2. + + To be consistent with existing implementations of the initial + specification of RTP in RFC 1889, the default encryption algorithm is + the Data Encryption Standard (DES) algorithm in cipher block chaining + (CBC) mode, as described in Section 1.1 of RFC 1423 [29], except that + padding to a multiple of 8 octets is indicated as described for the P + bit in Section 5.1. The initialization vector is zero because random + values are supplied in the RTP header or by the random prefix for + compound RTCP packets. For details on the use of CBC initialization + vectors, see [30]. + + Implementations that support the encryption method specified here + SHOULD always support the DES algorithm in CBC mode as the default + cipher for this method to maximize interoperability. This method was + chosen because it has been demonstrated to be easy and practical to + use in experimental audio and video tools in operation on the + Internet. However, DES has since been found to be too easily broken. + + + +Schulzrinne, et al. Standards Track [Page 66] + +RFC 3550 RTP July 2003 + + + It is RECOMMENDED that stronger encryption algorithms such as + Triple-DES be used in place of the default algorithm. Furthermore, + secure CBC mode requires that the first block of each packet be XORed + with a random, independent IV of the same size as the cipher's block + size. For RTCP, this is (partially) achieved by prepending each + packet with a 32-bit random number, independently chosen for each + packet. For RTP, the timestamp and sequence number start from random + values, but consecutive packets will not be independently randomized. + It should be noted that the randomness in both cases (RTP and RTCP) + is limited. High-security applications SHOULD consider other, more + conventional, protection means. Other encryption algorithms MAY be + specified dynamically for a session by non-RTP means. In particular, + the SRTP profile [28] based on AES is being developed to take into + account known plaintext and CBC plaintext manipulation concerns, and + will be the correct choice in the future. + + As an alternative to encryption at the IP level or at the RTP level + as described above, profiles MAY define additional payload types for + encrypted encodings. Those encodings MUST specify how padding and + other aspects of the encryption are to be handled. This method + allows encrypting only the data while leaving the headers in the + clear for applications where that is desired. It may be particularly + useful for hardware devices that will handle both decryption and + decoding. It is also valuable for applications where link-level + compression of RTP and lower-layer headers is desired and + confidentiality of the payload (but not addresses) is sufficient + since encryption of the headers precludes compression. + +9.2 Authentication and Message Integrity + + Authentication and message integrity services are not defined at the + RTP level since these services would not be directly feasible without + a key management infrastructure. It is expected that authentication + and integrity services will be provided by lower layer protocols. + +10. Congestion Control + + All transport protocols used on the Internet need to address + congestion control in some way [31]. RTP is not an exception, but + because the data transported over RTP is often inelastic (generated + at a fixed or controlled rate), the means to control congestion in + RTP may be quite different from those for other transport protocols + such as TCP. In one sense, inelasticity reduces the risk of + congestion because the RTP stream will not expand to consume all + available bandwidth as a TCP stream can. However, inelasticity also + means that the RTP stream cannot arbitrarily reduce its load on the + network to eliminate congestion when it occurs. + + + + +Schulzrinne, et al. Standards Track [Page 67] + +RFC 3550 RTP July 2003 + + + Since RTP may be used for a wide variety of applications in many + different contexts, there is no single congestion control mechanism + that will work for all. Therefore, congestion control SHOULD be + defined in each RTP profile as appropriate. For some profiles, it + may be sufficient to include an applicability statement restricting + the use of that profile to environments where congestion is avoided + by engineering. For other profiles, specific methods such as data + rate adaptation based on RTCP feedback may be required. + +11. RTP over Network and Transport Protocols + + This section describes issues specific to carrying RTP packets within + particular network and transport protocols. The following rules + apply unless superseded by protocol-specific definitions outside this + specification. + + RTP relies on the underlying protocol(s) to provide demultiplexing of + RTP data and RTCP control streams. For UDP and similar protocols, + RTP SHOULD use an even destination port number and the corresponding + RTCP stream SHOULD use the next higher (odd) destination port number. + For applications that take a single port number as a parameter and + derive the RTP and RTCP port pair from that number, if an odd number + is supplied then the application SHOULD replace that number with the + next lower (even) number to use as the base of the port pair. For + applications in which the RTP and RTCP destination port numbers are + specified via explicit, separate parameters (using a signaling + protocol or other means), the application MAY disregard the + restrictions that the port numbers be even/odd and consecutive + although the use of an even/odd port pair is still encouraged. The + RTP and RTCP port numbers MUST NOT be the same since RTP relies on + the port numbers to demultiplex the RTP data and RTCP control + streams. + + In a unicast session, both participants need to identify a port pair + for receiving RTP and RTCP packets. Both participants MAY use the + same port pair. A participant MUST NOT assume that the source port + of the incoming RTP or RTCP packet can be used as the destination + port for outgoing RTP or RTCP packets. When RTP data packets are + being sent in both directions, each participant's RTCP SR packets + MUST be sent to the port that the other participant has specified for + reception of RTCP. The RTCP SR packets combine sender information + for the outgoing data plus reception report information for the + incoming data. If a side is not actively sending data (see Section + 6.4), an RTCP RR packet is sent instead. + + It is RECOMMENDED that layered encoding applications (see Section + 2.4) use a set of contiguous port numbers. The port numbers MUST be + distinct because of a widespread deficiency in existing operating + + + +Schulzrinne, et al. Standards Track [Page 68] + +RFC 3550 RTP July 2003 + + + systems that prevents use of the same port with multiple multicast + addresses, and for unicast, there is only one permissible address. + Thus for layer n, the data port is P + 2n, and the control port is P + + 2n + 1. When IP multicast is used, the addresses MUST also be + distinct because multicast routing and group membership are managed + on an address granularity. However, allocation of contiguous IP + multicast addresses cannot be assumed because some groups may require + different scopes and may therefore be allocated from different + address ranges. + + The previous paragraph conflicts with the SDP specification, RFC 2327 + [15], which says that it is illegal for both multiple addresses and + multiple ports to be specified in the same session description + because the association of addresses with ports could be ambiguous. + It is intended that this restriction will be relaxed in a revision of + RFC 2327 to allow an equal number of addresses and ports to be + specified with a one-to-one mapping implied. + + RTP data packets contain no length field or other delineation, + therefore RTP relies on the underlying protocol(s) to provide a + length indication. The maximum length of RTP packets is limited only + by the underlying protocols. + + If RTP packets are to be carried in an underlying protocol that + provides the abstraction of a continuous octet stream rather than + messages (packets), an encapsulation of the RTP packets MUST be + defined to provide a framing mechanism. Framing is also needed if + the underlying protocol may contain padding so that the extent of the + RTP payload cannot be determined. The framing mechanism is not + defined here. + + A profile MAY specify a framing method to be used even when RTP is + carried in protocols that do provide framing in order to allow + carrying several RTP packets in one lower-layer protocol data unit, + such as a UDP packet. Carrying several RTP packets in one network or + transport packet reduces header overhead and may simplify + synchronization between different streams. + +12. Summary of Protocol Constants + + This section contains a summary listing of the constants defined in + this specification. + + The RTP payload type (PT) constants are defined in profiles rather + than this document. However, the octet of the RTP header which + contains the marker bit(s) and payload type MUST avoid the reserved + values 200 and 201 (decimal) to distinguish RTP packets from the RTCP + SR and RR packet types for the header validation procedure described + + + +Schulzrinne, et al. Standards Track [Page 69] + +RFC 3550 RTP July 2003 + + + in Appendix A.1. For the standard definition of one marker bit and a + 7-bit payload type field as shown in this specification, this + restriction means that payload types 72 and 73 are reserved. + +12.1 RTCP Packet Types + + abbrev. name value + SR sender report 200 + RR receiver report 201 + SDES source description 202 + BYE goodbye 203 + APP application-defined 204 + + These type values were chosen in the range 200-204 for improved + header validity checking of RTCP packets compared to RTP packets or + other unrelated packets. When the RTCP packet type field is compared + to the corresponding octet of the RTP header, this range corresponds + to the marker bit being 1 (which it usually is not in data packets) + and to the high bit of the standard payload type field being 1 (since + the static payload types are typically defined in the low half). + This range was also chosen to be some distance numerically from 0 and + 255 since all-zeros and all-ones are common data patterns. + + Since all compound RTCP packets MUST begin with SR or RR, these codes + were chosen as an even/odd pair to allow the RTCP validity check to + test the maximum number of bits with mask and value. + + Additional RTCP packet types may be registered through IANA (see + Section 15). + +12.2 SDES Types + + abbrev. name value + END end of SDES list 0 + CNAME canonical name 1 + NAME user name 2 + EMAIL user's electronic mail address 3 + PHONE user's phone number 4 + LOC geographic user location 5 + TOOL name of application or tool 6 + NOTE notice about the source 7 + PRIV private extensions 8 + + Additional SDES types may be registered through IANA (see Section + 15). + + + + + + +Schulzrinne, et al. Standards Track [Page 70] + +RFC 3550 RTP July 2003 + + +13. RTP Profiles and Payload Format Specifications + + A complete specification of RTP for a particular application will + require one or more companion documents of two types described here: + profiles, and payload format specifications. + + RTP may be used for a variety of applications with somewhat differing + requirements. The flexibility to adapt to those requirements is + provided by allowing multiple choices in the main protocol + specification, then selecting the appropriate choices or defining + extensions for a particular environment and class of applications in + a separate profile document. Typically an application will operate + under only one profile in a particular RTP session, so there is no + explicit indication within the RTP protocol itself as to which + profile is in use. A profile for audio and video applications may be + found in the companion RFC 3551. Profiles are typically titled "RTP + Profile for ...". + + The second type of companion document is a payload format + specification, which defines how a particular kind of payload data, + such as H.261 encoded video, should be carried in RTP. These + documents are typically titled "RTP Payload Format for XYZ + Audio/Video Encoding". Payload formats may be useful under multiple + profiles and may therefore be defined independently of any particular + profile. The profile documents are then responsible for assigning a + default mapping of that format to a payload type value if needed. + + Within this specification, the following items have been identified + for possible definition within a profile, but this list is not meant + to be exhaustive: + + RTP data header: The octet in the RTP data header that contains + the marker bit and payload type field MAY be redefined by a + profile to suit different requirements, for example with more or + fewer marker bits (Section 5.3, p. 18). + + Payload types: Assuming that a payload type field is included, + the profile will usually define a set of payload formats (e.g., + media encodings) and a default static mapping of those formats to + payload type values. Some of the payload formats may be defined + by reference to separate payload format specifications. For each + payload type defined, the profile MUST specify the RTP timestamp + clock rate to be used (Section 5.1, p. 14). + + RTP data header additions: Additional fields MAY be appended to + the fixed RTP data header if some additional functionality is + required across the profile's class of applications independent of + payload type (Section 5.3, p. 18). + + + +Schulzrinne, et al. Standards Track [Page 71] + +RFC 3550 RTP July 2003 + + + RTP data header extensions: The contents of the first 16 bits of + the RTP data header extension structure MUST be defined if use of + that mechanism is to be allowed under the profile for + implementation-specific extensions (Section 5.3.1, p. 18). + + RTCP packet types: New application-class-specific RTCP packet + types MAY be defined and registered with IANA. + + RTCP report interval: A profile SHOULD specify that the values + suggested in Section 6.2 for the constants employed in the + calculation of the RTCP report interval will be used. Those are + the RTCP fraction of session bandwidth, the minimum report + interval, and the bandwidth split between senders and receivers. + A profile MAY specify alternate values if they have been + demonstrated to work in a scalable manner. + + SR/RR extension: An extension section MAY be defined for the + RTCP SR and RR packets if there is additional information that + should be reported regularly about the sender or receivers + (Section 6.4.3, p. 42 and 43). + + SDES use: The profile MAY specify the relative priorities for + RTCP SDES items to be transmitted or excluded entirely (Section + 6.3.9); an alternate syntax or semantics for the CNAME item + (Section 6.5.1); the format of the LOC item (Section 6.5.5); the + semantics and use of the NOTE item (Section 6.5.7); or new SDES + item types to be registered with IANA. + + Security: A profile MAY specify which security services and + algorithms should be offered by applications, and MAY provide + guidance as to their appropriate use (Section 9, p. 65). + + String-to-key mapping: A profile MAY specify how a user-provided + password or pass phrase is mapped into an encryption key. + + Congestion: A profile SHOULD specify the congestion control + behavior appropriate for that profile. + + Underlying protocol: Use of a particular underlying network or + transport layer protocol to carry RTP packets MAY be required. + + Transport mapping: A mapping of RTP and RTCP to transport-level + addresses, e.g., UDP ports, other than the standard mapping + defined in Section 11, p. 68 may be specified. + + + + + + + +Schulzrinne, et al. Standards Track [Page 72] + +RFC 3550 RTP July 2003 + + + Encapsulation: An encapsulation of RTP packets may be defined to + allow multiple RTP data packets to be carried in one lower-layer + packet or to provide framing over underlying protocols that do not + already do so (Section 11, p. 69). + + It is not expected that a new profile will be required for every + application. Within one application class, it would be better to + extend an existing profile rather than make a new one in order to + facilitate interoperation among the applications since each will + typically run under only one profile. Simple extensions such as the + definition of additional payload type values or RTCP packet types may + be accomplished by registering them through IANA and publishing their + descriptions in an addendum to the profile or in a payload format + specification. + +14. Security Considerations + + RTP suffers from the same security liabilities as the underlying + protocols. For example, an impostor can fake source or destination + network addresses, or change the header or payload. Within RTCP, the + CNAME and NAME information may be used to impersonate another + participant. In addition, RTP may be sent via IP multicast, which + provides no direct means for a sender to know all the receivers of + the data sent and therefore no measure of privacy. Rightly or not, + users may be more sensitive to privacy concerns with audio and video + communication than they have been with more traditional forms of + network communication [33]. Therefore, the use of security + mechanisms with RTP is important. These mechanisms are discussed in + Section 9. + + RTP-level translators or mixers may be used to allow RTP traffic to + reach hosts behind firewalls. Appropriate firewall security + principles and practices, which are beyond the scope of this + document, should be followed in the design and installation of these + devices and in the admission of RTP applications for use behind the + firewall. + +15. IANA Considerations + + Additional RTCP packet types and SDES item types may be registered + through the Internet Assigned Numbers Authority (IANA). Since these + number spaces are small, allowing unconstrained registration of new + values would not be prudent. To facilitate review of requests and to + promote shared use of new types among multiple applications, requests + for registration of new values must be documented in an RFC or other + permanent and readily available reference such as the product of + another cooperative standards body (e.g., ITU-T). Other requests may + also be accepted, under the advice of a "designated expert." + + + +Schulzrinne, et al. Standards Track [Page 73] + +RFC 3550 RTP July 2003 + + + (Contact the IANA for the contact information of the current expert.) + + RTP profile specifications SHOULD register with IANA a name for the + profile in the form "RTP/xxx", where xxx is a short abbreviation of + the profile title. These names are for use by higher-level control + protocols, such as the Session Description Protocol (SDP), RFC 2327 + [15], to refer to transport methods. + +16. Intellectual Property Rights Statement + + The IETF takes no position regarding the validity or scope of any + intellectual property or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; neither does it represent that it + has made any effort to identify any such rights. Information on the + IETF's procedures with respect to rights in standards-track and + standards-related documentation can be found in BCP-11. Copies of + claims of rights made available for publication and any assurances of + licenses to be made available, or the result of an attempt made to + obtain a general license or permission for the use of such + proprietary rights by implementors or users of this specification can + be obtained from the IETF Secretariat. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights which may cover technology that may be required to practice + this standard. Please address the information to the IETF Executive + Director. + +17. Acknowledgments + + This memorandum is based on discussions within the IETF Audio/Video + Transport working group chaired by Stephen Casner and Colin Perkins. + The current protocol has its origins in the Network Voice Protocol + and the Packet Video Protocol (Danny Cohen and Randy Cole) and the + protocol implemented by the vat application (Van Jacobson and Steve + McCanne). Christian Huitema provided ideas for the random identifier + generator. Extensive analysis and simulation of the timer + reconsideration algorithm was done by Jonathan Rosenberg. The + additions for layered encodings were specified by Michael Speer and + Steve McCanne. + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 74] + +RFC 3550 RTP July 2003 + + +Appendix A - Algorithms + + We provide examples of C code for aspects of RTP sender and receiver + algorithms. There may be other implementation methods that are + faster in particular operating environments or have other advantages. + These implementation notes are for informational purposes only and + are meant to clarify the RTP specification. + + The following definitions are used for all examples; for clarity and + brevity, the structure definitions are only valid for 32-bit big- + endian (most significant octet first) architectures. Bit fields are + assumed to be packed tightly in big-endian bit order, with no + additional padding. Modifications would be required to construct a + portable implementation. + + /* + * rtp.h -- RTP header file + */ + #include + + /* + * The type definitions below are valid for 32-bit architectures and + * may have to be adjusted for 16- or 64-bit architectures. + */ + typedef unsigned char u_int8; + typedef unsigned short u_int16; + typedef unsigned int u_int32; + typedef short int16; + + /* + * Current protocol version. + */ + #define RTP_VERSION 2 + + #define RTP_SEQ_MOD (1<<16) + #define RTP_MAX_SDES 255 /* maximum text length for SDES */ + + typedef enum { + RTCP_SR = 200, + RTCP_RR = 201, + RTCP_SDES = 202, + RTCP_BYE = 203, + RTCP_APP = 204 + } rtcp_type_t; + + typedef enum { + RTCP_SDES_END = 0, + RTCP_SDES_CNAME = 1, + + + +Schulzrinne, et al. Standards Track [Page 75] + +RFC 3550 RTP July 2003 + + + RTCP_SDES_NAME = 2, + RTCP_SDES_EMAIL = 3, + RTCP_SDES_PHONE = 4, + RTCP_SDES_LOC = 5, + RTCP_SDES_TOOL = 6, + RTCP_SDES_NOTE = 7, + RTCP_SDES_PRIV = 8 + } rtcp_sdes_type_t; + + /* + * RTP data header + */ + typedef struct { + unsigned int version:2; /* protocol version */ + unsigned int p:1; /* padding flag */ + unsigned int x:1; /* header extension flag */ + unsigned int cc:4; /* CSRC count */ + unsigned int m:1; /* marker bit */ + unsigned int pt:7; /* payload type */ + unsigned int seq:16; /* sequence number */ + u_int32 ts; /* timestamp */ + u_int32 ssrc; /* synchronization source */ + u_int32 csrc[1]; /* optional CSRC list */ + } rtp_hdr_t; + + /* + * RTCP common header word + */ + typedef struct { + unsigned int version:2; /* protocol version */ + unsigned int p:1; /* padding flag */ + unsigned int count:5; /* varies by packet type */ + unsigned int pt:8; /* RTCP packet type */ + u_int16 length; /* pkt len in words, w/o this word */ + } rtcp_common_t; + + /* + * Big-endian mask for version, padding bit and packet type pair + */ + #define RTCP_VALID_MASK (0xc000 | 0x2000 | 0xfe) + #define RTCP_VALID_VALUE ((RTP_VERSION << 14) | RTCP_SR) + + /* + * Reception report block + */ + typedef struct { + u_int32 ssrc; /* data source being reported */ + unsigned int fraction:8; /* fraction lost since last SR/RR */ + + + +Schulzrinne, et al. Standards Track [Page 76] + +RFC 3550 RTP July 2003 + + + int lost:24; /* cumul. no. pkts lost (signed!) */ + u_int32 last_seq; /* extended last seq. no. received */ + u_int32 jitter; /* interarrival jitter */ + u_int32 lsr; /* last SR packet from this source */ + u_int32 dlsr; /* delay since last SR packet */ + } rtcp_rr_t; + + /* + * SDES item + */ + typedef struct { + u_int8 type; /* type of item (rtcp_sdes_type_t) */ + u_int8 length; /* length of item (in octets) */ + char data[1]; /* text, not null-terminated */ + } rtcp_sdes_item_t; + + /* + * One RTCP packet + */ + typedef struct { + rtcp_common_t common; /* common header */ + union { + /* sender report (SR) */ + struct { + u_int32 ssrc; /* sender generating this report */ + u_int32 ntp_sec; /* NTP timestamp */ + u_int32 ntp_frac; + u_int32 rtp_ts; /* RTP timestamp */ + u_int32 psent; /* packets sent */ + u_int32 osent; /* octets sent */ + rtcp_rr_t rr[1]; /* variable-length list */ + } sr; + + /* reception report (RR) */ + struct { + u_int32 ssrc; /* receiver generating this report */ + rtcp_rr_t rr[1]; /* variable-length list */ + } rr; + + /* source description (SDES) */ + struct rtcp_sdes { + u_int32 src; /* first SSRC/CSRC */ + rtcp_sdes_item_t item[1]; /* list of SDES items */ + } sdes; + + /* BYE */ + struct { + u_int32 src[1]; /* list of sources */ + + + +Schulzrinne, et al. Standards Track [Page 77] + +RFC 3550 RTP July 2003 + + + /* can't express trailing text for reason */ + } bye; + } r; + } rtcp_t; + + typedef struct rtcp_sdes rtcp_sdes_t; + + /* + * Per-source state information + */ + typedef struct { + u_int16 max_seq; /* highest seq. number seen */ + u_int32 cycles; /* shifted count of seq. number cycles */ + u_int32 base_seq; /* base seq number */ + u_int32 bad_seq; /* last 'bad' seq number + 1 */ + u_int32 probation; /* sequ. packets till source is valid */ + u_int32 received; /* packets received */ + u_int32 expected_prior; /* packet expected at last interval */ + u_int32 received_prior; /* packet received at last interval */ + u_int32 transit; /* relative trans time for prev pkt */ + u_int32 jitter; /* estimated jitter */ + /* ... */ + } source; + +A.1 RTP Data Header Validity Checks + + An RTP receiver should check the validity of the RTP header on + incoming packets since they might be encrypted or might be from a + different application that happens to be misaddressed. Similarly, if + encryption according to the method described in Section 9 is enabled, + the header validity check is needed to verify that incoming packets + have been correctly decrypted, although a failure of the header + validity check (e.g., unknown payload type) may not necessarily + indicate decryption failure. + + Only weak validity checks are possible on an RTP data packet from a + source that has not been heard before: + + o RTP version field must equal 2. + + o The payload type must be known, and in particular it must not be + equal to SR or RR. + + o If the P bit is set, then the last octet of the packet must + contain a valid octet count, in particular, less than the total + packet length minus the header size. + + + + + +Schulzrinne, et al. Standards Track [Page 78] + +RFC 3550 RTP July 2003 + + + o The X bit must be zero if the profile does not specify that the + header extension mechanism may be used. Otherwise, the extension + length field must be less than the total packet size minus the + fixed header length and padding. + + o The length of the packet must be consistent with CC and payload + type (if payloads have a known length). + + The last three checks are somewhat complex and not always possible, + leaving only the first two which total just a few bits. If the SSRC + identifier in the packet is one that has been received before, then + the packet is probably valid and checking if the sequence number is + in the expected range provides further validation. If the SSRC + identifier has not been seen before, then data packets carrying that + identifier may be considered invalid until a small number of them + arrive with consecutive sequence numbers. Those invalid packets MAY + be discarded or they MAY be stored and delivered once validation has + been achieved if the resulting delay is acceptable. + + The routine update_seq shown below ensures that a source is declared + valid only after MIN_SEQUENTIAL packets have been received in + sequence. It also validates the sequence number seq of a newly + received packet and updates the sequence state for the packet's + source in the structure to which s points. + + When a new source is heard for the first time, that is, its SSRC + identifier is not in the table (see Section 8.2), and the per-source + state is allocated for it, s->probation is set to the number of + sequential packets required before declaring a source valid + (parameter MIN_SEQUENTIAL) and other variables are initialized: + + init_seq(s, seq); + s->max_seq = seq - 1; + s->probation = MIN_SEQUENTIAL; + + A non-zero s->probation marks the source as not yet valid so the + state may be discarded after a short timeout rather than a long one, + as discussed in Section 6.2.1. + + After a source is considered valid, the sequence number is considered + valid if it is no more than MAX_DROPOUT ahead of s->max_seq nor more + than MAX_MISORDER behind. If the new sequence number is ahead of + max_seq modulo the RTP sequence number range (16 bits), but is + smaller than max_seq, it has wrapped around and the (shifted) count + of sequence number cycles is incremented. A value of one is returned + to indicate a valid sequence number. + + + + + +Schulzrinne, et al. Standards Track [Page 79] + +RFC 3550 RTP July 2003 + + + Otherwise, the value zero is returned to indicate that the validation + failed, and the bad sequence number plus 1 is stored. If the next + packet received carries the next higher sequence number, it is + considered the valid start of a new packet sequence presumably caused + by an extended dropout or a source restart. Since multiple complete + sequence number cycles may have been missed, the packet loss + statistics are reset. + + Typical values for the parameters are shown, based on a maximum + misordering time of 2 seconds at 50 packets/second and a maximum + dropout of 1 minute. The dropout parameter MAX_DROPOUT should be a + small fraction of the 16-bit sequence number space to give a + reasonable probability that new sequence numbers after a restart will + not fall in the acceptable range for sequence numbers from before the + restart. + + void init_seq(source *s, u_int16 seq) + { + s->base_seq = seq; + s->max_seq = seq; + s->bad_seq = RTP_SEQ_MOD + 1; /* so seq == bad_seq is false */ + s->cycles = 0; + s->received = 0; + s->received_prior = 0; + s->expected_prior = 0; + /* other initialization */ + } + + int update_seq(source *s, u_int16 seq) + { + u_int16 udelta = seq - s->max_seq; + const int MAX_DROPOUT = 3000; + const int MAX_MISORDER = 100; + const int MIN_SEQUENTIAL = 2; + + /* + * Source is not valid until MIN_SEQUENTIAL packets with + * sequential sequence numbers have been received. + */ + if (s->probation) { + /* packet is in sequence */ + if (seq == s->max_seq + 1) { + s->probation--; + s->max_seq = seq; + if (s->probation == 0) { + init_seq(s, seq); + s->received++; + return 1; + + + +Schulzrinne, et al. Standards Track [Page 80] + +RFC 3550 RTP July 2003 + + + } + } else { + s->probation = MIN_SEQUENTIAL - 1; + s->max_seq = seq; + } + return 0; + } else if (udelta < MAX_DROPOUT) { + /* in order, with permissible gap */ + if (seq < s->max_seq) { + /* + * Sequence number wrapped - count another 64K cycle. + */ + s->cycles += RTP_SEQ_MOD; + } + s->max_seq = seq; + } else if (udelta <= RTP_SEQ_MOD - MAX_MISORDER) { + /* the sequence number made a very large jump */ + if (seq == s->bad_seq) { + /* + * Two sequential packets -- assume that the other side + * restarted without telling us so just re-sync + * (i.e., pretend this was the first packet). + */ + init_seq(s, seq); + } + else { + s->bad_seq = (seq + 1) & (RTP_SEQ_MOD-1); + return 0; + } + } else { + /* duplicate or reordered packet */ + } + s->received++; + return 1; + } + + The validity check can be made stronger requiring more than two + packets in sequence. The disadvantages are that a larger number of + initial packets will be discarded (or delayed in a queue) and that + high packet loss rates could prevent validation. However, because + the RTCP header validation is relatively strong, if an RTCP packet is + received from a source before the data packets, the count could be + adjusted so that only two packets are required in sequence. If + initial data loss for a few seconds can be tolerated, an application + MAY choose to discard all data packets from a source until a valid + RTCP packet has been received from that source. + + + + + +Schulzrinne, et al. Standards Track [Page 81] + +RFC 3550 RTP July 2003 + + + Depending on the application and encoding, algorithms may exploit + additional knowledge about the payload format for further validation. + For payload types where the timestamp increment is the same for all + packets, the timestamp values can be predicted from the previous + packet received from the same source using the sequence number + difference (assuming no change in payload type). + + A strong "fast-path" check is possible since with high probability + the first four octets in the header of a newly received RTP data + packet will be just the same as that of the previous packet from the + same SSRC except that the sequence number will have increased by one. + Similarly, a single-entry cache may be used for faster SSRC lookups + in applications where data is typically received from one source at a + time. + +A.2 RTCP Header Validity Checks + + The following checks should be applied to RTCP packets. + + o RTP version field must equal 2. + + o The payload type field of the first RTCP packet in a compound + packet must be equal to SR or RR. + + o The padding bit (P) should be zero for the first packet of a + compound RTCP packet because padding should only be applied, if it + is needed, to the last packet. + + o The length fields of the individual RTCP packets must add up to + the overall length of the compound RTCP packet as received. This + is a fairly strong check. + + The code fragment below performs all of these checks. The packet + type is not checked for subsequent packets since unknown packet types + may be present and should be ignored. + + u_int32 len; /* length of compound RTCP packet in words */ + rtcp_t *r; /* RTCP header */ + rtcp_t *end; /* end of compound RTCP packet */ + + if ((*(u_int16 *)r & RTCP_VALID_MASK) != RTCP_VALID_VALUE) { + /* something wrong with packet format */ + } + end = (rtcp_t *)((u_int32 *)r + len); + + do r = (rtcp_t *)((u_int32 *)r + r->common.length + 1); + while (r < end && r->common.version == 2); + + + + +Schulzrinne, et al. Standards Track [Page 82] + +RFC 3550 RTP July 2003 + + + if (r != end) { + /* something wrong with packet format */ + } + +A.3 Determining Number of Packets Expected and Lost + + In order to compute packet loss rates, the number of RTP packets + expected and actually received from each source needs to be known, + using per-source state information defined in struct source + referenced via pointer s in the code below. The number of packets + received is simply the count of packets as they arrive, including any + late or duplicate packets. The number of packets expected can be + computed by the receiver as the difference between the highest + sequence number received (s->max_seq) and the first sequence number + received (s->base_seq). Since the sequence number is only 16 bits + and will wrap around, it is necessary to extend the highest sequence + number with the (shifted) count of sequence number wraparounds + (s->cycles). Both the received packet count and the count of cycles + are maintained the RTP header validity check routine in Appendix A.1. + + extended_max = s->cycles + s->max_seq; + expected = extended_max - s->base_seq + 1; + + The number of packets lost is defined to be the number of packets + expected less the number of packets actually received: + + lost = expected - s->received; + + Since this signed number is carried in 24 bits, it should be clamped + at 0x7fffff for positive loss or 0x800000 for negative loss rather + than wrapping around. + + The fraction of packets lost during the last reporting interval + (since the previous SR or RR packet was sent) is calculated from + differences in the expected and received packet counts across the + interval, where expected_prior and received_prior are the values + saved when the previous reception report was generated: + + expected_interval = expected - s->expected_prior; + s->expected_prior = expected; + received_interval = s->received - s->received_prior; + s->received_prior = s->received; + lost_interval = expected_interval - received_interval; + if (expected_interval == 0 || lost_interval <= 0) fraction = 0; + else fraction = (lost_interval << 8) / expected_interval; + + The resulting fraction is an 8-bit fixed point number with the binary + point at the left edge. + + + +Schulzrinne, et al. Standards Track [Page 83] + +RFC 3550 RTP July 2003 + + +A.4 Generating RTCP SDES Packets + + This function builds one SDES chunk into buffer b composed of argc + items supplied in arrays type, value and length. It returns a + pointer to the next available location within b. + + char *rtp_write_sdes(char *b, u_int32 src, int argc, + rtcp_sdes_type_t type[], char *value[], + int length[]) + { + rtcp_sdes_t *s = (rtcp_sdes_t *)b; + rtcp_sdes_item_t *rsp; + int i; + int len; + int pad; + + /* SSRC header */ + s->src = src; + rsp = &s->item[0]; + + /* SDES items */ + for (i = 0; i < argc; i++) { + rsp->type = type[i]; + len = length[i]; + if (len > RTP_MAX_SDES) { + /* invalid length, may want to take other action */ + len = RTP_MAX_SDES; + } + rsp->length = len; + memcpy(rsp->data, value[i], len); + rsp = (rtcp_sdes_item_t *)&rsp->data[len]; + } + + /* terminate with end marker and pad to next 4-octet boundary */ + len = ((char *) rsp) - b; + pad = 4 - (len & 0x3); + b = (char *) rsp; + while (pad--) *b++ = RTCP_SDES_END; + + return b; + } + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 84] + +RFC 3550 RTP July 2003 + + +A.5 Parsing RTCP SDES Packets + + This function parses an SDES packet, calling functions find_member() + to find a pointer to the information for a session member given the + SSRC identifier and member_sdes() to store the new SDES information + for that member. This function expects a pointer to the header of + the RTCP packet. + + void rtp_read_sdes(rtcp_t *r) + { + int count = r->common.count; + rtcp_sdes_t *sd = &r->r.sdes; + rtcp_sdes_item_t *rsp, *rspn; + rtcp_sdes_item_t *end = (rtcp_sdes_item_t *) + ((u_int32 *)r + r->common.length + 1); + source *s; + + while (--count >= 0) { + rsp = &sd->item[0]; + if (rsp >= end) break; + s = find_member(sd->src); + + for (; rsp->type; rsp = rspn ) { + rspn = (rtcp_sdes_item_t *)((char*)rsp+rsp->length+2); + if (rspn >= end) { + rsp = rspn; + break; + } + member_sdes(s, rsp->type, rsp->data, rsp->length); + } + sd = (rtcp_sdes_t *) + ((u_int32 *)sd + (((char *)rsp - (char *)sd) >> 2)+1); + } + if (count >= 0) { + /* invalid packet format */ + } + } + +A.6 Generating a Random 32-bit Identifier + + The following subroutine generates a random 32-bit identifier using + the MD5 routines published in RFC 1321 [32]. The system routines may + not be present on all operating systems, but they should serve as + hints as to what kinds of information may be used. Other system + calls that may be appropriate include + + + + + + +Schulzrinne, et al. Standards Track [Page 85] + +RFC 3550 RTP July 2003 + + + o getdomainname(), + + o getwd(), or + + o getrusage(). + + "Live" video or audio samples are also a good source of random + numbers, but care must be taken to avoid using a turned-off + microphone or blinded camera as a source [17]. + + Use of this or a similar routine is recommended to generate the + initial seed for the random number generator producing the RTCP + period (as shown in Appendix A.7), to generate the initial values for + the sequence number and timestamp, and to generate SSRC values. + Since this routine is likely to be CPU-intensive, its direct use to + generate RTCP periods is inappropriate because predictability is not + an issue. Note that this routine produces the same result on + repeated calls until the value of the system clock changes unless + different values are supplied for the type argument. + + /* + * Generate a random 32-bit quantity. + */ + #include /* u_long */ + #include /* gettimeofday() */ + #include /* get..() */ + #include /* printf() */ + #include /* clock() */ + #include /* uname() */ + #include "global.h" /* from RFC 1321 */ + #include "md5.h" /* from RFC 1321 */ + + #define MD_CTX MD5_CTX + #define MDInit MD5Init + #define MDUpdate MD5Update + #define MDFinal MD5Final + + static u_long md_32(char *string, int length) + { + MD_CTX context; + union { + char c[16]; + u_long x[4]; + } digest; + u_long r; + int i; + + MDInit (&context); + + + +Schulzrinne, et al. Standards Track [Page 86] + +RFC 3550 RTP July 2003 + + + MDUpdate (&context, string, length); + MDFinal ((unsigned char *)&digest, &context); + r = 0; + for (i = 0; i < 3; i++) { + r ^= digest.x[i]; + } + return r; + } /* md_32 */ + + /* + * Return random unsigned 32-bit quantity. Use 'type' argument if + * you need to generate several different values in close succession. + */ + u_int32 random32(int type) + { + struct { + int type; + struct timeval tv; + clock_t cpu; + pid_t pid; + u_long hid; + uid_t uid; + gid_t gid; + struct utsname name; + } s; + + gettimeofday(&s.tv, 0); + uname(&s.name); + s.type = type; + s.cpu = clock(); + s.pid = getpid(); + s.hid = gethostid(); + s.uid = getuid(); + s.gid = getgid(); + /* also: system uptime */ + + return md_32((char *)&s, sizeof(s)); + } /* random32 */ + +A.7 Computing the RTCP Transmission Interval + + The following functions implement the RTCP transmission and reception + rules described in Section 6.2. These rules are coded in several + functions: + + o rtcp_interval() computes the deterministic calculated interval, + measured in seconds. The parameters are defined in Section 6.3. + + + + +Schulzrinne, et al. Standards Track [Page 87] + +RFC 3550 RTP July 2003 + + + o OnExpire() is called when the RTCP transmission timer expires. + + o OnReceive() is called whenever an RTCP packet is received. + + Both OnExpire() and OnReceive() have event e as an argument. This is + the next scheduled event for that participant, either an RTCP report + or a BYE packet. It is assumed that the following functions are + available: + + o Schedule(time t, event e) schedules an event e to occur at time t. + When time t arrives, the function OnExpire is called with e as an + argument. + + o Reschedule(time t, event e) reschedules a previously scheduled + event e for time t. + + o SendRTCPReport(event e) sends an RTCP report. + + o SendBYEPacket(event e) sends a BYE packet. + + o TypeOfEvent(event e) returns EVENT_BYE if the event being + processed is for a BYE packet to be sent, else it returns + EVENT_REPORT. + + o PacketType(p) returns PACKET_RTCP_REPORT if packet p is an RTCP + report (not BYE), PACKET_BYE if its a BYE RTCP packet, and + PACKET_RTP if its a regular RTP data packet. + + o ReceivedPacketSize() and SentPacketSize() return the size of the + referenced packet in octets. + + o NewMember(p) returns a 1 if the participant who sent packet p is + not currently in the member list, 0 otherwise. Note this function + is not sufficient for a complete implementation because each CSRC + identifier in an RTP packet and each SSRC in a BYE packet should + be processed. + + o NewSender(p) returns a 1 if the participant who sent packet p is + not currently in the sender sublist of the member list, 0 + otherwise. + + o AddMember() and RemoveMember() to add and remove participants from + the member list. + + o AddSender() and RemoveSender() to add and remove participants from + the sender sublist of the member list. + + + + + +Schulzrinne, et al. Standards Track [Page 88] + +RFC 3550 RTP July 2003 + + + These functions would have to be extended for an implementation that + allows the RTCP bandwidth fractions for senders and non-senders to be + specified as explicit parameters rather than fixed values of 25% and + 75%. The extended implementation of rtcp_interval() would need to + avoid division by zero if one of the parameters was zero. + + double rtcp_interval(int members, + int senders, + double rtcp_bw, + int we_sent, + double avg_rtcp_size, + int initial) + { + /* + * Minimum average time between RTCP packets from this site (in + * seconds). This time prevents the reports from `clumping' when + * sessions are small and the law of large numbers isn't helping + * to smooth out the traffic. It also keeps the report interval + * from becoming ridiculously small during transient outages like + * a network partition. + */ + double const RTCP_MIN_TIME = 5.; + /* + * Fraction of the RTCP bandwidth to be shared among active + * senders. (This fraction was chosen so that in a typical + * session with one or two active senders, the computed report + * time would be roughly equal to the minimum report time so that + * we don't unnecessarily slow down receiver reports.) The + * receiver fraction must be 1 - the sender fraction. + */ + double const RTCP_SENDER_BW_FRACTION = 0.25; + double const RTCP_RCVR_BW_FRACTION = (1-RTCP_SENDER_BW_FRACTION); + /* + /* To compensate for "timer reconsideration" converging to a + * value below the intended average. + */ + double const COMPENSATION = 2.71828 - 1.5; + + double t; /* interval */ + double rtcp_min_time = RTCP_MIN_TIME; + int n; /* no. of members for computation */ + + /* + * Very first call at application start-up uses half the min + * delay for quicker notification while still allowing some time + * before reporting for randomization and to learn about other + * sources so the report interval will converge to the correct + * interval more quickly. + + + +Schulzrinne, et al. Standards Track [Page 89] + +RFC 3550 RTP July 2003 + + + */ + if (initial) { + rtcp_min_time /= 2; + } + /* + * Dedicate a fraction of the RTCP bandwidth to senders unless + * the number of senders is large enough that their share is + * more than that fraction. + */ + n = members; + if (senders <= members * RTCP_SENDER_BW_FRACTION) { + if (we_sent) { + rtcp_bw *= RTCP_SENDER_BW_FRACTION; + n = senders; + } else { + rtcp_bw *= RTCP_RCVR_BW_FRACTION; + n -= senders; + } + } + + /* + * The effective number of sites times the average packet size is + * the total number of octets sent when each site sends a report. + * Dividing this by the effective bandwidth gives the time + * interval over which those packets must be sent in order to + * meet the bandwidth target, with a minimum enforced. In that + * time interval we send one report so this time is also our + * average time between reports. + */ + t = avg_rtcp_size * n / rtcp_bw; + if (t < rtcp_min_time) t = rtcp_min_time; + + /* + * To avoid traffic bursts from unintended synchronization with + * other sites, we then pick our actual next report interval as a + * random number uniformly distributed between 0.5*t and 1.5*t. + */ + t = t * (drand48() + 0.5); + t = t / COMPENSATION; + return t; + } + + void OnExpire(event e, + int members, + int senders, + double rtcp_bw, + int we_sent, + double *avg_rtcp_size, + + + +Schulzrinne, et al. Standards Track [Page 90] + +RFC 3550 RTP July 2003 + + + int *initial, + time_tp tc, + time_tp *tp, + int *pmembers) + { + /* This function is responsible for deciding whether to send an + * RTCP report or BYE packet now, or to reschedule transmission. + * It is also responsible for updating the pmembers, initial, tp, + * and avg_rtcp_size state variables. This function should be + * called upon expiration of the event timer used by Schedule(). + */ + + double t; /* Interval */ + double tn; /* Next transmit time */ + + /* In the case of a BYE, we use "timer reconsideration" to + * reschedule the transmission of the BYE if necessary */ + + if (TypeOfEvent(e) == EVENT_BYE) { + t = rtcp_interval(members, + senders, + rtcp_bw, + we_sent, + *avg_rtcp_size, + *initial); + tn = *tp + t; + if (tn <= tc) { + SendBYEPacket(e); + exit(1); + } else { + Schedule(tn, e); + } + + } else if (TypeOfEvent(e) == EVENT_REPORT) { + t = rtcp_interval(members, + senders, + rtcp_bw, + we_sent, + *avg_rtcp_size, + *initial); + tn = *tp + t; + if (tn <= tc) { + SendRTCPReport(e); + *avg_rtcp_size = (1./16.)*SentPacketSize(e) + + (15./16.)*(*avg_rtcp_size); + *tp = tc; + + /* We must redraw the interval. Don't reuse the + + + +Schulzrinne, et al. Standards Track [Page 91] + +RFC 3550 RTP July 2003 + + + one computed above, since its not actually + distributed the same, as we are conditioned + on it being small enough to cause a packet to + be sent */ + + t = rtcp_interval(members, + senders, + rtcp_bw, + we_sent, + *avg_rtcp_size, + *initial); + + Schedule(t+tc,e); + *initial = 0; + } else { + Schedule(tn, e); + } + *pmembers = members; + } + } + + void OnReceive(packet p, + event e, + int *members, + int *pmembers, + int *senders, + double *avg_rtcp_size, + double *tp, + double tc, + double tn) + { + /* What we do depends on whether we have left the group, and are + * waiting to send a BYE (TypeOfEvent(e) == EVENT_BYE) or an RTCP + * report. p represents the packet that was just received. */ + + if (PacketType(p) == PACKET_RTCP_REPORT) { + if (NewMember(p) && (TypeOfEvent(e) == EVENT_REPORT)) { + AddMember(p); + *members += 1; + } + *avg_rtcp_size = (1./16.)*ReceivedPacketSize(p) + + (15./16.)*(*avg_rtcp_size); + } else if (PacketType(p) == PACKET_RTP) { + if (NewMember(p) && (TypeOfEvent(e) == EVENT_REPORT)) { + AddMember(p); + *members += 1; + } + if (NewSender(p) && (TypeOfEvent(e) == EVENT_REPORT)) { + + + +Schulzrinne, et al. Standards Track [Page 92] + +RFC 3550 RTP July 2003 + + + AddSender(p); + *senders += 1; + } + } else if (PacketType(p) == PACKET_BYE) { + *avg_rtcp_size = (1./16.)*ReceivedPacketSize(p) + + (15./16.)*(*avg_rtcp_size); + + if (TypeOfEvent(e) == EVENT_REPORT) { + if (NewSender(p) == FALSE) { + RemoveSender(p); + *senders -= 1; + } + + if (NewMember(p) == FALSE) { + RemoveMember(p); + *members -= 1; + } + + if (*members < *pmembers) { + tn = tc + + (((double) *members)/(*pmembers))*(tn - tc); + *tp = tc - + (((double) *members)/(*pmembers))*(tc - *tp); + + /* Reschedule the next report for time tn */ + + Reschedule(tn, e); + *pmembers = *members; + } + + } else if (TypeOfEvent(e) == EVENT_BYE) { + *members += 1; + } + } + } + + + + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 93] + +RFC 3550 RTP July 2003 + + +A.8 Estimating the Interarrival Jitter + + The code fragments below implement the algorithm given in Section + 6.4.1 for calculating an estimate of the statistical variance of the + RTP data interarrival time to be inserted in the interarrival jitter + field of reception reports. The inputs are r->ts, the timestamp from + the incoming packet, and arrival, the current time in the same units. + Here s points to state for the source; s->transit holds the relative + transit time for the previous packet, and s->jitter holds the + estimated jitter. The jitter field of the reception report is + measured in timestamp units and expressed as an unsigned integer, but + the jitter estimate is kept in a floating point. As each data packet + arrives, the jitter estimate is updated: + + int transit = arrival - r->ts; + int d = transit - s->transit; + s->transit = transit; + if (d < 0) d = -d; + s->jitter += (1./16.) * ((double)d - s->jitter); + + When a reception report block (to which rr points) is generated for + this member, the current jitter estimate is returned: + + rr->jitter = (u_int32) s->jitter; + + Alternatively, the jitter estimate can be kept as an integer, but + scaled to reduce round-off error. The calculation is the same except + for the last line: + + s->jitter += d - ((s->jitter + 8) >> 4); + + In this case, the estimate is sampled for the reception report as: + + rr->jitter = s->jitter >> 4; + + + + + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 94] + +RFC 3550 RTP July 2003 + + +Appendix B - Changes from RFC 1889 + + Most of this RFC is identical to RFC 1889. There are no changes in + the packet formats on the wire, only changes to the rules and + algorithms governing how the protocol is used. The biggest change is + an enhancement to the scalable timer algorithm for calculating when + to send RTCP packets: + + o The algorithm for calculating the RTCP transmission interval + specified in Sections 6.2 and 6.3 and illustrated in Appendix A.7 + is augmented to include "reconsideration" to minimize transmission + in excess of the intended rate when many participants join a + session simultaneously, and "reverse reconsideration" to reduce + the incidence and duration of false participant timeouts when the + number of participants drops rapidly. Reverse reconsideration is + also used to possibly shorten the delay before sending RTCP SR + when transitioning from passive receiver to active sender mode. + + o Section 6.3.7 specifies new rules controlling when an RTCP BYE + packet should be sent in order to avoid a flood of packets when + many participants leave a session simultaneously. + + o The requirement to retain state for inactive participants for a + period long enough to span typical network partitions was removed + from Section 6.2.1. In a session where many participants join for + a brief time and fail to send BYE, this requirement would cause a + significant overestimate of the number of participants. The + reconsideration algorithm added in this revision compensates for + the large number of new participants joining simultaneously when a + partition heals. + + It should be noted that these enhancements only have a significant + effect when the number of session participants is large (thousands) + and most of the participants join or leave at the same time. This + makes testing in a live network difficult. However, the algorithm + was subjected to a thorough analysis and simulation to verify its + performance. Furthermore, the enhanced algorithm was designed to + interoperate with the algorithm in RFC 1889 such that the degree of + reduction in excess RTCP bandwidth during a step join is proportional + to the fraction of participants that implement the enhanced + algorithm. Interoperation of the two algorithms has been verified + experimentally on live networks. + + Other functional changes were: + + o Section 6.2.1 specifies that implementations may store only a + sampling of the participants' SSRC identifiers to allow scaling to + very large sessions. Algorithms are specified in RFC 2762 [21]. + + + +Schulzrinne, et al. Standards Track [Page 95] + +RFC 3550 RTP July 2003 + + + o In Section 6.2 it is specified that RTCP sender and non-sender + bandwidths may be set as separate parameters of the session rather + than a strict percentage of the session bandwidth, and may be set + to zero. The requirement that RTCP was mandatory for RTP sessions + using IP multicast was relaxed. However, a clarification was also + added that turning off RTCP is NOT RECOMMENDED. + + o In Sections 6.2, 6.3.1 and Appendix A.7, it is specified that the + fraction of participants below which senders get dedicated RTCP + bandwidth changes from the fixed 1/4 to a ratio based on the RTCP + sender and non-sender bandwidth parameters when those are given. + The condition that no bandwidth is dedicated to senders when there + are no senders was removed since that is expected to be a + transitory state. It also keeps non-senders from using sender + RTCP bandwidth when that is not intended. + + o Also in Section 6.2 it is specified that the minimum RTCP interval + may be scaled to smaller values for high bandwidth sessions, and + that the initial RTCP delay may be set to zero for unicast + sessions. + + o Timing out a participant is to be based on inactivity for a number + of RTCP report intervals calculated using the receiver RTCP + bandwidth fraction even for active senders. + + o Sections 7.2 and 7.3 specify that translators and mixers should + send BYE packets for the sources they are no longer forwarding. + + o Rule changes for layered encodings are defined in Sections 2.4, + 6.3.9, 8.3 and 11. In the last of these, it is noted that the + address and port assignment rule conflicts with the SDP + specification, RFC 2327 [15], but it is intended that this + restriction will be relaxed in a revision of RFC 2327. + + o The convention for using even/odd port pairs for RTP and RTCP in + Section 11 was clarified to refer to destination ports. The + requirement to use an even/odd port pair was removed if the two + ports are specified explicitly. For unicast RTP sessions, + distinct port pairs may be used for the two ends (Sections 3, 7.1 + and 11). + + o A new Section 10 was added to explain the requirement for + congestion control in applications using RTP. + + o In Section 8.2, the requirement that a new SSRC identifier MUST be + chosen whenever the source transport address is changed has been + relaxed to say that a new SSRC identifier MAY be chosen. + Correspondingly, it was clarified that an implementation MAY + + + +Schulzrinne, et al. Standards Track [Page 96] + +RFC 3550 RTP July 2003 + + + choose to keep packets from the new source address rather than the + existing source address when an SSRC collision occurs between two + other participants, and SHOULD do so for applications such as + telephony in which some sources such as mobile entities may change + addresses during the course of an RTP session. + + o An indentation bug in the RFC 1889 printing of the pseudo-code for + the collision detection and resolution algorithm in Section 8.2 + has been corrected by translating the syntax to pseudo C language, + and the algorithm has been modified to remove the restriction that + both RTP and RTCP must be sent from the same source port number. + + o The description of the padding mechanism for RTCP packets was + clarified and it is specified that padding MUST only be applied to + the last packet of a compound RTCP packet. + + o In Section A.1, initialization of base_seq was corrected to be seq + rather than seq - 1, and the text was corrected to say the bad + sequence number plus 1 is stored. The initialization of max_seq + and other variables for the algorithm was separated from the text + to make clear that this initialization must be done in addition to + calling the init_seq() function (and a few words lost in RFC 1889 + when processing the document from source to output form were + restored). + + o Clamping of number of packets lost in Section A.3 was corrected to + use both positive and negative limits. + + o The specification of "relative" NTP timestamp in the RTCP SR + section now defines these timestamps to be based on the most + common system-specific clock, such as system uptime, rather than + on session elapsed time which would not be the same for multiple + applications started on the same machine at different times. + + Non-functional changes: + + o It is specified that a receiver MUST ignore packets with payload + types it does not understand. + + o In Fig. 2, the floating point NTP timestamp value was corrected, + some missing leading zeros were added in a hex number, and the UTC + timezone was specified. + + o The inconsequence of NTP timestamps wrapping around in the year + 2036 is explained. + + + + + + +Schulzrinne, et al. Standards Track [Page 97] + +RFC 3550 RTP July 2003 + + + o The policy for registration of RTCP packet types and SDES types + was clarified in a new Section 15, IANA Considerations. The + suggestion that experimenters register the numbers they need and + then unregister those which prove to be unneeded has been removed + in favor of using APP and PRIV. Registration of profile names was + also specified. + + o The reference for the UTF-8 character set was changed from an + X/Open Preliminary Specification to be RFC 2279. + + o The reference for RFC 1597 was updated to RFC 1918 and the + reference for RFC 2543 was updated to RFC 3261. + + o The last paragraph of the introduction in RFC 1889, which + cautioned implementors to limit deployment in the Internet, was + removed because it was deemed no longer relevant. + + o A non-normative note regarding the use of RTP with Source-Specific + Multicast (SSM) was added in Section 6. + + o The definition of "RTP session" in Section 3 was expanded to + acknowledge that a single session may use multiple destination + transport addresses (as was always the case for a translator or + mixer) and to explain that the distinguishing feature of an RTP + session is that each corresponds to a separate SSRC identifier + space. A new definition of "multimedia session" was added to + reduce confusion about the word "session". + + o The meaning of "sampling instant" was explained in more detail as + part of the definition of the timestamp field of the RTP header in + Section 5.1. + + o Small clarifications of the text have been made in several places, + some in response to questions from readers. In particular: + + - In RFC 1889, the first five words of the second sentence of + Section 2.2 were lost in processing the document from source to + output form, but are now restored. + + - A definition for "RTP media type" was added in Section 3 to + allow the explanation of multiplexing RTP sessions in Section + 5.2 to be more clear regarding the multiplexing of multiple + media. That section also now explains that multiplexing + multiple sources of the same medium based on SSRC identifiers + may be appropriate and is the norm for multicast sessions. + + - The definition for "non-RTP means" was expanded to include + examples of other protocols constituting non-RTP means. + + + +Schulzrinne, et al. Standards Track [Page 98] + +RFC 3550 RTP July 2003 + + + - The description of the session bandwidth parameter is expanded + in Section 6.2, including a clarification that the control + traffic bandwidth is in addition to the session bandwidth for + the data traffic. + + - The effect of varying packet duration on the jitter calculation + was explained in Section 6.4.4. + + - The method for terminating and padding a sequence of SDES items + was clarified in Section 6.5. + + - IPv6 address examples were added in the description of SDES + CNAME in Section 6.5.1, and "example.com" was used in place of + other example domain names. + + - The Security section added a formal reference to IPSEC now that + it is available, and says that the confidentiality method + defined in this specification is primarily to codify existing + practice. It is RECOMMENDED that stronger encryption + algorithms such as Triple-DES be used in place of the default + algorithm, and noted that the SRTP profile based on AES will be + the correct choice in the future. A caution about the weakness + of the RTP header as an initialization vector was added. It + was also noted that payload-only encryption is necessary to + allow for header compression. + + - The method for partial encryption of RTCP was clarified; in + particular, SDES CNAME is carried in only one part when the + compound RTCP packet is split. + + - It is clarified that only one compound RTCP packet should be + sent per reporting interval and that if there are too many + active sources for the reports to fit in the MTU, then a subset + of the sources should be selected round-robin over multiple + intervals. + + - A note was added in Appendix A.1 that packets may be saved + during RTP header validation and delivered upon success. + + - Section 7.3 now explains that a mixer aggregating SDES packets + uses more RTCP bandwidth due to longer packets, and a mixer + passing through RTCP naturally sends packets at higher than the + single source rate, but both behaviors are valid. + + - Section 13 clarifies that an RTP application may use multiple + profiles but typically only one in a given session. + + + + + +Schulzrinne, et al. Standards Track [Page 99] + +RFC 3550 RTP July 2003 + + + - The terms MUST, SHOULD, MAY, etc. are used as defined in RFC + 2119. + + - The bibliography was divided into normative and informative + references. + +References + +Normative References + + [1] Schulzrinne, H. and S. Casner, "RTP Profile for Audio and Video + Conferences with Minimal Control", RFC 3551, July 2003. + + [2] Bradner, S., "Key Words for Use in RFCs to Indicate Requirement + Levels", BCP 14, RFC 2119, March 1997. + + [3] Postel, J., "Internet Protocol", STD 5, RFC 791, September 1981. + + [4] Mills, D., "Network Time Protocol (Version 3) Specification, + Implementation and Analysis", RFC 1305, March 1992. + + [5] Yergeau, F., "UTF-8, a Transformation Format of ISO 10646", RFC + 2279, January 1998. + + [6] Mockapetris, P., "Domain Names - Concepts and Facilities", STD + 13, RFC 1034, November 1987. + + [7] Mockapetris, P., "Domain Names - Implementation and + Specification", STD 13, RFC 1035, November 1987. + + [8] Braden, R., "Requirements for Internet Hosts - Application and + Support", STD 3, RFC 1123, October 1989. + + [9] Resnick, P., "Internet Message Format", RFC 2822, April 2001. + +Informative References + + [10] Clark, D. and D. Tennenhouse, "Architectural Considerations for + a New Generation of Protocols," in SIGCOMM Symposium on + Communications Architectures and Protocols , (Philadelphia, + Pennsylvania), pp. 200--208, IEEE Computer Communications + Review, Vol. 20(4), September 1990. + + [11] Schulzrinne, H., "Issues in designing a transport protocol for + audio and video conferences and other multiparticipant real-time + applications." expired Internet Draft, October 1993. + + + + + +Schulzrinne, et al. Standards Track [Page 100] + +RFC 3550 RTP July 2003 + + + [12] Comer, D., Internetworking with TCP/IP , vol. 1. Englewood + Cliffs, New Jersey: Prentice Hall, 1991. + + [13] Rosenberg, J., Schulzrinne, H., Camarillo, G., Johnston, A., + Peterson, J., Sparks, R., Handley, M. and E. Schooler, "SIP: + Session Initiation Protocol", RFC 3261, June 2002. + + [14] International Telecommunication Union, "Visual telephone systems + and equipment for local area networks which provide a non- + guaranteed quality of service", Recommendation H.323, + Telecommunication Standardization Sector of ITU, Geneva, + Switzerland, July 2003. + + [15] Handley, M. and V. Jacobson, "SDP: Session Description + Protocol", RFC 2327, April 1998. + + [16] Schulzrinne, H., Rao, A. and R. Lanphier, "Real Time Streaming + Protocol (RTSP)", RFC 2326, April 1998. + + [17] Eastlake 3rd, D., Crocker, S. and J. Schiller, "Randomness + Recommendations for Security", RFC 1750, December 1994. + + [18] Bolot, J.-C., Turletti, T. and I. Wakeman, "Scalable Feedback + Control for Multicast Video Distribution in the Internet", in + SIGCOMM Symposium on Communications Architectures and Protocols, + (London, England), pp. 58--67, ACM, August 1994. + + [19] Busse, I., Deffner, B. and H. Schulzrinne, "Dynamic QoS Control + of Multimedia Applications Based on RTP", Computer + Communications , vol. 19, pp. 49--58, January 1996. + + [20] Floyd, S. and V. Jacobson, "The Synchronization of Periodic + Routing Messages", in SIGCOMM Symposium on Communications + Architectures and Protocols (D. P. Sidhu, ed.), (San Francisco, + California), pp. 33--44, ACM, September 1993. Also in [34]. + + [21] Rosenberg, J. and H. Schulzrinne, "Sampling of the Group + Membership in RTP", RFC 2762, February 2000. + + [22] Cadzow, J., Foundations of Digital Signal Processing and Data + Analysis New York, New York: Macmillan, 1987. + + [23] Hinden, R. and S. Deering, "Internet Protocol Version 6 (IPv6) + Addressing Architecture", RFC 3513, April 2003. + + [24] Rekhter, Y., Moskowitz, B., Karrenberg, D., de Groot, G. and E. + Lear, "Address Allocation for Private Internets", RFC 1918, + February 1996. + + + +Schulzrinne, et al. Standards Track [Page 101] + +RFC 3550 RTP July 2003 + + + [25] Lear, E., Fair, E., Crocker, D. and T. Kessler, "Network 10 + Considered Harmful (Some Practices Shouldn't be Codified)", RFC + 1627, July 1994. + + [26] Feller, W., An Introduction to Probability Theory and its + Applications, vol. 1. New York, New York: John Wiley and Sons, + third ed., 1968. + + [27] Kent, S. and R. Atkinson, "Security Architecture for the + Internet Protocol", RFC 2401, November 1998. + + [28] Baugher, M., Blom, R., Carrara, E., McGrew, D., Naslund, M., + Norrman, K. and D. Oran, "Secure Real-time Transport Protocol", + Work in Progress, April 2003. + + [29] Balenson, D., "Privacy Enhancement for Internet Electronic Mail: + Part III", RFC 1423, February 1993. + + [30] Voydock, V. and S. Kent, "Security Mechanisms in High-Level + Network Protocols", ACM Computing Surveys, vol. 15, pp. 135-171, + June 1983. + + [31] Floyd, S., "Congestion Control Principles", BCP 41, RFC 2914, + September 2000. + + [32] Rivest, R., "The MD5 Message-Digest Algorithm", RFC 1321, April + 1992. + + [33] Stubblebine, S., "Security Services for Multimedia + Conferencing", in 16th National Computer Security Conference, + (Baltimore, Maryland), pp. 391--395, September 1993. + + [34] Floyd, S. and V. Jacobson, "The Synchronization of Periodic + Routing Messages", IEEE/ACM Transactions on Networking, vol. 2, + pp. 122--136, April 1994. + + + + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 102] + +RFC 3550 RTP July 2003 + + +Authors' Addresses + + Henning Schulzrinne + Department of Computer Science + Columbia University + 1214 Amsterdam Avenue + New York, NY 10027 + United States + + EMail: schulzrinne@cs.columbia.edu + + + Stephen L. Casner + Packet Design + 3400 Hillview Avenue, Building 3 + Palo Alto, CA 94304 + United States + + EMail: casner@acm.org + + + Ron Frederick + Blue Coat Systems Inc. + 650 Almanor Avenue + Sunnyvale, CA 94085 + United States + + EMail: ronf@bluecoat.com + + + Van Jacobson + Packet Design + 3400 Hillview Avenue, Building 3 + Palo Alto, CA 94304 + United States + + EMail: van@packetdesign.com + + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 103] + +RFC 3550 RTP July 2003 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2003). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Schulzrinne, et al. Standards Track [Page 104] + diff --git a/src/modules/rtp/rfc3551.txt b/src/modules/rtp/rfc3551.txt new file mode 100644 index 00000000..c43ff34d --- /dev/null +++ b/src/modules/rtp/rfc3551.txt @@ -0,0 +1,2467 @@ + + + + + + +Network Working Group H. Schulzrinne +Request for Comments: 3551 Columbia University +Obsoletes: 1890 S. Casner +Category: Standards Track Packet Design + July 2003 + + + RTP Profile for Audio and Video Conferences + with Minimal Control + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2003). All Rights Reserved. + +Abstract + + This document describes a profile called "RTP/AVP" for the use of the + real-time transport protocol (RTP), version 2, and the associated + control protocol, RTCP, within audio and video multiparticipant + conferences with minimal control. It provides interpretations of + generic fields within the RTP specification suitable for audio and + video conferences. In particular, this document defines a set of + default mappings from payload type numbers to encodings. + + This document also describes how audio and video data may be carried + within RTP. It defines a set of standard encodings and their names + when used within RTP. The descriptions provide pointers to reference + implementations and the detailed standards. This document is meant + as an aid for implementors of audio, video and other real-time + multimedia applications. + + This memorandum obsoletes RFC 1890. It is mostly backwards- + compatible except for functions removed because two interoperable + implementations were not found. The additions to RFC 1890 codify + existing practice in the use of payload formats under this profile + and include new payload formats defined since RFC 1890 was published. + + + + + + + +Schulzrinne & Casner Standards Track [Page 1] + +RFC 3551 RTP A/V Profile July 2003 + + +Table of Contents + + 1. Introduction ................................................. 3 + 1.1 Terminology ............................................. 3 + 2. RTP and RTCP Packet Forms and Protocol Behavior .............. 4 + 3. Registering Additional Encodings ............................. 6 + 4. Audio ........................................................ 8 + 4.1 Encoding-Independent Rules .............................. 8 + 4.2 Operating Recommendations ............................... 9 + 4.3 Guidelines for Sample-Based Audio Encodings ............. 10 + 4.4 Guidelines for Frame-Based Audio Encodings .............. 11 + 4.5 Audio Encodings ......................................... 12 + 4.5.1 DVI4 ............................................ 13 + 4.5.2 G722 ............................................ 14 + 4.5.3 G723 ............................................ 14 + 4.5.4 G726-40, G726-32, G726-24, and G726-16 .......... 18 + 4.5.5 G728 ............................................ 19 + 4.5.6 G729 ............................................ 20 + 4.5.7 G729D and G729E ................................. 22 + 4.5.8 GSM ............................................. 24 + 4.5.9 GSM-EFR ......................................... 27 + 4.5.10 L8 .............................................. 27 + 4.5.11 L16 ............................................. 27 + 4.5.12 LPC ............................................. 27 + 4.5.13 MPA ............................................. 28 + 4.5.14 PCMA and PCMU ................................... 28 + 4.5.15 QCELP ........................................... 28 + 4.5.16 RED ............................................. 29 + 4.5.17 VDVI ............................................ 29 + 5. Video ........................................................ 30 + 5.1 CelB .................................................... 30 + 5.2 JPEG .................................................... 30 + 5.3 H261 .................................................... 30 + 5.4 H263 .................................................... 31 + 5.5 H263-1998 ............................................... 31 + 5.6 MPV ..................................................... 31 + 5.7 MP2T .................................................... 31 + 5.8 nv ...................................................... 32 + 6. Payload Type Definitions ..................................... 32 + 7. RTP over TCP and Similar Byte Stream Protocols ............... 34 + 8. Port Assignment .............................................. 34 + 9. Changes from RFC 1890 ........................................ 35 + 10. Security Considerations ...................................... 38 + 11. IANA Considerations .......................................... 39 + 12. References ................................................... 39 + 12.1 Normative References .................................... 39 + 12.2 Informative References .................................. 39 + 13. Current Locations of Related Resources ....................... 41 + + + +Schulzrinne & Casner Standards Track [Page 2] + +RFC 3551 RTP A/V Profile July 2003 + + + 14. Acknowledgments .............................................. 42 + 15. Intellectual Property Rights Statement ....................... 43 + 16. Authors' Addresses ........................................... 43 + 17. Full Copyright Statement ..................................... 44 + +1. Introduction + + This profile defines aspects of RTP left unspecified in the RTP + Version 2 protocol definition (RFC 3550) [1]. This profile is + intended for the use within audio and video conferences with minimal + session control. In particular, no support for the negotiation of + parameters or membership control is provided. The profile is + expected to be useful in sessions where no negotiation or membership + control are used (e.g., using the static payload types and the + membership indications provided by RTCP), but this profile may also + be useful in conjunction with a higher-level control protocol. + + Use of this profile may be implicit in the use of the appropriate + applications; there may be no explicit indication by port number, + protocol identifier or the like. Applications such as session + directories may use the name for this profile specified in Section + 11. + + Other profiles may make different choices for the items specified + here. + + This document also defines a set of encodings and payload formats for + audio and video. These payload format descriptions are included here + only as a matter of convenience since they are too small to warrant + separate documents. Use of these payload formats is NOT REQUIRED to + use this profile. Only the binding of some of the payload formats to + static payload type numbers in Tables 4 and 5 is normative. + +1.1 Terminology + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [2] and + indicate requirement levels for implementations compliant with this + RTP profile. + + This document defines the term media type as dividing encodings of + audio and video content into three classes: audio, video and + audio/video (interleaved). + + + + + + + +Schulzrinne & Casner Standards Track [Page 3] + +RFC 3551 RTP A/V Profile July 2003 + + +2. RTP and RTCP Packet Forms and Protocol Behavior + + The section "RTP Profiles and Payload Format Specifications" of RFC + 3550 enumerates a number of items that can be specified or modified + in a profile. This section addresses these items. Generally, this + profile follows the default and/or recommended aspects of the RTP + specification. + + RTP data header: The standard format of the fixed RTP data + header is used (one marker bit). + + Payload types: Static payload types are defined in Section 6. + + RTP data header additions: No additional fixed fields are + appended to the RTP data header. + + RTP data header extensions: No RTP header extensions are + defined, but applications operating under this profile MAY use + such extensions. Thus, applications SHOULD NOT assume that the + RTP header X bit is always zero and SHOULD be prepared to ignore + the header extension. If a header extension is defined in the + future, that definition MUST specify the contents of the first 16 + bits in such a way that multiple different extensions can be + identified. + + RTCP packet types: No additional RTCP packet types are defined + by this profile specification. + + RTCP report interval: The suggested constants are to be used for + the RTCP report interval calculation. Sessions operating under + this profile MAY specify a separate parameter for the RTCP traffic + bandwidth rather than using the default fraction of the session + bandwidth. The RTCP traffic bandwidth MAY be divided into two + separate session parameters for those participants which are + active data senders and those which are not. Following the + recommendation in the RTP specification [1] that 1/4 of the RTCP + bandwidth be dedicated to data senders, the RECOMMENDED default + values for these two parameters would be 1.25% and 3.75%, + respectively. For a particular session, the RTCP bandwidth for + non-data-senders MAY be set to zero when operating on + unidirectional links or for sessions that don't require feedback + on the quality of reception. The RTCP bandwidth for data senders + SHOULD be kept non-zero so that sender reports can still be sent + for inter-media synchronization and to identify the source by + CNAME. The means by which the one or two session parameters for + RTCP bandwidth are specified is beyond the scope of this memo. + + + + + +Schulzrinne & Casner Standards Track [Page 4] + +RFC 3551 RTP A/V Profile July 2003 + + + SR/RR extension: No extension section is defined for the RTCP SR + or RR packet. + + SDES use: Applications MAY use any of the SDES items described + in the RTP specification. While CNAME information MUST be sent + every reporting interval, other items SHOULD only be sent every + third reporting interval, with NAME sent seven out of eight times + within that slot and the remaining SDES items cyclically taking up + the eighth slot, as defined in Section 6.2.2 of the RTP + specification. In other words, NAME is sent in RTCP packets 1, 4, + 7, 10, 13, 16, 19, while, say, EMAIL is used in RTCP packet 22. + + Security: The RTP default security services are also the default + under this profile. + + String-to-key mapping: No mapping is specified by this profile. + + Congestion: RTP and this profile may be used in the context of + enhanced network service, for example, through Integrated Services + (RFC 1633) [4] or Differentiated Services (RFC 2475) [5], or they + may be used with best effort service. + + If enhanced service is being used, RTP receivers SHOULD monitor + packet loss to ensure that the service that was requested is + actually being delivered. If it is not, then they SHOULD assume + that they are receiving best-effort service and behave + accordingly. + + If best-effort service is being used, RTP receivers SHOULD monitor + packet loss to ensure that the packet loss rate is within + acceptable parameters. Packet loss is considered acceptable if a + TCP flow across the same network path and experiencing the same + network conditions would achieve an average throughput, measured + on a reasonable timescale, that is not less than the RTP flow is + achieving. This condition can be satisfied by implementing + congestion control mechanisms to adapt the transmission rate (or + the number of layers subscribed for a layered multicast session), + or by arranging for a receiver to leave the session if the loss + rate is unacceptably high. + + The comparison to TCP cannot be specified exactly, but is intended + as an "order-of-magnitude" comparison in timescale and throughput. + The timescale on which TCP throughput is measured is the round- + trip time of the connection. In essence, this requirement states + that it is not acceptable to deploy an application (using RTP or + any other transport protocol) on the best-effort Internet which + consumes bandwidth arbitrarily and does not compete fairly with + TCP within an order of magnitude. + + + +Schulzrinne & Casner Standards Track [Page 5] + +RFC 3551 RTP A/V Profile July 2003 + + + Underlying protocol: The profile specifies the use of RTP over + unicast and multicast UDP as well as TCP. (This does not preclude + the use of these definitions when RTP is carried by other lower- + layer protocols.) + + Transport mapping: The standard mapping of RTP and RTCP to + transport-level addresses is used. + + Encapsulation: This profile leaves to applications the + specification of RTP encapsulation in protocols other than UDP. + +3. Registering Additional Encodings + + This profile lists a set of encodings, each of which is comprised of + a particular media data compression or representation plus a payload + format for encapsulation within RTP. Some of those payload formats + are specified here, while others are specified in separate RFCs. It + is expected that additional encodings beyond the set listed here will + be created in the future and specified in additional payload format + RFCs. + + This profile also assigns to each encoding a short name which MAY be + used by higher-level control protocols, such as the Session + Description Protocol (SDP), RFC 2327 [6], to identify encodings + selected for a particular RTP session. + + In some contexts it may be useful to refer to these encodings in the + form of a MIME content-type. To facilitate this, RFC 3555 [7] + provides registrations for all of the encodings names listed here as + MIME subtype names under the "audio" and "video" MIME types through + the MIME registration procedure as specified in RFC 2048 [8]. + + Any additional encodings specified for use under this profile (or + others) may also be assigned names registered as MIME subtypes with + the Internet Assigned Numbers Authority (IANA). This registry + provides a means to insure that the names assigned to the additional + encodings are kept unique. RFC 3555 specifies the information that + is required for the registration of RTP encodings. + + In addition to assigning names to encodings, this profile also + assigns static RTP payload type numbers to some of them. However, + the payload type number space is relatively small and cannot + accommodate assignments for all existing and future encodings. + During the early stages of RTP development, it was necessary to use + statically assigned payload types because no other mechanism had been + specified to bind encodings to payload types. It was anticipated + that non-RTP means beyond the scope of this memo (such as directory + services or invitation protocols) would be specified to establish a + + + +Schulzrinne & Casner Standards Track [Page 6] + +RFC 3551 RTP A/V Profile July 2003 + + + dynamic mapping between a payload type and an encoding. Now, + mechanisms for defining dynamic payload type bindings have been + specified in the Session Description Protocol (SDP) and in other + protocols such as ITU-T Recommendation H.323/H.245. These mechanisms + associate the registered name of the encoding/payload format, along + with any additional required parameters, such as the RTP timestamp + clock rate and number of channels, with a payload type number. This + association is effective only for the duration of the RTP session in + which the dynamic payload type binding is made. This association + applies only to the RTP session for which it is made, thus the + numbers can be re-used for different encodings in different sessions + so the number space limitation is avoided. + + This profile reserves payload type numbers in the range 96-127 + exclusively for dynamic assignment. Applications SHOULD first use + values in this range for dynamic payload types. Those applications + which need to define more than 32 dynamic payload types MAY bind + codes below 96, in which case it is RECOMMENDED that unassigned + payload type numbers be used first. However, the statically assigned + payload types are default bindings and MAY be dynamically bound to + new encodings if needed. Redefining payload types below 96 may cause + incorrect operation if an attempt is made to join a session without + obtaining session description information that defines the dynamic + payload types. + + Dynamic payload types SHOULD NOT be used without a well-defined + mechanism to indicate the mapping. Systems that expect to + interoperate with others operating under this profile SHOULD NOT make + their own assignments of proprietary encodings to particular, fixed + payload types. + + This specification establishes the policy that no additional static + payload types will be assigned beyond the ones defined in this + document. Establishing this policy avoids the problem of trying to + create a set of criteria for accepting static assignments and + encourages the implementation and deployment of the dynamic payload + type mechanisms. + + The final set of static payload type assignments is provided in + Tables 4 and 5. + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 7] + +RFC 3551 RTP A/V Profile July 2003 + + +4. Audio + +4.1 Encoding-Independent Rules + + Since the ability to suppress silence is one of the primary + motivations for using packets to transmit voice, the RTP header + carries both a sequence number and a timestamp to allow a receiver to + distinguish between lost packets and periods of time when no data was + transmitted. Discontiguous transmission (silence suppression) MAY be + used with any audio payload format. Receivers MUST assume that + senders may suppress silence unless this is restricted by signaling + specified elsewhere. (Even if the transmitter does not suppress + silence, the receiver should be prepared to handle periods when no + data is present since packets may be lost.) + + Some payload formats (see Sections 4.5.3 and 4.5.6) define a "silence + insertion descriptor" or "comfort noise" frame to specify parameters + for artificial noise that may be generated during a period of silence + to approximate the background noise at the source. For other payload + formats, a generic Comfort Noise (CN) payload format is specified in + RFC 3389 [9]. When the CN payload format is used with another + payload format, different values in the RTP payload type field + distinguish comfort-noise packets from those of the selected payload + format. + + For applications which send either no packets or occasional comfort- + noise packets during silence, the first packet of a talkspurt, that + is, the first packet after a silence period during which packets have + not been transmitted contiguously, SHOULD be distinguished by setting + the marker bit in the RTP data header to one. The marker bit in all + other packets is zero. The beginning of a talkspurt MAY be used to + adjust the playout delay to reflect changing network delays. + Applications without silence suppression MUST set the marker bit to + zero. + + The RTP clock rate used for generating the RTP timestamp is + independent of the number of channels and the encoding; it usually + equals the number of sampling periods per second. For N-channel + encodings, each sampling period (say, 1/8,000 of a second) generates + N samples. (This terminology is standard, but somewhat confusing, as + the total number of samples generated per second is then the sampling + rate times the channel count.) + + If multiple audio channels are used, channels are numbered left-to- + right, starting at one. In RTP audio packets, information from + lower-numbered channels precedes that from higher-numbered channels. + + + + + +Schulzrinne & Casner Standards Track [Page 8] + +RFC 3551 RTP A/V Profile July 2003 + + + For more than two channels, the convention followed by the AIFF-C + audio interchange format SHOULD be followed [3], using the following + notation, unless some other convention is specified for a particular + encoding or payload format: + + l left + r right + c center + S surround + F front + R rear + + channels description channel + 1 2 3 4 5 6 + _________________________________________________ + 2 stereo l r + 3 l r c + 4 l c r S + 5 Fl Fr Fc Sl Sr + 6 l lc c r rc S + + Note: RFC 1890 defined two conventions for the ordering of four + audio channels. Since the ordering is indicated implicitly by + the number of channels, this was ambiguous. In this revision, + the order described as "quadrophonic" has been eliminated to + remove the ambiguity. This choice was based on the observation + that quadrophonic consumer audio format did not become popular + whereas surround-sound subsequently has. + + Samples for all channels belonging to a single sampling instant MUST + be within the same packet. The interleaving of samples from + different channels depends on the encoding. General guidelines are + given in Section 4.3 and 4.4. + + The sampling frequency SHOULD be drawn from the set: 8,000, 11,025, + 16,000, 22,050, 24,000, 32,000, 44,100 and 48,000 Hz. (Older Apple + Macintosh computers had a native sample rate of 22,254.54 Hz, which + can be converted to 22,050 with acceptable quality by dropping 4 + samples in a 20 ms frame.) However, most audio encodings are defined + for a more restricted set of sampling frequencies. Receivers SHOULD + be prepared to accept multi-channel audio, but MAY choose to only + play a single channel. + +4.2 Operating Recommendations + + The following recommendations are default operating parameters. + Applications SHOULD be prepared to handle other values. The ranges + given are meant to give guidance to application writers, allowing a + + + +Schulzrinne & Casner Standards Track [Page 9] + +RFC 3551 RTP A/V Profile July 2003 + + + set of applications conforming to these guidelines to interoperate + without additional negotiation. These guidelines are not intended to + restrict operating parameters for applications that can negotiate a + set of interoperable parameters, e.g., through a conference control + protocol. + + For packetized audio, the default packetization interval SHOULD have + a duration of 20 ms or one frame, whichever is longer, unless + otherwise noted in Table 1 (column "ms/packet"). The packetization + interval determines the minimum end-to-end delay; longer packets + introduce less header overhead but higher delay and make packet loss + more noticeable. For non-interactive applications such as lectures + or for links with severe bandwidth constraints, a higher + packetization delay MAY be used. A receiver SHOULD accept packets + representing between 0 and 200 ms of audio data. (For framed audio + encodings, a receiver SHOULD accept packets with a number of frames + equal to 200 ms divided by the frame duration, rounded up.) This + restriction allows reasonable buffer sizing for the receiver. + +4.3 Guidelines for Sample-Based Audio Encodings + + In sample-based encodings, each audio sample is represented by a + fixed number of bits. Within the compressed audio data, codes for + individual samples may span octet boundaries. An RTP audio packet + may contain any number of audio samples, subject to the constraint + that the number of bits per sample times the number of samples per + packet yields an integral octet count. Fractional encodings produce + less than one octet per sample. + + The duration of an audio packet is determined by the number of + samples in the packet. + + For sample-based encodings producing one or more octets per sample, + samples from different channels sampled at the same sampling instant + SHOULD be packed in consecutive octets. For example, for a two- + channel encoding, the octet sequence is (left channel, first sample), + (right channel, first sample), (left channel, second sample), (right + channel, second sample), .... For multi-octet encodings, octets + SHOULD be transmitted in network byte order (i.e., most significant + octet first). + + The packing of sample-based encodings producing less than one octet + per sample is encoding-specific. + + The RTP timestamp reflects the instant at which the first sample in + the packet was sampled, that is, the oldest information in the + packet. + + + + +Schulzrinne & Casner Standards Track [Page 10] + +RFC 3551 RTP A/V Profile July 2003 + + +4.4 Guidelines for Frame-Based Audio Encodings + + Frame-based encodings encode a fixed-length block of audio into + another block of compressed data, typically also of fixed length. + For frame-based encodings, the sender MAY choose to combine several + such frames into a single RTP packet. The receiver can tell the + number of frames contained in an RTP packet, if all the frames have + the same length, by dividing the RTP payload length by the audio + frame size which is defined as part of the encoding. This does not + work when carrying frames of different sizes unless the frame sizes + are relatively prime. If not, the frames MUST indicate their size. + + For frame-based codecs, the channel order is defined for the whole + block. That is, for two-channel audio, right and left samples SHOULD + be coded independently, with the encoded frame for the left channel + preceding that for the right channel. + + All frame-oriented audio codecs SHOULD be able to encode and decode + several consecutive frames within a single packet. Since the frame + size for the frame-oriented codecs is given, there is no need to use + a separate designation for the same encoding, but with different + number of frames per packet. + + RTP packets SHALL contain a whole number of frames, with frames + inserted according to age within a packet, so that the oldest frame + (to be played first) occurs immediately after the RTP packet header. + The RTP timestamp reflects the instant at which the first sample in + the first frame was sampled, that is, the oldest information in the + packet. + + + + + + + + + + + + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 11] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5 Audio Encodings + + name of sampling default + encoding sample/frame bits/sample rate ms/frame ms/packet + __________________________________________________________________ + DVI4 sample 4 var. 20 + G722 sample 8 16,000 20 + G723 frame N/A 8,000 30 30 + G726-40 sample 5 8,000 20 + G726-32 sample 4 8,000 20 + G726-24 sample 3 8,000 20 + G726-16 sample 2 8,000 20 + G728 frame N/A 8,000 2.5 20 + G729 frame N/A 8,000 10 20 + G729D frame N/A 8,000 10 20 + G729E frame N/A 8,000 10 20 + GSM frame N/A 8,000 20 20 + GSM-EFR frame N/A 8,000 20 20 + L8 sample 8 var. 20 + L16 sample 16 var. 20 + LPC frame N/A 8,000 20 20 + MPA frame N/A var. var. + PCMA sample 8 var. 20 + PCMU sample 8 var. 20 + QCELP frame N/A 8,000 20 20 + VDVI sample var. var. 20 + + Table 1: Properties of Audio Encodings (N/A: not applicable; var.: + variable) + + The characteristics of the audio encodings described in this document + are shown in Table 1; they are listed in order of their payload type + in Table 4. While most audio codecs are only specified for a fixed + sampling rate, some sample-based algorithms (indicated by an entry of + "var." in the sampling rate column of Table 1) may be used with + different sampling rates, resulting in different coded bit rates. + When used with a sampling rate other than that for which a static + payload type is defined, non-RTP means beyond the scope of this memo + MUST be used to define a dynamic payload type and MUST indicate the + selected RTP timestamp clock rate, which is usually the same as the + sampling rate for audio. + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 12] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5.1 DVI4 + + DVI4 uses an adaptive delta pulse code modulation (ADPCM) encoding + scheme that was specified by the Interactive Multimedia Association + (IMA) as the "IMA ADPCM wave type". However, the encoding defined + here as DVI4 differs in three respects from the IMA specification: + + o The RTP DVI4 header contains the predicted value rather than the + first sample value contained the IMA ADPCM block header. + + o IMA ADPCM blocks contain an odd number of samples, since the first + sample of a block is contained just in the header (uncompressed), + followed by an even number of compressed samples. DVI4 has an + even number of compressed samples only, using the `predict' word + from the header to decode the first sample. + + o For DVI4, the 4-bit samples are packed with the first sample in + the four most significant bits and the second sample in the four + least significant bits. In the IMA ADPCM codec, the samples are + packed in the opposite order. + + Each packet contains a single DVI block. This profile only defines + the 4-bit-per-sample version, while IMA also specified a 3-bit-per- + sample encoding. + + The "header" word for each channel has the following structure: + + int16 predict; /* predicted value of first sample + from the previous block (L16 format) */ + u_int8 index; /* current index into stepsize table */ + u_int8 reserved; /* set to zero by sender, ignored by receiver */ + + Each octet following the header contains two 4-bit samples, thus the + number of samples per packet MUST be even because there is no means + to indicate a partially filled last octet. + + Packing of samples for multiple channels is for further study. + + The IMA ADPCM algorithm was described in the document IMA Recommended + Practices for Enhancing Digital Audio Compatibility in Multimedia + Systems (version 3.0). However, the Interactive Multimedia + Association ceased operations in 1997. Resources for an archived + copy of that document and a software implementation of the RTP DVI4 + encoding are listed in Section 13. + + + + + + + +Schulzrinne & Casner Standards Track [Page 13] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5.2 G722 + + G722 is specified in ITU-T Recommendation G.722, "7 kHz audio-coding + within 64 kbit/s". The G.722 encoder produces a stream of octets, + each of which SHALL be octet-aligned in an RTP packet. The first bit + transmitted in the G.722 octet, which is the most significant bit of + the higher sub-band sample, SHALL correspond to the most significant + bit of the octet in the RTP packet. + + Even though the actual sampling rate for G.722 audio is 16,000 Hz, + the RTP clock rate for the G722 payload format is 8,000 Hz because + that value was erroneously assigned in RFC 1890 and must remain + unchanged for backward compatibility. The octet rate or sample-pair + rate is 8,000 Hz. + +4.5.3 G723 + + G723 is specified in ITU Recommendation G.723.1, "Dual-rate speech + coder for multimedia communications transmitting at 5.3 and 6.3 + kbit/s". The G.723.1 5.3/6.3 kbit/s codec was defined by the ITU-T + as a mandatory codec for ITU-T H.324 GSTN videophone terminal + applications. The algorithm has a floating point specification in + Annex B to G.723.1, a silence compression algorithm in Annex A to + G.723.1 and a scalable channel coding scheme for wireless + applications in G.723.1 Annex C. + + This Recommendation specifies a coded representation that can be used + for compressing the speech signal component of multi-media services + at a very low bit rate. Audio is encoded in 30 ms frames, with an + additional delay of 7.5 ms due to look-ahead. A G.723.1 frame can be + one of three sizes: 24 octets (6.3 kb/s frame), 20 octets (5.3 kb/s + frame), or 4 octets. These 4-octet frames are called SID frames + (Silence Insertion Descriptor) and are used to specify comfort noise + parameters. There is no restriction on how 4, 20, and 24 octet + frames are intermixed. The least significant two bits of the first + octet in the frame determine the frame size and codec type: + + bits content octets/frame + 00 high-rate speech (6.3 kb/s) 24 + 01 low-rate speech (5.3 kb/s) 20 + 10 SID frame 4 + 11 reserved + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 14] + +RFC 3551 RTP A/V Profile July 2003 + + + It is possible to switch between the two rates at any 30 ms frame + boundary. Both (5.3 kb/s and 6.3 kb/s) rates are a mandatory part of + the encoder and decoder. Receivers MUST accept both data rates and + MUST accept SID frames unless restriction of these capabilities has + been signaled. The MIME registration for G723 in RFC 3555 [7] + specifies parameters that MAY be used with MIME or SDP to restrict to + a single data rate or to restrict the use of SID frames. This coder + was optimized to represent speech with near-toll quality at the above + rates using a limited amount of complexity. + + The packing of the encoded bit stream into octets and the + transmission order of the octets is specified in Rec. G.723.1 and is + the same as that produced by the G.723 C code reference + implementation. For the 6.3 kb/s data rate, this packing is + illustrated as follows, where the header (HDR) bits are always "0 0" + as shown in Fig. 1 to indicate operation at 6.3 kb/s, and the Z bit + is always set to zero. The diagrams show the bit packing in "network + byte order", also known as big-endian order. The bits of each 32-bit + word are numbered 0 to 31, with the most significant bit on the left + and numbered 0. The octets (bytes) of each word are transmitted most + significant octet first. The bits of each data field are numbered in + the order of the bit stream representation of the encoding (least + significant bit first). The vertical bars indicate the boundaries + between field fragments. + + + + + + + + + + + + + + + + + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 15] + +RFC 3551 RTP A/V Profile July 2003 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | LPC |HDR| LPC | LPC | ACL0 |LPC| + | | | | | | | + |0 0 0 0 0 0|0 0|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2| + |5 4 3 2 1 0| |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ACL2 |ACL|A| GAIN0 |ACL|ACL| GAIN0 | GAIN1 | + | | 1 |C| | 3 | 2 | | | + |0 0 0 0 0|0 0|0|0 0 0 0|0 0|0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0| + |4 3 2 1 0|1 0|6|3 2 1 0|1 0|6 5|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | GAIN2 | GAIN1 | GAIN2 | GAIN3 | GRID | GAIN3 | + | | | | | | | + |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0| + |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|3 2 1 0|1 0 9 8| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | MSBPOS |Z|POS| MSBPOS | POS0 |POS| POS0 | + | | | 0 | | | 1 | | + |0 0 0 0 0 0 0|0|0 0|1 1 1 0 0 0|0 0 0 0 0 0 0 0|0 0|1 1 1 1 1 1| + |6 5 4 3 2 1 0| |1 0|2 1 0 9 8 7|9 8 7 6 5 4 3 2|1 0|5 4 3 2 1 0| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | POS1 | POS2 | POS1 | POS2 | POS3 | POS2 | + | | | | | | | + |0 0 0 0 0 0 0 0|0 0 0 0|1 1 1 1|1 1 0 0 0 0 0 0|0 0 0 0|1 1 1 1| + |9 8 7 6 5 4 3 2|3 2 1 0|3 2 1 0|1 0 9 8 7 6 5 4|3 2 1 0|5 4 3 2| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | POS3 | PSIG0 |POS|PSIG2| PSIG1 | PSIG3 |PSIG2| + | | | 3 | | | | | + |1 1 0 0 0 0 0 0|0 0 0 0 0 0|1 1|0 0 0|0 0 0 0 0|0 0 0 0 0|0 0 0| + |1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2|2 1 0|4 3 2 1 0|4 3 2 1 0|5 4 3| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 1: G.723 (6.3 kb/s) bit packing + + For the 5.3 kb/s data rate, the header (HDR) bits are always "0 1", + as shown in Fig. 2, to indicate operation at 5.3 kb/s. + + + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 16] + +RFC 3551 RTP A/V Profile July 2003 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | LPC |HDR| LPC | LPC | ACL0 |LPC| + | | | | | | | + |0 0 0 0 0 0|0 1|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2| + |5 4 3 2 1 0| |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ACL2 |ACL|A| GAIN0 |ACL|ACL| GAIN0 | GAIN1 | + | | 1 |C| | 3 | 2 | | | + |0 0 0 0 0|0 0|0|0 0 0 0|0 0|0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0| + |4 3 2 1 0|1 0|6|3 2 1 0|1 0|6 5|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | GAIN2 | GAIN1 | GAIN2 | GAIN3 | GRID | GAIN3 | + | | | | | | | + |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0| + |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0|4 3 2 1|1 0 9 8| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | POS0 | POS1 | POS0 | POS1 | POS2 | + | | | | | | + |0 0 0 0 0 0 0 0|0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0 0 0 0 0| + |7 6 5 4 3 2 1 0|3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|7 6 5 4 3 2 1 0| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | POS3 | POS2 | POS3 | PSIG1 | PSIG0 | PSIG3 | PSIG2 | + | | | | | | | | + |0 0 0 0|1 1 0 0|1 1 0 0 0 0 0 0|0 0 0 0|0 0 0 0|0 0 0 0|0 0 0 0| + |3 2 1 0|1 0 9 8|1 0 9 8 7 6 5 4|3 2 1 0|3 2 1 0|3 2 1 0|3 2 1 0| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 2: G.723 (5.3 kb/s) bit packing + + The packing of G.723.1 SID (silence) frames, which are indicated by + the header (HDR) bits having the pattern "1 0", is depicted in Fig. + 3. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | LPC |HDR| LPC | LPC | GAIN |LPC| + | | | | | | | + |0 0 0 0 0 0|1 0|1 1 1 1 0 0 0 0|2 2 1 1 1 1 1 1|0 0 0 0 0 0|2 2| + |5 4 3 2 1 0| |3 2 1 0 9 8 7 6|1 0 9 8 7 6 5 4|5 4 3 2 1 0|3 2| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 3: G.723 SID mode bit packing + + + + + + +Schulzrinne & Casner Standards Track [Page 17] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5.4 G726-40, G726-32, G726-24, and G726-16 + + ITU-T Recommendation G.726 describes, among others, the algorithm + recommended for conversion of a single 64 kbit/s A-law or mu-law PCM + channel encoded at 8,000 samples/sec to and from a 40, 32, 24, or 16 + kbit/s channel. The conversion is applied to the PCM stream using an + Adaptive Differential Pulse Code Modulation (ADPCM) transcoding + technique. The ADPCM representation consists of a series of + codewords with a one-to-one correspondence to the samples in the PCM + stream. The G726 data rates of 40, 32, 24, and 16 kbit/s have + codewords of 5, 4, 3, and 2 bits, respectively. + + The 16 and 24 kbit/s encodings do not provide toll quality speech. + They are designed for used in overloaded Digital Circuit + Multiplication Equipment (DCME). ITU-T G.726 recommends that the 16 + and 24 kbit/s encodings should be alternated with higher data rate + encodings to provide an average sample size of between 3.5 and 3.7 + bits per sample. + + The encodings of G.726 are here denoted as G726-40, G726-32, G726-24, + and G726-16. Prior to 1990, G721 described the 32 kbit/s ADPCM + encoding, and G723 described the 40, 32, and 16 kbit/s encodings. + Thus, G726-32 designates the same algorithm as G721 in RFC 1890. + + A stream of G726 codewords contains no information on the encoding + being used, therefore transitions between G726 encoding types are not + permitted within a sequence of packed codewords. Applications MUST + determine the encoding type of packed codewords from the RTP payload + identifier. + + No payload-specific header information SHALL be included as part of + the audio data. A stream of G726 codewords MUST be packed into + octets as follows: the first codeword is placed into the first octet + such that the least significant bit of the codeword aligns with the + least significant bit in the octet, the second codeword is then + packed so that its least significant bit coincides with the least + significant unoccupied bit in the octet. When a complete codeword + cannot be placed into an octet, the bits overlapping the octet + boundary are placed into the least significant bits of the next + octet. Packing MUST end with a completely packed final octet. The + number of codewords packed will therefore be a multiple of 8, 2, 8, + and 4 for G726-40, G726-32, G726-24, and G726-16, respectively. An + example of the packing scheme for G726-32 codewords is as shown, + where bit 7 is the least significant bit of the first octet, and bit + A3 is the least significant bit of the first codeword: + + + + + + +Schulzrinne & Casner Standards Track [Page 18] + +RFC 3551 RTP A/V Profile July 2003 + + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- + |B B B B|A A A A|D D D D|C C C C| ... + |0 1 2 3|0 1 2 3|0 1 2 3|0 1 2 3| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- + + An example of the packing scheme for G726-24 codewords follows, where + again bit 7 is the least significant bit of the first octet, and bit + A2 is the least significant bit of the first codeword: + + 0 1 2 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- + |C C|B B B|A A A|F|E E E|D D D|C|H H H|G G G|F F| ... + |1 2|0 1 2|0 1 2|2|0 1 2|0 1 2|0|0 1 2|0 1 2|0 1| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- + + Note that the "little-endian" direction in which samples are packed + into octets in the G726-16, -24, -32 and -40 payload formats + specified here is consistent with ITU-T Recommendation X.420, but is + the opposite of what is specified in ITU-T Recommendation I.366.2 + Annex E for ATM AAL2 transport. A second set of RTP payload formats + matching the packetization of I.366.2 Annex E and identified by MIME + subtypes AAL2-G726-16, -24, -32 and -40 will be specified in a + separate document. + +4.5.5 G728 + + G728 is specified in ITU-T Recommendation G.728, "Coding of speech at + 16 kbit/s using low-delay code excited linear prediction". + + A G.278 encoder translates 5 consecutive audio samples into a 10-bit + codebook index, resulting in a bit rate of 16 kb/s for audio sampled + at 8,000 samples per second. The group of five consecutive samples + is called a vector. Four consecutive vectors, labeled V1 to V4 + (where V1 is to be played first by the receiver), build one G.728 + frame. The four vectors of 40 bits are packed into 5 octets, labeled + B1 through B5. B1 SHALL be placed first in the RTP packet. + + Referring to the figure below, the principle for bit order is + "maintenance of bit significance". Bits from an older vector are + more significant than bits from newer vectors. The MSB of the frame + goes to the MSB of B1 and the LSB of the frame goes to LSB of B5. + + + + + + + +Schulzrinne & Casner Standards Track [Page 19] + +RFC 3551 RTP A/V Profile July 2003 + + + 1 2 3 3 + 0 0 0 0 9 + ++++++++++++++++++++++++++++++++++++++++ + <---V1---><---V2---><---V3---><---V4---> vectors + <--B1--><--B2--><--B3--><--B4--><--B5--> octets + <------------- frame 1 ----------------> + + In particular, B1 contains the eight most significant bits of V1, + with the MSB of V1 being the MSB of B1. B2 contains the two least + significant bits of V1, the more significant of the two in its MSB, + and the six most significant bits of V2. B1 SHALL be placed first in + the RTP packet and B5 last. + +4.5.6 G729 + + G729 is specified in ITU-T Recommendation G.729, "Coding of speech at + 8 kbit/s using conjugate structure-algebraic code excited linear + prediction (CS-ACELP)". A reduced-complexity version of the G.729 + algorithm is specified in Annex A to Rec. G.729. The speech coding + algorithms in the main body of G.729 and in G.729 Annex A are fully + interoperable with each other, so there is no need to further + distinguish between them. An implementation that signals or accepts + use of G729 payload format may implement either G.729 or G.729A + unless restricted by additional signaling specified elsewhere related + specifically to the encoding rather than the payload format. The + G.729 and G.729 Annex A codecs were optimized to represent speech + with high quality, where G.729 Annex A trades some speech quality for + an approximate 50% complexity reduction [10]. See the next Section + (4.5.7) for other data rates added in later G.729 Annexes. For all + data rates, the sampling frequency (and RTP timestamp clock rate) is + 8,000 Hz. + + A voice activity detector (VAD) and comfort noise generator (CNG) + algorithm in Annex B of G.729 is RECOMMENDED for digital simultaneous + voice and data applications and can be used in conjunction with G.729 + or G.729 Annex A. A G.729 or G.729 Annex A frame contains 10 octets, + while the G.729 Annex B comfort noise frame occupies 2 octets. + Receivers MUST accept comfort noise frames if restriction of their + use has not been signaled. The MIME registration for G729 in RFC + 3555 [7] specifies a parameter that MAY be used with MIME or SDP to + restrict the use of comfort noise frames. + + A G729 RTP packet may consist of zero or more G.729 or G.729 Annex A + frames, followed by zero or one G.729 Annex B frames. The presence + of a comfort noise frame can be deduced from the length of the RTP + payload. The default packetization interval is 20 ms (two frames), + but in some situations it may be desirable to send 10 ms packets. An + + + + +Schulzrinne & Casner Standards Track [Page 20] + +RFC 3551 RTP A/V Profile July 2003 + + + example would be a transition from speech to comfort noise in the + first 10 ms of the packet. For some applications, a longer + packetization interval may be required to reduce the packet rate. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |L| L1 | L2 | L3 | P1 |P| C1 | + |0| | | | |0| | + | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7| |0 1 2 3 4| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | C1 | S1 | GA1 | GB1 | P2 | C2 | + | 1 1 1| | | | | | + |5 6 7 8 9 0 1 2|0 1 2 3|0 1 2|0 1 2 3|0 1 2 3 4|0 1 2 3 4 5 6 7| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | C2 | S2 | GA2 | GB2 | + | 1 1 1| | | | + |8 9 0 1 2|0 1 2 3|0 1 2|0 1 2 3| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 4: G.729 and G.729A bit packing + + The transmitted parameters of a G.729/G.729A 10-ms frame, consisting + of 80 bits, are defined in Recommendation G.729, Table 8/G.729. The + mapping of the these parameters is given below in Fig. 4. The + diagrams show the bit packing in "network byte order", also known as + big-endian order. The bits of each 32-bit word are numbered 0 to 31, + with the most significant bit on the left and numbered 0. The octets + (bytes) of each word are transmitted most significant octet first. + The bits of each data field are numbered in the order as produced by + the G.729 C code reference implementation. + + The packing of the G.729 Annex B comfort noise frame is shown in Fig. + 5. + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |L| LSF1 | LSF2 | GAIN |R| + |S| | | |E| + |F| | | |S| + |0|0 1 2 3 4|0 1 2 3|0 1 2 3 4|V| RESV = Reserved (zero) + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 5: G.729 Annex B bit packing + + + + + + +Schulzrinne & Casner Standards Track [Page 21] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5.7 G729D and G729E + + Annexes D and E to ITU-T Recommendation G.729 provide additional data + rates. Because the data rate is not signaled in the bitstream, the + different data rates are given distinct RTP encoding names which are + mapped to distinct payload type numbers. G729D indicates a 6.4 + kbit/s coding mode (G.729 Annex D, for momentary reduction in channel + capacity), while G729E indicates an 11.8 kbit/s mode (G.729 Annex E, + for improved performance with a wide range of narrow-band input + signals, e.g., music and background noise). Annex E has two + operating modes, backward adaptive and forward adaptive, which are + signaled by the first two bits in each frame (the most significant + two bits of the first octet). + + The voice activity detector (VAD) and comfort noise generator (CNG) + algorithm specified in Annex B of G.729 may be used with Annex D and + Annex E frames in addition to G.729 and G.729 Annex A frames. The + algorithm details for the operation of Annexes D and E with the Annex + B CNG are specified in G.729 Annexes F and G. Note that Annexes F + and G do not introduce any new encodings. Receivers MUST accept + comfort noise frames if restriction of their use has not been + signaled. The MIME registrations for G729D and G729E in RFC 3555 [7] + specify a parameter that MAY be used with MIME or SDP to restrict the + use of comfort noise frames. + + For G729D, an RTP packet may consist of zero or more G.729 Annex D + frames, followed by zero or one G.729 Annex B frame. Similarly, for + G729E, an RTP packet may consist of zero or more G.729 Annex E + frames, followed by zero or one G.729 Annex B frame. The presence of + a comfort noise frame can be deduced from the length of the RTP + payload. + + A single RTP packet must contain frames of only one data rate, + optionally followed by one comfort noise frame. The data rate may be + changed from packet to packet by changing the payload type number. + G.729 Annexes D, E and H describe what the encoding and decoding + algorithms must do to accommodate a change in data rate. + + For G729D, the bits of a G.729 Annex D frame are formatted as shown + below in Fig. 6 (cf. Table D.1/G.729). The frame length is 64 bits. + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 22] + +RFC 3551 RTP A/V Profile July 2003 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |L| L1 | L2 | L3 | P1 | C1 | + |0| | | | | | + | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7|0 1 2 3 4 5| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | C1 |S1 | GA1 | GB1 | P2 | C2 |S2 | GA2 | GB2 | + | | | | | | | | | | + |6 7 8|0 1|0 1 2|0 1 2|0 1 2 3|0 1 2 3 4 5 6 7 8|0 1|0 1 2|0 1 2| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 6: G.729 Annex D bit packing + + The net bit rate for the G.729 Annex E algorithm is 11.8 kbit/s and a + total of 118 bits are used. Two bits are appended as "don't care" + bits to complete an integer number of octets for the frame. For + G729E, the bits of a data frame are formatted as shown in the next + two diagrams (cf. Table E.1/G.729). The fields for the G729E forward + adaptive mode are packed as shown in Fig. 7. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0 0|L| L1 | L2 | L3 | P1 |P| C0_1| + | |0| | | | |0| | + | | |0 1 2 3 4 5 6|0 1 2 3 4|0 1 2 3 4|0 1 2 3 4 5 6 7| |0 1 2| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | C1_1 | C2_1 | C3_1 | C4_1 | + | | | | | | + |3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | GA1 | GB1 | P2 | C0_2 | C1_2 | C2_2 | + | | | | | | | + |0 1 2|0 1 2 3|0 1 2 3 4|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | C3_2 | C4_2 | GA2 | GB2 |DC | + | | | | | | | + |6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 7: G.729 Annex E (forward adaptive mode) bit packing + + The fields for the G729E backward adaptive mode are packed as shown + in Fig. 8. + + + + + + +Schulzrinne & Casner Standards Track [Page 23] + +RFC 3551 RTP A/V Profile July 2003 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |1 1| P1 |P| C0_1 | C1_1 | + | | |0| 1 1 1| | + | |0 1 2 3 4 5 6 7|0|0 1 2 3 4 5 6 7 8 9 0 1 2|0 1 2 3 4 5 6 7| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | C2_1 | C3_1 | C4_1 |GA1 | GB1 |P2 | + | | | | | | | | + |8 9|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | C0_2 | C1_2 | C2_2 | + | | 1 1 1| | | + |2 3 4|0 1 2 3 4 5 6 7 8 9 0 1 2|0 1 2 3 4 5 6 7 8 9|0 1 2 3 4 5| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | C3_2 | C4_2 | GA2 | GB2 |DC | + | | | | | | | + |6|0 1 2 3 4 5 6|0 1 2 3 4 5 6|0 1 2|0 1 2 3|0 1| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 8: G.729 Annex E (backward adaptive mode) bit packing + +4.5.8 GSM + + GSM (Group Speciale Mobile) denotes the European GSM 06.10 standard + for full-rate speech transcoding, ETS 300 961, which is based on + RPE/LTP (residual pulse excitation/long term prediction) coding at a + rate of 13 kb/s [11,12,13]. The text of the standard can be obtained + from: + + ETSI (European Telecommunications Standards Institute) + ETSI Secretariat: B.P.152 + F-06561 Valbonne Cedex + France + Phone: +33 92 94 42 00 + Fax: +33 93 65 47 16 + + Blocks of 160 audio samples are compressed into 33 octets, for an + effective data rate of 13,200 b/s. + +4.5.8.1 General Packaging Issues + + The GSM standard (ETS 300 961) specifies the bit stream produced by + the codec, but does not specify how these bits should be packed for + transmission. The packetization specified here has subsequently been + adopted in ETSI Technical Specification TS 101 318. Some software + implementations of the GSM codec use a different packing than that + specified here. + + + +Schulzrinne & Casner Standards Track [Page 24] + +RFC 3551 RTP A/V Profile July 2003 + + + field field name bits field field name bits + ________________________________________________ + 1 LARc[0] 6 39 xmc[22] 3 + 2 LARc[1] 6 40 xmc[23] 3 + 3 LARc[2] 5 41 xmc[24] 3 + 4 LARc[3] 5 42 xmc[25] 3 + 5 LARc[4] 4 43 Nc[2] 7 + 6 LARc[5] 4 44 bc[2] 2 + 7 LARc[6] 3 45 Mc[2] 2 + 8 LARc[7] 3 46 xmaxc[2] 6 + 9 Nc[0] 7 47 xmc[26] 3 + 10 bc[0] 2 48 xmc[27] 3 + 11 Mc[0] 2 49 xmc[28] 3 + 12 xmaxc[0] 6 50 xmc[29] 3 + 13 xmc[0] 3 51 xmc[30] 3 + 14 xmc[1] 3 52 xmc[31] 3 + 15 xmc[2] 3 53 xmc[32] 3 + 16 xmc[3] 3 54 xmc[33] 3 + 17 xmc[4] 3 55 xmc[34] 3 + 18 xmc[5] 3 56 xmc[35] 3 + 19 xmc[6] 3 57 xmc[36] 3 + 20 xmc[7] 3 58 xmc[37] 3 + 21 xmc[8] 3 59 xmc[38] 3 + 22 xmc[9] 3 60 Nc[3] 7 + 23 xmc[10] 3 61 bc[3] 2 + 24 xmc[11] 3 62 Mc[3] 2 + 25 xmc[12] 3 63 xmaxc[3] 6 + 26 Nc[1] 7 64 xmc[39] 3 + 27 bc[1] 2 65 xmc[40] 3 + 28 Mc[1] 2 66 xmc[41] 3 + 29 xmaxc[1] 6 67 xmc[42] 3 + 30 xmc[13] 3 68 xmc[43] 3 + 31 xmc[14] 3 69 xmc[44] 3 + 32 xmc[15] 3 70 xmc[45] 3 + 33 xmc[16] 3 71 xmc[46] 3 + 34 xmc[17] 3 72 xmc[47] 3 + 35 xmc[18] 3 73 xmc[48] 3 + 36 xmc[19] 3 74 xmc[49] 3 + 37 xmc[20] 3 75 xmc[50] 3 + 38 xmc[21] 3 76 xmc[51] 3 + + Table 2: Ordering of GSM variables + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 25] + +RFC 3551 RTP A/V Profile July 2003 + + + Octet Bit 0 Bit 1 Bit 2 Bit 3 Bit 4 Bit 5 Bit 6 Bit 7 + _____________________________________________________________________ + 0 1 1 0 1 LARc0.0 LARc0.1 LARc0.2 LARc0.3 + 1 LARc0.4 LARc0.5 LARc1.0 LARc1.1 LARc1.2 LARc1.3 LARc1.4 LARc1.5 + 2 LARc2.0 LARc2.1 LARc2.2 LARc2.3 LARc2.4 LARc3.0 LARc3.1 LARc3.2 + 3 LARc3.3 LARc3.4 LARc4.0 LARc4.1 LARc4.2 LARc4.3 LARc5.0 LARc5.1 + 4 LARc5.2 LARc5.3 LARc6.0 LARc6.1 LARc6.2 LARc7.0 LARc7.1 LARc7.2 + 5 Nc0.0 Nc0.1 Nc0.2 Nc0.3 Nc0.4 Nc0.5 Nc0.6 bc0.0 + 6 bc0.1 Mc0.0 Mc0.1 xmaxc00 xmaxc01 xmaxc02 xmaxc03 xmaxc04 + 7 xmaxc05 xmc0.0 xmc0.1 xmc0.2 xmc1.0 xmc1.1 xmc1.2 xmc2.0 + 8 xmc2.1 xmc2.2 xmc3.0 xmc3.1 xmc3.2 xmc4.0 xmc4.1 xmc4.2 + 9 xmc5.0 xmc5.1 xmc5.2 xmc6.0 xmc6.1 xmc6.2 xmc7.0 xmc7.1 + 10 xmc7.2 xmc8.0 xmc8.1 xmc8.2 xmc9.0 xmc9.1 xmc9.2 xmc10.0 + 11 xmc10.1 xmc10.2 xmc11.0 xmc11.1 xmc11.2 xmc12.0 xmc12.1 xcm12.2 + 12 Nc1.0 Nc1.1 Nc1.2 Nc1.3 Nc1.4 Nc1.5 Nc1.6 bc1.0 + 13 bc1.1 Mc1.0 Mc1.1 xmaxc10 xmaxc11 xmaxc12 xmaxc13 xmaxc14 + 14 xmax15 xmc13.0 xmc13.1 xmc13.2 xmc14.0 xmc14.1 xmc14.2 xmc15.0 + 15 xmc15.1 xmc15.2 xmc16.0 xmc16.1 xmc16.2 xmc17.0 xmc17.1 xmc17.2 + 16 xmc18.0 xmc18.1 xmc18.2 xmc19.0 xmc19.1 xmc19.2 xmc20.0 xmc20.1 + 17 xmc20.2 xmc21.0 xmc21.1 xmc21.2 xmc22.0 xmc22.1 xmc22.2 xmc23.0 + 18 xmc23.1 xmc23.2 xmc24.0 xmc24.1 xmc24.2 xmc25.0 xmc25.1 xmc25.2 + 19 Nc2.0 Nc2.1 Nc2.2 Nc2.3 Nc2.4 Nc2.5 Nc2.6 bc2.0 + 20 bc2.1 Mc2.0 Mc2.1 xmaxc20 xmaxc21 xmaxc22 xmaxc23 xmaxc24 + 21 xmaxc25 xmc26.0 xmc26.1 xmc26.2 xmc27.0 xmc27.1 xmc27.2 xmc28.0 + 22 xmc28.1 xmc28.2 xmc29.0 xmc29.1 xmc29.2 xmc30.0 xmc30.1 xmc30.2 + 23 xmc31.0 xmc31.1 xmc31.2 xmc32.0 xmc32.1 xmc32.2 xmc33.0 xmc33.1 + 24 xmc33.2 xmc34.0 xmc34.1 xmc34.2 xmc35.0 xmc35.1 xmc35.2 xmc36.0 + 25 Xmc36.1 xmc36.2 xmc37.0 xmc37.1 xmc37.2 xmc38.0 xmc38.1 xmc38.2 + 26 Nc3.0 Nc3.1 Nc3.2 Nc3.3 Nc3.4 Nc3.5 Nc3.6 bc3.0 + 27 bc3.1 Mc3.0 Mc3.1 xmaxc30 xmaxc31 xmaxc32 xmaxc33 xmaxc34 + 28 xmaxc35 xmc39.0 xmc39.1 xmc39.2 xmc40.0 xmc40.1 xmc40.2 xmc41.0 + 29 xmc41.1 xmc41.2 xmc42.0 xmc42.1 xmc42.2 xmc43.0 xmc43.1 xmc43.2 + 30 xmc44.0 xmc44.1 xmc44.2 xmc45.0 xmc45.1 xmc45.2 xmc46.0 xmc46.1 + 31 xmc46.2 xmc47.0 xmc47.1 xmc47.2 xmc48.0 xmc48.1 xmc48.2 xmc49.0 + 32 xmc49.1 xmc49.2 xmc50.0 xmc50.1 xmc50.2 xmc51.0 xmc51.1 xmc51.2 + + Table 3: GSM payload format + + In the GSM packing used by RTP, the bits SHALL be packed beginning + from the most significant bit. Every 160 sample GSM frame is coded + into one 33 octet (264 bit) buffer. Every such buffer begins with a + 4 bit signature (0xD), followed by the MSB encoding of the fields of + the frame. The first octet thus contains 1101 in the 4 most + significant bits (0-3) and the 4 most significant bits of F1 (0-3) in + the 4 least significant bits (4-7). The second octet contains the 2 + least significant bits of F1 in bits 0-1, and F2 in bits 2-7, and so + on. The order of the fields in the frame is described in Table 2. + + + + +Schulzrinne & Casner Standards Track [Page 26] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5.8.2 GSM Variable Names and Numbers + + In the RTP encoding we have the bit pattern described in Table 3, + where F.i signifies the ith bit of the field F, bit 0 is the most + significant bit, and the bits of every octet are numbered from 0 to 7 + from most to least significant. + +4.5.9 GSM-EFR + + GSM-EFR denotes GSM 06.60 enhanced full rate speech transcoding, + specified in ETS 300 726 which is available from ETSI at the address + given in Section 4.5.8. This codec has a frame length of 244 bits. + For transmission in RTP, each codec frame is packed into a 31 octet + (248 bit) buffer beginning with a 4-bit signature 0xC in a manner + similar to that specified here for the original GSM 06.10 codec. The + packing is specified in ETSI Technical Specification TS 101 318. + +4.5.10 L8 + + L8 denotes linear audio data samples, using 8-bits of precision with + an offset of 128, that is, the most negative signal is encoded as + zero. + +4.5.11 L16 + + L16 denotes uncompressed audio data samples, using 16-bit signed + representation with 65,535 equally divided steps between minimum and + maximum signal level, ranging from -32,768 to 32,767. The value is + represented in two's complement notation and transmitted in network + byte order (most significant byte first). + + The MIME registration for L16 in RFC 3555 [7] specifies parameters + that MAY be used with MIME or SDP to indicate that analog pre- + emphasis was applied to the signal before quantization or to indicate + that a multiple-channel audio stream follows a different channel + ordering convention than is specified in Section 4.1. + +4.5.12 LPC + + LPC designates an experimental linear predictive encoding contributed + by Ron Frederick, which is based on an implementation written by Ron + Zuckerman posted to the Usenet group comp.dsp on June 26, 1992. The + codec generates 14 octets for every frame. The framesize is set to + 20 ms, resulting in a bit rate of 5,600 b/s. + + + + + + + +Schulzrinne & Casner Standards Track [Page 27] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5.13 MPA + + MPA denotes MPEG-1 or MPEG-2 audio encapsulated as elementary + streams. The encoding is defined in ISO standards ISO/IEC 11172-3 + and 13818-3. The encapsulation is specified in RFC 2250 [14]. + + The encoding may be at any of three levels of complexity, called + Layer I, II and III. The selected layer as well as the sampling rate + and channel count are indicated in the payload. The RTP timestamp + clock rate is always 90,000, independent of the sampling rate. + MPEG-1 audio supports sampling rates of 32, 44.1, and 48 kHz (ISO/IEC + 11172-3, section 1.1; "Scope"). MPEG-2 supports sampling rates of + 16, 22.05 and 24 kHz. The number of samples per frame is fixed, but + the frame size will vary with the sampling rate and bit rate. + + The MIME registration for MPA in RFC 3555 [7] specifies parameters + that MAY be used with MIME or SDP to restrict the selection of layer, + channel count, sampling rate, and bit rate. + +4.5.14 PCMA and PCMU + + PCMA and PCMU are specified in ITU-T Recommendation G.711. Audio + data is encoded as eight bits per sample, after logarithmic scaling. + PCMU denotes mu-law scaling, PCMA A-law scaling. A detailed + description is given by Jayant and Noll [15]. Each G.711 octet SHALL + be octet-aligned in an RTP packet. The sign bit of each G.711 octet + SHALL correspond to the most significant bit of the octet in the RTP + packet (i.e., assuming the G.711 samples are handled as octets on the + host machine, the sign bit SHALL be the most significant bit of the + octet as defined by the host machine format). The 56 kb/s and 48 + kb/s modes of G.711 are not applicable to RTP, since PCMA and PCMU + MUST always be transmitted as 8-bit samples. + + See Section 4.1 regarding silence suppression. + +4.5.15 QCELP + + The Electronic Industries Association (EIA) & Telecommunications + Industry Association (TIA) standard IS-733, "TR45: High Rate Speech + Service Option for Wideband Spread Spectrum Communications Systems", + defines the QCELP audio compression algorithm for use in wireless + CDMA applications. The QCELP CODEC compresses each 20 milliseconds + of 8,000 Hz, 16-bit sampled input speech into one of four different + size output frames: Rate 1 (266 bits), Rate 1/2 (124 bits), Rate 1/4 + (54 bits) or Rate 1/8 (20 bits). For typical speech patterns, this + results in an average output of 6.8 kb/s for normal mode and 4.7 kb/s + for reduced rate mode. The packetization of the QCELP audio codec is + described in [16]. + + + +Schulzrinne & Casner Standards Track [Page 28] + +RFC 3551 RTP A/V Profile July 2003 + + +4.5.16 RED + + The redundant audio payload format "RED" is specified by RFC 2198 + [17]. It defines a means by which multiple redundant copies of an + audio packet may be transmitted in a single RTP stream. Each packet + in such a stream contains, in addition to the audio data for that + packetization interval, a (more heavily compressed) copy of the data + from a previous packetization interval. This allows an approximation + of the data from lost packets to be recovered upon decoding of a + subsequent packet, giving much improved sound quality when compared + with silence substitution for lost packets. + +4.5.17 VDVI + + VDVI is a variable-rate version of DVI4, yielding speech bit rates of + between 10 and 25 kb/s. It is specified for single-channel operation + only. Samples are packed into octets starting at the most- + significant bit. The last octet is padded with 1 bits if the last + sample does not fill the last octet. This padding is distinct from + the valid codewords. The receiver needs to detect the padding + because there is no explicit count of samples in the packet. + + It uses the following encoding: + + DVI4 codeword VDVI bit pattern + _______________________________ + 0 00 + 1 010 + 2 1100 + 3 11100 + 4 111100 + 5 1111100 + 6 11111100 + 7 11111110 + 8 10 + 9 011 + 10 1101 + 11 11101 + 12 111101 + 13 1111101 + 14 11111101 + 15 11111111 + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 29] + +RFC 3551 RTP A/V Profile July 2003 + + +5. Video + + The following sections describe the video encodings that are defined + in this memo and give their abbreviated names used for + identification. These video encodings and their payload types are + listed in Table 5. + + All of these video encodings use an RTP timestamp frequency of 90,000 + Hz, the same as the MPEG presentation time stamp frequency. This + frequency yields exact integer timestamp increments for the typical + 24 (HDTV), 25 (PAL), and 29.97 (NTSC) and 30 Hz (HDTV) frame rates + and 50, 59.94 and 60 Hz field rates. While 90 kHz is the RECOMMENDED + rate for future video encodings used within this profile, other rates + MAY be used. However, it is not sufficient to use the video frame + rate (typically between 15 and 30 Hz) because that does not provide + adequate resolution for typical synchronization requirements when + calculating the RTP timestamp corresponding to the NTP timestamp in + an RTCP SR packet. The timestamp resolution MUST also be sufficient + for the jitter estimate contained in the receiver reports. + + For most of these video encodings, the RTP timestamp encodes the + sampling instant of the video image contained in the RTP data packet. + If a video image occupies more than one packet, the timestamp is the + same on all of those packets. Packets from different video images + are distinguished by their different timestamps. + + Most of these video encodings also specify that the marker bit of the + RTP header SHOULD be set to one in the last packet of a video frame + and otherwise set to zero. Thus, it is not necessary to wait for a + following packet with a different timestamp to detect that a new + frame should be displayed. + +5.1 CelB + + The CELL-B encoding is a proprietary encoding proposed by Sun + Microsystems. The byte stream format is described in RFC 2029 [18]. + +5.2 JPEG + + The encoding is specified in ISO Standards 10918-1 and 10918-2. The + RTP payload format is as specified in RFC 2435 [19]. + +5.3 H261 + + The encoding is specified in ITU-T Recommendation H.261, "Video codec + for audiovisual services at p x 64 kbit/s". The packetization and + RTP-specific properties are described in RFC 2032 [20]. + + + + +Schulzrinne & Casner Standards Track [Page 30] + +RFC 3551 RTP A/V Profile July 2003 + + +5.4 H263 + + The encoding is specified in the 1996 version of ITU-T Recommendation + H.263, "Video coding for low bit rate communication". The + packetization and RTP-specific properties are described in RFC 2190 + [21]. The H263-1998 payload format is RECOMMENDED over this one for + use by new implementations. + +5.5 H263-1998 + + The encoding is specified in the 1998 version of ITU-T Recommendation + H.263, "Video coding for low bit rate communication". The + packetization and RTP-specific properties are described in RFC 2429 + [22]. Because the 1998 version of H.263 is a superset of the 1996 + syntax, this payload format can also be used with the 1996 version of + H.263, and is RECOMMENDED for this use by new implementations. This + payload format does not replace RFC 2190, which continues to be used + by existing implementations, and may be required for backward + compatibility in new implementations. Implementations using the new + features of the 1998 version of H.263 MUST use the payload format + described in RFC 2429. + +5.6 MPV + + MPV designates the use of MPEG-1 and MPEG-2 video encoding elementary + streams as specified in ISO Standards ISO/IEC 11172 and 13818-2, + respectively. The RTP payload format is as specified in RFC 2250 + [14], Section 3. + + The MIME registration for MPV in RFC 3555 [7] specifies a parameter + that MAY be used with MIME or SDP to restrict the selection of the + type of MPEG video. + +5.7 MP2T + + MP2T designates the use of MPEG-2 transport streams, for either audio + or video. The RTP payload format is described in RFC 2250 [14], + Section 2. + + + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 31] + +RFC 3551 RTP A/V Profile July 2003 + + +5.8 nv + + The encoding is implemented in the program `nv', version 4, developed + at Xerox PARC by Ron Frederick. Further information is available + from the author: + + Ron Frederick + Blue Coat Systems Inc. + 650 Almanor Avenue + Sunnyvale, CA 94085 + United States + EMail: ronf@bluecoat.com + +6. Payload Type Definitions + + Tables 4 and 5 define this profile's static payload type values for + the PT field of the RTP data header. In addition, payload type + values in the range 96-127 MAY be defined dynamically through a + conference control protocol, which is beyond the scope of this + document. For example, a session directory could specify that for a + given session, payload type 96 indicates PCMU encoding, 8,000 Hz + sampling rate, 2 channels. Entries in Tables 4 and 5 with payload + type "dyn" have no static payload type assigned and are only used + with a dynamic payload type. Payload type 2 was assigned to G721 in + RFC 1890 and to its equivalent successor G726-32 in draft versions of + this specification, but its use is now deprecated and that static + payload type is marked reserved due to conflicting use for the + payload formats G726-32 and AAL2-G726-32 (see Section 4.5.4). + Payload type 13 indicates the Comfort Noise (CN) payload format + specified in RFC 3389 [9]. Payload type 19 is marked "reserved" + because some draft versions of this specification assigned that + number to an earlier version of the comfort noise payload format. + The payload type range 72-76 is marked "reserved" so that RTCP and + RTP packets can be reliably distinguished (see Section "Summary of + Protocol Constants" of the RTP protocol specification). + + The payload types currently defined in this profile are assigned to + exactly one of three categories or media types: audio only, video + only and those combining audio and video. The media types are marked + in Tables 4 and 5 as "A", "V" and "AV", respectively. Payload types + of different media types SHALL NOT be interleaved or multiplexed + within a single RTP session, but multiple RTP sessions MAY be used in + parallel to send multiple media types. An RTP source MAY change + payload types within the same media type during a session. See the + section "Multiplexing RTP Sessions" of RFC 3550 for additional + explanation. + + + + + +Schulzrinne & Casner Standards Track [Page 32] + +RFC 3551 RTP A/V Profile July 2003 + + + PT encoding media type clock rate channels + name (Hz) + ___________________________________________________ + 0 PCMU A 8,000 1 + 1 reserved A + 2 reserved A + 3 GSM A 8,000 1 + 4 G723 A 8,000 1 + 5 DVI4 A 8,000 1 + 6 DVI4 A 16,000 1 + 7 LPC A 8,000 1 + 8 PCMA A 8,000 1 + 9 G722 A 8,000 1 + 10 L16 A 44,100 2 + 11 L16 A 44,100 1 + 12 QCELP A 8,000 1 + 13 CN A 8,000 1 + 14 MPA A 90,000 (see text) + 15 G728 A 8,000 1 + 16 DVI4 A 11,025 1 + 17 DVI4 A 22,050 1 + 18 G729 A 8,000 1 + 19 reserved A + 20 unassigned A + 21 unassigned A + 22 unassigned A + 23 unassigned A + dyn G726-40 A 8,000 1 + dyn G726-32 A 8,000 1 + dyn G726-24 A 8,000 1 + dyn G726-16 A 8,000 1 + dyn G729D A 8,000 1 + dyn G729E A 8,000 1 + dyn GSM-EFR A 8,000 1 + dyn L8 A var. var. + dyn RED A (see text) + dyn VDVI A var. 1 + + Table 4: Payload types (PT) for audio encodings + + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 33] + +RFC 3551 RTP A/V Profile July 2003 + + + PT encoding media type clock rate + name (Hz) + _____________________________________________ + 24 unassigned V + 25 CelB V 90,000 + 26 JPEG V 90,000 + 27 unassigned V + 28 nv V 90,000 + 29 unassigned V + 30 unassigned V + 31 H261 V 90,000 + 32 MPV V 90,000 + 33 MP2T AV 90,000 + 34 H263 V 90,000 + 35-71 unassigned ? + 72-76 reserved N/A N/A + 77-95 unassigned ? + 96-127 dynamic ? + dyn H263-1998 V 90,000 + + Table 5: Payload types (PT) for video and combined + encodings + + Session participants agree through mechanisms beyond the scope of + this specification on the set of payload types allowed in a given + session. This set MAY, for example, be defined by the capabilities + of the applications used, negotiated by a conference control protocol + or established by agreement between the human participants. + + Audio applications operating under this profile SHOULD, at a minimum, + be able to send and/or receive payload types 0 (PCMU) and 5 (DVI4). + This allows interoperability without format negotiation and ensures + successful negotiation with a conference control protocol. + +7. RTP over TCP and Similar Byte Stream Protocols + + Under special circumstances, it may be necessary to carry RTP in + protocols offering a byte stream abstraction, such as TCP, possibly + multiplexed with other data. The application MUST define its own + method of delineating RTP and RTCP packets (RTSP [23] provides an + example of such an encapsulation specification). + +8. Port Assignment + + As specified in the RTP protocol definition, RTP data SHOULD be + carried on an even UDP port number and the corresponding RTCP packets + SHOULD be carried on the next higher (odd) port number. + + + + +Schulzrinne & Casner Standards Track [Page 34] + +RFC 3551 RTP A/V Profile July 2003 + + + Applications operating under this profile MAY use any such UDP port + pair. For example, the port pair MAY be allocated randomly by a + session management program. A single fixed port number pair cannot + be required because multiple applications using this profile are + likely to run on the same host, and there are some operating systems + that do not allow multiple processes to use the same UDP port with + different multicast addresses. + + However, port numbers 5004 and 5005 have been registered for use with + this profile for those applications that choose to use them as the + default pair. Applications that operate under multiple profiles MAY + use this port pair as an indication to select this profile if they + are not subject to the constraint of the previous paragraph. + Applications need not have a default and MAY require that the port + pair be explicitly specified. The particular port numbers were + chosen to lie in the range above 5000 to accommodate port number + allocation practice within some versions of the Unix operating + system, where port numbers below 1024 can only be used by privileged + processes and port numbers between 1024 and 5000 are automatically + assigned by the operating system. + +9. Changes from RFC 1890 + + This RFC revises RFC 1890. It is mostly backwards-compatible with + RFC 1890 except for functions removed because two interoperable + implementations were not found. The additions to RFC 1890 codify + existing practice in the use of payload formats under this profile. + Since this profile may be used without using any of the payload + formats listed here, the addition of new payload formats in this + revision does not affect backwards compatibility. The changes are + listed below, categorized into functional and non-functional changes. + + Functional changes: + + o Section 11, "IANA Considerations" was added to specify the + registration of the name for this profile. That appendix also + references a new Section 3 "Registering Additional Encodings" + which establishes a policy that no additional registration of + static payload types for this profile will be made beyond those + added in this revision and included in Tables 4 and 5. Instead, + additional encoding names may be registered as MIME subtypes for + binding to dynamic payload types. Non-normative references were + added to RFC 3555 [7] where MIME subtypes for all the listed + payload formats are registered, some with optional parameters for + use of the payload formats. + + + + + + +Schulzrinne & Casner Standards Track [Page 35] + +RFC 3551 RTP A/V Profile July 2003 + + + o Static payload types 4, 16, 17 and 34 were added to incorporate + IANA registrations made since the publication of RFC 1890, along + with the corresponding payload format descriptions for G723 and + H263. + + o Following working group discussion, static payload types 12 and 18 + were added along with the corresponding payload format + descriptions for QCELP and G729. Static payload type 13 was + assigned to the Comfort Noise (CN) payload format defined in RFC + 3389. Payload type 19 was marked reserved because it had been + temporarily allocated to an earlier version of Comfort Noise + present in some draft revisions of this document. + + o The payload format for G721 was renamed to G726-32 following the + ITU-T renumbering, and the payload format description for G726 was + expanded to include the -16, -24 and -40 data rates. Because of + confusion regarding draft revisions of this document, some + implementations of these G726 payload formats packed samples into + octets starting with the most significant bit rather than the + least significant bit as specified here. To partially resolve + this incompatibility, new payload formats named AAL2-G726-16, -24, + -32 and -40 will be specified in a separate document (see note in + Section 4.5.4), and use of static payload type 2 is deprecated as + explained in Section 6. + + o Payload formats G729D and G729E were added following the ITU-T + addition of Annexes D and E to Recommendation G.729. Listings + were added for payload formats GSM-EFR, RED, and H263-1998 + published in other documents subsequent to RFC 1890. These + additional payload formats are referenced only by dynamic payload + type numbers. + + o The descriptions of the payload formats for G722, G728, GSM, VDVI + were expanded. + + o The payload format for 1016 audio was removed and its static + payload type assignment 1 was marked "reserved" because two + interoperable implementations were not found. + + o Requirements for congestion control were added in Section 2. + + o This profile follows the suggestion in the revised RTP spec that + RTCP bandwidth may be specified separately from the session + bandwidth and separately for active senders and passive receivers. + + o The mapping of a user pass-phrase string into an encryption key + was deleted from Section 2 because two interoperable + implementations were not found. + + + +Schulzrinne & Casner Standards Track [Page 36] + +RFC 3551 RTP A/V Profile July 2003 + + + o The "quadrophonic" sample ordering convention for four-channel + audio was removed to eliminate an ambiguity as noted in Section + 4.1. + + Non-functional changes: + + o In Section 4.1, it is now explicitly stated that silence + suppression is allowed for all audio payload formats. (This has + always been the case and derives from a fundamental aspect of + RTP's design and the motivations for packet audio, but was not + explicit stated before.) The use of comfort noise is also + explained. + + o In Section 4.1, the requirement level for setting of the marker + bit on the first packet after silence for audio was changed from + "is" to "SHOULD be", and clarified that the marker bit is set only + when packets are intentionally not sent. + + o Similarly, text was added to specify that the marker bit SHOULD be + set to one on the last packet of a video frame, and that video + frames are distinguished by their timestamps. + + o RFC references are added for payload formats published after RFC + 1890. + + o The security considerations and full copyright sections were + added. + + o According to Peter Hoddie of Apple, only pre-1994 Macintosh used + the 22254.54 rate and none the 11127.27 rate, so the latter was + dropped from the discussion of suggested sampling frequencies. + + o Table 1 was corrected to move some values from the "ms/packet" + column to the "default ms/packet" column where they belonged. + + o Since the Interactive Multimedia Association ceased operations, an + alternate resource was provided for a referenced IMA document. + + o A note has been added for G722 to clarify a discrepancy between + the actual sampling rate and the RTP timestamp clock rate. + + o Small clarifications of the text have been made in several places, + some in response to questions from readers. In particular: + + - A definition for "media type" is given in Section 1.1 to allow + the explanation of multiplexing RTP sessions in Section 6 to be + more clear regarding the multiplexing of multiple media. + + + + +Schulzrinne & Casner Standards Track [Page 37] + +RFC 3551 RTP A/V Profile July 2003 + + + - The explanation of how to determine the number of audio frames + in a packet from the length was expanded. + + - More description of the allocation of bandwidth to SDES items + is given. + + - A note was added that the convention for the order of channels + specified in Section 4.1 may be overridden by a particular + encoding or payload format specification. + + - The terms MUST, SHOULD, MAY, etc. are used as defined in RFC + 2119. + + o A second author for this document was added. + +10. Security Considerations + + Implementations using the profile defined in this specification are + subject to the security considerations discussed in the RTP + specification [1]. This profile does not specify any different + security services. The primary function of this profile is to list a + set of data compression encodings for audio and video media. + + Confidentiality of the media streams is achieved by encryption. + Because the data compression used with the payload formats described + in this profile is applied end-to-end, encryption may be performed + after compression so there is no conflict between the two operations. + + A potential denial-of-service threat exists for data encodings using + compression techniques that have non-uniform receiver-end + computational load. The attacker can inject pathological datagrams + into the stream which are complex to decode and cause the receiver to + be overloaded. + + As with any IP-based protocol, in some circumstances a receiver may + be overloaded simply by the receipt of too many packets, either + desired or undesired. Network-layer authentication MAY be used to + discard packets from undesired sources, but the processing cost of + the authentication itself may be too high. In a multicast + environment, source pruning is implemented in IGMPv3 (RFC 3376) [24] + and in multicast routing protocols to allow a receiver to select + which sources are allowed to reach it. + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 38] + +RFC 3551 RTP A/V Profile July 2003 + + +11. IANA Considerations + + The RTP specification establishes a registry of profile names for use + by higher-level control protocols, such as the Session Description + Protocol (SDP), RFC 2327 [6], to refer to transport methods. This + profile registers the name "RTP/AVP". + + Section 3 establishes the policy that no additional registration of + static RTP payload types for this profile will be made beyond those + added in this document revision and included in Tables 4 and 5. IANA + may reference that section in declining to accept any additional + registration requests. In Tables 4 and 5, note that types 1 and 2 + have been marked reserved and the set of "dyn" payload types included + has been updated. These changes are explained in Sections 6 and 9. + +12. References + +12.1 Normative References + + [1] Schulzrinne, H., Casner, S., Frederick, R. and V. Jacobson, + "RTP: A Transport Protocol for Real-Time Applications", RFC + 3550, July 2003. + + [2] Bradner, S., "Key Words for Use in RFCs to Indicate Requirement + Levels", BCP 14, RFC 2119, March 1997. + + [3] Apple Computer, "Audio Interchange File Format AIFF-C", August + 1991. (also ftp://ftp.sgi.com/sgi/aiff-c.9.26.91.ps.Z). + +12.2 Informative References + + [4] Braden, R., Clark, D. and S. Shenker, "Integrated Services in + the Internet Architecture: an Overview", RFC 1633, June 1994. + + [5] Blake, S., Black, D., Carlson, M., Davies, E., Wang, Z. and W. + Weiss, "An Architecture for Differentiated Service", RFC 2475, + December 1998. + + [6] Handley, M. and V. Jacobson, "SDP: Session Description + Protocol", RFC 2327, April 1998. + + [7] Casner, S. and P. Hoschka, "MIME Type Registration of RTP + Payload Types", RFC 3555, July 2003. + + [8] Freed, N., Klensin, J. and J. Postel, "Multipurpose Internet + Mail Extensions (MIME) Part Four: Registration Procedures", BCP + 13, RFC 2048, November 1996. + + + + +Schulzrinne & Casner Standards Track [Page 39] + +RFC 3551 RTP A/V Profile July 2003 + + + [9] Zopf, R., "Real-time Transport Protocol (RTP) Payload for + Comfort Noise (CN)", RFC 3389, September 2002. + + [10] Deleam, D. and J.-P. Petit, "Real-time implementations of the + recent ITU-T low bit rate speech coders on the TI TMS320C54X + DSP: results, methodology, and applications", in Proc. of + International Conference on Signal Processing, Technology, and + Applications (ICSPAT) , (Boston, Massachusetts), pp. 1656--1660, + October 1996. + + [11] Mouly, M. and M.-B. Pautet, The GSM system for mobile + communications Lassay-les-Chateaux, France: Europe Media + Duplication, 1993. + + [12] Degener, J., "Digital Speech Compression", Dr. Dobb's Journal, + December 1994. + + [13] Redl, S., Weber, M. and M. Oliphant, An Introduction to GSM + Boston: Artech House, 1995. + + [14] Hoffman, D., Fernando, G., Goyal, V. and M. Civanlar, "RTP + Payload Format for MPEG1/MPEG2 Video", RFC 2250, January 1998. + + [15] Jayant, N. and P. Noll, Digital Coding of Waveforms--Principles + and Applications to Speech and Video Englewood Cliffs, New + Jersey: Prentice-Hall, 1984. + + [16] McKay, K., "RTP Payload Format for PureVoice(tm) Audio", RFC + 2658, August 1999. + + [17] Perkins, C., Kouvelas, I., Hodson, O., Hardman, V., Handley, M., + Bolot, J.-C., Vega-Garcia, A. and S. Fosse-Parisis, "RTP Payload + for Redundant Audio Data", RFC 2198, September 1997. + + [18] Speer, M. and D. Hoffman, "RTP Payload Format of Sun's CellB + Video Encoding", RFC 2029, October 1996. + + [19] Berc, L., Fenner, W., Frederick, R., McCanne, S. and P. Stewart, + "RTP Payload Format for JPEG-Compressed Video", RFC 2435, + October 1998. + + [20] Turletti, T. and C. Huitema, "RTP Payload Format for H.261 Video + Streams", RFC 2032, October 1996. + + [21] Zhu, C., "RTP Payload Format for H.263 Video Streams", RFC 2190, + September 1997. + + + + + +Schulzrinne & Casner Standards Track [Page 40] + +RFC 3551 RTP A/V Profile July 2003 + + + [22] Bormann, C., Cline, L., Deisher, G., Gardos, T., Maciocco, C., + Newell, D., Ott, J., Sullivan, G., Wenger, S. and C. Zhu, "RTP + Payload Format for the 1998 Version of ITU-T Rec. H.263 Video + (H.263+)", RFC 2429, October 1998. + + [23] Schulzrinne, H., Rao, A. and R. Lanphier, "Real Time Streaming + Protocol (RTSP)", RFC 2326, April 1998. + + [24] Cain, B., Deering, S., Kouvelas, I., Fenner, B. and A. + Thyagarajan, "Internet Group Management Protocol, Version 3", + RFC 3376, October 2002. + +13. Current Locations of Related Resources + + Note: Several sections below refer to the ITU-T Software Tool + Library (STL). It is available from the ITU Sales Service, Place des + Nations, CH-1211 Geneve 20, Switzerland (also check + http://www.itu.int). The ITU-T STL is covered by a license defined + in ITU-T Recommendation G.191, "Software tools for speech and audio + coding standardization". + + DVI4 + + An archived copy of the document IMA Recommended Practices for + Enhancing Digital Audio Compatibility in Multimedia Systems (version + 3.0), which describes the IMA ADPCM algorithm, is available at: + + http://www.cs.columbia.edu/~hgs/audio/dvi/ + + An implementation is available from Jack Jansen at + + ftp://ftp.cwi.nl/local/pub/audio/adpcm.shar + + G722 + + An implementation of the G.722 algorithm is available as part of the + ITU-T STL, described above. + + G723 + + The reference C code implementation defining the G.723.1 algorithm + and its Annexes A, B, and C are available as an integral part of + Recommendation G.723.1 from the ITU Sales Service, address listed + above. Both the algorithm and C code are covered by a specific + license. The ITU-T Secretariat should be contacted to obtain such + licensing information. + + + + + +Schulzrinne & Casner Standards Track [Page 41] + +RFC 3551 RTP A/V Profile July 2003 + + + G726 + + G726 is specified in the ITU-T Recommendation G.726, "40, 32, 24, and + 16 kb/s Adaptive Differential Pulse Code Modulation (ADPCM)". An + implementation of the G.726 algorithm is available as part of the + ITU-T STL, described above. + + G729 + + The reference C code implementation defining the G.729 algorithm and + its Annexes A through I are available as an integral part of + Recommendation G.729 from the ITU Sales Service, listed above. Annex + I contains the integrated C source code for all G.729 operating + modes. The G.729 algorithm and associated C code are covered by a + specific license. The contact information for obtaining the license + is available from the ITU-T Secretariat. + + GSM + + A reference implementation was written by Carsten Bormann and Jutta + Degener (then at TU Berlin, Germany). It is available at + + http://www.dmn.tzi.org/software/gsm/ + + Although the RPE-LTP algorithm is not an ITU-T standard, there is a C + code implementation of the RPE-LTP algorithm available as part of the + ITU-T STL. The STL implementation is an adaptation of the TU Berlin + version. + + LPC + + An implementation is available at + + ftp://parcftp.xerox.com/pub/net-research/lpc.tar.Z + + PCMU, PCMA + + An implementation of these algorithms is available as part of the + ITU-T STL, described above. + +14. Acknowledgments + + The comments and careful review of Simao Campos, Richard Cox and AVT + Working Group participants are gratefully acknowledged. The GSM + description was adopted from the IMTC Voice over IP Forum Service + Interoperability Implementation Agreement (January 1997). Fred Burg + and Terry Lyons helped with the G.729 description. + + + + +Schulzrinne & Casner Standards Track [Page 42] + +RFC 3551 RTP A/V Profile July 2003 + + +15. Intellectual Property Rights Statement + + The IETF takes no position regarding the validity or scope of any + intellectual property or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; neither does it represent that it + has made any effort to identify any such rights. Information on the + IETF's procedures with respect to rights in standards-track and + standards-related documentation can be found in BCP-11. Copies of + claims of rights made available for publication and any assurances of + licenses to be made available, or the result of an attempt made to + obtain a general license or permission for the use of such + proprietary rights by implementors or users of this specification can + be obtained from the IETF Secretariat. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights which may cover technology that may be required to practice + this standard. Please address the information to the IETF Executive + Director. + +16. Authors' Addresses + + Henning Schulzrinne + Department of Computer Science + Columbia University + 1214 Amsterdam Avenue + New York, NY 10027 + United States + + EMail: schulzrinne@cs.columbia.edu + + + Stephen L. Casner + Packet Design + 3400 Hillview Avenue, Building 3 + Palo Alto, CA 94304 + United States + + EMail: casner@acm.org + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 43] + +RFC 3551 RTP A/V Profile July 2003 + + +17. Full Copyright Statement + + Copyright (C) The Internet Society (2003). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Schulzrinne & Casner Standards Track [Page 44] + diff --git a/src/modules/rtp/rtp.c b/src/modules/rtp/rtp.c new file mode 100644 index 00000000..a3e78d84 --- /dev/null +++ b/src/modules/rtp/rtp.c @@ -0,0 +1,193 @@ +/* $Id$ */ + +/*** + This file is part of polypaudio. + + polypaudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + polypaudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with polypaudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "rtp.h" + +pa_rtp_context* pa_rtp_context_init_send(pa_rtp_context *c, int fd, uint32_t ssrc, uint8_t payload) { + assert(c); + assert(fd >= 0); + + c->fd = fd; + c->sequence = (uint16_t) (rand()*rand()); + c->timestamp = 0; + c->ssrc = ssrc ? ssrc : (uint32_t) (rand()*rand()); + c->payload = payload & 127; + + return c; +} + +#define MAX_IOVECS 16 + +int pa_rtp_send(pa_rtp_context *c, size_t size, pa_memblockq *q) { + struct iovec iov[MAX_IOVECS]; + pa_memblock* mb[MAX_IOVECS]; + int iov_idx = 1; + size_t n = 0, skip = 0; + + assert(c); + assert(size > 0); + assert(q); + + if (pa_memblockq_get_length(q) < size) + return 0; + + for (;;) { + int r; + pa_memchunk chunk; + + if ((r = pa_memblockq_peek(q, &chunk)) >= 0) { + + size_t k = n + chunk.length > size ? size - n : chunk.length; + + if (chunk.memblock) { + iov[iov_idx].iov_base = (uint8_t*) chunk.memblock->data + chunk.index; + iov[iov_idx].iov_len = k; + mb[iov_idx] = chunk.memblock; + iov_idx ++; + + n += k; + } + + skip += k; + pa_memblockq_drop(q, &chunk, k); + } + + if (r < 0 || !chunk.memblock || n >= size || iov_idx >= MAX_IOVECS) { + uint32_t header[3]; + struct msghdr m; + int k, i; + + if (n > 0) { + header[0] = htonl(((uint32_t) 2 << 30) | ((uint32_t) c->payload << 16) | ((uint32_t) c->sequence)); + header[1] = htonl(c->timestamp); + header[2] = htonl(c->ssrc); + + iov[0].iov_base = header; + iov[0].iov_len = sizeof(header); + + m.msg_name = NULL; + m.msg_namelen = 0; + m.msg_iov = iov; + m.msg_iovlen = iov_idx; + m.msg_control = NULL; + m.msg_controllen = 0; + m.msg_flags = 0; + + k = sendmsg(c->fd, &m, MSG_DONTWAIT); + + for (i = 1; i < iov_idx; i++) + pa_memblock_unref(mb[i]); + + c->sequence++; + } else + k = 0; + + c->timestamp += skip; + + if (k < 0) { + if (errno != EAGAIN) /* If the queue is full, just ignore it */ + pa_log(__FILE__": sendmsg() failed: %s", strerror(errno)); + return -1; + } + + if (r < 0 || pa_memblockq_get_length(q) < size) + break; + + n = 0; + skip = 0; + iov_idx = 1; + } + } + + return 0; +} + +pa_rtp_context* pa_rtp_context_init_recv(pa_rtp_context *c, int fd) { + assert(c); + + c->fd = fd; + return c; +} + +int pa_rtp_recv(pa_rtp_context *c, pa_memchunk *chunk) { + assert(c); + assert(chunk); + + return 0; +} + +uint8_t pa_rtp_payload_type(const pa_sample_spec *ss) { + assert(ss); + + if (ss->format == PA_SAMPLE_ULAW && ss->rate == 8000 && ss->channels == 1) + return 0; + if (ss->format == PA_SAMPLE_ALAW && ss->rate == 8000 && ss->channels == 1) + return 0; + if (ss->format == PA_SAMPLE_S16BE && ss->rate == 44100 && ss->channels == 2) + return 10; + if (ss->format == PA_SAMPLE_S16BE && ss->rate == 44100 && ss->channels == 1) + return 11; + + return 127; +} + +pa_sample_spec *pa_rtp_sample_spec_fixup(pa_sample_spec * ss) { + assert(ss); + + if (!pa_rtp_sample_spec_valid(ss)) + ss->format = PA_SAMPLE_S16BE; + + assert(pa_rtp_sample_spec_valid(ss)); + return ss; +} + +int pa_rtp_sample_spec_valid(const pa_sample_spec *ss) { + assert(ss); + + if (!pa_sample_spec_valid(ss)) + return 0; + + return + ss->format == PA_SAMPLE_U8 || + ss->format == PA_SAMPLE_ALAW || + ss->format == PA_SAMPLE_ULAW || + ss->format == PA_SAMPLE_S16BE; +} + +void pa_rtp_context_destroy(pa_rtp_context *c) { + assert(c); + + close(c->fd); +} diff --git a/src/modules/rtp/rtp.h b/src/modules/rtp/rtp.h new file mode 100644 index 00000000..e925cc0e --- /dev/null +++ b/src/modules/rtp/rtp.h @@ -0,0 +1,51 @@ +#ifndef foortphfoo +#define foortphfoo + +/* $Id$ */ + +/*** + This file is part of polypaudio. + + polypaudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + polypaudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with polypaudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include +#include +#include +#include +#include + +typedef struct pa_rtp_context { + int fd; + uint16_t sequence; + uint32_t timestamp; + uint32_t ssrc; + uint8_t payload; +} pa_rtp_context; + +pa_rtp_context* pa_rtp_context_init_send(pa_rtp_context *c, int fd, uint32_t ssrc, uint8_t payload); +int pa_rtp_send(pa_rtp_context *c, size_t size, pa_memblockq *q); + +pa_rtp_context* pa_rtp_context_init_recv(pa_rtp_context *c, int fd); +int pa_rtp_recv(pa_rtp_context *c, pa_memchunk *chunk); + +uint8_t pa_rtp_payload_type(const pa_sample_spec *ss); +pa_sample_spec* pa_rtp_sample_spec_fixup(pa_sample_spec *ss); +int pa_rtp_sample_spec_valid(const pa_sample_spec *ss); + +void pa_rtp_context_destroy(pa_rtp_context *c); + +#endif diff --git a/src/modules/rtp/sap.c b/src/modules/rtp/sap.c new file mode 100644 index 00000000..ebf20bc4 --- /dev/null +++ b/src/modules/rtp/sap.c @@ -0,0 +1,107 @@ +/* $Id$ */ + +/*** + This file is part of polypaudio. + + polypaudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + polypaudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with polypaudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "sap.h" + +pa_sap_context* pa_sap_context_init_send(pa_sap_context *c, int fd, char *sdp_data) { + assert(c); + assert(fd >= 0); + assert(sdp_data); + + c->fd = fd; + c->sdp_data = sdp_data; + c->msg_id_hash = (uint16_t) (rand()*rand()); + + return c; +} + +void pa_sap_context_destroy(pa_sap_context *c) { + assert(c); + + close(c->fd); + pa_xfree(c->sdp_data); +} + +int pa_sap_send(pa_sap_context *c, int goodbye) { + uint32_t header; + const char mime[] = "application/sdp"; + struct sockaddr_storage sa_buf; + struct sockaddr *sa = (struct sockaddr*) &sa_buf; + socklen_t salen = sizeof(sa_buf); + struct iovec iov[4]; + struct msghdr m; + int k; + + if (getsockname(c->fd, sa, &salen) < 0) { + pa_log("getsockname() failed: %s\n", strerror(errno)); + return -1; + } + + assert(sa->sa_family == AF_INET || sa->sa_family == AF_INET6); + + header = htonl(((uint32_t) 1 << 29) | + (sa->sa_family == AF_INET6 ? (uint32_t) 1 << 28 : 0) | + (goodbye ? (uint32_t) 1 << 26 : 0) | + (c->msg_id_hash)); + + iov[0].iov_base = &header; + iov[0].iov_len = sizeof(header); + + iov[1].iov_base = sa->sa_family == AF_INET ? (void*) &((struct sockaddr_in*) sa)->sin_addr : (void*) &((struct sockaddr_in6*) sa)->sin6_addr; + iov[1].iov_len = sa->sa_family == AF_INET ? 4 : 16; + + iov[2].iov_base = (char*) mime; + iov[2].iov_len = sizeof(mime); + + iov[3].iov_base = c->sdp_data; + iov[3].iov_len = strlen(c->sdp_data); + + m.msg_name = NULL; + m.msg_namelen = 0; + m.msg_iov = iov; + m.msg_iovlen = 4; + m.msg_control = NULL; + m.msg_controllen = 0; + m.msg_flags = 0; + + if ((k = sendmsg(c->fd, &m, MSG_DONTWAIT)) < 0) + pa_log("sendmsg() failed: %s\n", strerror(errno)); + + return k; +} diff --git a/src/modules/rtp/sap.h b/src/modules/rtp/sap.h new file mode 100644 index 00000000..787b39f7 --- /dev/null +++ b/src/modules/rtp/sap.h @@ -0,0 +1,43 @@ +#ifndef foosaphfoo +#define foosaphfoo + +/* $Id$ */ + +/*** + This file is part of polypaudio. + + polypaudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + polypaudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with polypaudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include +#include +#include +#include +#include + +typedef struct pa_sap_context { + int fd; + char *sdp_data; + + uint16_t msg_id_hash; +} pa_sap_context; + +pa_sap_context* pa_sap_context_init_send(pa_sap_context *c, int fd, char *sdp_data); +void pa_sap_context_destroy(pa_sap_context *c); + +int pa_sap_send(pa_sap_context *c, int goodbye); + +#endif diff --git a/src/modules/rtp/sdp.c b/src/modules/rtp/sdp.c new file mode 100644 index 00000000..99e8c12b --- /dev/null +++ b/src/modules/rtp/sdp.c @@ -0,0 +1,87 @@ +/* $Id$ */ + +/*** + This file is part of polypaudio. + + polypaudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + polypaudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with polypaudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include + +#include "sdp.h" + +static const char* map_format(pa_sample_format_t f) { + switch (f) { + case PA_SAMPLE_S16BE: return "L16"; + case PA_SAMPLE_U8: return "L8"; + case PA_SAMPLE_ALAW: return "PCMA"; + case PA_SAMPLE_ULAW: return "PCMU"; + default: + return NULL; + } +} + +char *pa_sdp_build(int af, const void *src, const void *dst, const char *name, uint16_t port, uint8_t payload, const pa_sample_spec *ss) { + uint32_t ntp; + char buf_src[64], buf_dst[64]; + const char *u, *f, *a; + + assert(src); + assert(dst); + assert(af == AF_INET || af == AF_INET6); + + f = map_format(ss->format); + assert(f); + + if (!(u = getenv("USER"))) + if (!(u = getenv("USERNAME"))) + u = "-"; + + ntp = time(NULL) + 2208988800; + + a = inet_ntop(af, src, buf_src, sizeof(buf_src)); + assert(a); + a = inet_ntop(af, dst, buf_dst, sizeof(buf_dst)); + assert(a); + + return pa_sprintf_malloc( + "v=0\n" + "o=%s %lu 0 IN %s %s\n" + "s=%s\n" + "c=IN %s %s\n" + "t=%lu 0\n" + "a=recvonly\n" + "m=audio %u RTP/AVP %i\n" + "a=rtpmap:%i %s/%u/%u\n" + "a=type:broadcast\n", + u, (unsigned long) ntp, af == AF_INET ? "IP4" : "IP6", buf_src, + name, + af == AF_INET ? "IP4" : "IP6", buf_dst, + (unsigned long) ntp, + port, payload, + payload, f, ss->rate, ss->channels); +} diff --git a/src/modules/rtp/sdp.h b/src/modules/rtp/sdp.h new file mode 100644 index 00000000..10820067 --- /dev/null +++ b/src/modules/rtp/sdp.h @@ -0,0 +1,33 @@ +#ifndef foosdphfoo +#define foosdphfoo + +/* $Id$ */ + +/*** + This file is part of polypaudio. + + polypaudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + polypaudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with polypaudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include +#include +#include + +#include + +char *pa_sdp_build(int af, const void *src, const void *dst, const char *name, uint16_t port, uint8_t payload, const pa_sample_spec *ss); + +#endif -- cgit