diff --git a/roles/dotfiles/files/.XCompose b/roles/dotfiles/files/.XCompose new file mode 100644 index 0000000..4939613 --- /dev/null +++ b/roles/dotfiles/files/.XCompose @@ -0,0 +1,118 @@ +include "/usr/X11R6/share/X11/locale/en_US.UTF-8/Compose" + +# Used for pollen + : "◊" U25ca # ◊ LOZENGE + +# LAMBDA LAMBDA LAMBDA + : "λ" U03BB # GREEK SMALL LETTER LAMBDA + +# Greek letters +# Source: https://gist.githubusercontent.com/carlobaldassi/8951743/raw/2b587c8147603d395bf2ec221eee348f27dabaa8/XCompose_greek + : "α" U03B1 # GREEK SMALL LETTER ALPHA + : "α" U03B1 # GREEK SMALL LETTER ALPHA + : "β" U03B2 # GREEK SMALL LETTER BETA + : "β" U03B2 # GREEK SMALL LETTER BETA + : "ξ" U03BE # GREEK SMALL LETTER XI + : "ξ" U03BE # GREEK SMALL LETTER XI + : "δ" U03B4 # GREEK SMALL LETTER DELTA + : "δ" U03B4 # GREEK SMALL LETTER DELTA + : "ε" U03B5 # GREEK SMALL LETTER EPSILON + : "ε" U03B5 # GREEK SMALL LETTER EPSILON + : "φ" U03C6 # GREEK SMALL LETTER PHI + : "φ" U03C6 # GREEK SMALL LETTER PHI + : "γ" U03B3 # GREEK SMALL LETTER GAMMA + : "γ" U03B3 # GREEK SMALL LETTER GAMMA + : "θ" U03B8 # GREEK SMALL LETTER THETA + : "θ" U03B8 # GREEK SMALL LETTER THETA + : "ι" U03B9 # GREEK SMALL LETTER ΙΟΤΑ + : "ι" U03B9 # GREEK SMALL LETTER ΙΟΤΑ + : "κ" U03BA # GREEK SMALL LETTER KAPPA + : "κ" U03BA # GREEK SMALL LETTER KAPPA + : "λ" U03BB # GREEK SMALL LETTER LAMBDA + : "λ" U03BB # GREEK SMALL LETTER LAMBDA + : "μ" U03BC # GREEK SMALL LETTER MU + : "μ" U03BC # GREEK SMALL LETTER MU + : "ν" U03BD # GREEK SMALL LETTER NU + : "ν" U03BD # GREEK SMALL LETTER NU + : "ο" U03BF # GREEK SMALL LETTER OMICRON + : "ο" U03BF # GREEK SMALL LETTER OMICRON +

: "π" U03C0 # GREEK SMALL LETTER PI +

: "Π" U03A0 # GREEK CAPITAL LETTER PI +

: "Π" U03A0 # GREEK CAPITAL LETTER PI + : "Ψ" U03A8 # GREEK CAPITAL LETTER PSI + : "Ψ" U03A8 # GREEK CAPITAL LETTER PSI + : "Ρ" U03A1 # GREEK CAPITAL LETTER RHO + : "Ρ" U03A1 # GREEK CAPITAL LETTER RHO + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Τ" U03A4 # GREEK CAPITAL LETTER TAU + : "Τ" U03A4 # GREEK CAPITAL LETTER TAU + : "Υ" U03A5 # GREEK CAPITAL LETTER UPSILON + : "Υ" U03A5 # GREEK CAPITAL LETTER UPSILON + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Ω" U03A9 # GREEK CAPITAL LETTER OMEGA + : "Ω" U03A9 # GREEK CAPITAL LETTER OMEGA + : "Χ" U03A7 # GREEK CAPITAL LETTER CHI + : "Χ" U03A7 # GREEK CAPITAL LETTER CHI + : "Η" U0397 # GREEK CAPITAL LETTER ΕΤΑ + : "Η" U0397 # GREEK CAPITAL LETTER ΕΤΑ + : "Ζ" U0396 # GREEK CAPITAL LETTER ZETA + : "Ζ" U0396 # GREEK CAPITAL LETTER ZETA + + : "Ɐ" U2200 # FOR ALL + : "∃" U2203 # THERE EXISTS + + + diff --git a/roles/dotfiles/files/.config/htop/htoprc b/roles/dotfiles/files/.config/htop/htoprc new file mode 100644 index 0000000..b25c04d --- /dev/null +++ b/roles/dotfiles/files/.config/htop/htoprc @@ -0,0 +1,26 @@ +# Beware! This file is rewritten by htop when settings are changed in the interface. +# The parser is also very primitive, and not human-friendly. +fields=0 48 17 18 38 39 40 2 46 47 49 1 +sort_key=46 +sort_direction=1 +hide_threads=0 +hide_kernel_threads=1 +hide_userland_threads=0 +shadow_other_users=0 +show_thread_names=0 +show_program_path=1 +highlight_base_name=0 +highlight_megabytes=1 +highlight_threads=1 +tree_view=0 +header_margin=1 +detailed_cpu_time=0 +cpu_count_from_zero=0 +update_process_names=0 +account_guest_in_cpu_meter=0 +color_scheme=0 +delay=15 +left_meters=AllCPUs Memory Swap +left_meter_modes=1 1 1 +right_meters=Tasks LoadAverage Uptime Memory Swap +right_meter_modes=2 2 2 2 2 diff --git a/roles/dotfiles/files/.config/i3/config b/roles/dotfiles/files/.config/i3/config new file mode 100644 index 0000000..5088c39 --- /dev/null +++ b/roles/dotfiles/files/.config/i3/config @@ -0,0 +1,184 @@ +# This file has been auto-generated by i3-config-wizard(1). +# It will not be overwritten, so edit it as you like. +# +# Should you change your keyboard layout some time, delete +# this file and re-run i3-config-wizard(1). +# + +# i3 config file (v4) +# +# Please see https://i3wm.org/docs/userguide.html for a complete reference! + +set $mod Mod4 + +# Font for window titles. Will also be used by the bar unless a different font +# is used in the bar {} block below. +font pango:monospace 8 + +# This font is widely installed, provides lots of unicode glyphs, right-to-left +# text rendering and scalability on retina/hidpi displays (thanks to pango). +#font pango:DejaVu Sans Mono 8 + +# The combination of xss-lock, nm-applet and pactl is a popular choice, so +# they are included here as an example. Modify as you see fit. + +# xss-lock grabs a logind suspend inhibit lock and will use i3lock to lock the +# screen before suspend. Use loginctl lock-session to lock your screen. +exec --no-startup-id xss-lock --transfer-sleep-lock -- i3lock --nofork + +# NetworkManager is the most popular way to manage wireless networks on Linux, +# and nm-applet is a desktop environment-independent system tray GUI for it. +exec --no-startup-id nm-applet + +# Use pactl to adjust volume in PulseAudio. +set $refresh_i3status killall -SIGUSR1 i3status +bindsym XF86AudioRaiseVolume exec --no-startup-id pactl set-sink-volume @DEFAULT_SINK@ +10% && $refresh_i3status +bindsym XF86AudioLowerVolume exec --no-startup-id pactl set-sink-volume @DEFAULT_SINK@ -10% && $refresh_i3status +bindsym XF86AudioMute exec --no-startup-id pactl set-sink-mute @DEFAULT_SINK@ toggle && $refresh_i3status +bindsym XF86AudioMicMute exec --no-startup-id pactl set-source-mute @DEFAULT_SOURCE@ toggle && $refresh_i3status + +# Use Mouse+$mod to drag floating windows to their wanted position +floating_modifier $mod + +# start a terminal +bindsym $mod+Return exec i3-sensible-terminal + +# kill focused window +bindsym $mod+Shift+q kill + +# start dmenu (a program launcher) +bindsym $mod+d exec dmenu_run +# There also is the (new) i3-dmenu-desktop which only displays applications +# shipping a .desktop file. It is a wrapper around dmenu, so you need that +# installed. +# bindsym $mod+d exec --no-startup-id i3-dmenu-desktop + +# change focus +bindsym $mod+j focus left +bindsym $mod+k focus down +bindsym $mod+l focus up +bindsym $mod+semicolon focus right + +# alternatively, you can use the cursor keys: +bindsym $mod+Left focus left +bindsym $mod+Down focus down +bindsym $mod+Up focus up +bindsym $mod+Right focus right + +# move focused window +bindsym $mod+Shift+j move left +bindsym $mod+Shift+k move down +bindsym $mod+Shift+l move up +bindsym $mod+Shift+semicolon move right + +# alternatively, you can use the cursor keys: +bindsym $mod+Shift+Left move left +bindsym $mod+Shift+Down move down +bindsym $mod+Shift+Up move up +bindsym $mod+Shift+Right move right + +# split in horizontal orientation +bindsym $mod+h split h + +# split in vertical orientation +bindsym $mod+v split v + +# enter fullscreen mode for the focused container +bindsym $mod+f fullscreen toggle + +# change container layout (stacked, tabbed, toggle split) +bindsym $mod+s layout stacking +bindsym $mod+w layout tabbed +bindsym $mod+e layout toggle split + +# toggle tiling / floating +bindsym $mod+Shift+space floating toggle + +# change focus between tiling / floating windows +bindsym $mod+space focus mode_toggle + +# focus the parent container +bindsym $mod+a focus parent + +# focus the child container +#bindsym $mod+d focus child + +# Define names for default workspaces for which we configure key bindings later on. +# We use variables to avoid repeating the names in multiple places. +set $ws1 "1" +set $ws2 "2" +set $ws3 "3" +set $ws4 "4" +set $ws5 "5" +set $ws6 "6" +set $ws7 "7" +set $ws8 "8" +set $ws9 "9" +set $ws10 "10" + +# switch to workspace +bindsym $mod+1 workspace number $ws1 +bindsym $mod+2 workspace number $ws2 +bindsym $mod+3 workspace number $ws3 +bindsym $mod+4 workspace number $ws4 +bindsym $mod+5 workspace number $ws5 +bindsym $mod+6 workspace number $ws6 +bindsym $mod+7 workspace number $ws7 +bindsym $mod+8 workspace number $ws8 +bindsym $mod+9 workspace number $ws9 +bindsym $mod+0 workspace number $ws10 + +# move focused container to workspace +bindsym $mod+Shift+1 move container to workspace number $ws1 +bindsym $mod+Shift+2 move container to workspace number $ws2 +bindsym $mod+Shift+3 move container to workspace number $ws3 +bindsym $mod+Shift+4 move container to workspace number $ws4 +bindsym $mod+Shift+5 move container to workspace number $ws5 +bindsym $mod+Shift+6 move container to workspace number $ws6 +bindsym $mod+Shift+7 move container to workspace number $ws7 +bindsym $mod+Shift+8 move container to workspace number $ws8 +bindsym $mod+Shift+9 move container to workspace number $ws9 +bindsym $mod+Shift+0 move container to workspace number $ws10 + +# reload the configuration file +bindsym $mod+Shift+c reload +# restart i3 inplace (preserves your layout/session, can be used to upgrade i3) +bindsym $mod+Shift+r restart +# exit i3 (logs you out of your X session) +bindsym $mod+Shift+e exec "i3-nagbar -t warning -m 'You pressed the exit shortcut. Do you really want to exit i3? This will end your X session.' -B 'Yes, exit i3' 'i3-msg exit'" + +# resize window (you can also use the mouse for that) +mode "resize" { + # These bindings trigger as soon as you enter the resize mode + + # Pressing left will shrink the window’s width. + # Pressing right will grow the window’s width. + # Pressing up will shrink the window’s height. + # Pressing down will grow the window’s height. + bindsym j resize shrink width 10 px or 10 ppt + bindsym k resize grow height 10 px or 10 ppt + bindsym l resize shrink height 10 px or 10 ppt + bindsym semicolon resize grow width 10 px or 10 ppt + + # same bindings, but for the arrow keys + bindsym Left resize shrink width 10 px or 10 ppt + bindsym Down resize grow height 10 px or 10 ppt + bindsym Up resize shrink height 10 px or 10 ppt + bindsym Right resize grow width 10 px or 10 ppt + + # back to normal: Enter or Escape or $mod+r + bindsym Return mode "default" + bindsym Escape mode "default" + bindsym $mod+r mode "default" +} + +bindsym $mod+r mode "resize" + +# Start i3bar to display a workspace bar (plus the system information i3status +# finds out, if available) +bar { + status_command i3status +} + +# make sure qemu is floating +for_window [title="^QEMU*"] floating enable diff --git a/roles/dotfiles/files/.config/i3status/config b/roles/dotfiles/files/.config/i3status/config new file mode 100644 index 0000000..c4c24ca --- /dev/null +++ b/roles/dotfiles/files/.config/i3status/config @@ -0,0 +1,65 @@ +general { + output_format = "i3bar" + colors = true + interval = 5 +} + +order += "disk /home" +order += "disk /usr/local" +order += "wireless iwm0" +order += "battery 0" +order += "cpu_temperature 0" +order += "load" +order += "tztime local" + +wireless iwm0 { + format_up = "W: [%essid] %ip" + format_down = "W: down" +} + +ethernet eth0 { + format_up = "E: %ip (%speed)" + format_down = "E: down" +} + +battery 0 { + format = "%status %percentage %remaining %emptytime" + format_down = "No battery" + status_chr = "CHR" + status_bat = "BAT" + status_unk = "UNK" + status_full = "FULL" + path = "/sys/class/power_supply/BAT%d/uevent" + low_threshold = 10 +} + +tztime local { + format = "%Y-%m-%d %H:%M:%S" +} + +load { + format = "%5min" +} + +cpu_temperature 0 { + format = "T: %degrees °C" + path = "/sys/devices/platform/coretemp.0/temp1_input" +} + +memory { + format = "%used" + threshold_degraded = "10%" + format_degraded = "MEMORY: %free" +} + +disk "/home" { + format = "%free" +} + +disk "/usr/local" { + format = "%free" +} + +read_file uptime { + path = "/proc/uptime" +} diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc-index-latest.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc-index-latest.txt new file mode 100644 index 0000000..68e0493 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc-index-latest.txt @@ -0,0 +1,88 @@ + +=========================================================================== + This is a list of the latest RFCs only. + To get the full list of RFCs, please look up rfc-index.txt +=========================================================================== + +8197 A SIP Response Code for Unwanted Calls. H. Schulzrinne. July 2017. + (Format: TXT=19114 bytes) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8197) + +8198 Aggressive Use of DNSSEC-Validated Cache. K. Fujiwara, A. Kato, W. + Kumari. July 2017. (Format: TXT=27918 bytes) (Updates RFC4035) + (Status: PROPOSED STANDARD) (DOI: 10.17487/RFC8198) + +8199 YANG Module Classification. D. Bogdanovic, B. Claise, C. Moberg. + July 2017. (Format: TXT=23080 bytes) (Status: INFORMATIONAL) (DOI: + 10.17487/RFC8199) + +8200 Internet Protocol, Version 6 (IPv6) Specification. S. Deering, R. + Hinden. July 2017. (Format: TXT=93658 bytes) (Obsoletes RFC2460) + (Also STD0086) (Status: INTERNET STANDARD) (DOI: 10.17487/RFC8200) + +8201 Path MTU Discovery for IP version 6. J. McCann, S. Deering, J. + Mogul, R. Hinden, Ed.. July 2017. (Format: TXT=42751 bytes) + (Obsoletes RFC1981) (Also STD0087) (Status: INTERNET STANDARD) (DOI: + 10.17487/RFC8201) + +8202 IS-IS Multi-Instance. L. Ginsberg, S. Previdi, W. Henderickx. June + 2017. (Format: TXT=35114 bytes) (Obsoletes RFC6822) (Status: + PROPOSED STANDARD) (DOI: 10.17487/RFC8202) + +8203 BGP Administrative Shutdown Communication. J. Snijders, J. Heitz, J. + Scudder. July 2017. (Format: TXT=12532 bytes) (Updates RFC4486) + (Status: PROPOSED STANDARD) (DOI: 10.17487/RFC8203) + +8212 Default External BGP (EBGP) Route Propagation Behavior without + Policies. J. Mauch, J. Snijders, G. Hankins. July 2017. (Format: + TXT=12552 bytes) (Updates RFC4271) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8212) + +8213 Security of Messages Exchanged between Servers and Relay Agents. B. + Volz, Y. Pal. August 2017. (Format: TXT=17657 bytes) (Status: + PROPOSED STANDARD) (DOI: 10.17487/RFC8213) + +8214 Virtual Private Wire Service Support in Ethernet VPN. S. Boutros, A. + Sajassi, S. Salam, J. Drake, J. Rabadan. August 2017. (Format: + TXT=34563 bytes) (Status: PROPOSED STANDARD) (DOI: 10.17487/RFC8214) + +8215 Local-Use IPv4/IPv6 Translation Prefix. T. Anderson. August 2017. + (Format: TXT=14846 bytes) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8215) + +8217 Clarifications for When to Use the name-addr Production in SIP + Messages. R. Sparks. August 2017. (Format: TXT=12829 bytes) (Updates + RFC3261, RFC3325, RFC3515, RFC3892, RFC4508, RFC5002, RFC5318, + RFC5360, RFC5502) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8217) + +8218 Multipath Extension for the Optimized Link State Routing Protocol + Version 2 (OLSRv2). J. Yi, B. Parrein. August 2017. (Format: + TXT=56286 bytes) (Status: EXPERIMENTAL) (DOI: 10.17487/RFC8218) + +8219 Benchmarking Methodology for IPv6 Transition Technologies. M. + Georgescu, L. Pislaru, G. Lencse. August 2017. (Format: TXT=66085 + bytes) (Status: INFORMATIONAL) (DOI: 10.17487/RFC8219) + +8227 MPLS-TP Shared-Ring Protection (MSRP) Mechanism for Ring Topology. + W. Cheng, L. Wang, H. Li, H. van Helvoort, J. Dong. August 2017. + (Format: TXT=128880 bytes) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8227) + +8228 Guidance on Designing Label Generation Rulesets (LGRs) Supporting + Variant Labels. A. Freytag. August 2017. (Format: TXT=50900 bytes) + (Status: INFORMATIONAL) (DOI: 10.17487/RFC8228) + +8229 TCP Encapsulation of IKE and IPsec Packets. T. Pauly, S. Touati, R. + Mantha. August 2017. (Format: TXT=56294 bytes) (Status: PROPOSED + STANDARD) (DOI: 10.17487/RFC8229) + +8234 Updates to MPLS Transport Profile (MPLS-TP) Linear Protection in + Automatic Protection Switching (APS) Mode. J. Ryoo, T. Cheung, H. + van Helvoort, I. Busi, G. Wen. August 2017. (Format: TXT=16898 + bytes) (Updates RFC7271) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8234) + +=========================================================================== + - This file last updated Wed Aug 30 02:30:01 PDT 2017 + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc1701.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc1701.txt new file mode 100644 index 0000000..60a0e9b --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc1701.txt @@ -0,0 +1,451 @@ + + + + + + +Network Working Group S. Hanks +Request for Comments: 1701 NetSmiths, Ltd. +Category: Informational T. Li + D. Farinacci + P. Traina + cisco Systems + October 1994 + + + Generic Routing Encapsulation (GRE) + +Status of this Memo + + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +Abstract + + This document specifies a protocol for performing encapsulation of an + arbitrary network layer protocol over another arbitrary network layer + protocol. + +Introduction + + A number of different proposals [RFC 1234, RFC 1226] currently exist + for the encapsulation of one protocol over another protocol. Other + types of encapsulations [RFC 1241, SDRP, RFC 1479] have been proposed + for transporting IP over IP for policy purposes. This memo describes + a protocol which is very similar to, but is more general than, the + above proposals. In attempting to be more general, many protocol + specific nuances have been ignored. The result is that this proposal + is may be less suitable for a situation where a specific "X over Y" + encapsulation has been described. It is the attempt of this protocol + to provide a simple, general purpose mechanism which is reduces the + problem of encapsulation from its current O(n^2) problem to a more + manageable state. This proposal also attempts to provide a + lightweight encapsulation for use in policy based routing. This memo + explicitly does not address the issue of when a packet should be + encapsulated. This memo acknowledges, but does not address problems + with mutual encapsulation [RFC 1326]. + + In the most general case, a system has a packet that needs to be + encapsulated and routed. We will call this the payload packet. The + payload is first encapsulated in a GRE packet, which possibly also + includes a route. The resulting GRE packet can then be encapsulated + in some other protocol and then forwarded. We will call this outer + + + +Hanks, Li, Farinacci & Traina [Page 1] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + protocol the delivery protocol. The algorithms for processing this + packet are discussed later. + +Overall packet + + The entire encapsulated packet would then have the form: + + --------------------------------- + | | + | Delivery Header | + | | + --------------------------------- + | | + | GRE Header | + | | + --------------------------------- + | | + | Payload packet | + | | + --------------------------------- + +Packet header + + The GRE packet header has the form: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C|R|K|S|s|Recur| Flags | Ver | Protocol Type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum (optional) | Offset (optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Key (optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Sequence Number (optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Routing (optional) + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Flags and version (2 octets) + + The GRE flags are encoded in the first two octets. Bit 0 is the + most significant bit, bit 15 is the least significant bit. Bits + 13 through 15 are reserved for the Version field. Bits 5 through + 12 are reserved for future use and MUST be transmitted as zero. + + + + + + +Hanks, Li, Farinacci & Traina [Page 2] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + Checksum Present (bit 0) + + If the Checksum Present bit is set to 1, then the Checksum field + is present and contains valid information. + + If either the Checksum Present bit or the Routing Present bit are + set, BOTH the Checksum and Offset fields are present in the GRE + packet. + + Routing Present (bit 1) + + If the Routing Present bit is set to 1, then it indicates that the + Offset and Routing fields are present and contain valid + information. + + If either the Checksum Present bit or the Routing Present bit are + set, BOTH the Checksum and Offset fields are present in the GRE + packet. + + Key Present (bit 2) + + If the Key Present bit is set to 1, then it indicates that the Key + field is present in the GRE header. Otherwise, the Key field is + not present in the GRE header. + + Sequence Number Present (bit 3) + + If the Sequence Number Present bit is set to 1, then it indicates + that the Sequence Number field is present. Otherwise, the + Sequence Number field is not present in the GRE header. + + Strict Source Route (bit 4) + + The meaning of the Strict Source route bit is defined in other + documents. It is recommended that this bit only be set to 1 if + all of the the Routing Information consists of Strict Source + Routes. + + Recursion Control (bits 5-7) + + Recursion control contains a three bit unsigned integer which + contains the number of additional encapsulations which are + permissible. This SHOULD default to zero. + + Version Number (bits 13-15) + + The Version Number field MUST contain the value 0. Other values + are outside of the scope of this document. + + + +Hanks, Li, Farinacci & Traina [Page 3] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + Protocol Type (2 octets) + + The Protocol Type field contains the protocol type of the payload + packet. In general, the value will be the Ethernet protocol type + field for the packet. Currently defined protocol types are listed + below. Additional values may be defined in other documents. + + Offset (2 octets) + + The offset field indicates the octet offset from the start of the + Routing field to the first octet of the active Source Route Entry + to be examined. This field is present if the Routing Present or + the Checksum Present bit is set to 1, and contains valid + information only if the Routing Present bit is set to 1. + + Checksum (2 octets) + + The Checksum field contains the IP (one's complement) checksum of + the GRE header and the payload packet. This field is present if + the Routing Present or the Checksum Present bit is set to 1, and + contains valid information only if the Checksum Present bit is set + to 1. + + Key (4 octets) + + The Key field contains a four octet number which was inserted by + the encapsulator. It may be used by the receiver to authenticate + the source of the packet. The techniques for determining + authenticity are outside of the scope of this document. The Key + field is only present if the Key Present field is set to 1. + + Sequence Number (4 octets) + + The Sequence Number field contains an unsigned 32 bit integer + which is inserted by the encapsulator. It may be used by the + receiver to establish the order in which packets have been + transmitted from the encapsulator to the receiver. The exact + algorithms for the generation of the Sequence Number and the + semantics of their reception is outside of the scope of this + document. + + Routing (variable) + + The Routing field is optional and is present only if the Routing + Present bit is set to 1. + + + + + + +Hanks, Li, Farinacci & Traina [Page 4] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + The Routing field is a list of Source Route Entries (SREs). Each + SRE has the form: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Family | SRE Offset | SRE Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Routing Information ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The routing field is terminated with a "NULL" SRE containing an + address family of type 0x0000 and a length of 0. + + Address Family (2 octets) + + The Address Family field contains a two octet value which indicates + the syntax and semantics of the Routing Information field. The + values for this field and the corresponding syntax and semantics for + Routing Information are defined in other documents. + + SRE Offset (1 octet) + + The SRE Offset field indicates the octet offset from the start of the + Routing Information field to the first octet of the active entry in + Source Route Entry to be examined. + + SRE Length (1 octet) + + The SRE Length field contains the number of octets in the SRE. If + the SRE Length is 0, this indicates this is the last SRE in the + Routing field. + + Routing Information (variable) + + The Routing Information field contains data which may be used in + routing this packet. The exact semantics of this field is defined in + other documents. + +Forwarding of GRE packets + + Normally, a system which is forwarding delivery layer packets will + not differentiate GRE packets from other packets in any way. + However, a GRE packet may be received by a system. In this case, the + system should use some delivery-specific means to determine that this + is a GRE packet. Once this is determined, the Key, Sequence Number + and Checksum fields if they contain valid information as indicated by + the corresponding flags may be checked. If the Routing Present bit + + + +Hanks, Li, Farinacci & Traina [Page 5] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + is set to 1, then the Address Family field should be checked to + determine the semantics and use of the SRE Length, SRE Offset and + Routing Information fields. The exact semantics for processing a SRE + for each Address Family is defined in other documents. + + Once all SREs have been processed, then the source route is complete, + the GRE header should be removed, the payload's TTL MUST be + decremented (if one exists) and the payload packet should be + forwarded as a normal packet. The exact forwarding method depends on + the Protocol Type field. + +Current List of Protocol Types + + The following are currently assigned protocol types for GRE. Future + protocol types must be taken from DIX ethernet encoding. For + historical reasons, a number of other values have been used for some + protocols. The following table of values MUST be used to identify + the following protocols: + + Protocol Family PTYPE + --------------- ----- + Reserved 0000 + SNA 0004 + OSI network layer 00FE + PUP 0200 + XNS 0600 + IP 0800 + Chaos 0804 + RFC 826 ARP 0806 + Frame Relay ARP 0808 + VINES 0BAD + VINES Echo 0BAE + VINES Loopback 0BAF + DECnet (Phase IV) 6003 + Transparent Ethernet Bridging 6558 + Raw Frame Relay 6559 + Apollo Domain 8019 + Ethertalk (Appletalk) 809B + Novell IPX 8137 + RFC 1144 TCP/IP compression 876B + IP Autonomous Systems 876C + Secure Data 876D + Reserved FFFF + + See the IANA list of Ether Types for the complete list of these + values. + + URL = ftp://ftp.isi.edu/in-notes/iana/assignments/ethernet-numbers. + + + +Hanks, Li, Farinacci & Traina [Page 6] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + +References + + RFC 1479 + Steenstrup, M. "Inter-Domain Policy Routing Protocol + Specification: Version 1", RFC1479, BBN Systems and Technologies, + July 1993. + + RFC 1226 + Kantor, B. "Internet Protocol Encapsulation of AX.25 Frames", RFC + 1226, University of California, San Diego, May 1991. + + RFC 1234 + Provan, D. "Tunneling IPX Traffic through IP Networks", RFC 1234, + Novell, Inc., June 1991. + + RFC 1241 + Woodburn, R., and D. Mills, "Scheme for an Internet Encapsulation + Protocol: Version 1", RFC 1241, SAIC, University of Delaware, July + 1991. + + RFC 1326 + Tsuchiya, P., "Mutual Encapsulation Considered Dangerous", RFC + 1326, Bellcore, May 1992. + + SDRP + Estrin, D., Li, T., and Y. Rekhter, "Source Demand Routing + Protocol Specification (Version 1)", Work in Progress. + + RFC 1702 + Hanks, S., Li, T., Farinacci, D., and P. Traina, "Generic Routing + Encapsulation over IPv4 networks", RFC 1702, NetSmiths, Ltd., + cisco Systems, October 1994. + +Security Considerations + + Security issues are not discussed in this memo. + + + + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 7] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + +Acknowledgements + + The authors would like to acknowledge Yakov Rekhter (IBM) and Deborah + Estrin (USC) for their advice, encouragement and insightful comments. + +Authors' Addresses + + Stan Hanks + NetSmiths, Ltd. + 2025 Lincoln Highway + Edison NJ, 08817 + + EMail: stan@netsmiths.com + + + Tony Li + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: tli@cisco.com + + + Dino Farinacci + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: dino@cisco.com + + + Paul Traina + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: pst@cisco.com + + + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 8] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc1702.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc1702.txt new file mode 100644 index 0000000..50b57ae --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc1702.txt @@ -0,0 +1,227 @@ + + + + + + +Network Working Group S. Hanks +Request for Comments: 1702 NetSmiths, Ltd. +Category: Informational T. Li + D. Farinacci + P. Traina + cisco Systems + October 1994 + + + Generic Routing Encapsulation over IPv4 networks + +Status of this Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +Introduction + + In an earlier memo [RFC 1701], we described GRE, a mechanism for + encapsulating arbitrary packets within an arbitrary transport + protocol. This is a companion memo which describes the use of GRE + with IP. This memo addresses the case of using IP as the delivery + protocol or the payload protocol and the special case of IP as both + the delivery and payload. This memo also describes using IP + addresses and autonomous system numbers as part of a GRE source + route. + +IP as a delivery protocol + + GRE packets which are encapsulated within IP will use IP protocol + type 47. + +IP as a payload protocol + + IP packets will be encapsulated with a Protocol Type field of 0x800. + + For the Address Family value of 0x800, the Routing Information field + will consist of a list of IP addresses and indicates an IP source + route. The first octet of the Routing Information field constitute a + 8 bit integer offset from the start of the Source Route Entry (SRE), + called the SRE Offset. The SRE Offset indicates the first octet of + the next IP address. The SRE Length field consists of the total + length of the IP Address List in octets. + + + + + + + +Hanks, Li, Farinacci & Traina [Page 1] + +RFC 1702 GRE over IPv4 networks October 1994 + + + This has the form: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Family | SRE Offset | SRE Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | IP Address List ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + For the Address Family value of 0xfffe, the Routing Information field + will consist of a list of Autonomous System numbers and indicates an + AS source route. The third octet of the Routing Information field + contains an 8 bit unsigned integer offset from the start of the + Source Route Entry (SRE), called the SRE Offset. The SRE Offset + indicates the first octet of the next AS number. THe SRE Length + field consists of the total length of the AS Number list in octets. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Family | SRE Offset | SRE Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | AS Number List ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +IP as both delivery and payload protocol + + When IP is encapsulated in IP, the TTL, TOS, and IP security options + MAY be copied from the payload packet into the same fields in the + delivery packet. The payload packet's TTL MUST be decremented when + the packet is decapsulated to insure that no packet lives forever. + +IP source routes + + When a system is processing a SRE with an Address Family indicating + an IP source route, it MUST use the SRE Offset to determine the next + destination IP address. If the next IP destination is this system, + the SRE Offset field should be increased by four (the size of an IP + address). If the SRE Offset is equal to the SRE Length in this SRE, + then the Offset field in the GRE header should be adjusted to point + to the next SRE (if any). This should be repeated until the next IP + destination is not this system or until the entire SRE has been + processed. + + If the source route is incomplete, then the Strict Source Route bit + is checked. If the source route is a strict source route and the + next IP destination is NOT an adjacent system, the packet MUST be + + + +Hanks, Li, Farinacci & Traina [Page 2] + +RFC 1702 GRE over IPv4 networks October 1994 + + + dropped. Otherwise, the system should use the IP address indicated + by the Offset field to replace the destination address in the + delivery header and forward the packet. + +Autonomous system source routes + + When a system is processing a SRE with an Address Family indicating + an AS source route, it MUST use the SRE Offset field to determine the + next autonomous system. If the next autonomous system is the local + autonomous system, the SRE Offset field should be increased by two + (the size of an autonomous system number). If the SRE Offset is + equal to the SRE Length in this SRE, then the Offset field in the GRE + header should be adjusted to point to the next SRE (if any). This + should be repeated until the next autonomous system number is not + equal to the local autonomous system number or until the entire SRE + has been processed. + + If the source route is incomplete, then the Strict Source Route bit + is checked. If the source route is a strict source route and the + next autonomous system is NOT an adjacent autonomous system, the + packet should be dropped. Otherwise, the system should use the + autonomous system number indicated by the SRE Offset field to replace + the destination address in the delivery header and forward the + packet. The exact mechanism for determining the next delivery + destination address given the AS number is outside of the scope of + this document. + +Security Considerations + + Security issues are not discussed in this memo. + + + + + + + + + + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 3] + +RFC 1702 GRE over IPv4 networks October 1994 + + +Authors' Addresses + + Stan Hanks + NetSmiths, Ltd. + 2025 Lincoln Highway + Edison, NJ 08817 + + EMail: stan@netsmiths.com + + + Tony Li + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: tli@cisco.com + + + Dino Farinacci + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: dino@cisco.com + + + Paul Traina + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: pst@cisco.com + +References + + RFC 1701 + Hanks, S., Li, T, Farinacci, D., and P. Traina, "Generic Routing + Encapsulation", RFC 1701, NetSmiths, Ltd., and cisco Systems, + October 1994. + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 4] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc2119.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc2119.txt new file mode 100644 index 0000000..e31fae4 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc2119.txt @@ -0,0 +1,171 @@ + + + + + + +Network Working Group S. Bradner +Request for Comments: 2119 Harvard University +BCP: 14 March 1997 +Category: Best Current Practice + + + Key words for use in RFCs to Indicate Requirement Levels + +Status of this Memo + + This document specifies an Internet Best Current Practices for the + Internet Community, and requests discussion and suggestions for + improvements. Distribution of this memo is unlimited. + +Abstract + + In many standards track documents several words are used to signify + the requirements in the specification. These words are often + capitalized. This document defines these words as they should be + interpreted in IETF documents. Authors who follow these guidelines + should incorporate this phrase near the beginning of their document: + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL + NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and + "OPTIONAL" in this document are to be interpreted as described in + RFC 2119. + + Note that the force of these words is modified by the requirement + level of the document in which they are used. + +1. MUST This word, or the terms "REQUIRED" or "SHALL", mean that the + definition is an absolute requirement of the specification. + +2. MUST NOT This phrase, or the phrase "SHALL NOT", mean that the + definition is an absolute prohibition of the specification. + +3. SHOULD This word, or the adjective "RECOMMENDED", mean that there + may exist valid reasons in particular circumstances to ignore a + particular item, but the full implications must be understood and + carefully weighed before choosing a different course. + +4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that + there may exist valid reasons in particular circumstances when the + particular behavior is acceptable or even useful, but the full + implications should be understood and the case carefully weighed + before implementing any behavior described with this label. + + + + + +Bradner Best Current Practice [Page 1] + +RFC 2119 RFC Key Words March 1997 + + +5. MAY This word, or the adjective "OPTIONAL", mean that an item is + truly optional. One vendor may choose to include the item because a + particular marketplace requires it or because the vendor feels that + it enhances the product while another vendor may omit the same item. + An implementation which does not include a particular option MUST be + prepared to interoperate with another implementation which does + include the option, though perhaps with reduced functionality. In the + same vein an implementation which does include a particular option + MUST be prepared to interoperate with another implementation which + does not include the option (except, of course, for the feature the + option provides.) + +6. Guidance in the use of these Imperatives + + Imperatives of the type defined in this memo must be used with care + and sparingly. In particular, they MUST only be used where it is + actually required for interoperation or to limit behavior which has + potential for causing harm (e.g., limiting retransmisssions) For + example, they must not be used to try to impose a particular method + on implementors where the method is not required for + interoperability. + +7. Security Considerations + + These terms are frequently used to specify behavior with security + implications. The effects on security of not implementing a MUST or + SHOULD, or doing something the specification says MUST NOT or SHOULD + NOT be done may be very subtle. Document authors should take the time + to elaborate the security implications of not following + recommendations or requirements as most implementors will not have + had the benefit of the experience and discussion that produced the + specification. + +8. Acknowledgments + + The definitions of these terms are an amalgam of definitions taken + from a number of RFCs. In addition, suggestions have been + incorporated from a number of people including Robert Ullmann, Thomas + Narten, Neal McBurnett, and Robert Elz. + + + + + + + + + + + + +Bradner Best Current Practice [Page 2] + +RFC 2119 RFC Key Words March 1997 + + +9. Author's Address + + Scott Bradner + Harvard University + 1350 Mass. Ave. + Cambridge, MA 02138 + + phone - +1 617 495 3864 + + email - sob@harvard.edu + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Bradner Best Current Practice [Page 3] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc2784.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc2784.txt new file mode 100644 index 0000000..614926a --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc2784.txt @@ -0,0 +1,507 @@ + + + + + + +Network Working Group D. Farinacci +Request for Comments: 2784 T. Li +Category: Standards Track Procket Networks + S. Hanks + Enron Communications + D. Meyer + Cisco Systems + P. Traina + Juniper Networks + March 2000 + + + Generic Routing Encapsulation (GRE) + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + This document specifies a protocol for encapsulation of an arbitrary + network layer protocol over another arbitrary network layer protocol. + +1. Introduction + + A number of different proposals [RFC1234, RFC1226] currently exist + for the encapsulation of one protocol over another protocol. Other + types of encapsulations [RFC1241, RFC1479] have been proposed for + transporting IP over IP for policy purposes. This memo describes a + protocol which is very similar to, but is more general than, the + above proposals. In attempting to be more general, many protocol + specific nuances have been ignored. The result is that this proposal + may be less suitable for a situation where a specific "X over Y" + encapsulation has been described. It is the attempt of this protocol + to provide a simple, general purpose mechanism which reduces the + problem of encapsulation from its current O(n^2) size to a more + manageable size. This memo purposely does not address the issue of + when a packet should be encapsulated. This memo acknowledges, but + does not address problems such as mutual encapsulation [RFC1326]. + + + + +Farinacci, et al. Standards Track [Page 1] + +RFC 2784 Generic Routing Encapsulation March 2000 + + + In the most general case, a system has a packet that needs to be + encapsulated and delivered to some destination. We will call this + the payload packet. The payload is first encapsulated in a GRE + packet. The resulting GRE packet can then be encapsulated in some + other protocol and then forwarded. We will call this outer protocol + the delivery protocol. The algorithms for processing this packet are + discussed later. + + Finally this specification describes the intersection of GRE + currently deployed by multiple vendors. + + The keywords MUST, MUST NOT, MAY, OPTIONAL, REQUIRED, RECOMMENDED, + SHALL, SHALL NOT, SHOULD, SHOULD NOT are to be interpreted as defined + in RFC 2119 [RFC2119]. + +2. Structure of a GRE Encapsulated Packet + + A GRE encapsulated packet has the form: + + --------------------------------- + | | + | Delivery Header | + | | + --------------------------------- + | | + | GRE Header | + | | + --------------------------------- + | | + | Payload packet | + | | + --------------------------------- + + This specification is generally concerned with the structure of the + GRE header, although special consideration is given to some of the + issues surrounding IPv4 payloads. + +2.1. GRE Header + + The GRE packet header has the form: + + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C| Reserved0 | Ver | Protocol Type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum (optional) | Reserved1 (Optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + +Farinacci, et al. Standards Track [Page 2] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +2.2. Checksum Present (bit 0) + + If the Checksum Present bit is set to one, then the Checksum and the + Reserved1 fields are present and the Checksum field contains valid + information. Note that a compliant implementation MUST accept and + process this field. + +2.3. Reserved0 (bits 1-12) + + A receiver MUST discard a packet where any of bits 1-5 are non-zero, + unless that receiver implements RFC 1701. Bits 6-12 are reserved for + future use. These bits MUST be sent as zero and MUST be ignored on + receipt. + +2.3.1. Version Number (bits 13-15) + + The Version Number field MUST contain the value zero. + +2.4. Protocol Type (2 octets) + + The Protocol Type field contains the protocol type of the payload + packet. These Protocol Types are defined in [RFC1700] as "ETHER + TYPES" and in [ETYPES]. An implementation receiving a packet + containing a Protocol Type which is not listed in [RFC1700] or + [ETYPES] SHOULD discard the packet. + +2.5. Checksum (2 octets) + + The Checksum field contains the IP (one's complement) checksum sum of + the all the 16 bit words in the GRE header and the payload packet. + For purposes of computing the checksum, the value of the checksum + field is zero. This field is present only if the Checksum Present bit + is set to one. + +2.6. Reserved1 (2 octets) + + The Reserved1 field is reserved for future use, and if present, MUST + be transmitted as zero. The Reserved1 field is present only when the + Checksum field is present (that is, Checksum Present bit is set to + one). + +3. IPv4 as a Payload + + When IPv4 is being carried as the GRE payload, the Protocol Type + field MUST be set to 0x800. + + + + + + +Farinacci, et al. Standards Track [Page 3] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +3.1. Forwarding Decapsulated IPv4 Payload Packets + + When a tunnel endpoint decapsulates a GRE packet which has an IPv4 + packet as the payload, the destination address in the IPv4 payload + packet header MUST be used to forward the packet and the TTL of the + payload packet MUST be decremented. Care should be taken when + forwarding such a packet, since if the destination address of the + payload packet is the encapsulator of the packet (i.e., the other end + of the tunnel), looping can occur. In this case, the packet MUST be + discarded. + +4. IPv4 as a Delivery Protocol + + The IPv4 protocol 47 [RFC1700] is used when GRE packets are + enapsulated in IPv4. See [RFC1122] for requirements relating to the + delivery of packets over IPv4 networks. + +5. Interoperation with RFC 1701 Compliant Implementations + + In RFC 1701, the field described here as Reserved0 contained a number + of flag bits which this specification deprecates. In particular, the + Routing Present, Key Present, Sequence Number Present, and Strict + Source Route bits have been deprecated, along with the Recursion + Control field. As a result, the GRE header will never contain the + Key, Sequence Number or Routing fields specified in RFC 1701. + + There are, however, existing implementations of RFC 1701. The + following sections describe correct interoperation with such + implementations. + +5.1. RFC 1701 Compliant Receiver + + An implementation complying to this specification will transmit the + Reserved0 field set to zero. An RFC 1701 compliant receiver will + interpret this as having the Routing Present, Key Present, Sequence + Number Present, and Strict Source Route bits set to zero, and will + not expect the RFC 1701 Key, Sequence Number or Routing fields to be + present. + +5.2. RFC 1701 Compliant Transmitter + + An RFC 1701 transmitter may set any of the Routing Present, Key + Present, Sequence Number Present, and Strict Source Route bits set to + one, and thus may transmit the RFC 1701 Key, Sequence Number or + Routing fields in the GRE header. As stated in Section 5.3, a packet + with non-zero bits in any of bits 1-5 MUST be discarded unless the + receiver implements RFC 1701. + + + + +Farinacci, et al. Standards Track [Page 4] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +6. Security Considerations + + Security in a network using GRE should be relatively similar to + security in a normal IPv4 network, as routing using GRE follows the + same routing that IPv4 uses natively. Route filtering will remain + unchanged. However packet filtering requires either that a firewall + look inside the GRE packet or that the filtering is done on the GRE + tunnel endpoints. In those environments in which this is considered + to be a security issue it may be desirable to terminate the tunnel at + the firewall. + +7. IANA Considerations + + This section considers the assignment of additional GRE Version + Numbers and Protocol Types. + +7.1. GRE Version Numbers + + This document specifies GRE version number 0. GRE version number 1 is + used by PPTP [RFC2637]. Additional GRE version numbers are assigned + by IETF Consensus as defined in RFC 2434 [RFC2434]. + +7.2. Protocol Types + + GRE uses an ETHER Type for the Protocol Type. New ETHER TYPES are + assigned by Xerox Systems Institute [RFC1700]. + +8. Acknowledgments + + This document is derived from the original ideas of the authors of + RFC 1701 and RFC 1702. Hitoshi Asaeda, Scott Bradner, Randy Bush, + Brian Carpenter, Bill Fenner, Andy Malis, Thomas Narten, Dave Thaler, + Tim Gleeson and others provided many constructive and insightful + comments. + + + + + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 5] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +9. Appendix -- Known Issues + + This document specifies the behavior of currently deployed GRE + implementations. As such, it does not attempt to address the + following known issues: + + o Interaction Path MTU Discovery (PMTU) [RFC1191] + + Existing implementations of GRE, when using IPv4 as the Delivery + Header, do not implement Path MTU discovery and do not set the + Don't Fragment bit in the Delivery Header. This can cause large + packets to become fragmented within the tunnel and reassembled at + the tunnel exit (independent of whether the payload packet is using + PMTU). If a tunnel entry point were to use Path MTU discovery, + however, that tunnel entry point would also need to relay ICMP + unreachable error messages (in particular the "fragmentation needed + and DF set" code) back to the originator of the packet, which is + not a requirement in this specification. Failure to properly relay + Path MTU information to an originator can result in the following + behavior: the originator sets the don't fragment bit, the packet + gets dropped within the tunnel, but since the originator doesn't + receive proper feedback, it retransmits with the same PMTU, causing + subsequently transmitted packets to be dropped. + + o IPv6 as Delivery and/or Payload Protocol + + This specification describes the intersection of GRE currently + deployed by multiple vendors. IPv6 as delivery and/or payload + protocol is not included in the currently deployed versions of GRE. + + o Interaction with ICMP + + o Interaction with the Differentiated Services Architecture + + o Multiple and Looping Encapsulations + +10. REFERENCES + + [ETYPES] ftp://ftp.isi.edu/in-notes/iana/assignments/ethernet- + numbers + + [RFC1122] Braden, R., "Requirements for Internet hosts - + communication layers", STD 3, RFC 1122, October 1989. + + [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, + November 1990. + + + + + +Farinacci, et al. Standards Track [Page 6] + +RFC 2784 Generic Routing Encapsulation March 2000 + + + [RFC1226] Kantor, B., "Internet Protocol Encapsulation of AX.25 + Frames", RFC 1226, May 1991. + + [RFC1234] Provan, D., "Tunneling IPX Traffic through IP Networks", + RFC 1234, June 1991. + + [RFC1241] Woodburn, R. and D. Mills, "Scheme for an Internet + Encapsulation Protocol: Version 1", RFC 1241, July 1991. + + [RFC1326] Tsuchiya, P., "Mutual Encapsulation Considered Dangerous", + RFC 1326, May 1992. + + [RFC1479] Steenstrup, M., "Inter-Domain Policy Routing Protocol + Specification: Version 1", RFC 1479, July 1993. + + [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", STD 2, RFC + 1700, October 1994. + + [RFC1701] Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic + Routing Encapsulation", RFC 1701, October 1994. + + [RFC1702] Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic + Routing Encapsulation over IPv4 networks", RFC 1702, + October 1994. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March, 1997. + + [RFC2408] Maughan, D., Schertler, M., Schneider, M. and J. Turner, + "Internet Security Association and Key Management Protocol + (ISAKMP)", RFC 2408, November 1998. + + [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 2434, + October, 1998. + + [RFC2637] Hamzeh, K., et al., "Point-to-Point Tunneling Protocol + (PPTP)", RFC 2637, July, 1999. + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 7] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +11. Authors' Addresses + + Dino Farinacci + Procket Networks + 3850 No. First St., Ste. C + San Jose, CA 95134 + + EMail: dino@procket.com + + + Tony Li + Procket Networks + 3850 No. First St., Ste. C + San Jose, CA 95134 + + Phone: +1 408 954 7903 + Fax: +1 408 987 6166 + EMail: tony1@home.net + + + Stan Hanks + Enron Communications + + EMail: stan_hanks@enron.net + + + David Meyer + Cisco Systems, Inc. + 170 Tasman Drive + San Jose, CA, 95134 + + EMail: dmm@cisco.com + + + Paul Traina + Juniper Networks + EMail: pst@juniper.net + + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 8] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +12. Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 9] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc2960.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc2960.txt new file mode 100644 index 0000000..b9ad20c --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc2960.txt @@ -0,0 +1,7507 @@ + + + + + + +Network Working Group R. Stewart +Request for Comments: 2960 Q. Xie +Category: Standards Track Motorola + K. Morneault + C. Sharp + Cisco + H. Schwarzbauer + Siemens + T. Taylor + Nortel Networks + I. Rytina + Ericsson + M. Kalla + Telcordia + L. Zhang + UCLA + V. Paxson + ACIRI + October 2000 + + + Stream Control Transmission Protocol + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + This document describes the Stream Control Transmission Protocol + (SCTP). SCTP is designed to transport PSTN signaling messages over + IP networks, but is capable of broader applications. + + SCTP is a reliable transport protocol operating on top of a + connectionless packet network such as IP. It offers the following + services to its users: + + -- acknowledged error-free non-duplicated transfer of user data, + -- data fragmentation to conform to discovered path MTU size, + + + + +Stewart, et al. Standards Track [Page 1] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + -- sequenced delivery of user messages within multiple streams, + with an option for order-of-arrival delivery of individual user + messages, + -- optional bundling of multiple user messages into a single SCTP + packet, and + -- network-level fault tolerance through supporting of multi- + homing at either or both ends of an association. + + The design of SCTP includes appropriate congestion avoidance behavior + and resistance to flooding and masquerade attacks. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 2] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +Table of Contents + + 1. Introduction.................................................. 5 + 1.1 Motivation.................................................. 6 + 1.2 Architectural View of SCTP.................................. 6 + 1.3 Functional View of SCTP..................................... 7 + 1.3.1 Association Startup and Takedown........................ 8 + 1.3.2 Sequenced Delivery within Streams....................... 9 + 1.3.3 User Data Fragmentation................................. 9 + 1.3.4 Acknowledgement and Congestion Avoidance................ 9 + 1.3.5 Chunk Bundling ......................................... 10 + 1.3.6 Packet Validation....................................... 10 + 1.3.7 Path Management......................................... 11 + 1.4 Key Terms................................................... 11 + 1.5 Abbreviations............................................... 15 + 1.6 Serial Number Arithmetic.................................... 15 + 2. Conventions.................................................... 16 + 3. SCTP packet Format............................................ 16 + 3.1 SCTP Common Header Field Descriptions....................... 17 + 3.2 Chunk Field Descriptions.................................... 18 + 3.2.1 Optional/Variable-length Parameter Format............... 20 + 3.3 SCTP Chunk Definitions...................................... 21 + 3.3.1 Payload Data (DATA)..................................... 22 + 3.3.2 Initiation (INIT)....................................... 24 + 3.3.2.1 Optional or Variable Length Parameters.............. 26 + 3.3.3 Initiation Acknowledgement (INIT ACK)................... 30 + 3.3.3.1 Optional or Variable Length Parameters.............. 33 + 3.3.4 Selective Acknowledgement (SACK)........................ 33 + 3.3.5 Heartbeat Request (HEARTBEAT)........................... 37 + 3.3.6 Heartbeat Acknowledgement (HEARTBEAT ACK)............... 38 + 3.3.7 Abort Association (ABORT)............................... 39 + 3.3.8 Shutdown Association (SHUTDOWN)......................... 40 + 3.3.9 Shutdown Acknowledgement (SHUTDOWN ACK)................. 40 + 3.3.10 Operation Error (ERROR)................................ 41 + 3.3.10.1 Invalid Stream Identifier.......................... 42 + 3.3.10.2 Missing Mandatory Parameter........................ 43 + 3.3.10.3 Stale Cookie Error................................. 43 + 3.3.10.4 Out of Resource.................................... 44 + 3.3.10.5 Unresolvable Address............................... 44 + 3.3.10.6 Unrecognized Chunk Type............................ 44 + 3.3.10.7 Invalid Mandatory Parameter........................ 45 + 3.3.10.8 Unrecognized Parameters............................ 45 + 3.3.10.9 No User Data....................................... 46 + 3.3.10.10 Cookie Received While Shutting Down............... 46 + 3.3.11 Cookie Echo (COOKIE ECHO).............................. 46 + 3.3.12 Cookie Acknowledgement (COOKIE ACK).................... 47 + 3.3.13 Shutdown Complete (SHUTDOWN COMPLETE).................. 48 + 4. SCTP Association State Diagram................................. 48 + + + +Stewart, et al. Standards Track [Page 3] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 5. Association Initialization..................................... 52 + 5.1 Normal Establishment of an Association...................... 52 + 5.1.1 Handle Stream Parameters................................ 54 + 5.1.2 Handle Address Parameters............................... 54 + 5.1.3 Generating State Cookie................................. 56 + 5.1.4 State Cookie Processing................................. 57 + 5.1.5 State Cookie Authentication............................. 57 + 5.1.6 An Example of Normal Association Establishment.......... 58 + 5.2 Handle Duplicate or unexpected INIT, INIT ACK, COOKIE ECHO, + and COOKIE ACK.............................................. 60 + 5.2.1 Handle Duplicate INIT in COOKIE-WAIT + or COOKIE-ECHOED States................................. 60 + 5.2.2 Unexpected INIT in States Other than CLOSED, + COOKIE-ECHOED, COOKIE-WAIT and SHUTDOWN-ACK-SENT........ 61 + 5.2.3 Unexpected INIT ACK..................................... 61 + 5.2.4 Handle a COOKIE ECHO when a TCB exists.................. 62 + 5.2.4.1 An Example of a Association Restart................. 64 + 5.2.5 Handle Duplicate COOKIE ACK............................. 66 + 5.2.6 Handle Stale COOKIE Error............................... 66 + 5.3 Other Initialization Issues................................. 67 + 5.3.1 Selection of Tag Value.................................. 67 + 6. User Data Transfer............................................. 67 + 6.1 Transmission of DATA Chunks................................. 69 + 6.2 Acknowledgement on Reception of DATA Chunks................. 70 + 6.2.1 Tracking Peer's Receive Buffer Space.................... 73 + 6.3 Management Retransmission Timer............................. 75 + 6.3.1 RTO Calculation......................................... 75 + 6.3.2 Retransmission Timer Rules.............................. 76 + 6.3.3 Handle T3-rtx Expiration................................ 77 + 6.4 Multi-homed SCTP Endpoints.................................. 78 + 6.4.1 Failover from Inactive Destination Address.............. 79 + 6.5 Stream Identifier and Stream Sequence Number................ 80 + 6.6 Ordered and Unordered Delivery.............................. 80 + 6.7 Report Gaps in Received DATA TSNs........................... 81 + 6.8 Adler-32 Checksum Calculation............................... 82 + 6.9 Fragmentation............................................... 83 + 6.10 Bundling .................................................. 84 + 7. Congestion Control .......................................... 85 + 7.1 SCTP Differences from TCP Congestion Control................ 85 + 7.2 SCTP Slow-Start and Congestion Avoidance.................... 87 + 7.2.1 Slow-Start.............................................. 87 + 7.2.2 Congestion Avoidance.................................... 89 + 7.2.3 Congestion Control...................................... 89 + 7.2.4 Fast Retransmit on Gap Reports.......................... 90 + 7.3 Path MTU Discovery.......................................... 91 + 8. Fault Management.............................................. 92 + 8.1 Endpoint Failure Detection.................................. 92 + 8.2 Path Failure Detection...................................... 92 + + + +Stewart, et al. Standards Track [Page 4] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 8.3 Path Heartbeat.............................................. 93 + 8.4 Handle "Out of the blue" Packets............................ 95 + 8.5 Verification Tag............................................ 96 + 8.5.1 Exceptions in Verification Tag Rules.................... 97 + 9. Termination of Association..................................... 98 + 9.1 Abort of an Association..................................... 98 + 9.2 Shutdown of an Association.................................. 98 + 10. Interface with Upper Layer....................................101 + 10.1 ULP-to-SCTP................................................101 + 10.2 SCTP-to-ULP................................................111 + 11. Security Considerations.......................................114 + 11.1 Security Objectives........................................114 + 11.2 SCTP Responses To Potential Threats........................115 + 11.2.1 Countering Insider Attacks.............................115 + 11.2.2 Protecting against Data Corruption in the Network......115 + 11.2.3 Protecting Confidentiality.............................115 + 11.2.4 Protecting against Blind Denial of Service Attacks.....116 + 11.2.4.1 Flooding...........................................116 + 11.2.4.2 Blind Masquerade...................................118 + 11.2.4.3 Improper Monopolization of Services................118 + 11.3 Protection against Fraud and Repudiation...................119 + 12. Recommended Transmission Control Block (TCB) Parameters.......120 + 12.1 Parameters necessary for the SCTP instance.................120 + 12.2 Parameters necessary per association (i.e. the TCB)........120 + 12.3 Per Transport Address Data.................................122 + 12.4 General Parameters Needed..................................123 + 13. IANA Considerations...........................................123 + 13.1 IETF-defined Chunk Extension...............................123 + 13.2 IETF-defined Chunk Parameter Extension.....................124 + 13.3 IETF-defined Additional Error Causes.......................124 + 13.4 Payload Protocol Identifiers...............................125 + 14. Suggested SCTP Protocol Parameter Values......................125 + 15. Acknowledgements..............................................126 + 16. Authors' Addresses............................................126 + 17. References....................................................128 + 18. Bibliography..................................................129 + Appendix A .......................................................131 + Appendix B .......................................................132 + Full Copyright Statement .........................................134 + +1. Introduction + + This section explains the reasoning behind the development of the + Stream Control Transmission Protocol (SCTP), the services it offers, + and the basic concepts needed to understand the detailed description + of the protocol. + + + + + +Stewart, et al. Standards Track [Page 5] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +1.1 Motivation + + TCP [RFC793] has performed immense service as the primary means of + reliable data transfer in IP networks. However, an increasing number + of recent applications have found TCP too limiting, and have + incorporated their own reliable data transfer protocol on top of UDP + [RFC768]. The limitations which users have wished to bypass include + the following: + + -- TCP provides both reliable data transfer and strict order-of- + transmission delivery of data. Some applications need reliable + transfer without sequence maintenance, while others would be + satisfied with partial ordering of the data. In both of these + cases the head-of-line blocking offered by TCP causes unnecessary + delay. + + -- The stream-oriented nature of TCP is often an inconvenience. + Applications must add their own record marking to delineate their + messages, and must make explicit use of the push facility to + ensure that a complete message is transferred in a reasonable + time. + + -- The limited scope of TCP sockets complicates the task of + providing highly-available data transfer capability using multi- + homed hosts. + + -- TCP is relatively vulnerable to denial of service attacks, such + as SYN attacks. + + Transport of PSTN signaling across the IP network is an application + for which all of these limitations of TCP are relevant. While this + application directly motivated the development of SCTP, other + applications may find SCTP a good match to their requirements. + +1.2 Architectural View of SCTP + + SCTP is viewed as a layer between the SCTP user application ("SCTP + user" for short) and a connectionless packet network service such as + IP. The remainder of this document assumes SCTP runs on top of IP. + The basic service offered by SCTP is the reliable transfer of user + messages between peer SCTP users. It performs this service within + the context of an association between two SCTP endpoints. Section 10 + of this document sketches the API which should exist at the boundary + between the SCTP and the SCTP user layers. + + SCTP is connection-oriented in nature, but the SCTP association is a + broader concept than the TCP connection. SCTP provides the means for + each SCTP endpoint (Section 1.4) to provide the other endpoint + + + +Stewart, et al. Standards Track [Page 6] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + (during association startup) with a list of transport addresses + (i.e., multiple IP addresses in combination with an SCTP port) + through which that endpoint can be reached and from which it will + originate SCTP packets. The association spans transfers over all of + the possible source/destination combinations which may be generated + from each endpoint's lists. + + _____________ _____________ + | SCTP User | | SCTP User | + | Application | | Application | + |-------------| |-------------| + | SCTP | | SCTP | + | Transport | | Transport | + | Service | | Service | + |-------------| |-------------| + | |One or more ---- One or more| | + | IP Network |IP address \/ IP address| IP Network | + | Service |appearances /\ appearances| Service | + |_____________| ---- |_____________| + + SCTP Node A |<-------- Network transport ------->| SCTP Node B + + Figure 1: An SCTP Association + +1.3 Functional View of SCTP + + The SCTP transport service can be decomposed into a number of + functions. These are depicted in Figure 2 and explained in the + remainder of this section. + + + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 7] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + SCTP User Application + + ----------------------------------------------------- + _____________ ____________________ + | | | Sequenced delivery | + | Association | | within streams | + | | |____________________| + | startup | + | | ____________________________ + | and | | User Data Fragmentation | + | | |____________________________| + | takedown | + | | ____________________________ + | | | Acknowledgement | + | | | and | + | | | Congestion Avoidance | + | | |____________________________| + | | + | | ____________________________ + | | | Chunk Bundling | + | | |____________________________| + | | + | | ________________________________ + | | | Packet Validation | + | | |________________________________| + | | + | | ________________________________ + | | | Path Management | + |_____________| |________________________________| + + Figure 2: Functional View of the SCTP Transport Service + +1.3.1 Association Startup and Takedown + + An association is initiated by a request from the SCTP user (see the + description of the ASSOCIATE (or SEND) primitive in Section 10). + + A cookie mechanism, similar to one described by Karn and Simpson in + [RFC2522], is employed during the initialization to provide + protection against security attacks. The cookie mechanism uses a + four-way handshake, the last two legs of which are allowed to carry + user data for fast setup. The startup sequence is described in + Section 5 of this document. + + SCTP provides for graceful close (i.e., shutdown) of an active + association on request from the SCTP user. See the description of + the SHUTDOWN primitive in Section 10. SCTP also allows ungraceful + close (i.e., abort), either on request from the user (ABORT + + + +Stewart, et al. Standards Track [Page 8] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + primitive) or as a result of an error condition detected within the + SCTP layer. Section 9 describes both the graceful and the ungraceful + close procedures. + + SCTP does not support a half-open state (like TCP) wherein one side + may continue sending data while the other end is closed. When either + endpoint performs a shutdown, the association on each peer will stop + accepting new data from its user and only deliver data in queue at + the time of the graceful close (see Section 9). + +1.3.2 Sequenced Delivery within Streams + + The term "stream" is used in SCTP to refer to a sequence of user + messages that are to be delivered to the upper-layer protocol in + order with respect to other messages within the same stream. This is + in contrast to its usage in TCP, where it refers to a sequence of + bytes (in this document a byte is assumed to be eight bits). + + The SCTP user can specify at association startup time the number of + streams to be supported by the association. This number is + negotiated with the remote end (see Section 5.1.1). User messages + are associated with stream numbers (SEND, RECEIVE primitives, Section + 10). Internally, SCTP assigns a stream sequence number to each + message passed to it by the SCTP user. On the receiving side, SCTP + ensures that messages are delivered to the SCTP user in sequence + within a given stream. However, while one stream may be blocked + waiting for the next in-sequence user message, delivery from other + streams may proceed. + + SCTP provides a mechanism for bypassing the sequenced delivery + service. User messages sent using this mechanism are delivered to + the SCTP user as soon as they are received. + +1.3.3 User Data Fragmentation + + When needed, SCTP fragments user messages to ensure that the SCTP + packet passed to the lower layer conforms to the path MTU. On + receipt, fragments are reassembled into complete messages before + being passed to the SCTP user. + +1.3.4 Acknowledgement and Congestion Avoidance + + SCTP assigns a Transmission Sequence Number (TSN) to each user data + fragment or unfragmented message. The TSN is independent of any + stream sequence number assigned at the stream level. The receiving + + + + + + +Stewart, et al. Standards Track [Page 9] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + end acknowledges all TSNs received, even if there are gaps in the + sequence. In this way, reliable delivery is kept functionally + separate from sequenced stream delivery. + + The acknowledgement and congestion avoidance function is responsible + for packet retransmission when timely acknowledgement has not been + received. Packet retransmission is conditioned by congestion + avoidance procedures similar to those used for TCP. See Sections 6 + and 7 for a detailed description of the protocol procedures + associated with this function. + +1.3.5 Chunk Bundling + + As described in Section 3, the SCTP packet as delivered to the lower + layer consists of a common header followed by one or more chunks. + Each chunk may contain either user data or SCTP control information. + The SCTP user has the option to request bundling of more than one + user messages into a single SCTP packet. The chunk bundling function + of SCTP is responsible for assembly of the complete SCTP packet and + its disassembly at the receiving end. + + During times of congestion an SCTP implementation MAY still perform + bundling even if the user has requested that SCTP not bundle. The + user's disabling of bundling only affects SCTP implementations that + may delay a small period of time before transmission (to attempt to + encourage bundling). When the user layer disables bundling, this + small delay is prohibited but not bundling that is performed during + congestion or retransmission. + +1.3.6 Packet Validation + + A mandatory Verification Tag field and a 32 bit checksum field (see + Appendix B for a description of the Adler-32 checksum) are included + in the SCTP common header. The Verification Tag value is chosen by + each end of the association during association startup. Packets + received without the expected Verification Tag value are discarded, + as a protection against blind masquerade attacks and against stale + SCTP packets from a previous association. The Adler-32 checksum + should be set by the sender of each SCTP packet to provide additional + protection against data corruption in the network. The receiver of + an SCTP packet with an invalid Adler-32 checksum silently discards + the packet. + + + + + + + + + +Stewart, et al. Standards Track [Page 10] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +1.3.7 Path Management + + The sending SCTP user is able to manipulate the set of transport + addresses used as destinations for SCTP packets through the + primitives described in Section 10. The SCTP path management + function chooses the destination transport address for each outgoing + SCTP packet based on the SCTP user's instructions and the currently + perceived reachability status of the eligible destination set. The + path management function monitors reachability through heartbeats + when other packet traffic is inadequate to provide this information + and advises the SCTP user when reachability of any far-end transport + address changes. The path management function is also responsible + for reporting the eligible set of local transport addresses to the + far end during association startup, and for reporting the transport + addresses returned from the far end to the SCTP user. + + At association start-up, a primary path is defined for each SCTP + endpoint, and is used for normal sending of SCTP packets. + + On the receiving end, the path management is responsible for + verifying the existence of a valid SCTP association to which the + inbound SCTP packet belongs before passing it for further processing. + + Note: Path Management and Packet Validation are done at the same + time, so although described separately above, in reality they cannot + be performed as separate items. + +1.4 Key Terms + + Some of the language used to describe SCTP has been introduced in the + previous sections. This section provides a consolidated list of the + key terms and their definitions. + + o Active destination transport address: A transport address on a + peer endpoint which a transmitting endpoint considers available + for receiving user messages. + + o Bundling: An optional multiplexing operation, whereby more than + one user message may be carried in the same SCTP packet. Each + user message occupies its own DATA chunk. + + o Chunk: A unit of information within an SCTP packet, consisting of + a chunk header and chunk-specific content. + + o Congestion Window (cwnd): An SCTP variable that limits the data, + in number of bytes, a sender can send to a particular destination + transport address before receiving an acknowledgement. + + + + +Stewart, et al. Standards Track [Page 11] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o Cumulative TSN Ack Point: The TSN of the last DATA chunk + acknowledged via the Cumulative TSN Ack field of a SACK. + + o Idle destination address: An address that has not had user + messages sent to it within some length of time, normally the + HEARTBEAT interval or greater. + + o Inactive destination transport address: An address which is + considered inactive due to errors and unavailable to transport + user messages. + + o Message = user message: Data submitted to SCTP by the Upper Layer + Protocol (ULP). + + o Message Authentication Code (MAC): An integrity check mechanism + based on cryptographic hash functions using a secret key. + Typically, message authentication codes are used between two + parties that share a secret key in order to validate information + transmitted between these parties. In SCTP it is used by an + endpoint to validate the State Cookie information that is returned + from the peer in the COOKIE ECHO chunk. The term "MAC" has + different meanings in different contexts. SCTP uses this term + with the same meaning as in [RFC2104]. + + o Network Byte Order: Most significant byte first, a.k.a., Big + Endian. + + o Ordered Message: A user message that is delivered in order with + respect to all previous user messages sent within the stream the + message was sent on. + + o Outstanding TSN (at an SCTP endpoint): A TSN (and the associated + DATA chunk) that has been sent by the endpoint but for which it + has not yet received an acknowledgement. + + o Path: The route taken by the SCTP packets sent by one SCTP + endpoint to a specific destination transport address of its peer + SCTP endpoint. Sending to different destination transport + addresses does not necessarily guarantee getting separate paths. + + o Primary Path: The primary path is the destination and source + address that will be put into a packet outbound to the peer + endpoint by default. The definition includes the source address + since an implementation MAY wish to specify both destination and + source address to better control the return path taken by reply + chunks and on which interface the packet is transmitted when the + data sender is multi-homed. + + + + +Stewart, et al. Standards Track [Page 12] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o Receiver Window (rwnd): An SCTP variable a data sender uses to + store the most recently calculated receiver window of its peer, in + number of bytes. This gives the sender an indication of the space + available in the receiver's inbound buffer. + + o SCTP association: A protocol relationship between SCTP endpoints, + composed of the two SCTP endpoints and protocol state information + including Verification Tags and the currently active set of + Transmission Sequence Numbers (TSNs), etc. An association can be + uniquely identified by the transport addresses used by the + endpoints in the association. Two SCTP endpoints MUST NOT have + more than one SCTP association between them at any given time. + + o SCTP endpoint: The logical sender/receiver of SCTP packets. On a + multi-homed host, an SCTP endpoint is represented to its peers as + a combination of a set of eligible destination transport addresses + to which SCTP packets can be sent and a set of eligible source + transport addresses from which SCTP packets can be received. All + transport addresses used by an SCTP endpoint must use the same + port number, but can use multiple IP addresses. A transport + address used by an SCTP endpoint must not be used by another SCTP + endpoint. In other words, a transport address is unique to an + SCTP endpoint. + + o SCTP packet (or packet): The unit of data delivery across the + interface between SCTP and the connectionless packet network + (e.g., IP). An SCTP packet includes the common SCTP header, + possible SCTP control chunks, and user data encapsulated within + SCTP DATA chunks. + + o SCTP user application (SCTP user): The logical higher-layer + application entity which uses the services of SCTP, also called + the Upper-layer Protocol (ULP). + + o Slow Start Threshold (ssthresh): An SCTP variable. This is the + threshold which the endpoint will use to determine whether to + perform slow start or congestion avoidance on a particular + destination transport address. Ssthresh is in number of bytes. + + o Stream: A uni-directional logical channel established from one to + another associated SCTP endpoint, within which all user messages + are delivered in sequence except for those submitted to the + unordered delivery service. + + Note: The relationship between stream numbers in opposite directions + is strictly a matter of how the applications use them. It is the + responsibility of the SCTP user to create and manage these + correlations if they are so desired. + + + +Stewart, et al. Standards Track [Page 13] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o Stream Sequence Number: A 16-bit sequence number used internally + by SCTP to assure sequenced delivery of the user messages within a + given stream. One stream sequence number is attached to each user + message. + + o Tie-Tags: Verification Tags from a previous association. These + Tags are used within a State Cookie so that the newly restarting + association can be linked to the original association within the + endpoint that did not restart. + + o Transmission Control Block (TCB): An internal data structure + created by an SCTP endpoint for each of its existing SCTP + associations to other SCTP endpoints. TCB contains all the status + and operational information for the endpoint to maintain and + manage the corresponding association. + + o Transmission Sequence Number (TSN): A 32-bit sequence number used + internally by SCTP. One TSN is attached to each chunk containing + user data to permit the receiving SCTP endpoint to acknowledge its + receipt and detect duplicate deliveries. + + o Transport address: A Transport Address is traditionally defined + by Network Layer address, Transport Layer protocol and Transport + Layer port number. In the case of SCTP running over IP, a + transport address is defined by the combination of an IP address + and an SCTP port number (where SCTP is the Transport protocol). + + o Unacknowledged TSN (at an SCTP endpoint): A TSN (and the associated + DATA chunk) which has been received by the endpoint but for which + an acknowledgement has not yet been sent. Or in the opposite case, + for a packet that has been sent but no acknowledgement has been + received. + + o Unordered Message: Unordered messages are "unordered" with respect + to any other message, this includes both other unordered messages + as well as other ordered messages. Unordered message might be + delivered prior to or later than ordered messages sent on the same + stream. + + o User message: The unit of data delivery across the interface + between SCTP and its user. + + o Verification Tag: A 32 bit unsigned integer that is randomly + generated. The Verification Tag provides a key that allows a + receiver to verify that the SCTP packet belongs to the current + association and is not an old or stale packet from a previous + association. + + + + +Stewart, et al. Standards Track [Page 14] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +1.5. Abbreviations + + MAC - Message Authentication Code [RFC2104] + + RTO - Retransmission Time-out + + RTT - Round-trip Time + + RTTVAR - Round-trip Time Variation + + SCTP - Stream Control Transmission Protocol + + SRTT - Smoothed RTT + + TCB - Transmission Control Block + + TLV - Type-Length-Value Coding Format + + TSN - Transmission Sequence Number + + ULP - Upper-layer Protocol + +1.6 Serial Number Arithmetic + + It is essential to remember that the actual Transmission Sequence + Number space is finite, though very large. This space ranges from 0 + to 2**32 - 1. Since the space is finite, all arithmetic dealing with + Transmission Sequence Numbers must be performed modulo 2**32. This + unsigned arithmetic preserves the relationship of sequence numbers as + they cycle from 2**32 - 1 to 0 again. There are some subtleties to + computer modulo arithmetic, so great care should be taken in + programming the comparison of such values. When referring to TSNs, + the symbol "=<" means "less than or equal"(modulo 2**32). + + Comparisons and arithmetic on TSNs in this document SHOULD use Serial + Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32. + + An endpoint SHOULD NOT transmit a DATA chunk with a TSN that is more + than 2**31 - 1 above the beginning TSN of its current send window. + Doing so will cause problems in comparing TSNs. + + Transmission Sequence Numbers wrap around when they reach 2**32 - 1. + That is, the next TSN a DATA chunk MUST use after transmitting TSN = + 2*32 - 1 is TSN = 0. + + Any arithmetic done on Stream Sequence Numbers SHOULD use Serial + Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 16. + + + + +Stewart, et al. Standards Track [Page 15] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + All other arithmetic and comparisons in this document uses normal + arithmetic. + +2. Conventions + + The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, + SHOULD NOT, RECOMMENDED, NOT RECOMMENDED, MAY, and OPTIONAL, when + they appear in this document, are to be interpreted as described in + [RFC2119]. + +3. SCTP packet Format + + An SCTP packet is composed of a common header and chunks. A chunk + contains either control information or user data. + + The SCTP packet format is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Common Header | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk #1 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk #n | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Multiple chunks can be bundled into one SCTP packet up to the MTU + size, except for the INIT, INIT ACK, and SHUTDOWN COMPLETE chunks. + These chunks MUST NOT be bundled with any other chunk in a packet. + See Section 6.10 for more details on chunk bundling. + + If a user data message doesn't fit into one SCTP packet it can be + fragmented into multiple chunks using the procedure defined in + Section 6.9. + + All integer fields in an SCTP packet MUST be transmitted in network + byte order, unless otherwise stated. + + + + + + + + + + + +Stewart, et al. Standards Track [Page 16] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.1 SCTP Common Header Field Descriptions + + SCTP Common Header Format + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Port Number | Destination Port Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Verification Tag | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Source Port Number: 16 bits (unsigned integer) + + This is the SCTP sender's port number. It can be used by the + receiver in combination with the source IP address, the SCTP + destination port and possibly the destination IP address to + identify the association to which this packet belongs. + + Destination Port Number: 16 bits (unsigned integer) + + This is the SCTP port number to which this packet is destined. + The receiving host will use this port number to de-multiplex the + SCTP packet to the correct receiving endpoint/application. + + Verification Tag: 32 bits (unsigned integer) + + The receiver of this packet uses the Verification Tag to validate + the sender of this SCTP packet. On transmit, the value of this + Verification Tag MUST be set to the value of the Initiate Tag + received from the peer endpoint during the association + initialization, with the following exceptions: + + - A packet containing an INIT chunk MUST have a zero Verification + Tag. + - A packet containing a SHUTDOWN-COMPLETE chunk with the T-bit + set MUST have the Verification Tag copied from the packet with + the SHUTDOWN-ACK chunk. + - A packet containing an ABORT chunk may have the verification + tag copied from the packet which caused the ABORT to be sent. + For details see Section 8.4 and 8.5. + + An INIT chunk MUST be the only chunk in the SCTP packet carrying it. + + + + + + +Stewart, et al. Standards Track [Page 17] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Checksum: 32 bits (unsigned integer) + + This field contains the checksum of this SCTP packet. Its + calculation is discussed in Section 6.8. SCTP uses the Adler- + 32 algorithm as described in Appendix B for calculating the + checksum + +3.2 Chunk Field Descriptions + + The figure below illustrates the field format for the chunks to be + transmitted in the SCTP packet. Each chunk is formatted with a Chunk + Type field, a chunk-specific Flag field, a Chunk Length field, and a + Value field. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk Type | Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Chunk Value / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Type: 8 bits (unsigned integer) + + This field identifies the type of information contained in the + Chunk Value field. It takes a value from 0 to 254. The value of + 255 is reserved for future use as an extension field. + + The values of Chunk Types are defined as follows: + + ID Value Chunk Type + ----- ---------- + 0 - Payload Data (DATA) + 1 - Initiation (INIT) + 2 - Initiation Acknowledgement (INIT ACK) + 3 - Selective Acknowledgement (SACK) + 4 - Heartbeat Request (HEARTBEAT) + 5 - Heartbeat Acknowledgement (HEARTBEAT ACK) + 6 - Abort (ABORT) + 7 - Shutdown (SHUTDOWN) + 8 - Shutdown Acknowledgement (SHUTDOWN ACK) + 9 - Operation Error (ERROR) + 10 - State Cookie (COOKIE ECHO) + 11 - Cookie Acknowledgement (COOKIE ACK) + 12 - Reserved for Explicit Congestion Notification Echo (ECNE) + 13 - Reserved for Congestion Window Reduced (CWR) + + + +Stewart, et al. Standards Track [Page 18] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 14 - Shutdown Complete (SHUTDOWN COMPLETE) + 15 to 62 - reserved by IETF + 63 - IETF-defined Chunk Extensions + 64 to 126 - reserved by IETF + 127 - IETF-defined Chunk Extensions + 128 to 190 - reserved by IETF + 191 - IETF-defined Chunk Extensions + 192 to 254 - reserved by IETF + 255 - IETF-defined Chunk Extensions + + Chunk Types are encoded such that the highest-order two bits specify + the action that must be taken if the processing endpoint does not + recognize the Chunk Type. + + 00 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it. + + 01 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it, and report the unrecognized + parameter in an 'Unrecognized Parameter Type' (in either an + ERROR or in the INIT ACK). + + 10 - Skip this chunk and continue processing. + + 11 - Skip this chunk and continue processing, but report in an ERROR + Chunk using the 'Unrecognized Chunk Type' cause of error. + + Note: The ECNE and CWR chunk types are reserved for future use of + Explicit Congestion Notification (ECN). + + Chunk Flags: 8 bits + + The usage of these bits depends on the chunk type as given by the + Chunk Type. Unless otherwise specified, they are set to zero on + transmit and are ignored on receipt. + + Chunk Length: 16 bits (unsigned integer) + + This value represents the size of the chunk in bytes including the + Chunk Type, Chunk Flags, Chunk Length, and Chunk Value fields. + Therefore, if the Chunk Value field is zero-length, the Length + field will be set to 4. The Chunk Length field does not count any + padding. + + + + + + + + +Stewart, et al. Standards Track [Page 19] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Chunk Value: variable length + + The Chunk Value field contains the actual information to be + transferred in the chunk. The usage and format of this field is + dependent on the Chunk Type. + + The total length of a chunk (including Type, Length and Value fields) + MUST be a multiple of 4 bytes. If the length of the chunk is not a + multiple of 4 bytes, the sender MUST pad the chunk with all zero + bytes and this padding is not included in the chunk length field. + The sender should never pad with more than 3 bytes. The receiver + MUST ignore the padding bytes. + + SCTP defined chunks are described in detail in Section 3.3. The + guidelines for IETF-defined chunk extensions can be found in Section + 13.1 of this document. + +3.2.1 Optional/Variable-length Parameter Format + + Chunk values of SCTP control chunks consist of a chunk-type-specific + header of required fields, followed by zero or more parameters. The + optional and variable-length parameters contained in a chunk are + defined in a Type-Length-Value format as shown below. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Parameter Type | Parameter Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Parameter Value / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Parameter Type: 16 bits (unsigned integer) + + The Type field is a 16 bit identifier of the type of parameter. + It takes a value of 0 to 65534. + + The value of 65535 is reserved for IETF-defined extensions. Values + other than those defined in specific SCTP chunk description are + reserved for use by IETF. + + + + + + + + + +Stewart, et al. Standards Track [Page 20] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Chunk Parameter Length: 16 bits (unsigned integer) + + The Parameter Length field contains the size of the parameter in + bytes, including the Parameter Type, Parameter Length, and + Parameter Value fields. Thus, a parameter with a zero-length + Parameter Value field would have a Length field of 4. The + Parameter Length does not include any padding bytes. + + Chunk Parameter Value: variable-length. + + The Parameter Value field contains the actual information to be + transferred in the parameter. + + The total length of a parameter (including Type, Parameter Length and + Value fields) MUST be a multiple of 4 bytes. If the length of the + parameter is not a multiple of 4 bytes, the sender pads the Parameter + at the end (i.e., after the Parameter Value field) with all zero + bytes. The length of the padding is not included in the parameter + length field. A sender SHOULD NOT pad with more than 3 bytes. The + receiver MUST ignore the padding bytes. + + The Parameter Types are encoded such that the highest-order two bits + specify the action that must be taken if the processing endpoint does + not recognize the Parameter Type. + + 00 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it. + + 01 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it, and report the unrecognized + parameter in an 'Unrecognized Parameter Type' (in either an + ERROR or in the INIT ACK). + + 10 - Skip this parameter and continue processing. + + 11 - Skip this parameter and continue processing but report the + unrecognized parameter in an 'Unrecognized Parameter Type' (in + either an ERROR or in the INIT ACK). + + The actual SCTP parameters are defined in the specific SCTP chunk + sections. The rules for IETF-defined parameter extensions are + defined in Section 13.2. + +3.3 SCTP Chunk Definitions + + This section defines the format of the different SCTP chunk types. + + + + + +Stewart, et al. Standards Track [Page 21] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.1 Payload Data (DATA) (0) + + The following format MUST be used for the DATA chunk: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 0 | Reserved|U|B|E| Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | TSN | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Stream Identifier S | Stream Sequence Number n | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Payload Protocol Identifier | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / User Data (seq n of Stream S) / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Reserved: 5 bits + + Should be set to all '0's and ignored by the receiver. + + U bit: 1 bit + + The (U)nordered bit, if set to '1', indicates that this is an + unordered DATA chunk, and there is no Stream Sequence Number + assigned to this DATA chunk. Therefore, the receiver MUST ignore + the Stream Sequence Number field. + + After re-assembly (if necessary), unordered DATA chunks MUST be + dispatched to the upper layer by the receiver without any attempt + to re-order. + + If an unordered user message is fragmented, each fragment of the + message MUST have its U bit set to '1'. + + B bit: 1 bit + + The (B)eginning fragment bit, if set, indicates the first fragment + of a user message. + + E bit: 1 bit + + The (E)nding fragment bit, if set, indicates the last fragment of + a user message. + + + + +Stewart, et al. Standards Track [Page 22] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + An unfragmented user message shall have both the B and E bits set to + '1'. Setting both B and E bits to '0' indicates a middle fragment of + a multi-fragment user message, as summarized in the following table: + + B E Description + ============================================================ + | 1 0 | First piece of a fragmented user message | + +----------------------------------------------------------+ + | 0 0 | Middle piece of a fragmented user message | + +----------------------------------------------------------+ + | 0 1 | Last piece of a fragmented user message | + +----------------------------------------------------------+ + | 1 1 | Unfragmented Message | + ============================================================ + | Table 1: Fragment Description Flags | + ============================================================ + + When a user message is fragmented into multiple chunks, the TSNs are + used by the receiver to reassemble the message. This means that the + TSNs for each fragment of a fragmented user message MUST be strictly + sequential. + + Length: 16 bits (unsigned integer) + + This field indicates the length of the DATA chunk in bytes from + the beginning of the type field to the end of the user data field + excluding any padding. A DATA chunk with no user data field will + have Length set to 16 (indicating 16 bytes). + + TSN : 32 bits (unsigned integer) + + This value represents the TSN for this DATA chunk. The valid + range of TSN is from 0 to 4294967295 (2**32 - 1). TSN wraps back + to 0 after reaching 4294967295. + + Stream Identifier S: 16 bits (unsigned integer) + + Identifies the stream to which the following user data belongs. + + Stream Sequence Number n: 16 bits (unsigned integer) + + This value represents the stream sequence number of the following + user data within the stream S. Valid range is 0 to 65535. + + When a user message is fragmented by SCTP for transport, the same + stream sequence number MUST be carried in each of the fragments of + the message. + + + + +Stewart, et al. Standards Track [Page 23] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Payload Protocol Identifier: 32 bits (unsigned integer) + + This value represents an application (or upper layer) specified + protocol identifier. This value is passed to SCTP by its upper + layer and sent to its peer. This identifier is not used by SCTP + but can be used by certain network entities as well as the peer + application to identify the type of information being carried in + this DATA chunk. This field must be sent even in fragmented DATA + chunks (to make sure it is available for agents in the middle of + the network). + + The value 0 indicates no application identifier is specified by + the upper layer for this payload data. + + User Data: variable length + + This is the payload user data. The implementation MUST pad the + end of the data to a 4 byte boundary with all-zero bytes. Any + padding MUST NOT be included in the length field. A sender MUST + never add more than 3 bytes of padding. + +3.3.2 Initiation (INIT) (1) + + This chunk is used to initiate a SCTP association between two + endpoints. The format of the INIT chunk is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 1 | Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initiate Tag | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Advertised Receiver Window Credit (a_rwnd) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of Outbound Streams | Number of Inbound Streams | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initial TSN | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Optional/Variable-Length Parameters / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The INIT chunk contains the following parameters. Unless otherwise + noted, each parameter MUST only be included once in the INIT chunk. + + + + + +Stewart, et al. Standards Track [Page 24] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Fixed Parameters Status + ---------------------------------------------- + Initiate Tag Mandatory + Advertised Receiver Window Credit Mandatory + Number of Outbound Streams Mandatory + Number of Inbound Streams Mandatory + Initial TSN Mandatory + + Variable Parameters Status Type Value + ------------------------------------------------------------- + IPv4 Address (Note 1) Optional 5 + IPv6 Address (Note 1) Optional 6 + Cookie Preservative Optional 9 + Reserved for ECN Capable (Note 2) Optional 32768 (0x8000) + Host Name Address (Note 3) Optional 11 + Supported Address Types (Note 4) Optional 12 + + Note 1: The INIT chunks can contain multiple addresses that can be + IPv4 and/or IPv6 in any combination. + + Note 2: The ECN capable field is reserved for future use of Explicit + Congestion Notification. + + Note 3: An INIT chunk MUST NOT contain more than one Host Name + address parameter. Moreover, the sender of the INIT MUST NOT combine + any other address types with the Host Name address in the INIT. The + receiver of INIT MUST ignore any other address types if the Host Name + address parameter is present in the received INIT chunk. + + Note 4: This parameter, when present, specifies all the address types + the sending endpoint can support. The absence of this parameter + indicates that the sending endpoint can support any address type. + + The Chunk Flags field in INIT is reserved and all bits in it should + be set to 0 by the sender and ignored by the receiver. The sequence + of parameters within an INIT can be processed in any order. + + Initiate Tag: 32 bits (unsigned integer) + + The receiver of the INIT (the responding end) records the value of + the Initiate Tag parameter. This value MUST be placed into the + Verification Tag field of every SCTP packet that the receiver of + the INIT transmits within this association. + + The Initiate Tag is allowed to have any value except 0. See + Section 5.3.1 for more on the selection of the tag value. + + + + + +Stewart, et al. Standards Track [Page 25] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + If the value of the Initiate Tag in a received INIT chunk is found + to be 0, the receiver MUST treat it as an error and close the + association by transmitting an ABORT. + + Advertised Receiver Window Credit (a_rwnd): 32 bits (unsigned + integer) + + This value represents the dedicated buffer space, in number of + bytes, the sender of the INIT has reserved in association with + this window. During the life of the association this buffer space + SHOULD not be lessened (i.e. dedicated buffers taken away from + this association); however, an endpoint MAY change the value of + a_rwnd it sends in SACK chunks. + + Number of Outbound Streams (OS): 16 bits (unsigned integer) + + Defines the number of outbound streams the sender of this INIT + chunk wishes to create in this association. The value of 0 MUST + NOT be used. + + Note: A receiver of an INIT with the OS value set to 0 SHOULD + abort the association. + + Number of Inbound Streams (MIS) : 16 bits (unsigned integer) + + Defines the maximum number of streams the sender of this INIT + chunk allows the peer end to create in this association. The + value 0 MUST NOT be used. + + Note: There is no negotiation of the actual number of streams but + instead the two endpoints will use the min(requested, offered). + See Section 5.1.1 for details. + + Note: A receiver of an INIT with the MIS value of 0 SHOULD abort + the association. + + Initial TSN (I-TSN) : 32 bits (unsigned integer) + + Defines the initial TSN that the sender will use. The valid range + is from 0 to 4294967295. This field MAY be set to the value of + the Initiate Tag field. + +3.3.2.1 Optional/Variable Length Parameters in INIT + + The following parameters follow the Type-Length-Value format as + defined in Section 3.2.1. Any Type-Length-Value fields MUST come + after the fixed-length fields defined in the previous section. + + + + +Stewart, et al. Standards Track [Page 26] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + IPv4 Address Parameter (5) + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 5 | Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | IPv4 Address | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + IPv4 Address: 32 bits (unsigned integer) + + Contains an IPv4 address of the sending endpoint. It is binary + encoded. + + IPv6 Address Parameter (6) + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 6 | Length = 20 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | IPv6 Address | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IPv6 Address: 128 bit (unsigned integer) + + Contains an IPv6 address of the sending endpoint. It is binary + encoded. + + Note: A sender MUST NOT use an IPv4-mapped IPv6 address [RFC2373] + but should instead use an IPv4 Address Parameter for an IPv4 + address. + + Combined with the Source Port Number in the SCTP common header, + the value passed in an IPv4 or IPv6 Address parameter indicates a + transport address the sender of the INIT will support for the + association being initiated. That is, during the lifetime of this + association, this IP address can appear in the source address + field of an IP datagram sent from the sender of the INIT, and can + be used as a destination address of an IP datagram sent from the + receiver of the INIT. + + + + + +Stewart, et al. Standards Track [Page 27] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + More than one IP Address parameter can be included in an INIT + chunk when the INIT sender is multi-homed. Moreover, a multi- + homed endpoint may have access to different types of network, thus + more than one address type can be present in one INIT chunk, i.e., + IPv4 and IPv6 addresses are allowed in the same INIT chunk. + + If the INIT contains at least one IP Address parameter, then the + source address of the IP datagram containing the INIT chunk and + any additional address(es) provided within the INIT can be used as + destinations by the endpoint receiving the INIT. If the INIT does + not contain any IP Address parameters, the endpoint receiving the + INIT MUST use the source address associated with the received IP + datagram as its sole destination address for the association. + + Note that not using any IP address parameters in the INIT and + INIT-ACK is an alternative to make an association more likely to + work across a NAT box. + + Cookie Preservative (9) + + The sender of the INIT shall use this parameter to suggest to the + receiver of the INIT for a longer life-span of the State Cookie. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 9 | Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Suggested Cookie Life-span Increment (msec.) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Suggested Cookie Life-span Increment: 32 bits (unsigned integer) + + This parameter indicates to the receiver how much increment in + milliseconds the sender wishes the receiver to add to its default + cookie life-span. + + This optional parameter should be added to the INIT chunk by the + sender when it re-attempts establishing an association with a peer + to which its previous attempt of establishing the association failed + due to a stale cookie operation error. The receiver MAY choose to + ignore the suggested cookie life-span increase for its own security + reasons. + + + + + + + + +Stewart, et al. Standards Track [Page 28] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Host Name Address (11) + + The sender of INIT uses this parameter to pass its Host Name (in + place of its IP addresses) to its peer. The peer is responsible + for resolving the name. Using this parameter might make it more + likely for the association to work across a NAT box. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 11 | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Host Name / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Host Name: variable length + + This field contains a host name in "host name syntax" per RFC1123 + Section 2.1 [RFC1123]. The method for resolving the host name is + out of scope of SCTP. + + Note: At least one null terminator is included in the Host Name + string and must be included in the length. + + Supported Address Types (12) + + The sender of INIT uses this parameter to list all the address + types it can support. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 12 | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Type #1 | Address Type #2 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ...... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Address Type: 16 bits (unsigned integer) + + This is filled with the type value of the corresponding address + TLV (e.g., IPv4 = 5, IPv6 = 6, Hostname = 11). + + + + + + + +Stewart, et al. Standards Track [Page 29] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.3 Initiation Acknowledgement (INIT ACK) (2): + + The INIT ACK chunk is used to acknowledge the initiation of an SCTP + association. + + The parameter part of INIT ACK is formatted similarly to the INIT + chunk. It uses two extra variable parameters: The State Cookie and + the Unrecognized Parameter: + + The format of the INIT ACK chunk is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 2 | Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initiate Tag | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Advertised Receiver Window Credit | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of Outbound Streams | Number of Inbound Streams | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initial TSN | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Optional/Variable-Length Parameters / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Initiate Tag: 32 bits (unsigned integer) + + The receiver of the INIT ACK records the value of the Initiate Tag + parameter. This value MUST be placed into the Verification Tag + field of every SCTP packet that the INIT ACK receiver transmits + within this association. + + The Initiate Tag MUST NOT take the value 0. See Section 5.3.1 for + more on the selection of the Initiate Tag value. + + If the value of the Initiate Tag in a received INIT ACK chunk is + found to be 0, the receiver MUST treat it as an error and close + the association by transmitting an ABORT. + + + + + + + + + +Stewart, et al. Standards Track [Page 30] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Advertised Receiver Window Credit (a_rwnd): 32 bits (unsigned + integer) + + This value represents the dedicated buffer space, in number of + bytes, the sender of the INIT ACK has reserved in association with + this window. During the life of the association this buffer space + SHOULD not be lessened (i.e. dedicated buffers taken away from + this association). + + Number of Outbound Streams (OS): 16 bits (unsigned integer) + + Defines the number of outbound streams the sender of this INIT ACK + chunk wishes to create in this association. The value of 0 MUST + NOT be used. + + Note: A receiver of an INIT ACK with the OS value set to 0 SHOULD + destroy the association discarding its TCB. + + Number of Inbound Streams (MIS) : 16 bits (unsigned integer) + + Defines the maximum number of streams the sender of this INIT ACK + chunk allows the peer end to create in this association. The + value 0 MUST NOT be used. + + Note: There is no negotiation of the actual number of streams but + instead the two endpoints will use the min(requested, offered). + See Section 5.1.1 for details. + + Note: A receiver of an INIT ACK with the MIS value set to 0 + SHOULD destroy the association discarding its TCB. + + Initial TSN (I-TSN) : 32 bits (unsigned integer) + + Defines the initial TSN that the INIT-ACK sender will use. The + valid range is from 0 to 4294967295. This field MAY be set to the + value of the Initiate Tag field. + + Fixed Parameters Status + ---------------------------------------------- + Initiate Tag Mandatory + Advertised Receiver Window Credit Mandatory + Number of Outbound Streams Mandatory + Number of Inbound Streams Mandatory + Initial TSN Mandatory + + + + + + + +Stewart, et al. Standards Track [Page 31] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Variable Parameters Status Type Value + ------------------------------------------------------------- + State Cookie Mandatory 7 + IPv4 Address (Note 1) Optional 5 + IPv6 Address (Note 1) Optional 6 + Unrecognized Parameters Optional 8 + Reserved for ECN Capable (Note 2) Optional 32768 (0x8000) + Host Name Address (Note 3) Optional 11 + + Note 1: The INIT ACK chunks can contain any number of IP address + parameters that can be IPv4 and/or IPv6 in any combination. + + Note 2: The ECN capable field is reserved for future use of Explicit + Congestion Notification. + + Note 3: The INIT ACK chunks MUST NOT contain more than one Host Name + address parameter. Moreover, the sender of the INIT ACK MUST NOT + combine any other address types with the Host Name address in the + INIT ACK. The receiver of the INIT ACK MUST ignore any other address + types if the Host Name address parameter is present. + + IMPLEMENTATION NOTE: An implementation MUST be prepared to receive a + INIT ACK that is quite large (more than 1500 bytes) due to the + variable size of the state cookie AND the variable address list. For + example if a responder to the INIT has 1000 IPv4 addresses it wishes + to send, it would need at least 8,000 bytes to encode this in the + INIT ACK. + + In combination with the Source Port carried in the SCTP common + header, each IP Address parameter in the INIT ACK indicates to the + receiver of the INIT ACK a valid transport address supported by the + sender of the INIT ACK for the lifetime of the association being + initiated. + + If the INIT ACK contains at least one IP Address parameter, then the + source address of the IP datagram containing the INIT ACK and any + additional address(es) provided within the INIT ACK may be used as + destinations by the receiver of the INIT-ACK. If the INIT ACK does + not contain any IP Address parameters, the receiver of the INIT-ACK + MUST use the source address associated with the received IP datagram + as its sole destination address for the association. + + The State Cookie and Unrecognized Parameters use the Type-Length- + Value format as defined in Section 3.2.1 and are described below. + The other fields are defined the same as their counterparts in the + INIT chunk. + + + + + +Stewart, et al. Standards Track [Page 32] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.3.1 Optional or Variable Length Parameters + + State Cookie + + Parameter Type Value: 7 + + Parameter Length: variable size, depending on Size of Cookie + + Parameter Value: + + This parameter value MUST contain all the necessary state and + parameter information required for the sender of this INIT ACK + to create the association, along with a Message Authentication + Code (MAC). See Section 5.1.3 for details on State Cookie + definition. + + Unrecognized Parameters: + + Parameter Type Value: 8 + + Parameter Length: Variable Size. + + Parameter Value: + + This parameter is returned to the originator of the INIT chunk + when the INIT contains an unrecognized parameter which has a + value that indicates that it should be reported to the sender. + This parameter value field will contain unrecognized parameters + copied from the INIT chunk complete with Parameter Type, Length + and Value fields. + +3.3.4 Selective Acknowledgement (SACK) (3): + + This chunk is sent to the peer endpoint to acknowledge received DATA + chunks and to inform the peer endpoint of gaps in the received + subsequences of DATA chunks as represented by their TSNs. + + The SACK MUST contain the Cumulative TSN Ack and Advertised Receiver + Window Credit (a_rwnd) parameters. + + By definition, the value of the Cumulative TSN Ack parameter is the + last TSN received before a break in the sequence of received TSNs + occurs; the next TSN value following this one has not yet been + received at the endpoint sending the SACK. This parameter therefore + acknowledges receipt of all TSNs less than or equal to its value. + + The handling of a_rwnd by the receiver of the SACK is discussed in + detail in Section 6.2.1. + + + +Stewart, et al. Standards Track [Page 33] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The SACK also contains zero or more Gap Ack Blocks. Each Gap Ack + Block acknowledges a subsequence of TSNs received following a break + in the sequence of received TSNs. By definition, all TSNs + acknowledged by Gap Ack Blocks are greater than the value of the + Cumulative TSN Ack. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 3 |Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cumulative TSN Ack | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Advertised Receiver Window Credit (a_rwnd) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of Gap Ack Blocks = N | Number of Duplicate TSNs = X | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Gap Ack Block #1 Start | Gap Ack Block #1 End | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / / + \ ... \ + / / + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Gap Ack Block #N Start | Gap Ack Block #N End | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Duplicate TSN 1 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / / + \ ... \ + / / + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Duplicate TSN X | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to all zeros on transmit and ignored on receipt. + + Cumulative TSN Ack: 32 bits (unsigned integer) + + This parameter contains the TSN of the last DATA chunk received in + sequence before a gap. + + Advertised Receiver Window Credit (a_rwnd): 32 bits (unsigned + integer) + + This field indicates the updated receive buffer space in bytes of + the sender of this SACK, see Section 6.2.1 for details. + + + +Stewart, et al. Standards Track [Page 34] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Number of Gap Ack Blocks: 16 bits (unsigned integer) + + Indicates the number of Gap Ack Blocks included in this SACK. + + Number of Duplicate TSNs: 16 bit + + This field contains the number of duplicate TSNs the endpoint has + received. Each duplicate TSN is listed following the Gap Ack + Block list. + + Gap Ack Blocks: + + These fields contain the Gap Ack Blocks. They are repeated for + each Gap Ack Block up to the number of Gap Ack Blocks defined in + the Number of Gap Ack Blocks field. All DATA chunks with TSNs + greater than or equal to (Cumulative TSN Ack + Gap Ack Block + Start) and less than or equal to (Cumulative TSN Ack + Gap Ack + Block End) of each Gap Ack Block are assumed to have been received + correctly. + + Gap Ack Block Start: 16 bits (unsigned integer) + + Indicates the Start offset TSN for this Gap Ack Block. To + calculate the actual TSN number the Cumulative TSN Ack is added to + this offset number. This calculated TSN identifies the first TSN + in this Gap Ack Block that has been received. + + Gap Ack Block End: 16 bits (unsigned integer) + + Indicates the End offset TSN for this Gap Ack Block. To calculate + the actual TSN number the Cumulative TSN Ack is added to this + offset number. This calculated TSN identifies the TSN of the last + DATA chunk received in this Gap Ack Block. + + For example, assume the receiver has the following DATA chunks newly + arrived at the time when it decides to send a Selective ACK, + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 35] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + ---------- + | TSN=17 | + ---------- + | | <- still missing + ---------- + | TSN=15 | + ---------- + | TSN=14 | + ---------- + | | <- still missing + ---------- + | TSN=12 | + ---------- + | TSN=11 | + ---------- + | TSN=10 | + ---------- + + then, the parameter part of the SACK MUST be constructed as follows + (assuming the new a_rwnd is set to 4660 by the sender): + + +--------------------------------+ + | Cumulative TSN Ack = 12 | + +--------------------------------+ + | a_rwnd = 4660 | + +----------------+---------------+ + | num of block=2 | num of dup=0 | + +----------------+---------------+ + |block #1 strt=2 |block #1 end=3 | + +----------------+---------------+ + |block #2 strt=5 |block #2 end=5 | + +----------------+---------------+ + + + Duplicate TSN: 32 bits (unsigned integer) + + Indicates the number of times a TSN was received in duplicate + since the last SACK was sent. Every time a receiver gets a + duplicate TSN (before sending the SACK) it adds it to the list of + duplicates. The duplicate count is re-initialized to zero after + sending each SACK. + + For example, if a receiver were to get the TSN 19 three times it + would list 19 twice in the outbound SACK. After sending the SACK + if it received yet one more TSN 19 it would list 19 as a duplicate + once in the next outgoing SACK. + + + + + +Stewart, et al. Standards Track [Page 36] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.5 Heartbeat Request (HEARTBEAT) (4): + + An endpoint should send this chunk to its peer endpoint to probe the + reachability of a particular destination transport address defined in + the present association. + + The parameter field contains the Heartbeat Information which is a + variable length opaque data structure understood only by the sender. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 4 | Chunk Flags | Heartbeat Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Heartbeat Information TLV (Variable-Length) / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Heartbeat Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and the Heartbeat Information field. + + Heartbeat Information: variable length + + Defined as a variable-length parameter using the format described + in Section 3.2.1, i.e.: + + Variable Parameters Status Type Value + ------------------------------------------------------------- + Heartbeat Info Mandatory 1 + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Heartbeat Info Type=1 | HB Info Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Sender-specific Heartbeat Info / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + + + +Stewart, et al. Standards Track [Page 37] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The Sender-specific Heartbeat Info field should normally include + information about the sender's current time when this HEARTBEAT + chunk is sent and the destination transport address to which this + HEARTBEAT is sent (see Section 8.3). + +3.3.6 Heartbeat Acknowledgement (HEARTBEAT ACK) (5): + + An endpoint should send this chunk to its peer endpoint as a response + to a HEARTBEAT chunk (see Section 8.3). A HEARTBEAT ACK is always + sent to the source IP address of the IP datagram containing the + HEARTBEAT chunk to which this ack is responding. + + The parameter field contains a variable length opaque data structure. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 5 | Chunk Flags | Heartbeat Ack Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Heartbeat Information TLV (Variable-Length) / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Heartbeat Ack Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and the Heartbeat Information field. + + Heartbeat Information: variable length + + This field MUST contain the Heartbeat Information parameter of + the Heartbeat Request to which this Heartbeat Acknowledgement is + responding. + + Variable Parameters Status Type Value + ------------------------------------------------------------- + Heartbeat Info Mandatory 1 + + + + + + + + + +Stewart, et al. Standards Track [Page 38] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.7 Abort Association (ABORT) (6): + + The ABORT chunk is sent to the peer of an association to close the + association. The ABORT chunk may contain Cause Parameters to inform + the receiver the reason of the abort. DATA chunks MUST NOT be + bundled with ABORT. Control chunks (except for INIT, INIT ACK and + SHUTDOWN COMPLETE) MAY be bundled with an ABORT but they MUST be + placed before the ABORT in the SCTP packet, or they will be ignored + by the receiver. + + If an endpoint receives an ABORT with a format error or for an + association that doesn't exist, it MUST silently discard it. + Moreover, under any circumstances, an endpoint that receives an ABORT + MUST NOT respond to that ABORT by sending an ABORT of its own. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 6 |Reserved |T| Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / zero or more Error Causes / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Reserved: 7 bits + + Set to 0 on transmit and ignored on receipt. + + T bit: 1 bit + + The T bit is set to 0 if the sender had a TCB that it destroyed. + If the sender did not have a TCB it should set this bit to 1. + + Note: Special rules apply to this chunk for verification, please see + Section 8.5.1 for details. + + Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and all the Error Cause fields present. + + See Section 3.3.10 for Error Cause definitions. + + + + + + +Stewart, et al. Standards Track [Page 39] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.8 Shutdown Association (SHUTDOWN) (7): + + An endpoint in an association MUST use this chunk to initiate a + graceful close of the association with its peer. This chunk has the + following format. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 7 | Chunk Flags | Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cumulative TSN Ack | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Length: 16 bits (unsigned integer) + + Indicates the length of the parameter. Set to 8. + + Cumulative TSN Ack: 32 bits (unsigned integer) + + This parameter contains the TSN of the last chunk received in + sequence before any gaps. + + Note: Since the SHUTDOWN message does not contain Gap Ack Blocks, + it cannot be used to acknowledge TSNs received out of order. In a + SACK, lack of Gap Ack Blocks that were previously included + indicates that the data receiver reneged on the associated DATA + chunks. Since SHUTDOWN does not contain Gap Ack Blocks, the + receiver of the SHUTDOWN shouldn't interpret the lack of a Gap Ack + Block as a renege. (see Section 6.2 for information on reneging) + +3.3.9 Shutdown Acknowledgement (SHUTDOWN ACK) (8): + + This chunk MUST be used to acknowledge the receipt of the SHUTDOWN + chunk at the completion of the shutdown process, see Section 9.2 for + details. + + The SHUTDOWN ACK chunk has no parameters. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 8 |Chunk Flags | Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + +Stewart, et al. Standards Track [Page 40] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + +3.3.10 Operation Error (ERROR) (9): + + An endpoint sends this chunk to its peer endpoint to notify it of + certain error conditions. It contains one or more error causes. An + Operation Error is not considered fatal in and of itself, but may be + used with an ABORT chunk to report a fatal condition. It has the + following parameters: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 9 | Chunk Flags | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / one or more Error Causes / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and all the Error Cause fields present. + + Error causes are defined as variable-length parameters using the + format described in 3.2.1, i.e.: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Cause-specific Information / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Cause Code: 16 bits (unsigned integer) + + Defines the type of error conditions being reported. + + + + + +Stewart, et al. Standards Track [Page 41] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Cause Code + Value Cause Code + --------- ---------------- + 1 Invalid Stream Identifier + 2 Missing Mandatory Parameter + 3 Stale Cookie Error + 4 Out of Resource + 5 Unresolvable Address + 6 Unrecognized Chunk Type + 7 Invalid Mandatory Parameter + 8 Unrecognized Parameters + 9 No User Data + 10 Cookie Received While Shutting Down + + Cause Length: 16 bits (unsigned integer) + + Set to the size of the parameter in bytes, including the Cause + Code, Cause Length, and Cause-Specific Information fields + + Cause-specific Information: variable length + + This field carries the details of the error condition. + + Sections 3.3.10.1 - 3.3.10.10 define error causes for SCTP. + Guidelines for the IETF to define new error cause values are + discussed in Section 13.3. + +3.3.10.1 Invalid Stream Identifier (1) + + Cause of error + --------------- + Invalid Stream Identifier: Indicates endpoint received a DATA chunk + sent to a nonexistent stream. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=1 | Cause Length=8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Stream Identifier | (Reserved) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Stream Identifier: 16 bits (unsigned integer) + + Contains the Stream Identifier of the DATA chunk received in + error. + + + + + + + +Stewart, et al. Standards Track [Page 42] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Reserved: 16 bits + + This field is reserved. It is set to all 0's on transmit and + Ignored on receipt. + +3.3.10.2 Missing Mandatory Parameter (2) + + Cause of error + --------------- + Missing Mandatory Parameter: Indicates that one or more mandatory + TLV parameters are missing in a received INIT or INIT ACK. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=2 | Cause Length=8+N*2 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of missing params=N | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Missing Param Type #1 | Missing Param Type #2 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Missing Param Type #N-1 | Missing Param Type #N | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Number of Missing params: 32 bits (unsigned integer) + + This field contains the number of parameters contained in the + Cause-specific Information field. + + Missing Param Type: 16 bits (unsigned integer) + + Each field will contain the missing mandatory parameter number. + +3.3.10.3 Stale Cookie Error (3) + + Cause of error + -------------- + Stale Cookie Error: Indicates the receipt of a valid State Cookie + that has expired. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=3 | Cause Length=8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Measure of Staleness (usec.) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Measure of Staleness: 32 bits (unsigned integer) + + This field contains the difference, in microseconds, between the + current time and the time the State Cookie expired. + + + +Stewart, et al. Standards Track [Page 43] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The sender of this error cause MAY choose to report how long past + expiration the State Cookie is by including a non-zero value in + the Measure of Staleness field. If the sender does not wish to + provide this information it should set the Measure of Staleness + field to the value of zero. + +3.3.10.4 Out of Resource (4) + + Cause of error + --------------- + Out of Resource: Indicates that the sender is out of resource. This + is usually sent in combination with or within an ABORT. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=4 | Cause Length=4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +3.3.10.5 Unresolvable Address (5) + + Cause of error + --------------- + Unresolvable Address: Indicates that the sender is not able to + resolve the specified address parameter (e.g., type of address is not + supported by the sender). This is usually sent in combination with + or within an ABORT. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=5 | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Unresolvable Address / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unresolvable Address: variable length + + The unresolvable address field contains the complete Type, Length + and Value of the address parameter (or Host Name parameter) that + contains the unresolvable address or host name. + +3.3.10.6 Unrecognized Chunk Type (6) + + Cause of error + --------------- + Unrecognized Chunk Type: This error cause is returned to the + originator of the chunk if the receiver does not understand the chunk + and the upper bits of the 'Chunk Type' are set to 01 or 11. + + + + + +Stewart, et al. Standards Track [Page 44] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=6 | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Unrecognized Chunk / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unrecognized Chunk: variable length + + The Unrecognized Chunk field contains the unrecognized Chunk from + the SCTP packet complete with Chunk Type, Chunk Flags and Chunk + Length. + +3.3.10.7 Invalid Mandatory Parameter (7) + + Cause of error + --------------- + Invalid Mandatory Parameter: This error cause is returned to the + originator of an INIT or INIT ACK chunk when one of the mandatory + parameters is set to a invalid value. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=7 | Cause Length=4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +3.3.10.8 Unrecognized Parameters (8) + + Cause of error + --------------- + Unrecognized Parameters: This error cause is returned to the + originator of the INIT ACK chunk if the receiver does not recognize + one or more Optional TLV parameters in the INIT ACK chunk. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=8 | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Unrecognized Parameters / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unrecognized Parameters: variable length + + The Unrecognized Parameters field contains the unrecognized + parameters copied from the INIT ACK chunk complete with TLV. This + error cause is normally contained in an ERROR chunk bundled with + the COOKIE ECHO chunk when responding to the INIT ACK, when the + sender of the COOKIE ECHO chunk wishes to report unrecognized + parameters. + + + +Stewart, et al. Standards Track [Page 45] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.10.9 No User Data (9) + + Cause of error + --------------- + No User Data: This error cause is returned to the originator of a + DATA chunk if a received DATA chunk has no user data. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=9 | Cause Length=8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / TSN value / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + TSN value: 32 bits (+unsigned integer) + + The TSN value field contains the TSN of the DATA chunk received + with no user data field. + + This cause code is normally returned in an ABORT chunk (see + Section 6.2) + +3.3.10.10 Cookie Received While Shutting Down (10) + + Cause of error + --------------- + Cookie Received While Shutting Down: A COOKIE ECHO was received + While the endpoint was in SHUTDOWN-ACK-SENT state. This error is + usually returned in an ERROR chunk bundled with the retransmitted + SHUTDOWN ACK. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=10 | Cause Length=4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +3.3.11 Cookie Echo (COOKIE ECHO) (10): + + This chunk is used only during the initialization of an association. + It is sent by the initiator of an association to its peer to complete + the initialization process. This chunk MUST precede any DATA chunk + sent within the association, but MAY be bundled with one or more DATA + chunks in the same packet. + + + + + + + + + +Stewart, et al. Standards Track [Page 46] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 10 |Chunk Flags | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Cookie / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bit + + Set to zero on transmit and ignored on receipt. + + Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the 4 bytes of + the chunk header and the size of the Cookie. + + Cookie: variable size + + This field must contain the exact cookie received in the State + Cookie parameter from the previous INIT ACK. + + An implementation SHOULD make the cookie as small as possible to + insure interoperability. + +3.3.12 Cookie Acknowledgement (COOKIE ACK) (11): + + This chunk is used only during the initialization of an association. + It is used to acknowledge the receipt of a COOKIE ECHO chunk. This + chunk MUST precede any DATA or SACK chunk sent within the + association, but MAY be bundled with one or more DATA chunks or SACK + chunk in the same SCTP packet. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 11 |Chunk Flags | Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + + + + + + + +Stewart, et al. Standards Track [Page 47] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.13 Shutdown Complete (SHUTDOWN COMPLETE) (14): + + This chunk MUST be used to acknowledge the receipt of the SHUTDOWN + ACK chunk at the completion of the shutdown process, see Section 9.2 + for details. + + The SHUTDOWN COMPLETE chunk has no parameters. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 14 |Reserved |T| Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Reserved: 7 bits + + Set to 0 on transmit and ignored on receipt. + + T bit: 1 bit + + The T bit is set to 0 if the sender had a TCB that it destroyed. + If the sender did not have a TCB it should set this bit to 1. + + Note: Special rules apply to this chunk for verification, please see + Section 8.5.1 for details. + +4. SCTP Association State Diagram + + During the lifetime of an SCTP association, the SCTP endpoint's + association progress from one state to another in response to various + events. The events that may potentially advance an association's + state include: + + o SCTP user primitive calls, e.g., [ASSOCIATE], [SHUTDOWN], [ABORT], + + o Reception of INIT, COOKIE ECHO, ABORT, SHUTDOWN, etc., control + chunks, or + + o Some timeout events. + + The state diagram in the figures below illustrates state changes, + together with the causing events and resulting actions. Note that + some of the error conditions are not shown in the state diagram. + Full description of all special cases should be found in the text. + + + + + +Stewart, et al. Standards Track [Page 48] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Note: Chunk names are given in all capital letters, while parameter + names have the first letter capitalized, e.g., COOKIE ECHO chunk type + vs. State Cookie parameter. If more than one event/message can occur + which causes a state transition it is labeled (A), (B) etc. + + ----- -------- (frm any state) + / \ / rcv ABORT [ABORT] + rcv INIT | | | ---------- or ---------- + --------------- | v v delete TCB snd ABORT + generate Cookie \ +---------+ delete TCB + snd INIT ACK ---| CLOSED | + +---------+ + / \ [ASSOCIATE] + / \ --------------- + | | create TCB + | | snd INIT + | | strt init timer + rcv valid | | + COOKIE ECHO | v + (1) ---------------- | +------------+ + create TCB | | COOKIE-WAIT| (2) + snd COOKIE ACK | +------------+ + | | + | | rcv INIT ACK + | | ----------------- + | | snd COOKIE ECHO + | | stop init timer + | | strt cookie timer + | v + | +--------------+ + | | COOKIE-ECHOED| (3) + | +--------------+ + | | + | | rcv COOKIE ACK + | | ----------------- + | | stop cookie timer + v v + +---------------+ + | ESTABLISHED | + +---------------+ + + + + + + + + + + + +Stewart, et al. Standards Track [Page 49] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + (from the ESTABLISHED state only) + | + | + /--------+--------\ + [SHUTDOWN] / \ + -------------------| | + check outstanding | | + DATA chunks | | + v | + +---------+ | + |SHUTDOWN-| | rcv SHUTDOWN/check + |PENDING | | outstanding DATA + +---------+ | chunks + | |------------------ + No more outstanding | | + ---------------------| | + snd SHUTDOWN | | + strt shutdown timer | | + v v + +---------+ +-----------+ + (4) |SHUTDOWN-| | SHUTDOWN- | (5,6) + |SENT | | RECEIVED | + +---------+ +-----------+ + | \ | + (A) rcv SHUTDOWN ACK | \ | + ----------------------| \ | + stop shutdown timer | \rcv:SHUTDOWN | + send SHUTDOWN COMPLETE| \ (B) | + delete TCB | \ | + | \ | No more outstanding + | \ |----------------- + | \ | send SHUTDOWN ACK + (B)rcv SHUTDOWN | \ | strt shutdown timer + ----------------------| \ | + send SHUTDOWN ACK | \ | + start shutdown timer | \ | + move to SHUTDOWN- | \ | + ACK-SENT | | | + | v | + | +-----------+ + | | SHUTDOWN- | (7) + | | ACK-SENT | + | +----------+- + | | (C)rcv SHUTDOWN COMPLETE + | |----------------- + | | stop shutdown timer + | | delete TCB + | | + + + +Stewart, et al. Standards Track [Page 50] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + | | (D)rcv SHUTDOWN ACK + | |-------------- + | | stop shutdown timer + | | send SHUTDOWN COMPLETE + | | delete TCB + | | + \ +---------+ / + \-->| CLOSED |<--/ + +---------+ + + Figure 3: State Transition Diagram of SCTP + + Notes: + + 1) If the State Cookie in the received COOKIE ECHO is invalid (i.e., + failed to pass the integrity check), the receiver MUST silently + discard the packet. Or, if the received State Cookie is expired + (see Section 5.1.5), the receiver MUST send back an ERROR chunk. + In either case, the receiver stays in the CLOSED state. + + 2) If the T1-init timer expires, the endpoint MUST retransmit INIT + and re-start the T1-init timer without changing state. This MUST + be repeated up to 'Max.Init.Retransmits' times. After that, the + endpoint MUST abort the initialization process and report the + error to SCTP user. + + 3) If the T1-cookie timer expires, the endpoint MUST retransmit + COOKIE ECHO and re-start the T1-cookie timer without changing + state. This MUST be repeated up to 'Max.Init.Retransmits' times. + After that, the endpoint MUST abort the initialization process and + report the error to SCTP user. + + 4) In SHUTDOWN-SENT state the endpoint MUST acknowledge any received + DATA chunks without delay. + + 5) In SHUTDOWN-RECEIVED state, the endpoint MUST NOT accept any new + send request from its SCTP user. + + 6) In SHUTDOWN-RECEIVED state, the endpoint MUST transmit or + retransmit data and leave this state when all data in queue is + transmitted. + + 7) In SHUTDOWN-ACK-SENT state, the endpoint MUST NOT accept any new + send request from its SCTP user. + + The CLOSED state is used to indicate that an association is not + created (i.e., doesn't exist). + + + + +Stewart, et al. Standards Track [Page 51] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5. Association Initialization + + Before the first data transmission can take place from one SCTP + endpoint ("A") to another SCTP endpoint ("Z"), the two endpoints must + complete an initialization process in order to set up an SCTP + association between them. + + The SCTP user at an endpoint should use the ASSOCIATE primitive to + initialize an SCTP association to another SCTP endpoint. + + IMPLEMENTATION NOTE: From an SCTP-user's point of view, an + association may be implicitly opened, without an ASSOCIATE primitive + (see 10.1 B) being invoked, by the initiating endpoint's sending of + the first user data to the destination endpoint. The initiating SCTP + will assume default values for all mandatory and optional parameters + for the INIT/INIT ACK. + + Once the association is established, unidirectional streams are open + for data transfer on both ends (see Section 5.1.1). + +5.1 Normal Establishment of an Association + + The initialization process consists of the following steps (assuming + that SCTP endpoint "A" tries to set up an association with SCTP + endpoint "Z" and "Z" accepts the new association): + + A) "A" first sends an INIT chunk to "Z". In the INIT, "A" must + provide its Verification Tag (Tag_A) in the Initiate Tag field. + Tag_A SHOULD be a random number in the range of 1 to 4294967295 + (see 5.3.1 for Tag value selection). After sending the INIT, "A" + starts the T1-init timer and enters the COOKIE-WAIT state. + + B) "Z" shall respond immediately with an INIT ACK chunk. The + destination IP address of the INIT ACK MUST be set to the source + IP address of the INIT to which this INIT ACK is responding. In + the response, besides filling in other parameters, "Z" must set + the Verification Tag field to Tag_A, and also provide its own + Verification Tag (Tag_Z) in the Initiate Tag field. + + Moreover, "Z" MUST generate and send along with the INIT ACK a + State Cookie. See Section 5.1.3 for State Cookie generation. + + Note: After sending out INIT ACK with the State Cookie parameter, + "Z" MUST NOT allocate any resources, nor keep any states for the + new association. Otherwise, "Z" will be vulnerable to resource + attacks. + + + + + +Stewart, et al. Standards Track [Page 52] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + C) Upon reception of the INIT ACK from "Z", "A" shall stop the T1- + init timer and leave COOKIE-WAIT state. "A" shall then send the + State Cookie received in the INIT ACK chunk in a COOKIE ECHO + chunk, start the T1-cookie timer, and enter the COOKIE-ECHOED + state. + + Note: The COOKIE ECHO chunk can be bundled with any pending + outbound DATA chunks, but it MUST be the first chunk in the packet + and until the COOKIE ACK is returned the sender MUST NOT send any + other packets to the peer. + + D) Upon reception of the COOKIE ECHO chunk, Endpoint "Z" will reply + with a COOKIE ACK chunk after building a TCB and moving to the + ESTABLISHED state. A COOKIE ACK chunk may be bundled with any + pending DATA chunks (and/or SACK chunks), but the COOKIE ACK chunk + MUST be the first chunk in the packet. + + IMPLEMENTATION NOTE: An implementation may choose to send the + Communication Up notification to the SCTP user upon reception of a + valid COOKIE ECHO chunk. + + E) Upon reception of the COOKIE ACK, endpoint "A" will move from the + COOKIE-ECHOED state to the ESTABLISHED state, stopping the T1- + cookie timer. It may also notify its ULP about the successful + establishment of the association with a Communication Up + notification (see Section 10). + + An INIT or INIT ACK chunk MUST NOT be bundled with any other chunk. + They MUST be the only chunks present in the SCTP packets that carry + them. + + An endpoint MUST send the INIT ACK to the IP address from which it + received the INIT. + + Note: T1-init timer and T1-cookie timer shall follow the same rules + given in Section 6.3. + + If an endpoint receives an INIT, INIT ACK, or COOKIE ECHO chunk but + decides not to establish the new association due to missing mandatory + parameters in the received INIT or INIT ACK, invalid parameter + values, or lack of local resources, it MUST respond with an ABORT + chunk. It SHOULD also specify the cause of abort, such as the type + of the missing mandatory parameters, etc., by including the error + cause parameters with the ABORT chunk. The Verification Tag field in + the common header of the outbound SCTP packet containing the ABORT + chunk MUST be set to the Initiate Tag value of the peer. + + + + + +Stewart, et al. Standards Track [Page 53] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + After the reception of the first DATA chunk in an association the + endpoint MUST immediately respond with a SACK to acknowledge the DATA + chunk. Subsequent acknowledgements should be done as described in + Section 6.2. + + When the TCB is created, each endpoint MUST set its internal + Cumulative TSN Ack Point to the value of its transmitted Initial TSN + minus one. + + IMPLEMENTATION NOTE: The IP addresses and SCTP port are generally + used as the key to find the TCB within an SCTP instance. + +5.1.1 Handle Stream Parameters + + In the INIT and INIT ACK chunks, the sender of the chunk shall + indicate the number of outbound streams (OS) it wishes to have in the + association, as well as the maximum inbound streams (MIS) it will + accept from the other endpoint. + + After receiving the stream configuration information from the other + side, each endpoint shall perform the following check: If the peer's + MIS is less than the endpoint's OS, meaning that the peer is + incapable of supporting all the outbound streams the endpoint wants + to configure, the endpoint MUST either use MIS outbound streams, or + abort the association and report to its upper layer the resources + shortage at its peer. + + After the association is initialized, the valid outbound stream + identifier range for either endpoint shall be 0 to min(local OS, + remote MIS)-1. + +5.1.2 Handle Address Parameters + + During the association initialization, an endpoint shall use the + following rules to discover and collect the destination transport + address(es) of its peer. + + A) If there are no address parameters present in the received INIT or + INIT ACK chunk, the endpoint shall take the source IP address from + which the chunk arrives and record it, in combination with the + SCTP source port number, as the only destination transport address + for this peer. + + B) If there is a Host Name parameter present in the received INIT or + INIT ACK chunk, the endpoint shall resolve that host name to a + list of IP address(es) and derive the transport address(es) of + this peer by combining the resolved IP address(es) with the SCTP + source port. + + + +Stewart, et al. Standards Track [Page 54] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The endpoint MUST ignore any other IP address parameters if they + are also present in the received INIT or INIT ACK chunk. + + The time at which the receiver of an INIT resolves the host name + has potential security implications to SCTP. If the receiver of + an INIT resolves the host name upon the reception of the chunk, + and the mechanism the receiver uses to resolve the host name + involves potential long delay (e.g. DNS query), the receiver may + open itself up to resource attacks for the period of time while it + is waiting for the name resolution results before it can build the + State Cookie and release local resources. + + Therefore, in cases where the name translation involves potential + long delay, the receiver of the INIT MUST postpone the name + resolution till the reception of the COOKIE ECHO chunk from the + peer. In such a case, the receiver of the INIT SHOULD build the + State Cookie using the received Host Name (instead of destination + transport addresses) and send the INIT ACK to the source IP + address from which the INIT was received. + + The receiver of an INIT ACK shall always immediately attempt to + resolve the name upon the reception of the chunk. + + The receiver of the INIT or INIT ACK MUST NOT send user data + (piggy-backed or stand-alone) to its peer until the host name is + successfully resolved. + + If the name resolution is not successful, the endpoint MUST + immediately send an ABORT with "Unresolvable Address" error cause + to its peer. The ABORT shall be sent to the source IP address + from which the last peer packet was received. + + C) If there are only IPv4/IPv6 addresses present in the received INIT + or INIT ACK chunk, the receiver shall derive and record all the + transport address(es) from the received chunk AND the source IP + address that sent the INIT or INIT ACK. The transport address(es) + are derived by the combination of SCTP source port (from the + common header) and the IP address parameter(s) carried in the INIT + or INIT ACK chunk and the source IP address of the IP datagram. + The receiver should use only these transport addresses as + destination transport addresses when sending subsequent packets to + its peer. + + IMPLEMENTATION NOTE: In some cases (e.g., when the implementation + doesn't control the source IP address that is used for + transmitting), an endpoint might need to include in its INIT or + INIT ACK all possible IP addresses from which packets to the peer + could be transmitted. + + + +Stewart, et al. Standards Track [Page 55] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + After all transport addresses are derived from the INIT or INIT ACK + chunk using the above rules, the endpoint shall select one of the + transport addresses as the initial primary path. + + Note: The INIT-ACK MUST be sent to the source address of the INIT. + + The sender of INIT may include a 'Supported Address Types' parameter + in the INIT to indicate what types of address are acceptable. When + this parameter is present, the receiver of INIT (initiatee) MUST + either use one of the address types indicated in the Supported + Address Types parameter when responding to the INIT, or abort the + association with an "Unresolvable Address" error cause if it is + unwilling or incapable of using any of the address types indicated by + its peer. + + IMPLEMENTATION NOTE: In the case that the receiver of an INIT ACK + fails to resolve the address parameter due to an unsupported type, it + can abort the initiation process and then attempt a re-initiation by + using a 'Supported Address Types' parameter in the new INIT to + indicate what types of address it prefers. + +5.1.3 Generating State Cookie + + When sending an INIT ACK as a response to an INIT chunk, the sender + of INIT ACK creates a State Cookie and sends it in the State Cookie + parameter of the INIT ACK. Inside this State Cookie, the sender + should include a MAC (see [RFC2104] for an example), a time stamp on + when the State Cookie is created, and the lifespan of the State + Cookie, along with all the information necessary for it to establish + the association. + + The following steps SHOULD be taken to generate the State Cookie: + + 1) Create an association TCB using information from both the received + INIT and the outgoing INIT ACK chunk, + + 2) In the TCB, set the creation time to the current time of day, and + the lifespan to the protocol parameter 'Valid.Cookie.Life', + + 3) From the TCB, identify and collect the minimal subset of + information needed to re-create the TCB, and generate a MAC using + this subset of information and a secret key (see [RFC2104] for an + example of generating a MAC), and + + 4) Generate the State Cookie by combining this subset of information + and the resultant MAC. + + + + + +Stewart, et al. Standards Track [Page 56] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + After sending the INIT ACK with the State Cookie parameter, the + sender SHOULD delete the TCB and any other local resource related to + the new association, so as to prevent resource attacks. + + The hashing method used to generate the MAC is strictly a private + matter for the receiver of the INIT chunk. The use of a MAC is + mandatory to prevent denial of service attacks. The secret key + SHOULD be random ([RFC1750] provides some information on randomness + guidelines); it SHOULD be changed reasonably frequently, and the + timestamp in the State Cookie MAY be used to determine which key + should be used to verify the MAC. + + An implementation SHOULD make the cookie as small as possible to + insure interoperability. + +5.1.4 State Cookie Processing + + When an endpoint (in the COOKIE WAIT state) receives an INIT ACK + chunk with a State Cookie parameter, it MUST immediately send a + COOKIE ECHO chunk to its peer with the received State Cookie. The + sender MAY also add any pending DATA chunks to the packet after the + COOKIE ECHO chunk. + + The endpoint shall also start the T1-cookie timer after sending out + the COOKIE ECHO chunk. If the timer expires, the endpoint shall + retransmit the COOKIE ECHO chunk and restart the T1-cookie timer. + This is repeated until either a COOKIE ACK is received or ' + Max.Init.Retransmits' is reached causing the peer endpoint to be + marked unreachable (and thus the association enters the CLOSED + state). + +5.1.5 State Cookie Authentication + + When an endpoint receives a COOKIE ECHO chunk from another endpoint + with which it has no association, it shall take the following + actions: + + 1) Compute a MAC using the TCB data carried in the State Cookie and + the secret key (note the timestamp in the State Cookie MAY be used + to determine which secret key to use). Reference [RFC2104] can be + used as a guideline for generating the MAC, + + 2) Authenticate the State Cookie as one that it previously generated + by comparing the computed MAC against the one carried in the State + Cookie. If this comparison fails, the SCTP packet, including the + COOKIE ECHO and any DATA chunks, should be silently discarded, + + + + + +Stewart, et al. Standards Track [Page 57] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 3) Compare the creation timestamp in the State Cookie to the current + local time. If the elapsed time is longer than the lifespan + carried in the State Cookie, then the packet, including the COOKIE + ECHO and any attached DATA chunks, SHOULD be discarded and the + endpoint MUST transmit an ERROR chunk with a "Stale Cookie" error + cause to the peer endpoint, + + 4) If the State Cookie is valid, create an association to the sender + of the COOKIE ECHO chunk with the information in the TCB data + carried in the COOKIE ECHO, and enter the ESTABLISHED state, + + 5) Send a COOKIE ACK chunk to the peer acknowledging reception of the + COOKIE ECHO. The COOKIE ACK MAY be bundled with an outbound DATA + chunk or SACK chunk; however, the COOKIE ACK MUST be the first + chunk in the SCTP packet. + + 6) Immediately acknowledge any DATA chunk bundled with the COOKIE + ECHO with a SACK (subsequent DATA chunk acknowledgement should + follow the rules defined in Section 6.2). As mentioned in step + 5), if the SACK is bundled with the COOKIE ACK, the COOKIE ACK + MUST appear first in the SCTP packet. + + If a COOKIE ECHO is received from an endpoint with which the receiver + of the COOKIE ECHO has an existing association, the procedures in + Section 5.2 should be followed. + +5.1.6 An Example of Normal Association Establishment + + In the following example, "A" initiates the association and then + sends a user message to "Z", then "Z" sends two user messages to "A" + later (assuming no bundling or fragmentation occurs): + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 58] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Endpoint A Endpoint Z + {app sets association with Z} + (build TCB) + INIT [I-Tag=Tag_A + & other info] --------\ + (Start T1-init timer) \ + (Enter COOKIE-WAIT state) \---> (compose temp TCB and Cookie_Z) + + /--- INIT ACK [Veri Tag=Tag_A, + / I-Tag=Tag_Z, + (Cancel T1-init timer) <------/ Cookie_Z, & other info] + (destroy temp TCB) + COOKIE ECHO [Cookie_Z] ------\ + (Start T1-init timer) \ + (Enter COOKIE-ECHOED state) \---> (build TCB enter ESTABLISHED + state) + + + /---- COOKIE-ACK + / + (Cancel T1-init timer, <-----/ + Enter ESTABLISHED state) + {app sends 1st user data; strm 0} + DATA [TSN=initial TSN_A + Strm=0,Seq=1 & user data]--\ + (Start T3-rtx timer) \ + \-> + /----- SACK [TSN Ack=init + TSN_A,Block=0] + (Cancel T3-rtx timer) <------/ + + ... + {app sends 2 messages;strm 0} + /---- DATA + / [TSN=init TSN_Z + <--/ Strm=0,Seq=1 & user data 1] + SACK [TSN Ack=init TSN_Z, /---- DATA + Block=0] --------\ / [TSN=init TSN_Z +1, + \/ Strm=0,Seq=2 & user data 2] + <------/\ + \ + \------> + + Figure 4: INITiation Example + + If the T1-init timer expires at "A" after the INIT or COOKIE ECHO + chunks are sent, the same INIT or COOKIE ECHO chunk with the same + Initiate Tag (i.e., Tag_A) or State Cookie shall be retransmitted and + + + +Stewart, et al. Standards Track [Page 59] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + the timer restarted. This shall be repeated Max.Init.Retransmits + times before "A" considers "Z" unreachable and reports the failure to + its upper layer (and thus the association enters the CLOSED state). + When retransmitting the INIT, the endpoint MUST follow the rules + defined in 6.3 to determine the proper timer value. + +5.2 Handle Duplicate or Unexpected INIT, INIT ACK, COOKIE ECHO, and + COOKIE ACK + + During the lifetime of an association (in one of the possible + states), an endpoint may receive from its peer endpoint one of the + setup chunks (INIT, INIT ACK, COOKIE ECHO, and COOKIE ACK). The + receiver shall treat such a setup chunk as a duplicate and process it + as described in this section. + + Note: An endpoint will not receive the chunk unless the chunk was + sent to a SCTP transport address and is from a SCTP transport address + associated with this endpoint. Therefore, the endpoint processes + such a chunk as part of its current association. + + The following scenarios can cause duplicated or unexpected chunks: + + A) The peer has crashed without being detected, re-started itself and + sent out a new INIT chunk trying to restore the association, + + B) Both sides are trying to initialize the association at about the + same time, + + C) The chunk is from a stale packet that was used to establish the + present association or a past association that is no longer in + existence, + + D) The chunk is a false packet generated by an attacker, or + + E) The peer never received the COOKIE ACK and is retransmitting its + COOKIE ECHO. + + The rules in the following sections shall be applied in order to + identify and correctly handle these cases. + +5.2.1 INIT received in COOKIE-WAIT or COOKIE-ECHOED State (Item B) + + This usually indicates an initialization collision, i.e., each + endpoint is attempting, at about the same time, to establish an + association with the other endpoint. + + Upon receipt of an INIT in the COOKIE-WAIT or COOKIE-ECHOED state, an + endpoint MUST respond with an INIT ACK using the same parameters it + + + +Stewart, et al. Standards Track [Page 60] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + sent in its original INIT chunk (including its Initiation Tag, + unchanged). These original parameters are combined with those from + the newly received INIT chunk. The endpoint shall also generate a + State Cookie with the INIT ACK. The endpoint uses the parameters + sent in its INIT to calculate the State Cookie. + + After that, the endpoint MUST NOT change its state, the T1-init timer + shall be left running and the corresponding TCB MUST NOT be + destroyed. The normal procedures for handling State Cookies when a + TCB exists will resolve the duplicate INITs to a single association. + + For an endpoint that is in the COOKIE-ECHOED state it MUST populate + its Tie-Tags with the Tag information of itself and its peer (see + section 5.2.2 for a description of the Tie-Tags). + +5.2.2 Unexpected INIT in States Other than CLOSED, COOKIE-ECHOED, + COOKIE-WAIT and SHUTDOWN-ACK-SENT + + Unless otherwise stated, upon reception of an unexpected INIT for + this association, the endpoint shall generate an INIT ACK with a + State Cookie. In the outbound INIT ACK the endpoint MUST copy its + current Verification Tag and peer's Verification Tag into a reserved + place within the state cookie. We shall refer to these locations as + the Peer's-Tie-Tag and the Local-Tie-Tag. The outbound SCTP packet + containing this INIT ACK MUST carry a Verification Tag value equal to + the Initiation Tag found in the unexpected INIT. And the INIT ACK + MUST contain a new Initiation Tag (randomly generated see Section + 5.3.1). Other parameters for the endpoint SHOULD be copied from the + existing parameters of the association (e.g. number of outbound + streams) into the INIT ACK and cookie. + + After sending out the INIT ACK, the endpoint shall take no further + actions, i.e., the existing association, including its current state, + and the corresponding TCB MUST NOT be changed. + + Note: Only when a TCB exists and the association is not in a COOKIE- + WAIT state are the Tie-Tags populated. For a normal association INIT + (i.e. the endpoint is in a COOKIE-WAIT state), the Tie-Tags MUST be + set to 0 (indicating that no previous TCB existed). The INIT ACK and + State Cookie are populated as specified in section 5.2.1. + +5.2.3 Unexpected INIT ACK + + If an INIT ACK is received by an endpoint in any state other than the + COOKIE-WAIT state, the endpoint should discard the INIT ACK chunk. + An unexpected INIT ACK usually indicates the processing of an old or + duplicated INIT chunk. + + + + +Stewart, et al. Standards Track [Page 61] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5.2.4 Handle a COOKIE ECHO when a TCB exists + + When a COOKIE ECHO chunk is received by an endpoint in any state for + an existing association (i.e., not in the CLOSED state) the following + rules shall be applied: + + 1) Compute a MAC as described in Step 1 of Section 5.1.5, + + 2) Authenticate the State Cookie as described in Step 2 of Section + 5.1.5 (this is case C or D above). + + 3) Compare the timestamp in the State Cookie to the current time. If + the State Cookie is older than the lifespan carried in the State + Cookie and the Verification Tags contained in the State Cookie do + not match the current association's Verification Tags, the packet, + including the COOKIE ECHO and any DATA chunks, should be + discarded. The endpoint also MUST transmit an ERROR chunk with a + "Stale Cookie" error cause to the peer endpoint (this is case C or + D in section 5.2). + + If both Verification Tags in the State Cookie match the + Verification Tags of the current association, consider the State + Cookie valid (this is case E of section 5.2) even if the lifespan + is exceeded. + + 4) If the State Cookie proves to be valid, unpack the TCB into a + temporary TCB. + + 5) Refer to Table 2 to determine the correct action to be taken. + + + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 62] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + ++------------+------------+---------------+--------------+-------------+ +| Local Tag | Peer's Tag | Local-Tie-Tag |Peer's-Tie-Tag| Action/ | +| | | | | Description | ++------------+------------+---------------+--------------+-------------+ +| X | X | M | M | (A) | ++------------+------------+---------------+--------------+-------------+ +| M | X | A | A | (B) | ++------------+------------+---------------+--------------+-------------+ +| M | 0 | A | A | (B) | ++------------+------------+---------------+--------------+-------------+ +| X | M | 0 | 0 | (C) | ++------------+------------+---------------+--------------+-------------+ +| M | M | A | A | (D) | ++======================================================================+ +| Table 2: Handling of a COOKIE ECHO when a TCB exists | ++======================================================================+ + + Legend: + + X - Tag does not match the existing TCB + M - Tag matches the existing TCB. + 0 - No Tie-Tag in Cookie (unknown). + A - All cases, i.e. M, X or 0. + + Note: For any case not shown in Table 2, the cookie should be + silently discarded. + + Action + + A) In this case, the peer may have restarted. When the endpoint + recognizes this potential 'restart', the existing session is + treated the same as if it received an ABORT followed by a new + COOKIE ECHO with the following exceptions: + + - Any SCTP DATA Chunks MAY be retained (this is an implementation + specific option). + + - A notification of RESTART SHOULD be sent to the ULP instead of + a "COMMUNICATION LOST" notification. + + All the congestion control parameters (e.g., cwnd, ssthresh) + related to this peer MUST be reset to their initial values (see + Section 6.2.1). + + After this the endpoint shall enter the ESTABLISHED state. + + + + + + +Stewart, et al. Standards Track [Page 63] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes + the peer has restarted (Action A), it MUST NOT setup a new + association but instead resend the SHUTDOWN ACK and send an ERROR + chunk with a "Cookie Received while Shutting Down" error cause to + its peer. + + B) In this case, both sides may be attempting to start an association + at about the same time but the peer endpoint started its INIT + after responding to the local endpoint's INIT. Thus it may have + picked a new Verification Tag not being aware of the previous Tag + it had sent this endpoint. The endpoint should stay in or enter + the ESTABLISHED state but it MUST update its peer's Verification + Tag from the State Cookie, stop any init or cookie timers that may + running and send a COOKIE ACK. + + C) In this case, the local endpoint's cookie has arrived late. + Before it arrived, the local endpoint sent an INIT and received an + INIT-ACK and finally sent a COOKIE ECHO with the peer's same tag + but a new tag of its own. The cookie should be silently + discarded. The endpoint SHOULD NOT change states and should leave + any timers running. + + D) When both local and remote tags match the endpoint should always + enter the ESTABLISHED state, if it has not already done so. It + should stop any init or cookie timers that may be running and send + a COOKIE ACK. + + Note: The "peer's Verification Tag" is the tag received in the + Initiate Tag field of the INIT or INIT ACK chunk. + +5.2.4.1 An Example of a Association Restart + + In the following example, "A" initiates the association after a + restart has occurred. Endpoint "Z" had no knowledge of the restart + until the exchange (i.e. Heartbeats had not yet detected the failure + of "A"). (assuming no bundling or fragmentation occurs): + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 64] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +Endpoint A Endpoint Z +<-------------- Association is established----------------------> +Tag=Tag_A Tag=Tag_Z +<---------------------------------------------------------------> +{A crashes and restarts} +{app sets up a association with Z} +(build TCB) +INIT [I-Tag=Tag_A' + & other info] --------\ +(Start T1-init timer) \ +(Enter COOKIE-WAIT state) \---> (find a existing TCB + compose temp TCB and Cookie_Z + with Tie-Tags to previous + association) + /--- INIT ACK [Veri Tag=Tag_A', + / I-Tag=Tag_Z', +(Cancel T1-init timer) <------/ Cookie_Z[TieTags= + Tag_A,Tag_Z + & other info] + (destroy temp TCB,leave original + in place) +COOKIE ECHO [Veri=Tag_Z', + Cookie_Z + Tie=Tag_A, + Tag_Z]----------\ +(Start T1-init timer) \ +(Enter COOKIE-ECHOED state) \---> (Find existing association, + Tie-Tags match old tags, + Tags do not match i.e. + case X X M M above, + Announce Restart to ULP + and reset association). + /---- COOKIE-ACK + / +(Cancel T1-init timer, <-----/ + Enter ESTABLISHED state) +{app sends 1st user data; strm 0} +DATA [TSN=initial TSN_A + Strm=0,Seq=1 & user data]--\ +(Start T3-rtx timer) \ + \-> + /----- SACK [TSN Ack=init TSN_A,Block=0] +(Cancel T3-rtx timer) <------/ + + Figure 5: A Restart Example + + + + + + +Stewart, et al. Standards Track [Page 65] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5.2.5 Handle Duplicate COOKIE-ACK. + + At any state other than COOKIE-ECHOED, an endpoint should silently + discard a received COOKIE ACK chunk. + +5.2.6 Handle Stale COOKIE Error + + Receipt of an ERROR chunk with a "Stale Cookie" error cause indicates + one of a number of possible events: + + A) That the association failed to completely setup before the State + Cookie issued by the sender was processed. + + B) An old State Cookie was processed after setup completed. + + C) An old State Cookie is received from someone that the receiver is + not interested in having an association with and the ABORT chunk + was lost. + + When processing an ERROR chunk with a "Stale Cookie" error cause an + endpoint should first examine if an association is in the process of + being setup, i.e. the association is in the COOKIE-ECHOED state. In + all cases if the association is not in the COOKIE-ECHOED state, the + ERROR chunk should be silently discarded. + + If the association is in the COOKIE-ECHOED state, the endpoint may + elect one of the following three alternatives. + + 1) Send a new INIT chunk to the endpoint to generate a new State + Cookie and re-attempt the setup procedure. + + 2) Discard the TCB and report to the upper layer the inability to + setup the association. + + 3) Send a new INIT chunk to the endpoint, adding a Cookie + Preservative parameter requesting an extension to the lifetime of + the State Cookie. When calculating the time extension, an + implementation SHOULD use the RTT information measured based on + the previous COOKIE ECHO / ERROR exchange, and should add no more + than 1 second beyond the measured RTT, due to long State Cookie + lifetimes making the endpoint more subject to a replay attack. + + + + + + + + + + +Stewart, et al. Standards Track [Page 66] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5.3 Other Initialization Issues + +5.3.1 Selection of Tag Value + + Initiate Tag values should be selected from the range of 1 to 2**32 - + 1. It is very important that the Initiate Tag value be randomized to + help protect against "man in the middle" and "sequence number" + attacks. The methods described in [RFC1750] can be used for the + Initiate Tag randomization. Careful selection of Initiate Tags is + also necessary to prevent old duplicate packets from previous + associations being mistakenly processed as belonging to the current + association. + + Moreover, the Verification Tag value used by either endpoint in a + given association MUST NOT change during the lifetime of an + association. A new Verification Tag value MUST be used each time the + endpoint tears-down and then re-establishes an association to the + same peer. + +6. User Data Transfer + + Data transmission MUST only happen in the ESTABLISHED, SHUTDOWN- + PENDING, and SHUTDOWN-RECEIVED states. The only exception to this is + that DATA chunks are allowed to be bundled with an outbound COOKIE + ECHO chunk when in COOKIE-WAIT state. + + DATA chunks MUST only be received according to the rules below in + ESTABLISHED, SHUTDOWN-PENDING, SHUTDOWN-SENT. A DATA chunk received + in CLOSED is out of the blue and SHOULD be handled per 8.4. A DATA + chunk received in any other state SHOULD be discarded. + + A SACK MUST be processed in ESTABLISHED, SHUTDOWN-PENDING, and + SHUTDOWN-RECEIVED. An incoming SACK MAY be processed in COOKIE- + ECHOED. A SACK in the CLOSED state is out of the blue and SHOULD be + processed according to the rules in 8.4. A SACK chunk received in + any other state SHOULD be discarded. + + + A SCTP receiver MUST be able to receive a minimum of 1500 bytes in + one SCTP packet. This means that a SCTP endpoint MUST NOT indicate + less than 1500 bytes in its Initial a_rwnd sent in the INIT or INIT + ACK. + + For transmission efficiency, SCTP defines mechanisms for bundling of + small user messages and fragmentation of large user messages. The + following diagram depicts the flow of user messages through SCTP. + + + + + +Stewart, et al. Standards Track [Page 67] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + In this section the term "data sender" refers to the endpoint that + transmits a DATA chunk and the term "data receiver" refers to the + endpoint that receives a DATA chunk. A data receiver will transmit + SACK chunks. + + +--------------------------+ + | User Messages | + +--------------------------+ + SCTP user ^ | + ==================|==|======================================= + | v (1) + +------------------+ +--------------------+ + | SCTP DATA Chunks | |SCTP Control Chunks | + +------------------+ +--------------------+ + ^ | ^ | + | v (2) | v (2) + +--------------------------+ + | SCTP packets | + +--------------------------+ + SCTP ^ | + ===========================|==|=========================== + | v + Connectionless Packet Transfer Service (e.g., IP) + + Notes: + + 1) When converting user messages into DATA chunks, an endpoint + will fragment user messages larger than the current association + path MTU into multiple DATA chunks. The data receiver will + normally reassemble the fragmented message from DATA chunks + before delivery to the user (see Section 6.9 for details). + + 2) Multiple DATA and control chunks may be bundled by the sender + into a single SCTP packet for transmission, as long as the + final size of the packet does not exceed the current path MTU. + The receiver will unbundle the packet back into the original + chunks. Control chunks MUST come before DATA chunks in the + packet. + + Figure 6: Illustration of User Data Transfer + + The fragmentation and bundling mechanisms, as detailed in Sections + 6.9 and 6.10, are OPTIONAL to implement by the data sender, but they + MUST be implemented by the data receiver, i.e., an endpoint MUST + properly receive and process bundled or fragmented data. + + + + + + +Stewart, et al. Standards Track [Page 68] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +6.1 Transmission of DATA Chunks + + This document is specified as if there is a single retransmission + timer per destination transport address, but implementations MAY have + a retransmission timer for each DATA chunk. + + The following general rules MUST be applied by the data sender for + transmission and/or retransmission of outbound DATA chunks: + + A) At any given time, the data sender MUST NOT transmit new data to + any destination transport address if its peer's rwnd indicates + that the peer has no buffer space (i.e. rwnd is 0, see Section + 6.2.1). However, regardless of the value of rwnd (including if it + is 0), the data sender can always have one DATA chunk in flight to + the receiver if allowed by cwnd (see rule B below). This rule + allows the sender to probe for a change in rwnd that the sender + missed due to the SACK having been lost in transit from the data + receiver to the data sender. + + B) At any given time, the sender MUST NOT transmit new data to a + given transport address if it has cwnd or more bytes of data + outstanding to that transport address. + + C) When the time comes for the sender to transmit, before sending new + DATA chunks, the sender MUST first transmit any outstanding DATA + chunks which are marked for retransmission (limited by the current + cwnd). + + D) Then, the sender can send out as many new DATA chunks as Rule A + and Rule B above allow. + + Multiple DATA chunks committed for transmission MAY be bundled in a + single packet. Furthermore, DATA chunks being retransmitted MAY be + bundled with new DATA chunks, as long as the resulting packet size + does not exceed the path MTU. A ULP may request that no bundling is + performed but this should only turn off any delays that a SCTP + implementation may be using to increase bundling efficiency. It does + not in itself stop all bundling from occurring (i.e. in case of + congestion or retransmission). + + Before an endpoint transmits a DATA chunk, if any received DATA + chunks have not been acknowledged (e.g., due to delayed ack), the + sender should create a SACK and bundle it with the outbound DATA + chunk, as long as the size of the final SCTP packet does not exceed + the current MTU. See Section 6.2. + + + + + + +Stewart, et al. Standards Track [Page 69] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + IMPLEMENTATION NOTE: When the window is full (i.e., transmission is + disallowed by Rule A and/or Rule B), the sender MAY still accept send + requests from its upper layer, but MUST transmit no more DATA chunks + until some or all of the outstanding DATA chunks are acknowledged and + transmission is allowed by Rule A and Rule B again. + + Whenever a transmission or retransmission is made to any address, if + the T3-rtx timer of that address is not currently running, the sender + MUST start that timer. If the timer for that address is already + running, the sender MUST restart the timer if the earliest (i.e., + lowest TSN) outstanding DATA chunk sent to that address is being + retransmitted. Otherwise, the data sender MUST NOT restart the + timer. + + When starting or restarting the T3-rtx timer, the timer value must be + adjusted according to the timer rules defined in Sections 6.3.2, and + 6.3.3. + + Note: The data sender SHOULD NOT use a TSN that is more than 2**31 - + 1 above the beginning TSN of the current send window. + +6.2 Acknowledgement on Reception of DATA Chunks + + The SCTP endpoint MUST always acknowledge the reception of each valid + DATA chunk. + + The guidelines on delayed acknowledgement algorithm specified in + Section 4.2 of [RFC2581] SHOULD be followed. Specifically, an + acknowledgement SHOULD be generated for at least every second packet + (not every second DATA chunk) received, and SHOULD be generated + within 200 ms of the arrival of any unacknowledged DATA chunk. In + some situations it may be beneficial for an SCTP transmitter to be + more conservative than the algorithms detailed in this document + allow. However, an SCTP transmitter MUST NOT be more aggressive than + the following algorithms allow. + + A SCTP receiver MUST NOT generate more than one SACK for every + incoming packet, other than to update the offered window as the + receiving application consumes new data. + + IMPLEMENTATION NOTE: The maximum delay for generating an + acknowledgement may be configured by the SCTP administrator, either + statically or dynamically, in order to meet the specific timing + requirement of the protocol being carried. + + An implementation MUST NOT allow the maximum delay to be configured + to be more than 500 ms. In other words an implementation MAY lower + this value below 500ms but MUST NOT raise it above 500ms. + + + +Stewart, et al. Standards Track [Page 70] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Acknowledgements MUST be sent in SACK chunks unless shutdown was + requested by the ULP in which case an endpoint MAY send an + acknowledgement in the SHUTDOWN chunk. A SACK chunk can acknowledge + the reception of multiple DATA chunks. See Section 3.3.4 for SACK + chunk format. In particular, the SCTP endpoint MUST fill in the + Cumulative TSN Ack field to indicate the latest sequential TSN (of a + valid DATA chunk) it has received. Any received DATA chunks with TSN + greater than the value in the Cumulative TSN Ack field SHOULD also be + reported in the Gap Ack Block fields. + + Note: The SHUTDOWN chunk does not contain Gap Ack Block fields. + Therefore, the endpoint should use a SACK instead of the SHUTDOWN + chunk to acknowledge DATA chunks received out of order . + + When a packet arrives with duplicate DATA chunk(s) and with no new + DATA chunk(s), the endpoint MUST immediately send a SACK with no + delay. If a packet arrives with duplicate DATA chunk(s) bundled with + new DATA chunks, the endpoint MAY immediately send a SACK. Normally + receipt of duplicate DATA chunks will occur when the original SACK + chunk was lost and the peer's RTO has expired. The duplicate TSN + number(s) SHOULD be reported in the SACK as duplicate. + + When an endpoint receives a SACK, it MAY use the Duplicate TSN + information to determine if SACK loss is occurring. Further use of + this data is for future study. + + The data receiver is responsible for maintaining its receive buffers. + The data receiver SHOULD notify the data sender in a timely manner of + changes in its ability to receive data. How an implementation + manages its receive buffers is dependent on many factors (e.g., + Operating System, memory management system, amount of memory, etc.). + However, the data sender strategy defined in Section 6.2.1 is based + on the assumption of receiver operation similar to the following: + + A) At initialization of the association, the endpoint tells the + peer how much receive buffer space it has allocated to the + association in the INIT or INIT ACK. The endpoint sets a_rwnd + to this value. + + B) As DATA chunks are received and buffered, decrement a_rwnd by + the number of bytes received and buffered. This is, in effect, + closing rwnd at the data sender and restricting the amount of + data it can transmit. + + C) As DATA chunks are delivered to the ULP and released from the + receive buffers, increment a_rwnd by the number of bytes + delivered to the upper layer. This is, in effect, opening up + rwnd on the data sender and allowing it to send more data. The + + + +Stewart, et al. Standards Track [Page 71] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + data receiver SHOULD NOT increment a_rwnd unless it has + released bytes from its receive buffer. For example, if the + receiver is holding fragmented DATA chunks in a reassembly + queue, it should not increment a_rwnd. + + D) When sending a SACK, the data receiver SHOULD place the current + value of a_rwnd into the a_rwnd field. The data receiver + SHOULD take into account that the data sender will not + retransmit DATA chunks that are acked via the Cumulative TSN + Ack (i.e., will drop from its retransmit queue). + + Under certain circumstances, the data receiver may need to drop DATA + chunks that it has received but hasn't released from its receive + buffers (i.e., delivered to the ULP). These DATA chunks may have + been acked in Gap Ack Blocks. For example, the data receiver may be + holding data in its receive buffers while reassembling a fragmented + user message from its peer when it runs out of receive buffer space. + It may drop these DATA chunks even though it has acknowledged them in + Gap Ack Blocks. If a data receiver drops DATA chunks, it MUST NOT + include them in Gap Ack Blocks in subsequent SACKs until they are + received again via retransmission. In addition, the endpoint should + take into account the dropped data when calculating its a_rwnd. + + An endpoint SHOULD NOT revoke a SACK and discard data. Only in + extreme circumstance should an endpoint use this procedure (such as + out of buffer space). The data receiver should take into account + that dropping data that has been acked in Gap Ack Blocks can result + in suboptimal retransmission strategies in the data sender and thus + in suboptimal performance. + + The following example illustrates the use of delayed + acknowledgements: + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 72] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Endpoint A Endpoint Z + + {App sends 3 messages; strm 0} + DATA [TSN=7,Strm=0,Seq=3] ------------> (ack delayed) + (Start T3-rtx timer) + + DATA [TSN=8,Strm=0,Seq=4] ------------> (send ack) + /------- SACK [TSN Ack=8,block=0] + (cancel T3-rtx timer) <-----/ + + DATA [TSN=9,Strm=0,Seq=5] ------------> (ack delayed) + (Start T3-rtx timer) + ... + {App sends 1 message; strm 1} + (bundle SACK with DATA) + /----- SACK [TSN Ack=9,block=0] \ + / DATA [TSN=6,Strm=1,Seq=2] + (cancel T3-rtx timer) <------/ (Start T3-rtx timer) + + (ack delayed) + (send ack) + SACK [TSN Ack=6,block=0] -------------> (cancel T3-rtx timer) + + Figure 7: Delayed Acknowledgment Example + + If an endpoint receives a DATA chunk with no user data (i.e., the + Length field is set to 16) it MUST send an ABORT with error cause set + to "No User Data". + + An endpoint SHOULD NOT send a DATA chunk with no user data part. + +6.2.1 Processing a Received SACK + + Each SACK an endpoint receives contains an a_rwnd value. This value + represents the amount of buffer space the data receiver, at the time + of transmitting the SACK, has left of its total receive buffer space + (as specified in the INIT/INIT ACK). Using a_rwnd, Cumulative TSN + Ack and Gap Ack Blocks, the data sender can develop a representation + of the peer's receive buffer space. + + One of the problems the data sender must take into account when + processing a SACK is that a SACK can be received out of order. That + is, a SACK sent by the data receiver can pass an earlier SACK and be + received first by the data sender. If a SACK is received out of + order, the data sender can develop an incorrect view of the peer's + receive buffer space. + + + + + +Stewart, et al. Standards Track [Page 73] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Since there is no explicit identifier that can be used to detect + out-of-order SACKs, the data sender must use heuristics to determine + if a SACK is new. + + An endpoint SHOULD use the following rules to calculate the rwnd, + using the a_rwnd value, the Cumulative TSN Ack and Gap Ack Blocks in + a received SACK. + + A) At the establishment of the association, the endpoint initializes + the rwnd to the Advertised Receiver Window Credit (a_rwnd) the + peer specified in the INIT or INIT ACK. + + B) Any time a DATA chunk is transmitted (or retransmitted) to a peer, + the endpoint subtracts the data size of the chunk from the rwnd of + that peer. + + C) Any time a DATA chunk is marked for retransmission (via either + T3-rtx timer expiration (Section 6.3.3)or via fast retransmit + (Section 7.2.4)), add the data size of those chunks to the rwnd. + + Note: If the implementation is maintaining a timer on each DATA + chunk then only DATA chunks whose timer expired would be marked + for retransmission. + + D) Any time a SACK arrives, the endpoint performs the following: + + i) If Cumulative TSN Ack is less than the Cumulative TSN Ack + Point, then drop the SACK. Since Cumulative TSN Ack is + monotonically increasing, a SACK whose Cumulative TSN Ack is + less than the Cumulative TSN Ack Point indicates an out-of- + order SACK. + + ii) Set rwnd equal to the newly received a_rwnd minus the + number of bytes still outstanding after processing the + Cumulative TSN Ack and the Gap Ack Blocks. + + iii) If the SACK is missing a TSN that was previously + acknowledged via a Gap Ack Block (e.g., the data receiver + reneged on the data), then mark the corresponding DATA chunk as + available for retransmit: Mark it as missing for fast + retransmit as described in Section 7.2.4 and if no retransmit + timer is running for the destination address to which the DATA + chunk was originally transmitted, then T3-rtx is started for + that destination address. + + + + + + + +Stewart, et al. Standards Track [Page 74] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +6.3 Management of Retransmission Timer + + An SCTP endpoint uses a retransmission timer T3-rtx to ensure data + delivery in the absence of any feedback from its peer. The duration + of this timer is referred to as RTO (retransmission timeout). + + When an endpoint's peer is multi-homed, the endpoint will calculate a + separate RTO for each different destination transport address of its + peer endpoint. + + The computation and management of RTO in SCTP follows closely how TCP + manages its retransmission timer. To compute the current RTO, an + endpoint maintains two state variables per destination transport + address: SRTT (smoothed round-trip time) and RTTVAR (round-trip time + variation). + +6.3.1 RTO Calculation + + The rules governing the computation of SRTT, RTTVAR, and RTO are as + follows: + + C1) Until an RTT measurement has been made for a packet sent to the + given destination transport address, set RTO to the protocol + parameter 'RTO.Initial'. + + C2) When the first RTT measurement R is made, set SRTT <- R, RTTVAR + <- R/2, and RTO <- SRTT + 4 * RTTVAR. + + C3) When a new RTT measurement R' is made, set + + RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| SRTT + <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' + + Note: The value of SRTT used in the update to RTTVAR is its value + before updating SRTT itself using the second assignment. + + After the computation, update RTO <- SRTT + 4 * RTTVAR. + + C4) When data is in flight and when allowed by rule C5 below, a new + RTT measurement MUST be made each round trip. Furthermore, new + RTT measurements SHOULD be made no more than once per round-trip + for a given destination transport address. There are two reasons + for this recommendation: First, it appears that measuring more + frequently often does not in practice yield any significant + benefit [ALLMAN99]; second, if measurements are made more often, + then the values of RTO.Alpha and RTO.Beta in rule C3 above should + be adjusted so that SRTT and RTTVAR still adjust to changes at + roughly the same rate (in terms of how many round trips it takes + + + +Stewart, et al. Standards Track [Page 75] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + them to reflect new values) as they would if making only one + measurement per round-trip and using RTO.Alpha and RTO.Beta as + given in rule C3. However, the exact nature of these adjustments + remains a research issue. + + C5) Karn's algorithm: RTT measurements MUST NOT be made using packets + that were retransmitted (and thus for which it is ambiguous + whether the reply was for the first instance of the packet or a + later instance). + + C6) Whenever RTO is computed, if it is less than RTO.Min seconds then + it is rounded up to RTO.Min seconds. The reason for this rule is + that RTOs that do not have a high minimum value are susceptible + to unnecessary timeouts [ALLMAN99]. + + C7) A maximum value may be placed on RTO provided it is at least + RTO.max seconds. + + There is no requirement for the clock granularity G used for + computing RTT measurements and the different state variables, other + than: + + G1) Whenever RTTVAR is computed, if RTTVAR = 0, then adjust RTTVAR <- + G. + + Experience [ALLMAN99] has shown that finer clock granularities (<= + 100 msec) perform somewhat better than more coarse granularities. + +6.3.2 Retransmission Timer Rules + + The rules for managing the retransmission timer are as follows: + + R1) Every time a DATA chunk is sent to any address (including a + retransmission), if the T3-rtx timer of that address is not + running, start it running so that it will expire after the RTO of + that address. The RTO used here is that obtained after any + doubling due to previous T3-rtx timer expirations on the + corresponding destination address as discussed in rule E2 below. + + R2) Whenever all outstanding data sent to an address have been + acknowledged, turn off the T3-rtx timer of that address. + + R3) Whenever a SACK is received that acknowledges the DATA chunk with + the earliest outstanding TSN for that address, restart T3-rtx + timer for that address with its current RTO (if there is still + outstanding data on that address). + + + + + +Stewart, et al. Standards Track [Page 76] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + R4) Whenever a SACK is received missing a TSN that was previously + acknowledged via a Gap Ack Block, start T3-rtx for the + destination address to which the DATA chunk was originally + transmitted if it is not already running. + + The following example shows the use of various timer rules (assuming + the receiver uses delayed acks). + + Endpoint A Endpoint Z + {App begins to send} + Data [TSN=7,Strm=0,Seq=3] ------------> (ack delayed) + (Start T3-rtx timer) + {App sends 1 message; strm 1} + (bundle ack with data) + DATA [TSN=8,Strm=0,Seq=4] ----\ /-- SACK [TSN Ack=7,Block=0] + \ / DATA [TSN=6,Strm=1,Seq=2] + \ / (Start T3-rtx timer) + \ + / \ + (Re-start T3-rtx timer) <------/ \--> (ack delayed) + (ack delayed) + {send ack} + SACK [TSN Ack=6,Block=0] --------------> (Cancel T3-rtx timer) + .. + (send ack) + (Cancel T3-rtx timer) <-------------- SACK [TSN Ack=8,Block=0] + + Figure 8 - Timer Rule Examples + +6.3.3 Handle T3-rtx Expiration + + Whenever the retransmission timer T3-rtx expires for a destination + address, do the following: + + E1) For the destination address for which the timer expires, adjust + its ssthresh with rules defined in Section 7.2.3 and set the cwnd + <- MTU. + + E2) For the destination address for which the timer expires, set RTO + <- RTO * 2 ("back off the timer"). The maximum value discussed + in rule C7 above (RTO.max) may be used to provide an upper bound + to this doubling operation. + + E3) Determine how many of the earliest (i.e., lowest TSN) outstanding + DATA chunks for the address for which the T3-rtx has expired will + fit into a single packet, subject to the MTU constraint for the + path corresponding to the destination transport address to which + the retransmission is being sent (this may be different from the + + + +Stewart, et al. Standards Track [Page 77] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + address for which the timer expires [see Section 6.4]). Call + this value K. Bundle and retransmit those K DATA chunks in a + single packet to the destination endpoint. + + E4) Start the retransmission timer T3-rtx on the destination address + to which the retransmission is sent, if rule R1 above indicates + to do so. The RTO to be used for starting T3-rtx should be the + one for the destination address to which the retransmission is + sent, which, when the receiver is multi-homed, may be different + from the destination address for which the timer expired (see + Section 6.4 below). + + After retransmitting, once a new RTT measurement is obtained (which + can happen only when new data has been sent and acknowledged, per + rule C5, or for a measurement made from a HEARTBEAT [see Section + 8.3]), the computation in rule C3 is performed, including the + computation of RTO, which may result in "collapsing" RTO back down + after it has been subject to doubling (rule E2). + + Note: Any DATA chunks that were sent to the address for which the + T3-rtx timer expired but did not fit in one MTU (rule E3 above), + should be marked for retransmission and sent as soon as cwnd allows + (normally when a SACK arrives). + + The final rule for managing the retransmission timer concerns + failover (see Section 6.4.1): + + F1) Whenever an endpoint switches from the current destination + transport address to a different one, the current retransmission + timers are left running. As soon as the endpoint transmits a + packet containing DATA chunk(s) to the new transport address, + start the timer on that transport address, using the RTO value of + the destination address to which the data is being sent, if rule + R1 indicates to do so. + +6.4 Multi-homed SCTP Endpoints + + An SCTP endpoint is considered multi-homed if there are more than one + transport address that can be used as a destination address to reach + that endpoint. + + Moreover, the ULP of an endpoint shall select one of the multiple + destination addresses of a multi-homed peer endpoint as the primary + path (see Sections 5.1.2 and 10.1 for details). + + By default, an endpoint SHOULD always transmit to the primary path, + unless the SCTP user explicitly specifies the destination transport + address (and possibly source transport address) to use. + + + +Stewart, et al. Standards Track [Page 78] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + An endpoint SHOULD transmit reply chunks (e.g., SACK, HEARTBEAT ACK, + etc.) to the same destination transport address from which it + received the DATA or control chunk to which it is replying. This + rule should also be followed if the endpoint is bundling DATA chunks + together with the reply chunk. + + However, when acknowledging multiple DATA chunks received in packets + from different source addresses in a single SACK, the SACK chunk may + be transmitted to one of the destination transport addresses from + which the DATA or control chunks being acknowledged were received. + + When a receiver of a duplicate DATA chunk sends a SACK to a multi- + homed endpoint it MAY be beneficial to vary the destination address + and not use the source address of the DATA chunk. The reason being + that receiving a duplicate from a multi-homed endpoint might indicate + that the return path (as specified in the source address of the DATA + chunk) for the SACK is broken. + + Furthermore, when its peer is multi-homed, an endpoint SHOULD try to + retransmit a chunk to an active destination transport address that is + different from the last destination address to which the DATA chunk + was sent. + + Retransmissions do not affect the total outstanding data count. + However, if the DATA chunk is retransmitted onto a different + destination address, both the outstanding data counts on the new + destination address and the old destination address to which the data + chunk was last sent shall be adjusted accordingly. + +6.4.1 Failover from Inactive Destination Address + + Some of the transport addresses of a multi-homed SCTP endpoint may + become inactive due to either the occurrence of certain error + conditions (see Section 8.2) or adjustments from SCTP user. + + When there is outbound data to send and the primary path becomes + inactive (e.g., due to failures), or where the SCTP user explicitly + requests to send data to an inactive destination transport address, + before reporting an error to its ULP, the SCTP endpoint should try to + send the data to an alternate active destination transport address if + one exists. + + When retransmitting data, if the endpoint is multi-homed, it should + consider each source-destination address pair in its retransmission + selection policy. When retransmitting the endpoint should attempt to + pick the most divergent source-destination pair from the original + source-destination pair to which the packet was transmitted. + + + + +Stewart, et al. Standards Track [Page 79] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Note: Rules for picking the most divergent source-destination pair + are an implementation decision and is not specified within this + document. + +6.5 Stream Identifier and Stream Sequence Number + + Every DATA chunk MUST carry a valid stream identifier. If an + endpoint receives a DATA chunk with an invalid stream identifier, it + shall acknowledge the reception of the DATA chunk following the + normal procedure, immediately send an ERROR chunk with cause set to + "Invalid Stream Identifier" (see Section 3.3.10) and discard the DATA + chunk. The endpoint may bundle the ERROR chunk in the same packet as + the SACK as long as the ERROR follows the SACK. + + The stream sequence number in all the streams shall start from 0 when + the association is established. Also, when the stream sequence + number reaches the value 65535 the next stream sequence number shall + be set to 0. + +6.6 Ordered and Unordered Delivery + + Within a stream, an endpoint MUST deliver DATA chunks received with + the U flag set to 0 to the upper layer according to the order of + their stream sequence number. If DATA chunks arrive out of order of + their stream sequence number, the endpoint MUST hold the received + DATA chunks from delivery to the ULP until they are re-ordered. + + However, an SCTP endpoint can indicate that no ordered delivery is + required for a particular DATA chunk transmitted within the stream by + setting the U flag of the DATA chunk to 1. + + When an endpoint receives a DATA chunk with the U flag set to 1, it + must bypass the ordering mechanism and immediately deliver the data + to the upper layer (after re-assembly if the user data is fragmented + by the data sender). + + This provides an effective way of transmitting "out-of-band" data in + a given stream. Also, a stream can be used as an "unordered" stream + by simply setting the U flag to 1 in all DATA chunks sent through + that stream. + + IMPLEMENTATION NOTE: When sending an unordered DATA chunk, an + implementation may choose to place the DATA chunk in an outbound + packet that is at the head of the outbound transmission queue if + possible. + + + + + + +Stewart, et al. Standards Track [Page 80] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The 'Stream Sequence Number' field in a DATA chunk with U flag set to + 1 has no significance. The sender can fill it with arbitrary value, + but the receiver MUST ignore the field. + + Note: When transmitting ordered and unordered data, an endpoint does + not increment its Stream Sequence Number when transmitting a DATA + chunk with U flag set to 1. + +6.7 Report Gaps in Received DATA TSNs + + Upon the reception of a new DATA chunk, an endpoint shall examine the + continuity of the TSNs received. If the endpoint detects a gap in + the received DATA chunk sequence, it SHOULD send a SACK with Gap Ack + Blocks immediately. The data receiver continues sending a SACK after + receipt of each SCTP packet that doesn't fill the gap. + + Based on the Gap Ack Block from the received SACK, the endpoint can + calculate the missing DATA chunks and make decisions on whether to + retransmit them (see Section 6.2.1 for details). + + Multiple gaps can be reported in one single SACK (see Section 3.3.4). + + When its peer is multi-homed, the SCTP endpoint SHOULD always try to + send the SACK to the same destination address from which the last + DATA chunk was received. + + Upon the reception of a SACK, the endpoint MUST remove all DATA + chunks which have been acknowledged by the SACK's Cumulative TSN Ack + from its transmit queue. The endpoint MUST also treat all the DATA + chunks with TSNs not included in the Gap Ack Blocks reported by the + SACK as "missing". The number of "missing" reports for each + outstanding DATA chunk MUST be recorded by the data sender in order + to make retransmission decisions. See Section 7.2.4 for details. + + The following example shows the use of SACK to report a gap. + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 81] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Endpoint A Endpoint Z + {App sends 3 messages; strm 0} + DATA [TSN=6,Strm=0,Seq=2] ---------------> (ack delayed) + (Start T3-rtx timer) + + DATA [TSN=7,Strm=0,Seq=3] --------> X (lost) + + DATA [TSN=8,Strm=0,Seq=4] ---------------> (gap detected, + immediately send ack) + /----- SACK [TSN Ack=6,Block=1, + / Strt=2,End=2] + <-----/ + (remove 6 from out-queue, + and mark 7 as "1" missing report) + + Figure 9 - Reporting a Gap using SACK + + The maximum number of Gap Ack Blocks that can be reported within a + single SACK chunk is limited by the current path MTU. When a single + SACK can not cover all the Gap Ack Blocks needed to be reported due + to the MTU limitation, the endpoint MUST send only one SACK, + reporting the Gap Ack Blocks from the lowest to highest TSNs, within + the size limit set by the MTU, and leave the remaining highest TSN + numbers unacknowledged. + +6.8 Adler-32 Checksum Calculation + + When sending an SCTP packet, the endpoint MUST strengthen the data + integrity of the transmission by including the Adler-32 checksum + value calculated on the packet, as described below. + + After the packet is constructed (containing the SCTP common header + and one or more control or DATA chunks), the transmitter shall: + + 1) Fill in the proper Verification Tag in the SCTP common header and + initialize the checksum field to 0's. + + 2) Calculate the Adler-32 checksum of the whole packet, including the + SCTP common header and all the chunks. Refer to appendix B for + details of the Adler-32 algorithm. And, + + 3) Put the resultant value into the checksum field in the common + header, and leave the rest of the bits unchanged. + + When an SCTP packet is received, the receiver MUST first check the + Adler-32 checksum: + + 1) Store the received Adler-32 checksum value aside, + + + +Stewart, et al. Standards Track [Page 82] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 2) Replace the 32 bits of the checksum field in the received SCTP + packet with all '0's and calculate an Adler-32 checksum value of + the whole received packet. And, + + 3) Verify that the calculated Adler-32 checksum is the same as the + received Adler-32 checksum. If not, the receiver MUST treat the + packet as an invalid SCTP packet. + + The default procedure for handling invalid SCTP packets is to + silently discard them. + +6.9 Fragmentation and Reassembly + + An endpoint MAY support fragmentation when sending DATA chunks, but + MUST support reassembly when receiving DATA chunks. If an endpoint + supports fragmentation, it MUST fragment a user message if the size + of the user message to be sent causes the outbound SCTP packet size + to exceed the current MTU. If an implementation does not support + fragmentation of outbound user messages, the endpoint must return an + error to its upper layer and not attempt to send the user message. + + IMPLEMENTATION NOTE: In this error case, the Send primitive + discussed in Section 10.1 would need to return an error to the upper + layer. + + If its peer is multi-homed, the endpoint shall choose a size no + larger than the association Path MTU. The association Path MTU is + the smallest Path MTU of all destination addresses. + + Note: Once a message is fragmented it cannot be re-fragmented. + Instead if the PMTU has been reduced, then IP fragmentation must be + used. Please see Section 7.3 for details of PMTU discovery. + + When determining when to fragment, the SCTP implementation MUST take + into account the SCTP packet header as well as the DATA chunk + header(s). The implementation MUST also take into account the space + required for a SACK chunk if bundling a SACK chunk with the DATA + chunk. + + Fragmentation takes the following steps: + + 1) The data sender MUST break the user message into a series of DATA + chunks such that each chunk plus SCTP overhead fits into an IP + datagram smaller than or equal to the association Path MTU. + + 2) The transmitter MUST then assign, in sequence, a separate TSN to + each of the DATA chunks in the series. The transmitter assigns + the same SSN to each of the DATA chunks. If the user indicates + + + +Stewart, et al. Standards Track [Page 83] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + that the user message is to be delivered using unordered delivery, + then the U flag of each DATA chunk of the user message MUST be set + to 1. + + 3) The transmitter MUST also set the B/E bits of the first DATA chunk + in the series to '10', the B/E bits of the last DATA chunk in the + series to '01', and the B/E bits of all other DATA chunks in the + series to '00'. + + An endpoint MUST recognize fragmented DATA chunks by examining the + B/E bits in each of the received DATA chunks, and queue the + fragmented DATA chunks for re-assembly. Once the user message is + reassembled, SCTP shall pass the re-assembled user message to the + specific stream for possible re-ordering and final dispatching. + + Note: If the data receiver runs out of buffer space while still + waiting for more fragments to complete the re-assembly of the + message, it should dispatch part of its inbound message through a + partial delivery API (see Section 10), freeing some of its receive + buffer space so that the rest of the message may be received. + +6.10 Bundling + + An endpoint bundles chunks by simply including multiple chunks in one + outbound SCTP packet. The total size of the resultant IP datagram, + including the SCTP packet and IP headers, MUST be less or equal to + the current Path MTU. + + If its peer endpoint is multi-homed, the sending endpoint shall + choose a size no larger than the latest MTU of the current primary + path. + + When bundling control chunks with DATA chunks, an endpoint MUST place + control chunks first in the outbound SCTP packet. The transmitter + MUST transmit DATA chunks within a SCTP packet in increasing order of + TSN. + + Note: Since control chunks must be placed first in a packet and + since DATA chunks must be transmitted before SHUTDOWN or SHUTDOWN ACK + chunks, DATA chunks cannot be bundled with SHUTDOWN or SHUTDOWN ACK + chunks. + + Partial chunks MUST NOT be placed in an SCTP packet. + + + + + + + + +Stewart, et al. Standards Track [Page 84] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + An endpoint MUST process received chunks in their order in the + packet. The receiver uses the chunk length field to determine the end + of a chunk and beginning of the next chunk taking account of the fact + that all chunks end on a 4 byte boundary. If the receiver detects a + partial chunk, it MUST drop the chunk. + + An endpoint MUST NOT bundle INIT, INIT ACK or SHUTDOWN COMPLETE with + any other chunks. + +7. Congestion control + + Congestion control is one of the basic functions in SCTP. For some + applications, it may be likely that adequate resources will be + allocated to SCTP traffic to assure prompt delivery of time-critical + data - thus it would appear to be unlikely, during normal operations, + that transmissions encounter severe congestion conditions. However + SCTP must operate under adverse operational conditions, which can + develop upon partial network failures or unexpected traffic surges. + In such situations SCTP must follow correct congestion control steps + to recover from congestion quickly in order to get data delivered as + soon as possible. In the absence of network congestion, these + preventive congestion control algorithms should show no impact on the + protocol performance. + + IMPLEMENTATION NOTE: As far as its specific performance requirements + are met, an implementation is always allowed to adopt a more + conservative congestion control algorithm than the one defined below. + + The congestion control algorithms used by SCTP are based on + [RFC2581]. This section describes how the algorithms defined in + RFC2581 are adapted for use in SCTP. We first list differences in + protocol designs between TCP and SCTP, and then describe SCTP's + congestion control scheme. The description will use the same + terminology as in TCP congestion control whenever appropriate. + + SCTP congestion control is always applied to the entire association, + and not to individual streams. + +7.1 SCTP Differences from TCP Congestion control + + Gap Ack Blocks in the SCTP SACK carry the same semantic meaning as + the TCP SACK. TCP considers the information carried in the SACK as + advisory information only. SCTP considers the information carried in + the Gap Ack Blocks in the SACK chunk as advisory. In SCTP, any DATA + chunk that has been acknowledged by SACK, including DATA that arrived + at the receiving end out of order, are not considered fully delivered + until the Cumulative TSN Ack Point passes the TSN of the DATA chunk + (i.e., the DATA chunk has been acknowledged by the Cumulative TSN Ack + + + +Stewart, et al. Standards Track [Page 85] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + field in the SACK). Consequently, the value of cwnd controls the + amount of outstanding data, rather than (as in the case of non-SACK + TCP) the upper bound between the highest acknowledged sequence number + and the latest DATA chunk that can be sent within the congestion + window. SCTP SACK leads to different implementations of fast- + retransmit and fast-recovery than non-SACK TCP. As an example see + [FALL96]. + + The biggest difference between SCTP and TCP, however, is multi- + homing. SCTP is designed to establish robust communication + associations between two endpoints each of which may be reachable by + more than one transport address. Potentially different addresses may + lead to different data paths between the two endpoints, thus ideally + one may need a separate set of congestion control parameters for each + of the paths. The treatment here of congestion control for multi- + homed receivers is new with SCTP and may require refinement in the + future. The current algorithms make the following assumptions: + + o The sender usually uses the same destination address until being + instructed by the upper layer otherwise; however, SCTP may change + to an alternate destination in the event an address is marked + inactive (see Section 8.2). Also, SCTP may retransmit to a + different transport address than the original transmission. + + o The sender keeps a separate congestion control parameter set for + each of the destination addresses it can send to (not each + source-destination pair but for each destination). The parameters + should decay if the address is not used for a long enough time + period. + + o For each of the destination addresses, an endpoint does slow-start + upon the first transmission to that address. + + Note: TCP guarantees in-sequence delivery of data to its upper-layer + protocol within a single TCP session. This means that when TCP + notices a gap in the received sequence number, it waits until the gap + is filled before delivering the data that was received with sequence + numbers higher than that of the missing data. On the other hand, + SCTP can deliver data to its upper-layer protocol even if there is a + gap in TSN if the Stream Sequence Numbers are in sequence for a + particular stream (i.e., the missing DATA chunks are for a different + stream) or if unordered delivery is indicated. Although this does + not affect cwnd, it might affect rwnd calculation. + + + + + + + + +Stewart, et al. Standards Track [Page 86] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +7.2 SCTP Slow-Start and Congestion Avoidance + + The slow start and congestion avoidance algorithms MUST be used by an + endpoint to control the amount of data being injected into the + network. The congestion control in SCTP is employed in regard to the + association, not to an individual stream. In some situations it may + be beneficial for an SCTP sender to be more conservative than the + algorithms allow; however, an SCTP sender MUST NOT be more aggressive + than the following algorithms allow. + + Like TCP, an SCTP endpoint uses the following three control variables + to regulate its transmission rate. + + o Receiver advertised window size (rwnd, in bytes), which is set by + the receiver based on its available buffer space for incoming + packets. + + Note: This variable is kept on the entire association. + + o Congestion control window (cwnd, in bytes), which is adjusted by + the sender based on observed network conditions. + + Note: This variable is maintained on a per-destination address + basis. + + o Slow-start threshold (ssthresh, in bytes), which is used by the + sender to distinguish slow start and congestion avoidance phases. + + Note: This variable is maintained on a per-destination address + basis. + + SCTP also requires one additional control variable, + partial_bytes_acked, which is used during congestion avoidance phase + to facilitate cwnd adjustment. + + Unlike TCP, an SCTP sender MUST keep a set of these control variables + cwnd, ssthresh and partial_bytes_acked for EACH destination address + of its peer (when its peer is multi-homed). Only one rwnd is kept + for the whole association (no matter if the peer is multi-homed or + has a single address). + +7.2.1 Slow-Start + + Beginning data transmission into a network with unknown conditions or + after a sufficiently long idle period requires SCTP to probe the + network to determine the available capacity. The slow start + algorithm is used for this purpose at the beginning of a transfer, or + after repairing loss detected by the retransmission timer. + + + +Stewart, et al. Standards Track [Page 87] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o The initial cwnd before DATA transmission or after a sufficiently + long idle period MUST be <= 2*MTU. + + o The initial cwnd after a retransmission timeout MUST be no more + than 1*MTU. + + o The initial value of ssthresh MAY be arbitrarily high (for + example, implementations MAY use the size of the receiver + advertised window). + + o Whenever cwnd is greater than zero, the endpoint is allowed to + have cwnd bytes of data outstanding on that transport address. + + o When cwnd is less than or equal to ssthresh an SCTP endpoint MUST + use the slow start algorithm to increase cwnd (assuming the + current congestion window is being fully utilized). If an + incoming SACK advances the Cumulative TSN Ack Point, cwnd MUST be + increased by at most the lesser of 1) the total size of the + previously outstanding DATA chunk(s) acknowledged, and 2) the + destination's path MTU. This protects against the ACK-Splitting + attack outlined in [SAVAGE99]. + + In instances where its peer endpoint is multi-homed, if an endpoint + receives a SACK that advances its Cumulative TSN Ack Point, then it + should update its cwnd (or cwnds) apportioned to the destination + addresses to which it transmitted the acknowledged data. However if + the received SACK does not advance the Cumulative TSN Ack Point, the + endpoint MUST NOT adjust the cwnd of any of the destination + addresses. + + Because an endpoint's cwnd is not tied to its Cumulative TSN Ack + Point, as duplicate SACKs come in, even though they may not advance + the Cumulative TSN Ack Point an endpoint can still use them to clock + out new data. That is, the data newly acknowledged by the SACK + diminishes the amount of data now in flight to less than cwnd; and so + the current, unchanged value of cwnd now allows new data to be sent. + On the other hand, the increase of cwnd must be tied to the + Cumulative TSN Ack Point advancement as specified above. Otherwise + the duplicate SACKs will not only clock out new data, but also will + adversely clock out more new data than what has just left the + network, during a time of possible congestion. + + o When the endpoint does not transmit data on a given transport + address, the cwnd of the transport address should be adjusted to + max(cwnd/2, 2*MTU) per RTO. + + + + + + +Stewart, et al. Standards Track [Page 88] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +7.2.2 Congestion Avoidance + + When cwnd is greater than ssthresh, cwnd should be incremented by + 1*MTU per RTT if the sender has cwnd or more bytes of data + outstanding for the corresponding transport address. + + In practice an implementation can achieve this goal in the following + way: + + o partial_bytes_acked is initialized to 0. + + o Whenever cwnd is greater than ssthresh, upon each SACK arrival + that advances the Cumulative TSN Ack Point, increase + partial_bytes_acked by the total number of bytes of all new chunks + acknowledged in that SACK including chunks acknowledged by the new + Cumulative TSN Ack and by Gap Ack Blocks. + + o When partial_bytes_acked is equal to or greater than cwnd and + before the arrival of the SACK the sender had cwnd or more bytes + of data outstanding (i.e., before arrival of the SACK, flightsize + was greater than or equal to cwnd), increase cwnd by MTU, and + reset partial_bytes_acked to (partial_bytes_acked - cwnd). + + o Same as in the slow start, when the sender does not transmit DATA + on a given transport address, the cwnd of the transport address + should be adjusted to max(cwnd / 2, 2*MTU) per RTO. + + o When all of the data transmitted by the sender has been + acknowledged by the receiver, partial_bytes_acked is initialized + to 0. + +7.2.3 Congestion Control + + Upon detection of packet losses from SACK (see Section 7.2.4), An + endpoint should do the following: + + ssthresh = max(cwnd/2, 2*MTU) + cwnd = ssthresh + + Basically, a packet loss causes cwnd to be cut in half. + + When the T3-rtx timer expires on an address, SCTP should perform slow + start by: + + ssthresh = max(cwnd/2, 2*MTU) + cwnd = 1*MTU + + + + + +Stewart, et al. Standards Track [Page 89] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + and assure that no more than one SCTP packet will be in flight for + that address until the endpoint receives acknowledgement for + successful delivery of data to that address. + +7.2.4 Fast Retransmit on Gap Reports + + In the absence of data loss, an endpoint performs delayed + acknowledgement. However, whenever an endpoint notices a hole in the + arriving TSN sequence, it SHOULD start sending a SACK back every time + a packet arrives carrying data until the hole is filled. + + Whenever an endpoint receives a SACK that indicates some TSN(s) + missing, it SHOULD wait for 3 further miss indications (via + subsequent SACK's) on the same TSN(s) before taking action with + regard to Fast Retransmit. + + When the TSN(s) is reported as missing in the fourth consecutive + SACK, the data sender shall: + + 1) Mark the missing DATA chunk(s) for retransmission, + + 2) Adjust the ssthresh and cwnd of the destination address(es) to + which the missing DATA chunks were last sent, according to the + formula described in Section 7.2.3. + + 3) Determine how many of the earliest (i.e., lowest TSN) DATA chunks + marked for retransmission will fit into a single packet, subject + to constraint of the path MTU of the destination transport address + to which the packet is being sent. Call this value K. Retransmit + those K DATA chunks in a single packet. + + 4) Restart T3-rtx timer only if the last SACK acknowledged the lowest + outstanding TSN number sent to that address, or the endpoint is + retransmitting the first outstanding DATA chunk sent to that + address. + + Note: Before the above adjustments, if the received SACK also + acknowledges new DATA chunks and advances the Cumulative TSN Ack + Point, the cwnd adjustment rules defined in Sections 7.2.1 and 7.2.2 + must be applied first. + + A straightforward implementation of the above keeps a counter for + each TSN hole reported by a SACK. The counter increments for each + consecutive SACK reporting the TSN hole. After reaching 4 and + starting the fast retransmit procedure, the counter resets to 0. + + + + + + +Stewart, et al. Standards Track [Page 90] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Because cwnd in SCTP indirectly bounds the number of outstanding + TSN's, the effect of TCP fast-recovery is achieved automatically with + no adjustment to the congestion control window size. + +7.3 Path MTU Discovery + + [RFC1191] specifies "Path MTU Discovery", whereby an endpoint + maintains an estimate of the maximum transmission unit (MTU) along a + given Internet path and refrains from sending packets along that path + which exceed the MTU, other than occasional attempts to probe for a + change in the Path MTU (PMTU). RFC 1191 is thorough in its + discussion of the MTU discovery mechanism and strategies for + determining the current end-to-end MTU setting as well as detecting + changes in this value. [RFC1981] specifies the same mechanisms for + IPv6. An SCTP sender using IPv6 MUST use Path MTU Discovery unless + all packets are less than the minimum IPv6 MTU [RFC2460]. + + An endpoint SHOULD apply these techniques, and SHOULD do so on a + per-destination-address basis. + + There are 4 ways in which SCTP differs from the description in RFC + 1191 of applying MTU discovery to TCP: + + 1) SCTP associations can span multiple addresses. An endpoint MUST + maintain separate MTU estimates for each destination address of + its peer. + + 2) Elsewhere in this document, when the term "MTU" is discussed, it + refers to the MTU associated with the destination address + corresponding to the context of the discussion. + + 3) Unlike TCP, SCTP does not have a notion of "Maximum Segment Size". + Accordingly, the MTU for each destination address SHOULD be + initialized to a value no larger than the link MTU for the local + interface to which packets for that remote destination address + will be routed. + + 4) Since data transmission in SCTP is naturally structured in terms + of TSNs rather than bytes (as is the case for TCP), the discussion + in Section 6.5 of RFC 1191 applies: When retransmitting an IP + datagram to a remote address for which the IP datagram appears too + large for the path MTU to that address, the IP datagram SHOULD be + retransmitted without the DF bit set, allowing it to possibly be + fragmented. Transmissions of new IP datagrams MUST have DF set. + + + + + + + +Stewart, et al. Standards Track [Page 91] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 5) The sender should track an association PMTU which will be the + smallest PMTU discovered for all of the peer's destination + addresses. When fragmenting messages into multiple parts this + association PMTU should be used to calculate the size of each + fragment. This will allow retransmissions to be seamlessly sent + to an alternate address without encountering IP fragmentation. + + Other than these differences, the discussion of TCP's use of MTU + discovery in RFCs 1191 and 1981 applies to SCTP on a per- + destination-address basis. + + Note: For IPv6 destination addresses the DF bit does not exist, + instead the IP datagram must be fragmented as described in [RFC2460]. + +8. Fault Management + +8.1 Endpoint Failure Detection + + An endpoint shall keep a counter on the total number of consecutive + retransmissions to its peer (including retransmissions to all the + destination transport addresses of the peer if it is multi-homed). + If the value of this counter exceeds the limit indicated in the + protocol parameter 'Association.Max.Retrans', the endpoint shall + consider the peer endpoint unreachable and shall stop transmitting + any more data to it (and thus the association enters the CLOSED + state). In addition, the endpoint shall report the failure to the + upper layer, and optionally report back all outstanding user data + remaining in its outbound queue. The association is automatically + closed when the peer endpoint becomes unreachable. + + The counter shall be reset each time a DATA chunk sent to that peer + endpoint is acknowledged (by the reception of a SACK), or a + HEARTBEAT-ACK is received from the peer endpoint. + +8.2 Path Failure Detection + + When its peer endpoint is multi-homed, an endpoint should keep a + error counter for each of the destination transport addresses of the + peer endpoint. + + Each time the T3-rtx timer expires on any address, or when a + HEARTBEAT sent to an idle address is not acknowledged within a RTO, + the error counter of that destination address will be incremented. + When the value in the error counter exceeds the protocol parameter + 'Path.Max.Retrans' of that destination address, the endpoint should + mark the destination transport address as inactive, and a + notification SHOULD be sent to the upper layer. + + + + +Stewart, et al. Standards Track [Page 92] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + When an outstanding TSN is acknowledged or a HEARTBEAT sent to that + address is acknowledged with a HEARTBEAT ACK, the endpoint shall + clear the error counter of the destination transport address to which + the DATA chunk was last sent (or HEARTBEAT was sent). When the peer + endpoint is multi-homed and the last chunk sent to it was a + retransmission to an alternate address, there exists an ambiguity as + to whether or not the acknowledgement should be credited to the + address of the last chunk sent. However, this ambiguity does not + seem to bear any significant consequence to SCTP behavior. If this + ambiguity is undesirable, the transmitter may choose not to clear the + error counter if the last chunk sent was a retransmission. + + Note: When configuring the SCTP endpoint, the user should avoid + having the value of 'Association.Max.Retrans' larger than the + summation of the 'Path.Max.Retrans' of all the destination addresses + for the remote endpoint. Otherwise, all the destination addresses + may become inactive while the endpoint still considers the peer + endpoint reachable. When this condition occurs, how the SCTP chooses + to function is implementation specific. + + When the primary path is marked inactive (due to excessive + retransmissions, for instance), the sender MAY automatically transmit + new packets to an alternate destination address if one exists and is + active. If more than one alternate address is active when the + primary path is marked inactive only ONE transport address SHOULD be + chosen and used as the new destination transport address. + +8.3 Path Heartbeat + + By default, an SCTP endpoint shall monitor the reachability of the + idle destination transport address(es) of its peer by sending a + HEARTBEAT chunk periodically to the destination transport + address(es). + + A destination transport address is considered "idle" if no new chunk + which can be used for updating path RTT (usually including first + transmission DATA, INIT, COOKIE ECHO, HEARTBEAT etc.) and no + HEARTBEAT has been sent to it within the current heartbeat period of + that address. This applies to both active and inactive destination + addresses. + + The upper layer can optionally initiate the following functions: + + A) Disable heartbeat on a specific destination transport address of a + given association, + + B) Change the HB.interval, + + + + +Stewart, et al. Standards Track [Page 93] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + C) Re-enable heartbeat on a specific destination transport address of + a given association, and, + + D) Request an on-demand HEARTBEAT on a specific destination transport + address of a given association. + + The endpoint should increment the respective error counter of the + destination transport address each time a HEARTBEAT is sent to that + address and not acknowledged within one RTO. + + When the value of this counter reaches the protocol parameter ' + Path.Max.Retrans', the endpoint should mark the corresponding + destination address as inactive if it is not so marked, and may also + optionally report to the upper layer the change of reachability of + this destination address. After this, the endpoint should continue + HEARTBEAT on this destination address but should stop increasing the + counter. + + The sender of the HEARTBEAT chunk should include in the Heartbeat + Information field of the chunk the current time when the packet is + sent out and the destination address to which the packet is sent. + + IMPLEMENTATION NOTE: An alternative implementation of the heartbeat + mechanism that can be used is to increment the error counter variable + every time a HEARTBEAT is sent to a destination. Whenever a + HEARTBEAT ACK arrives, the sender SHOULD clear the error counter of + the destination that the HEARTBEAT was sent to. This in effect would + clear the previously stroked error (and any other error counts as + well). + + The receiver of the HEARTBEAT should immediately respond with a + HEARTBEAT ACK that contains the Heartbeat Information field copied + from the received HEARTBEAT chunk. + + Upon the receipt of the HEARTBEAT ACK, the sender of the HEARTBEAT + should clear the error counter of the destination transport address + to which the HEARTBEAT was sent, and mark the destination transport + address as active if it is not so marked. The endpoint may + optionally report to the upper layer when an inactive destination + address is marked as active due to the reception of the latest + HEARTBEAT ACK. The receiver of the HEARTBEAT ACK must also clear the + association overall error count as well (as defined in section 8.1). + + The receiver of the HEARTBEAT ACK should also perform an RTT + measurement for that destination transport address using the time + value carried in the HEARTBEAT ACK chunk. + + + + + +Stewart, et al. Standards Track [Page 94] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + On an idle destination address that is allowed to heartbeat, a + HEARTBEAT chunk is RECOMMENDED to be sent once per RTO of that + destination address plus the protocol parameter 'HB.interval' , with + jittering of +/- 50%, and exponential back-off of the RTO if the + previous HEARTBEAT is unanswered. + + A primitive is provided for the SCTP user to change the HB.interval + and turn on or off the heartbeat on a given destination address. The + heartbeat interval set by the SCTP user is added to the RTO of that + destination (including any exponential backoff). Only one heartbeat + should be sent each time the heartbeat timer expires (if multiple + destinations are idle). It is a implementation decision on how to + choose which of the candidate idle destinations to heartbeat to (if + more than one destination is idle). + + Note: When tuning the heartbeat interval, there is a side effect that + SHOULD be taken into account. When this value is increased, i.e. + the HEARTBEAT takes longer, the detection of lost ABORT messages + takes longer as well. If a peer endpoint ABORTs the association for + any reason and the ABORT chunk is lost, the local endpoint will only + discover the lost ABORT by sending a DATA chunk or HEARTBEAT chunk + (thus causing the peer to send another ABORT). This must be + considered when tuning the HEARTBEAT timer. If the HEARTBEAT is + disabled only sending DATA to the association will discover a lost + ABORT from the peer. + +8.4 Handle "Out of the blue" Packets + + An SCTP packet is called an "out of the blue" (OOTB) packet if it is + correctly formed, i.e., passed the receiver's Adler-32 check (see + Section 6.8), but the receiver is not able to identify the + association to which this packet belongs. + + The receiver of an OOTB packet MUST do the following: + + 1) If the OOTB packet is to or from a non-unicast address, silently + discard the packet. Otherwise, + + 2) If the OOTB packet contains an ABORT chunk, the receiver MUST + silently discard the OOTB packet and take no further action. + Otherwise, + + 3) If the packet contains an INIT chunk with a Verification Tag set + to '0', process it as described in Section 5.1. Otherwise, + + 4) If the packet contains a COOKIE ECHO in the first chunk, process + it as described in Section 5.1. Otherwise, + + + + +Stewart, et al. Standards Track [Page 95] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 5) If the packet contains a SHUTDOWN ACK chunk, the receiver should + respond to the sender of the OOTB packet with a SHUTDOWN COMPLETE. + When sending the SHUTDOWN COMPLETE, the receiver of the OOTB + packet must fill in the Verification Tag field of the outbound + packet with the Verification Tag received in the SHUTDOWN ACK and + set the T-bit in the Chunk Flags to indicate that no TCB was + found. Otherwise, + + 6) If the packet contains a SHUTDOWN COMPLETE chunk, the receiver + should silently discard the packet and take no further action. + Otherwise, + + 7) If the packet contains a "Stale cookie" ERROR or a COOKIE ACK the + SCTP Packet should be silently discarded. Otherwise, + + 8) The receiver should respond to the sender of the OOTB packet with + an ABORT. When sending the ABORT, the receiver of the OOTB packet + MUST fill in the Verification Tag field of the outbound packet + with the value found in the Verification Tag field of the OOTB + packet and set the T-bit in the Chunk Flags to indicate that no + TCB was found. After sending this ABORT, the receiver of the OOTB + packet shall discard the OOTB packet and take no further action. + +8.5 Verification Tag + + The Verification Tag rules defined in this section apply when sending + or receiving SCTP packets which do not contain an INIT, SHUTDOWN + COMPLETE, COOKIE ECHO (see Section 5.1), ABORT or SHUTDOWN ACK chunk. + The rules for sending and receiving SCTP packets containing one of + these chunk types are discussed separately in Section 8.5.1. + + When sending an SCTP packet, the endpoint MUST fill in the + Verification Tag field of the outbound packet with the tag value in + the Initiate Tag parameter of the INIT or INIT ACK received from its + peer. + + When receiving an SCTP packet, the endpoint MUST ensure that the + value in the Verification Tag field of the received SCTP packet + matches its own Tag. If the received Verification Tag value does not + match the receiver's own tag value, the receiver shall silently + discard the packet and shall not process it any further except for + those cases listed in Section 8.5.1 below. + + + + + + + + + +Stewart, et al. Standards Track [Page 96] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +8.5.1 Exceptions in Verification Tag Rules + + A) Rules for packet carrying INIT: + + - The sender MUST set the Verification Tag of the packet to 0. + + - When an endpoint receives an SCTP packet with the Verification + Tag set to 0, it should verify that the packet contains only an + INIT chunk. Otherwise, the receiver MUST silently discard the + packet. + + B) Rules for packet carrying ABORT: + + - The endpoint shall always fill in the Verification Tag field of + the outbound packet with the destination endpoint's tag value + if it is known. + + - If the ABORT is sent in response to an OOTB packet, the + endpoint MUST follow the procedure described in Section 8.4. + + - The receiver MUST accept the packet if the Verification Tag + matches either its own tag, OR the tag of its peer. Otherwise, + the receiver MUST silently discard the packet and take no + further action. + + C) Rules for packet carrying SHUTDOWN COMPLETE: + + - When sending a SHUTDOWN COMPLETE, if the receiver of the + SHUTDOWN ACK has a TCB then the destination endpoint's tag MUST + be used. Only where no TCB exists should the sender use the + Verification Tag from the SHUTDOWN ACK. + + - The receiver of a SHUTDOWN COMPLETE shall accept the packet if + the Verification Tag field of the packet matches its own tag OR + it is set to its peer's tag and the T bit is set in the Chunk + Flags. Otherwise, the receiver MUST silently discard the packet + and take no further action. An endpoint MUST ignore the + SHUTDOWN COMPLETE if it is not in the SHUTDOWN-ACK-SENT state. + + D) Rules for packet carrying a COOKIE ECHO + + - When sending a COOKIE ECHO, the endpoint MUST use the value of + the Initial Tag received in the INIT ACK. + + - The receiver of a COOKIE ECHO follows the procedures in Section + 5. + + + + + +Stewart, et al. Standards Track [Page 97] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + E) Rules for packet carrying a SHUTDOWN ACK + + - If the receiver is in COOKIE-ECHOED or COOKIE-WAIT state the + procedures in section 8.4 SHOULD be followed, in other words it + should be treated as an Out Of The Blue packet. + +9. Termination of Association + + An endpoint should terminate its association when it exits from + service. An association can be terminated by either abort or + shutdown. An abort of an association is abortive by definition in + that any data pending on either end of the association is discarded + and not delivered to the peer. A shutdown of an association is + considered a graceful close where all data in queue by either + endpoint is delivered to the respective peers. However, in the case + of a shutdown, SCTP does not support a half-open state (like TCP) + wherein one side may continue sending data while the other end is + closed. When either endpoint performs a shutdown, the association on + each peer will stop accepting new data from its user and only deliver + data in queue at the time of sending or receiving the SHUTDOWN chunk. + +9.1 Abort of an Association + + When an endpoint decides to abort an existing association, it shall + send an ABORT chunk to its peer endpoint. The sender MUST fill in + the peer's Verification Tag in the outbound packet and MUST NOT + bundle any DATA chunk with the ABORT. + + An endpoint MUST NOT respond to any received packet that contains an + ABORT chunk (also see Section 8.4). + + An endpoint receiving an ABORT shall apply the special Verification + Tag check rules described in Section 8.5.1. + + After checking the Verification Tag, the receiving endpoint shall + remove the association from its record, and shall report the + termination to its upper layer. + +9.2 Shutdown of an Association + + Using the SHUTDOWN primitive (see Section 10.1), the upper layer of + an endpoint in an association can gracefully close the association. + This will allow all outstanding DATA chunks from the peer of the + shutdown initiator to be delivered before the association terminates. + + Upon receipt of the SHUTDOWN primitive from its upper layer, the + endpoint enters SHUTDOWN-PENDING state and remains there until all + outstanding data has been acknowledged by its peer. The endpoint + + + +Stewart, et al. Standards Track [Page 98] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + accepts no new data from its upper layer, but retransmits data to the + far end if necessary to fill gaps. + + Once all its outstanding data has been acknowledged, the endpoint + shall send a SHUTDOWN chunk to its peer including in the Cumulative + TSN Ack field the last sequential TSN it has received from the peer. + It shall then start the T2-shutdown timer and enter the SHUTDOWN-SENT + state. If the timer expires, the endpoint must re-send the SHUTDOWN + with the updated last sequential TSN received from its peer. + + The rules in Section 6.3 MUST be followed to determine the proper + timer value for T2-shutdown. To indicate any gaps in TSN, the + endpoint may also bundle a SACK with the SHUTDOWN chunk in the same + SCTP packet. + + An endpoint should limit the number of retransmissions of the + SHUTDOWN chunk to the protocol parameter 'Association.Max.Retrans'. + If this threshold is exceeded the endpoint should destroy the TCB and + MUST report the peer endpoint unreachable to the upper layer (and + thus the association enters the CLOSED state). The reception of any + packet from its peer (i.e. as the peer sends all of its queued DATA + chunks) should clear the endpoint's retransmission count and restart + the T2-Shutdown timer, giving its peer ample opportunity to transmit + all of its queued DATA chunks that have not yet been sent. + + Upon the reception of the SHUTDOWN, the peer endpoint shall + + - enter the SHUTDOWN-RECEIVED state, + + - stop accepting new data from its SCTP user + + - verify, by checking the Cumulative TSN Ack field of the chunk, + that all its outstanding DATA chunks have been received by the + SHUTDOWN sender. + + Once an endpoint as reached the SHUTDOWN-RECEIVED state it MUST NOT + send a SHUTDOWN in response to a ULP request, and should discard + subsequent SHUTDOWN chunks. + + If there are still outstanding DATA chunks left, the SHUTDOWN + receiver shall continue to follow normal data transmission procedures + defined in Section 6 until all outstanding DATA chunks are + acknowledged; however, the SHUTDOWN receiver MUST NOT accept new data + from its SCTP user. + + While in SHUTDOWN-SENT state, the SHUTDOWN sender MUST immediately + respond to each received packet containing one or more DATA chunk(s) + with a SACK, a SHUTDOWN chunk, and restart the T2-shutdown timer. If + + + +Stewart, et al. Standards Track [Page 99] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + it has no more outstanding DATA chunks, the SHUTDOWN receiver shall + send a SHUTDOWN ACK and start a T2-shutdown timer of its own, + entering the SHUTDOWN-ACK-SENT state. If the timer expires, the + endpoint must re-send the SHUTDOWN ACK. + + The sender of the SHUTDOWN ACK should limit the number of + retransmissions of the SHUTDOWN ACK chunk to the protocol parameter ' + Association.Max.Retrans'. If this threshold is exceeded the endpoint + should destroy the TCB and may report the peer endpoint unreachable + to the upper layer (and thus the association enters the CLOSED + state). + + Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall stop + the T2-shutdown timer, send a SHUTDOWN COMPLETE chunk to its peer, + and remove all record of the association. + + Upon reception of the SHUTDOWN COMPLETE chunk the endpoint will + verify that it is in SHUTDOWN-ACK-SENT state, if it is not the chunk + should be discarded. If the endpoint is in the SHUTDOWN-ACK-SENT + state the endpoint should stop the T2-shutdown timer and remove all + knowledge of the association (and thus the association enters the + CLOSED state). + + An endpoint SHOULD assure that all its outstanding DATA chunks have + been acknowledged before initiating the shutdown procedure. + + An endpoint should reject any new data request from its upper layer + if it is in SHUTDOWN-PENDING, SHUTDOWN-SENT, SHUTDOWN-RECEIVED, or + SHUTDOWN-ACK-SENT state. + + If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT + chunk (e.g., if the SHUTDOWN COMPLETE was lost) with source and + destination transport addresses (either in the IP addresses or in the + INIT chunk) that belong to this association, it should discard the + INIT chunk and retransmit the SHUTDOWN ACK chunk. + + Note: Receipt of an INIT with the same source and destination IP + addresses as used in transport addresses assigned to an endpoint but + with a different port number indicates the initialization of a + separate association. + + The sender of the INIT or COOKIE ECHO should respond to the receipt + of a SHUTDOWN-ACK with a stand-alone SHUTDOWN COMPLETE in an SCTP + packet with the Verification Tag field of its common header set to + the same tag that was received in the SHUTDOWN ACK packet. This is + considered an Out of the Blue packet as defined in Section 8.4. The + sender of the INIT lets T1-init continue running and remains in the + + + + +Stewart, et al. Standards Track [Page 100] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + COOKIE-WAIT or COOKIE-ECHOED state. Normal T1-init timer expiration + will cause the INIT or COOKIE chunk to be retransmitted and thus + start a new association. + + If a SHUTDOWN is received in COOKIE WAIT or COOKIE ECHOED states the + SHUTDOWN chunk SHOULD be silently discarded. + + If an endpoint is in SHUTDOWN-SENT state and receives a SHUTDOWN + chunk from its peer, the endpoint shall respond immediately with a + SHUTDOWN ACK to its peer, and move into a SHUTDOWN-ACK-SENT state + restarting its T2-shutdown timer. + + If an endpoint is in the SHUTDOWN-ACK-SENT state and receives a + SHUTDOWN ACK, it shall stop the T2-shutdown timer, send a SHUTDOWN + COMPLETE chunk to its peer, and remove all record of the association. + +10. Interface with Upper Layer + + The Upper Layer Protocols (ULP) shall request for services by passing + primitives to SCTP and shall receive notifications from SCTP for + various events. + + The primitives and notifications described in this section should be + used as a guideline for implementing SCTP. The following functional + description of ULP interface primitives is shown for illustrative + purposes. Different SCTP implementations may have different ULP + interfaces. However, all SCTPs must provide a certain minimum set of + services to guarantee that all SCTP implementations can support the + same protocol hierarchy. + +10.1 ULP-to-SCTP + + The following sections functionally characterize a ULP/SCTP + interface. The notation used is similar to most procedure or + function calls in high level languages. + + The ULP primitives described below specify the basic functions the + SCTP must perform to support inter-process communication. Individual + implementations must define their own exact format, and may provide + combinations or subsets of the basic functions in single calls. + + A) Initialize + + Format: INITIALIZE ([local port], [local eligible address list]) -> + local SCTP instance name + + + + + + +Stewart, et al. Standards Track [Page 101] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + This primitive allows SCTP to initialize its internal data structures + and allocate necessary resources for setting up its operation + environment. Once SCTP is initialized, ULP can communicate directly + with other endpoints without re-invoking this primitive. + + SCTP will return a local SCTP instance name to the ULP. + + Mandatory attributes: + + None. + + Optional attributes: + + The following types of attributes may be passed along with the + primitive: + + o local port - SCTP port number, if ULP wants it to be specified; + + o local eligible address list - An address list that the local SCTP + endpoint should bind. By default, if an address list is not + included, all IP addresses assigned to the host should be used by + the local endpoint. + + IMPLEMENTATION NOTE: If this optional attribute is supported by an + implementation, it will be the responsibility of the implementation + to enforce that the IP source address field of any SCTP packets sent + out by this endpoint contains one of the IP addresses indicated in + the local eligible address list. + + B) Associate + + Format: ASSOCIATE(local SCTP instance name, destination transport addr, + outbound stream count) + -> association id [,destination transport addr list] [,outbound stream + count] + + This primitive allows the upper layer to initiate an association to a + specific peer endpoint. + + The peer endpoint shall be specified by one of the transport + addresses which defines the endpoint (see Section 1.4). If the local + SCTP instance has not been initialized, the ASSOCIATE is considered + an error. + + An association id, which is a local handle to the SCTP association, + will be returned on successful establishment of the association. If + SCTP is not able to open an SCTP association with the peer endpoint, + an error is returned. + + + +Stewart, et al. Standards Track [Page 102] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Other association parameters may be returned, including the complete + destination transport addresses of the peer as well as the outbound + stream count of the local endpoint. One of the transport address + from the returned destination addresses will be selected by the local + endpoint as default primary path for sending SCTP packets to this + peer. The returned "destination transport addr list" can be used by + the ULP to change the default primary path or to force sending a + packet to a specific transport address. + + IMPLEMENTATION NOTE: If ASSOCIATE primitive is implemented as a + blocking function call, the ASSOCIATE primitive can return + association parameters in addition to the association id upon + successful establishment. If ASSOCIATE primitive is implemented as a + non-blocking call, only the association id shall be returned and + association parameters shall be passed using the COMMUNICATION UP + notification. + + Mandatory attributes: + + o local SCTP instance name - obtained from the INITIALIZE operation. + + o destination transport addr - specified as one of the transport + addresses of the peer endpoint with which the association is to be + established. + + o outbound stream count - the number of outbound streams the ULP + would like to open towards this peer endpoint. + + Optional attributes: + + None. + + C) Shutdown + + Format: SHUTDOWN(association id) + -> result + + Gracefully closes an association. Any locally queued user data will + be delivered to the peer. The association will be terminated only + after the peer acknowledges all the SCTP packets sent. A success + code will be returned on successful termination of the association. + If attempting to terminate the association results in a failure, an + error code shall be returned. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + + + +Stewart, et al. Standards Track [Page 103] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Optional attributes: + + None. + + D) Abort + + Format: ABORT(association id [, cause code]) + -> result + + Ungracefully closes an association. Any locally queued user data + will be discarded and an ABORT chunk is sent to the peer. A success + code will be returned on successful abortion of the association. If + attempting to abort the association results in a failure, an error + code shall be returned. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + Optional attributes: + + o cause code - reason of the abort to be passed to the peer. + + None. + + E) Send + + Format: SEND(association id, buffer address, byte count [,context] + [,stream id] [,life time] [,destination transport address] + [,unorder flag] [,no-bundle flag] [,payload protocol-id] ) + -> result + + This is the main method to send user data via SCTP. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o buffer address - the location where the user message to be + transmitted is stored; + + o byte count - The size of the user data in number of bytes; + + Optional attributes: + + o context - an optional 32 bit integer that will be carried in the + sending failure notification to the ULP if the transportation of + this User Message fails. + + + +Stewart, et al. Standards Track [Page 104] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o stream id - to indicate which stream to send the data on. If not + specified, stream 0 will be used. + + o life time - specifies the life time of the user data. The user + data will not be sent by SCTP after the life time expires. This + parameter can be used to avoid efforts to transmit stale user + messages. SCTP notifies the ULP if the data cannot be initiated + to transport (i.e. sent to the destination via SCTP's send + primitive) within the life time variable. However, the user data + will be transmitted if SCTP has attempted to transmit a chunk + before the life time expired. + + IMPLEMENTATION NOTE: In order to better support the data lifetime + option, the transmitter may hold back the assigning of the TSN number + to an outbound DATA chunk to the last moment. And, for + implementation simplicity, once a TSN number has been assigned the + sender should consider the send of this DATA chunk as committed, + overriding any lifetime option attached to the DATA chunk. + + o destination transport address - specified as one of the + destination transport addresses of the peer endpoint to which this + packet should be sent. Whenever possible, SCTP should use this + destination transport address for sending the packets, instead of + the current primary path. + + o unorder flag - this flag, if present, indicates that the user + would like the data delivered in an unordered fashion to the peer + (i.e., the U flag is set to 1 on all DATA chunks carrying this + message). + + o no-bundle flag - instructs SCTP not to bundle this user data with + other outbound DATA chunks. SCTP MAY still bundle even when this + flag is present, when faced with network congestion. + + o payload protocol-id - A 32 bit unsigned integer that is to be + passed to the peer indicating the type of payload protocol data + being transmitted. This value is passed as opaque data by SCTP. + + F) Set Primary + + Format: SETPRIMARY(association id, destination transport address, + [source transport address] ) + -> result + + Instructs the local SCTP to use the specified destination transport + address as primary path for sending packets. + + + + + +Stewart, et al. Standards Track [Page 105] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The result of attempting this operation shall be returned. If the + specified destination transport address is not present in the + "destination transport address list" returned earlier in an associate + command or communication up notification, an error shall be returned. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - specified as one of the transport + addresses of the peer endpoint, which should be used as primary + address for sending packets. This overrides the current primary + address information maintained by the local SCTP endpoint. + + Optional attributes: + + o source transport address - optionally, some implementations may + allow you to set the default source address placed in all outgoing + IP datagrams. + + G) Receive + + Format: RECEIVE(association id, buffer address, buffer size + [,stream id]) + -> byte count [,transport address] [,stream id] [,stream sequence + number] [,partial flag] [,delivery number] [,payload protocol-id] + + This primitive shall read the first user message in the SCTP in-queue + into the buffer specified by ULP, if there is one available. The + size of the message read, in bytes, will be returned. It may, + depending on the specific implementation, also return other + information such as the sender's address, the stream id on which it + is received, whether there are more messages available for retrieval, + etc. For ordered messages, their stream sequence number may also be + returned. + + Depending upon the implementation, if this primitive is invoked when + no message is available the implementation should return an + indication of this condition or should block the invoking process + until data does become available. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o buffer address - the memory location indicated by the ULP to store + the received message. + + + + +Stewart, et al. Standards Track [Page 106] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o buffer size - the maximum size of data to be received, in bytes. + + Optional attributes: + + o stream id - to indicate which stream to receive the data on. + + o stream sequence number - the stream sequence number assigned by + the sending SCTP peer. + + o partial flag - if this returned flag is set to 1, then this + Receive contains a partial delivery of the whole message. When + this flag is set, the stream id and stream sequence number MUST + accompany this receive. When this flag is set to 0, it indicates + that no more deliveries will be received for this stream sequence + number. + + o payload protocol-id - A 32 bit unsigned integer that is received + from the peer indicating the type of payload protocol of the + received data. This value is passed as opaque data by SCTP. + + H) Status + + Format: STATUS(association id) + -> status data + + This primitive should return a data block containing the following + information: + association connection state, + destination transport address list, + destination transport address reachability states, + current receiver window size, + current congestion window sizes, + number of unacknowledged DATA chunks, + number of DATA chunks pending receipt, + primary path, + most recent SRTT on primary path, + RTO on primary path, + SRTT and RTO on other destination addresses, etc. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + Optional attributes: + + None. + + + + + +Stewart, et al. Standards Track [Page 107] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + I) Change Heartbeat + + Format: CHANGEHEARTBEAT(association id, destination transport address, + new state [,interval]) + -> result + + Instructs the local endpoint to enable or disable heartbeat on the + specified destination transport address. + + The result of attempting this operation shall be returned. + + Note: Even when enabled, heartbeat will not take place if the + destination transport address is not idle. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - specified as one of the transport + addresses of the peer endpoint. + + o new state - the new state of heartbeat for this destination + transport address (either enabled or disabled). + + Optional attributes: + + o interval - if present, indicates the frequency of the heartbeat if + this is to enable heartbeat on a destination transport address. + This value is added to the RTO of the destination transport + address. This value, if present, effects all destinations. + + J) Request HeartBeat + + Format: REQUESTHEARTBEAT(association id, destination transport + address) + -> result + + Instructs the local endpoint to perform a HeartBeat on the specified + destination transport address of the given association. The returned + result should indicate whether the transmission of the HEARTBEAT + chunk to the destination address is successful. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - the transport address of the + association on which a heartbeat should be issued. + + + +Stewart, et al. Standards Track [Page 108] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + K) Get SRTT Report + + Format: GETSRTTREPORT(association id, destination transport address) + -> srtt result + + Instructs the local SCTP to report the current SRTT measurement on + the specified destination transport address of the given association. + The returned result can be an integer containing the most recent SRTT + in milliseconds. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - the transport address of the + association on which the SRTT measurement is to be reported. + + L) Set Failure Threshold + + Format: SETFAILURETHRESHOLD(association id, destination transport + address, failure threshold) + -> result + + This primitive allows the local SCTP to customize the reachability + failure detection threshold 'Path.Max.Retrans' for the specified + destination address. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - the transport address of the + association on which the failure detection threshold is to be set. + + o failure threshold - the new value of 'Path.Max.Retrans' for the + destination address. + + M) Set Protocol Parameters + + Format: SETPROTOCOLPARAMETERS(association id, [,destination transport + address,] protocol parameter list) + -> result + + This primitive allows the local SCTP to customize the protocol + parameters. + + + + + + +Stewart, et al. Standards Track [Page 109] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o protocol parameter list - The specific names and values of the + protocol parameters (e.g., Association.Max.Retrans [see Section + 14]) that the SCTP user wishes to customize. + + Optional attributes: + + o destination transport address - some of the protocol parameters + may be set on a per destination transport address basis. + + N) Receive unsent message + + Format: RECEIVE_UNSENT(data retrieval id, buffer address, buffer size + [,stream id] [, stream sequence number] [,partial flag] + [,payload protocol-id]) + + o data retrieval id - The identification passed to the ULP in the + failure notification. + + o buffer address - the memory location indicated by the ULP to store + the received message. + + o buffer size - the maximum size of data to be received, in bytes. + + Optional attributes: + + o stream id - this is a return value that is set to indicate + which stream the data was sent to. + + o stream sequence number - this value is returned indicating + the stream sequence number that was associated with the message. + + o partial flag - if this returned flag is set to 1, then this + message is a partial delivery of the whole message. When + this flag is set, the stream id and stream sequence number MUST + accompany this receive. When this flag is set to 0, it indicates + that no more deliveries will be received for this stream sequence + number. + + o payload protocol-id - The 32 bit unsigned integer that was sent to + be sent to the peer indicating the type of payload protocol of the + received data. + + + + + + +Stewart, et al. Standards Track [Page 110] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + O) Receive unacknowledged message + + Format: RECEIVE_UNACKED(data retrieval id, buffer address, buffer size, + [,stream id] [, stream sequence number] [,partial flag] + [,payload protocol-id]) + + o data retrieval id - The identification passed to the ULP in the + failure notification. + + o buffer address - the memory location indicated by the ULP to store + the received message. + + o buffer size - the maximum size of data to be received, in bytes. + + Optional attributes: + + o stream id - this is a return value that is set to indicate which + stream the data was sent to. + + o stream sequence number - this value is returned indicating the + stream sequence number that was associated with the message. + + o partial flag - if this returned flag is set to 1, then this + message is a partial delivery of the whole message. When this + flag is set, the stream id and stream sequence number MUST + accompany this receive. When this flag is set to 0, it indicates + that no more deliveries will be received for this stream sequence + number. + + o payload protocol-id - The 32 bit unsigned integer that was sent to + be sent to the peer indicating the type of payload protocol of the + received data. + + P) Destroy SCTP instance + + Format: DESTROY(local SCTP instance name) + + o local SCTP instance name - this is the value that was passed to + the application in the initialize primitive and it indicates which + SCTP instance to be destroyed. + +10.2 SCTP-to-ULP + + It is assumed that the operating system or application environment + provides a means for the SCTP to asynchronously signal the ULP + process. When SCTP does signal an ULP process, certain information + is passed to the ULP. + + + + +Stewart, et al. Standards Track [Page 111] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + IMPLEMENTATION NOTE: In some cases this may be done through a + separate socket or error channel. + + A) DATA ARRIVE notification + + SCTP shall invoke this notification on the ULP when a user message is + successfully received and ready for retrieval. + + The following may be optionally be passed with the notification: + + o association id - local handle to the SCTP association + + o stream id - to indicate which stream the data is received on. + + B) SEND FAILURE notification + + If a message can not be delivered SCTP shall invoke this notification + on the ULP. + + The following may be optionally be passed with the notification: + + o association id - local handle to the SCTP association + + o data retrieval id - an identification used to retrieve unsent and + unacknowledged data. + + o cause code - indicating the reason of the failure, e.g., size too + large, message life-time expiration, etc. + + o context - optional information associated with this message (see D + in Section 10.1). + + C) NETWORK STATUS CHANGE notification + + When a destination transport address is marked inactive (e.g., when + SCTP detects a failure), or marked active (e.g., when SCTP detects a + recovery), SCTP shall invoke this notification on the ULP. + + The following shall be passed with the notification: + + o association id - local handle to the SCTP association + + o destination transport address - This indicates the destination + transport address of the peer endpoint affected by the change; + + o new-status - This indicates the new status. + + + + + +Stewart, et al. Standards Track [Page 112] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + D) COMMUNICATION UP notification + + This notification is used when SCTP becomes ready to send or receive + user messages, or when a lost communication to an endpoint is + restored. + + IMPLEMENTATION NOTE: If ASSOCIATE primitive is implemented as a + blocking function call, the association parameters are returned as a + result of the ASSOCIATE primitive itself. In that case, + COMMUNICATION UP notification is optional at the association + initiator's side. + + The following shall be passed with the notification: + + o association id - local handle to the SCTP association + + o status - This indicates what type of event has occurred + + o destination transport address list - the complete set of transport + addresses of the peer + + o outbound stream count - the maximum number of streams allowed to + be used in this association by the ULP + + o inbound stream count - the number of streams the peer endpoint has + requested with this association (this may not be the same number + as 'outbound stream count'). + + E) COMMUNICATION LOST notification + + When SCTP loses communication to an endpoint completely (e.g., via + Heartbeats) or detects that the endpoint has performed an abort + operation, it shall invoke this notification on the ULP. + + The following shall be passed with the notification: + + o association id - local handle to the SCTP association + + o status - This indicates what type of event has occurred; The status + may indicate a failure OR a normal termination event + occurred in response to a shutdown or abort request. + + The following may be passed with the notification: + + o data retrieval id - an identification used to retrieve unsent and + unacknowledged data. + + o last-acked - the TSN last acked by that peer endpoint; + + + +Stewart, et al. Standards Track [Page 113] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o last-sent - the TSN last sent to that peer endpoint; + + F) COMMUNICATION ERROR notification + + When SCTP receives an ERROR chunk from its peer and decides to notify + its ULP, it can invoke this notification on the ULP. + + The following can be passed with the notification: + + o association id - local handle to the SCTP association + + o error info - this indicates the type of error and optionally some + additional information received through the ERROR chunk. + + G) RESTART notification + + When SCTP detects that the peer has restarted, it may send this + notification to its ULP. + + The following can be passed with the notification: + + o association id - local handle to the SCTP association + + H) SHUTDOWN COMPLETE notification + + When SCTP completes the shutdown procedures (section 9.2) this + notification is passed to the upper layer. + + The following can be passed with the notification: + + o association id - local handle to the SCTP association + +11. Security Considerations + +11.1 Security Objectives + + As a common transport protocol designed to reliably carry time- + sensitive user messages, such as billing or signaling messages for + telephony services, between two networked endpoints, SCTP has the + following security objectives. + + - availability of reliable and timely data transport services + - integrity of the user-to-user information carried by SCTP + + + + + + + + +Stewart, et al. Standards Track [Page 114] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +11.2 SCTP Responses To Potential Threats + + SCTP may potentially be used in a wide variety of risk situations. + It is important for operator(s) of systems running SCTP to analyze + their particular situations and decide on the appropriate counter- + measures. + + Operators of systems running SCTP should consult [RFC2196] for + guidance in securing their site. + +11.2.1 Countering Insider Attacks + + The principles of [RFC2196] should be applied to minimize the risk of + theft of information or sabotage by insiders. Such procedures + include publication of security policies, control of access at the + physical, software, and network levels, and separation of services. + +11.2.2 Protecting against Data Corruption in the Network + + Where the risk of undetected errors in datagrams delivered by the + lower layer transport services is considered to be too great, + additional integrity protection is required. If this additional + protection were provided in the application-layer, the SCTP header + would remain vulnerable to deliberate integrity attacks. While the + existing SCTP mechanisms for detection of packet replays are + considered sufficient for normal operation, stronger protections are + needed to protect SCTP when the operating environment contains + significant risk of deliberate attacks from a sophisticated + adversary. + + In order to promote software code-reuse, to avoid re-inventing the + wheel, and to avoid gratuitous complexity to SCTP, the IP + Authentication Header [RFC2402] SHOULD be used when the threat + environment requires stronger integrity protections, but does not + require confidentiality. + + A widely implemented BSD Sockets API extension exists for + applications to request IP security services, such as AH or ESP from + an operating system kernel. Applications can use such an API to + request AH whenever AH use is appropriate. + +11.2.3 Protecting Confidentiality + + In most cases, the risk of breach of confidentiality applies to the + signaling data payload, not to the SCTP or lower-layer protocol + overheads. If that is true, encryption of the SCTP user data only + might be considered. As with the supplementary checksum service, + user data encryption MAY be performed by the SCTP user application. + + + +Stewart, et al. Standards Track [Page 115] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Alternately, the user application MAY use an implementation-specific + API to request that the IP Encapsulating Security Payload (ESP) + [RFC2406] be used to provide confidentiality and integrity. + + Particularly for mobile users, the requirement for confidentiality + might include the masking of IP addresses and ports. In this case + ESP SHOULD be used instead of application-level confidentiality. If + ESP is used to protect confidentiality of SCTP traffic, an ESP + cryptographic transform that includes cryptographic integrity + protection MUST be used, because if there is a confidentiality threat + there will also be a strong integrity threat. + + Whenever ESP is in use, application-level encryption is not generally + required. + + Regardless of where confidentiality is provided, the ISAKMP [RFC2408] + and the Internet Key Exchange (IKE) [RFC2409] SHOULD be used for key + management. + + Operators should consult [RFC2401] for more information on the + security services available at and immediately above the Internet + Protocol layer. + +11.2.4 Protecting against Blind Denial of Service Attacks + + A blind attack is one where the attacker is unable to intercept or + otherwise see the content of data flows passing to and from the + target SCTP node. Blind denial of service attacks may take the form + of flooding, masquerade, or improper monopolization of services. + +11.2.4.1 Flooding + + The objective of flooding is to cause loss of service and incorrect + behavior at target systems through resource exhaustion, interference + with legitimate transactions, and exploitation of buffer-related + software bugs. Flooding may be directed either at the SCTP node or + at resources in the intervening IP Access Links or the Internet. + Where the latter entities are the target, flooding will manifest + itself as loss of network services, including potentially the breach + of any firewalls in place. + + In general, protection against flooding begins at the equipment + design level, where it includes measures such as: + + - avoiding commitment of limited resources before determining that + the request for service is legitimate + + + + + +Stewart, et al. Standards Track [Page 116] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + - giving priority to completion of processing in progress over the + acceptance of new work + + - identification and removal of duplicate or stale queued requests + for service. + + - not responding to unexpected packets sent to non-unicast + addresses. + + Network equipment should be capable of generating an alarm and log if + a suspicious increase in traffic occurs. The log should provide + information such as the identity of the incoming link and source + address(es) used which will help the network or SCTP system operator + to take protective measures. Procedures should be in place for the + operator to act on such alarms if a clear pattern of abuse emerges. + + The design of SCTP is resistant to flooding attacks, particularly in + its use of a four-way start-up handshake, its use of a cookie to + defer commitment of resources at the responding SCTP node until the + handshake is completed, and its use of a Verification Tag to prevent + insertion of extraneous packets into the flow of an established + association. + + The IP Authentication Header and Encapsulating Security Payload might + be useful in reducing the risk of certain kinds of denial of service + attacks." + + The use of the Host Name feature in the INIT chunk could be used to + flood a target DNS server. A large backlog of DNS queries, resolving + the Host Name received in the INIT chunk to IP addresses, could be + accomplished by sending INIT's to multiple hosts in a given domain. + In addition, an attacker could use the Host Name feature in an + indirect attack on a third party by sending large numbers of INITs to + random hosts containing the host name of the target. In addition to + the strain on DNS resources, this could also result in large numbers + of INIT ACKs being sent to the target. One method to protect against + this type of attack is to verify that the IP addresses received from + DNS include the source IP address of the original INIT. If the list + of IP addresses received from DNS does not include the source IP + address of the INIT, the endpoint MAY silently discard the INIT. + This last option will not protect against the attack against the DNS. + + + + + + + + + + +Stewart, et al. Standards Track [Page 117] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +11.2.4.2 Blind Masquerade + + Masquerade can be used to deny service in several ways: + + - by tying up resources at the target SCTP node to which the + impersonated node has limited access. For example, the target + node may by policy permit a maximum of one SCTP association with + the impersonated SCTP node. The masquerading attacker may attempt + to establish an association purporting to come from the + impersonated node so that the latter cannot do so when it requires + it. + + - by deliberately allowing the impersonation to be detected, thereby + provoking counter-measures which cause the impersonated node to be + locked out of the target SCTP node. + + - by interfering with an established association by inserting + extraneous content such as a SHUTDOWN request. + + SCTP reduces the risk of blind masquerade attacks through IP spoofing + by use of the four-way startup handshake. Man-in-the-middle + masquerade attacks are discussed in Section 11.3 below. Because the + initial exchange is memoryless, no lockout mechanism is triggered by + blind masquerade attacks. In addition, the INIT ACK containing the + State Cookie is transmitted back to the IP address from which it + received the INIT. Thus the attacker would not receive the INIT ACK + containing the State Cookie. SCTP protects against insertion of + extraneous packets into the flow of an established association by use + of the Verification Tag. + + Logging of received INIT requests and abnormalities such as + unexpected INIT ACKs might be considered as a way to detect patterns + of hostile activity. However, the potential usefulness of such + logging must be weighed against the increased SCTP startup processing + it implies, rendering the SCTP node more vulnerable to flooding + attacks. Logging is pointless without the establishment of operating + procedures to review and analyze the logs on a routine basis. + +11.2.4.3 Improper Monopolization of Services + + Attacks under this heading are performed openly and legitimately by + the attacker. They are directed against fellow users of the target + SCTP node or of the shared resources between the attacker and the + target node. Possible attacks include the opening of a large number + of associations between the attacker's node and the target, or + transfer of large volumes of information within a legitimately- + established association. + + + + +Stewart, et al. Standards Track [Page 118] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Policy limits should be placed on the number of associations per + adjoining SCTP node. SCTP user applications should be capable of + detecting large volumes of illegitimate or "no-op" messages within a + given association and either logging or terminating the association + as a result, based on local policy. + +11.3 Protection against Fraud and Repudiation + + The objective of fraud is to obtain services without authorization + and specifically without paying for them. In order to achieve this + objective, the attacker must induce the SCTP user application at the + target SCTP node to provide the desired service while accepting + invalid billing data or failing to collect it. Repudiation is a + related problem, since it may occur as a deliberate act of fraud or + simply because the repudiating party kept inadequate records of + service received. + + Potential fraudulent attacks include interception and misuse of + authorizing information such as credit card numbers, blind masquerade + and replay, and man-in-the middle attacks which modify the packets + passing through a target SCTP association in real time. + + The interception attack is countered by the confidentiality measures + discussed in Section 11.2.3 above. + + Section 11.2.4.2 describes how SCTP is resistant to blind masquerade + attacks, as a result of the four-way startup handshake and the + Verification Tag. The Verification Tag and TSN together are + protections against blind replay attacks, where the replay is into an + existing association. + + However, SCTP does not protect against man-in-the-middle attacks + where the attacker is able to intercept and alter the packets sent + and received in an association. For example, the INIT ACK will have + sufficient information sent on the wire for an adversary in the + middle to hijack an existing SCTP association. Where a significant + possibility of such attacks is seen to exist, or where possible + repudiation is an issue, the use of the IPSEC AH service is + recommended to ensure both the integrity and the authenticity of the + SCTP packets passed. + + SCTP also provides no protection against attacks originating at or + beyond the SCTP node and taking place within the context of an + existing association. Prevention of such attacks should be covered + by appropriate security policies at the host site, as discussed in + Section 11.2.1. + + + + + +Stewart, et al. Standards Track [Page 119] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +12. Recommended Transmission Control Block (TCB) Parameters + + This section details a recommended set of parameters that should be + contained within the TCB for an implementation. This section is for + illustrative purposes and should not be deemed as requirements on an + implementation or as an exhaustive list of all parameters inside an + SCTP TCB. Each implementation may need its own additional parameters + for optimization. + +12.1 Parameters necessary for the SCTP instance + + Associations: A list of current associations and mappings to the data + consumers for each association. This may be in the + form of a hash table or other implementation dependent + structure. The data consumers may be process + identification information such as file descriptors, + named pipe pointer, or table pointers dependent on how + SCTP is implemented. + + Secret Key: A secret key used by this endpoint to compute the MAC. + This SHOULD be a cryptographic quality random number + with a sufficient length. Discussion in [RFC1750] can + be helpful in selection of the key. + + Address List: The list of IP addresses that this instance has bound. + This information is passed to one's peer(s) in INIT and + INIT ACK chunks. + + SCTP Port: The local SCTP port number the endpoint is bound to. + +12.2 Parameters necessary per association (i.e. the TCB) + + Peer : Tag value to be sent in every packet and is received + Verification: in the INIT or INIT ACK chunk. + Tag : + + My : Tag expected in every inbound packet and sent in the + Verification: INIT or INIT ACK chunk. + Tag : + + State : A state variable indicating what state the association + : is in, i.e. COOKIE-WAIT, COOKIE-ECHOED, ESTABLISHED, + : SHUTDOWN-PENDING, SHUTDOWN-SENT, SHUTDOWN-RECEIVED, + : SHUTDOWN-ACK-SENT. + + Note: No "CLOSED" state is illustrated since if a + association is "CLOSED" its TCB SHOULD be removed. + + + + +Stewart, et al. Standards Track [Page 120] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Peer : A list of SCTP transport addresses that the peer is + Transport : bound to. This information is derived from the INIT or + Address : INIT ACK and is used to associate an inbound packet + List : with a given association. Normally this information is + : hashed or keyed for quick lookup and access of the TCB. + + Primary : This is the current primary destination transport + Path : address of the peer endpoint. It may also specify a + : source transport address on this endpoint. + + Overall : The overall association error count. + Error Count : + + Overall : The threshold for this association that if the Overall + Error : Error Count reaches will cause this association to be + Threshold : torn down. + + Peer Rwnd : Current calculated value of the peer's rwnd. + + Next TSN : The next TSN number to be assigned to a new DATA chunk. + : This is sent in the INIT or INIT ACK chunk to the peer + : and incremented each time a DATA chunk is assigned a + : TSN (normally just prior to transmit or during + : fragmentation). + + Last Rcvd : This is the last TSN received in sequence. This value + TSN : is set initially by taking the peer's Initial TSN, + : received in the INIT or INIT ACK chunk, and + : subtracting one from it. + + Mapping : An array of bits or bytes indicating which out of + Array : order TSN's have been received (relative to the + : Last Rcvd TSN). If no gaps exist, i.e. no out of order + : packets have been received, this array will be set to + : all zero. This structure may be in the form of a + : circular buffer or bit array. + + Ack State : This flag indicates if the next received packet + : is to be responded to with a SACK. This is initialized + : to 0. When a packet is received it is incremented. + : If this value reaches 2 or more, a SACK is sent and the + : value is reset to 0. Note: This is used only when no + : DATA chunks are received out of order. When DATA chunks + : are out of order, SACK's are not delayed (see Section + : 6). + + + + + + +Stewart, et al. Standards Track [Page 121] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Inbound : An array of structures to track the inbound streams. + Streams : Normally including the next sequence number expected + : and possibly the stream number. + + Outbound : An array of structures to track the outbound streams. + Streams : Normally including the next sequence number to + : be sent on the stream. + + Reasm Queue : A re-assembly queue. + + Local : The list of local IP addresses bound in to this + Transport : association. + Address : + List : + + Association : The smallest PMTU discovered for all of the + PMTU : peer's transport addresses. + +12.3 Per Transport Address Data + + For each destination transport address in the peer's address list + derived from the INIT or INIT ACK chunk, a number of data elements + needs to be maintained including: + + Error count : The current error count for this destination. + + Error : Current error threshold for this destination i.e. + Threshold : what value marks the destination down if Error count + : reaches this value. + + cwnd : The current congestion window. + + ssthresh : The current ssthresh value. + + RTO : The current retransmission timeout value. + + SRTT : The current smoothed round trip time. + + RTTVAR : The current RTT variation. + + partial : The tracking method for increase of cwnd when in + bytes acked : congestion avoidance mode (see Section 6.2.2) + + state : The current state of this destination, i.e. DOWN, UP, + : ALLOW-HB, NO-HEARTBEAT, etc. + + PMTU : The current known path MTU. + + + + +Stewart, et al. Standards Track [Page 122] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Per : A timer used by each destination. + Destination : + Timer : + + RTO-Pending : A flag used to track if one of the DATA chunks sent to + this address is currently being used to compute a + RTT. If this flag is 0, the next DATA chunk sent to this + destination should be used to compute a RTT and this + flag should be set. Every time the RTT calculation + completes (i.e. the DATA chunk is SACK'd) clear this + flag. + + last-time : The time this destination was last sent to. This can be + used : used to determine if a HEARTBEAT is needed. + +12.4 General Parameters Needed + + Out Queue : A queue of outbound DATA chunks. + + In Queue : A queue of inbound DATA chunks. + +13. IANA Considerations + + This protocol will require port reservation like TCP for the use of + "well known" servers within the Internet. All current TCP ports + shall be automatically reserved in the SCTP port address space. New + requests should follow IANA's current mechanisms for TCP. + + This protocol may also be extended through IANA in three ways: + + -- through definition of additional chunk types, + -- through definition of additional parameter types, or + -- through definition of additional cause codes within + ERROR chunks + + In the case where a particular ULP using SCTP desires to have its own + ports, the ULP should be responsible for registering with IANA for + getting its ports assigned. + +13.1 IETF-defined Chunk Extension + + The definition and use of new chunk types is an integral part of + SCTP. Thus, new chunk types are assigned by IANA through an IETF + Consensus action as defined in [RFC2434]. + + The documentation for a new chunk code type must include the + following information: + + + + +Stewart, et al. Standards Track [Page 123] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + a) A long and short name for the new chunk type; + + b) A detailed description of the structure of the chunk, which MUST + conform to the basic structure defined in Section 3.2; + + c) A detailed definition and description of intended use of each + field within the chunk, including the chunk flags if any; + + d) A detailed procedural description of the use of the new chunk type + within the operation of the protocol. + + The last chunk type (255) is reserved for future extension if + necessary. + +13.2 IETF-defined Chunk Parameter Extension + + The assignment of new chunk parameter type codes is done through an + IETF Consensus action as defined in [RFC2434]. Documentation of the + chunk parameter MUST contain the following information: + + a) Name of the parameter type. + + b) Detailed description of the structure of the parameter field. + This structure MUST conform to the general type-length-value + format described in Section 3.2.1. + + c) Detailed definition of each component of the parameter value. + + d) Detailed description of the intended use of this parameter type, + and an indication of whether and under what circumstances multiple + instances of this parameter type may be found within the same + chunk. + +13.3 IETF-defined Additional Error Causes + + Additional cause codes may be allocated in the range 11 to 65535 + through a Specification Required action as defined in [RFC2434]. + Provided documentation must include the following information: + + a) Name of the error condition. + + b) Detailed description of the conditions under which an SCTP + endpoint should issue an ERROR (or ABORT) with this cause code. + + c) Expected action by the SCTP endpoint which receives an ERROR (or + ABORT) chunk containing this cause code. + + + + + +Stewart, et al. Standards Track [Page 124] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + d) Detailed description of the structure and content of data fields + which accompany this cause code. + + The initial word (32 bits) of a cause code parameter MUST conform to + the format shown in Section 3.3.10, i.e.: + + -- first two bytes contain the cause code value + -- last two bytes contain length of the Cause Parameter. + +13.4 Payload Protocol Identifiers + + Except for value 0 which is reserved by SCTP to indicate an + unspecified payload protocol identifier in a DATA chunk, SCTP will + not be responsible for standardizing or verifying any payload + protocol identifiers; SCTP simply receives the identifier from the + upper layer and carries it with the corresponding payload data. + + The upper layer, i.e., the SCTP user, SHOULD standardize any specific + protocol identifier with IANA if it is so desired. The use of any + specific payload protocol identifier is out of the scope of SCTP. + +14. Suggested SCTP Protocol Parameter Values + + The following protocol parameters are RECOMMENDED: + + RTO.Initial - 3 seconds + RTO.Min - 1 second + RTO.Max - 60 seconds + RTO.Alpha - 1/8 + RTO.Beta - 1/4 + Valid.Cookie.Life - 60 seconds + Association.Max.Retrans - 10 attempts + Path.Max.Retrans - 5 attempts (per destination address) + Max.Init.Retransmits - 8 attempts + HB.interval - 30 seconds + + IMPLEMENTATION NOTE: The SCTP implementation may allow ULP to + customize some of these protocol parameters (see Section 10). + + Note: RTO.Min SHOULD be set as recommended above. + + + + + + + + + + + +Stewart, et al. Standards Track [Page 125] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +15. Acknowledgements + + The authors wish to thank Mark Allman, R.J. Atkinson, Richard Band, + Scott Bradner, Steve Bellovin, Peter Butler, Ram Dantu, R. + Ezhirpavai, Mike Fisk, Sally Floyd, Atsushi Fukumoto, Matt Holdrege, + Henry Houh, Christian Huitema, Gary Lehecka, Jonathan Lee, David + Lehmann, John Loughney, Daniel Luan, Barry Nagelberg, Thomas Narten, + Erik Nordmark, Lyndon Ong, Shyamal Prasad, Kelvin Porter, Heinz + Prantner, Jarno Rajahalme, Raymond E. Reeves, Renee Revis, Ivan Arias + Rodriguez, A. Sankar, Greg Sidebottom, Brian Wyld, La Monte Yarroll, + and many others for their invaluable comments. + +16. Authors' Addresses + + Randall R. Stewart + 24 Burning Bush Trail. + Crystal Lake, IL 60012 + USA + + Phone: +1-815-477-2127 + EMail: rrs@cisco.com + + + Qiaobing Xie + Motorola, Inc. + 1501 W. Shure Drive, #2309 + Arlington Heights, IL 60004 + USA + + Phone: +1-847-632-3028 + EMail: qxie1@email.mot.com + + + Ken Morneault + Cisco Systems Inc. + 13615 Dulles Technology Drive + Herndon, VA. 20171 + USA + + Phone: +1-703-484-3323 + EMail: kmorneau@cisco.com + + + + + + + + + + +Stewart, et al. Standards Track [Page 126] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Chip Sharp + Cisco Systems Inc. + 7025 Kit Creek Road + Research Triangle Park, NC 27709 + USA + + Phone: +1-919-392-3121 + EMail: chsharp@cisco.com + + + Hanns Juergen Schwarzbauer + SIEMENS AG + Hofmannstr. 51 + 81359 Munich + Germany + + Phone: +49-89-722-24236 + EMail: HannsJuergen.Schwarzbauer@icn.siemens.de + + + Tom Taylor + Nortel Networks + 1852 Lorraine Ave. + Ottawa, Ontario + Canada K1H 6Z8 + + Phone: +1-613-736-0961 + EMail: taylor@nortelnetworks.com + + + Ian Rytina + Ericsson Australia + 37/360 Elizabeth Street + Melbourne, Victoria 3000 + Australia + + Phone: +61-3-9301-6164 + EMail: ian.rytina@ericsson.com + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 127] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Malleswar Kalla + Telcordia Technologies + 3 Corporate Place + PYA-2J-341 + Piscataway, NJ 08854 + USA + + Phone: +1-732-699-3728 + EMail: mkalla@telcordia.com + + Lixia Zhang + UCLA Computer Science Department + 4531G Boelter Hall + Los Angeles, CA 90095-1596 + USA + + Phone: +1-310-825-2695 + EMail: lixia@cs.ucla.edu + + Vern Paxson + ACIRI + 1947 Center St., Suite 600, + Berkeley, CA 94704-1198 + USA + + Phone: +1-510-666-2882 + EMail: vern@aciri.org + +17. References + + [RFC768] Postel, J. (ed.), "User Datagram Protocol", STD 6, RFC + 768, August 1980. + + [RFC793] Postel, J. (ed.), "Transmission Control Protocol", STD 7, + RFC 793, September 1981. + + [RFC1123] Braden, R., "Requirements for Internet hosts - application + and support", STD 3, RFC 1123, October 1989. + + [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, + November 1990. + + [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", STD 2, RFC + 1700, October 1994. + + [RFC1981] McCann, J., Deering, S. and J. Mogul, "Path MTU Discovery + for IP version 6", RFC 1981, August 1996. + + + + +Stewart, et al. Standards Track [Page 128] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + [RFC1982] Elz, R. and R. Bush, "Serial Number Arithmetic", RFC 1982, + August 1996. + + [RFC2026] Bradner, S., "The Internet Standards Process -- Revision + 3", BCP 9, RFC 2026, October 1996. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2401] Kent, S. and R. Atkinson, "Security Architecture for the + Internet Protocol", RFC 2401, November 1998. + + [RFC2402] Kent, S. and R. Atkinson, "IP Authentication Header", RFC + 2402, November 1998. + + [RFC2406] Kent, S. and R. Atkinson, "IP Encapsulating Security + Payload (ESP)", RFC 2406, November 1998. + + [RFC2408] Maughan, D., Schertler, M., Schneider, M. and J. Turner, + "Internet Security Association and Key Management + Protocol", RFC 2408, November 1998. + + [RFC2409] Harkins, D. and D. Carrel, "The Internet Key Exchange + (IKE)", RFC 2409, November 1998. + + [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 2434, + October 1998. + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, December 1998. + + [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion + Control", RFC 2581, April 1999. + +18. Bibliography + + [ALLMAN99] Allman, M. and Paxson, V., "On Estimating End-to-End + Network Path Properties", Proc. SIGCOMM'99, 1999. + + [FALL96] Fall, K. and Floyd, S., Simulation-based Comparisons of + Tahoe, Reno, and SACK TCP, Computer Communications Review, + V. 26 N. 3, July 1996, pp. 5-21. + + [RFC1750] Eastlake, D. (ed.), "Randomness Recommendations for + Security", RFC 1750, December 1994. + + + + + +Stewart, et al. Standards Track [Page 129] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + [RFC1950] Deutsch P. and J. Gailly, "ZLIB Compressed Data Format + Specification version 3.3", RFC 1950, May 1996. + + [RFC2104] Krawczyk, H., Bellare, M. and R. Canetti, "HMAC: Keyed- + Hashing for Message Authentication", RFC 2104, March 1997. + + [RFC2196] Fraser, B., "Site Security Handbook", FYI 8, RFC 2196, + September 1997. + + [RFC2522] Karn, P. and W. Simpson, "Photuris: Session-Key Management + Protocol", RFC 2522, March 1999. + + [SAVAGE99] Savage, S., Cardwell, N., Wetherall, D., and Anderson, T., + "TCP Congestion Control with a Misbehaving Receiver", ACM + Computer Communication Review, 29(5), October 1999. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 130] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +Appendix A: Explicit Congestion Notification + + ECN (Ramakrishnan, K., Floyd, S., "Explicit Congestion Notification", + RFC 2481, January 1999) describes a proposed extension to IP that + details a method to become aware of congestion outside of datagram + loss. This is an optional feature that an implementation MAY choose + to add to SCTP. This appendix details the minor differences + implementers will need to be aware of if they choose to implement + this feature. In general RFC 2481 should be followed with the + following exceptions. + + Negotiation: + + RFC2481 details negotiation of ECN during the SYN and SYN-ACK stages + of a TCP connection. The sender of the SYN sets two bits in the TCP + flags, and the sender of the SYN-ACK sets only 1 bit. The reasoning + behind this is to assure both sides are truly ECN capable. For SCTP + this is not necessary. To indicate that an endpoint is ECN capable + an endpoint SHOULD add to the INIT and or INIT ACK chunk the TLV + reserved for ECN. This TLV contains no parameters, and thus has the + following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Parameter Type = 32768 | Parameter Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + ECN-Echo: + + RFC 2481 details a specific bit for a receiver to send back in its + TCP acknowledgements to notify the sender of the Congestion + Experienced (CE) bit having arrived from the network. For SCTP this + same indication is made by including the ECNE chunk. This chunk + contains one data element, i.e. the lowest TSN associated with the IP + datagram marked with the CE bit, and looks as follows: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk Type=12 | Flags=00000000| Chunk Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Lowest TSN Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Note: The ECNE is considered a Control chunk. + + + + + +Stewart, et al. Standards Track [Page 131] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + CWR: + + RFC 2481 details a specific bit for a sender to send in the header of + its next outbound TCP segment to indicate to its peer that it has + reduced its congestion window. This is termed the CWR bit. For + SCTP the same indication is made by including the CWR chunk. + This chunk contains one data element, i.e. the TSN number that + was sent in the ECNE chunk. This element represents the lowest + TSN number in the datagram that was originally marked with the + CE bit. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk Type=13 | Flags=00000000| Chunk Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Lowest TSN Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Note: The CWR is considered a Control chunk. + +Appendix B Alder 32 bit checksum calculation + + The Adler-32 checksum calculation given in this appendix is copied from + [RFC1950]. + + Adler-32 is composed of two sums accumulated per byte: s1 is the sum + of all bytes, s2 is the sum of all s1 values. Both sums are done + modulo 65521. s1 is initialized to 1, s2 to zero. The Adler-32 + checksum is stored as s2*65536 + s1 in network byte order. + + The following C code computes the Adler-32 checksum of a data buffer. + It is written for clarity, not for speed. The sample code is in the + ANSI C programming language. Non C users may find it easier to read + with these hints: + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 132] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + & Bitwise AND operator. + >> Bitwise right shift operator. When applied to an + unsigned quantity, as here, right shift inserts zero bit(s) + at the left. + << Bitwise left shift operator. Left shift inserts zero + bit(s) at the right. + ++ "n++" increments the variable n. + % modulo operator: a % b is the remainder of a divided by b. + #define BASE 65521 /* largest prime smaller than 65536 */ + /* + Update a running Adler-32 checksum with the bytes buf[0..len-1] + and return the updated checksum. The Adler-32 checksum should be + initialized to 1. + + Usage example: + + unsigned long adler = 1L; + + while (read_buffer(buffer, length) != EOF) { + adler = update_adler32(adler, buffer, length); + } + if (adler != original_adler) error(); + */ + unsigned long update_adler32(unsigned long adler, + unsigned char *buf, int len) + { + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int n; + + for (n = 0; n < len; n++) { + s1 = (s1 + buf[n]) % BASE; + s2 = (s2 + s1) % BASE; + } + return (s2 << 16) + s1; + } + + /* Return the adler32 of the bytes buf[0..len-1] */ + unsigned long adler32(unsigned char *buf, int len) + { + return update_adler32(1L, buf, len); + } + + + + + + + + + +Stewart, et al. Standards Track [Page 133] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 134] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc2991.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc2991.txt new file mode 100644 index 0000000..284ca15 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc2991.txt @@ -0,0 +1,507 @@ + + + + + + +Network Working Group D. Thaler +Request for Comments: 2991 Microsoft +Category: Informational C. Hopps + NextHop Technologies + November 2000 + + + Multipath Issues in Unicast and Multicast Next-Hop Selection + +Status of this Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + Various routing protocols, including Open Shortest Path First (OSPF) + and Intermediate System to Intermediate System (ISIS), explicitly + allow "Equal-Cost Multipath" (ECMP) routing. Some router + implementations also allow equal-cost multipath usage with RIP and + other routing protocols. The effect of multipath routing on a + forwarder is that the forwarder potentially has several next-hops for + any given destination and must use some method to choose which next- + hop should be used for a given data packet. + +1. Introduction + + Various routing protocols, including OSPF and ISIS, explicitly allow + "Equal-Cost Multipath" routing. Some router implementations also + allow equal-cost multipath usage with RIP and other routing + protocols. Using equal-cost multipath means that if multiple equal- + cost routes to the same destination exist, they can be discovered and + used to provide load balancing among redundant paths. + + The effect of multipath routing on a forwarder is that the forwarder + potentially has several next-hops for any given destination and must + use some method to choose which next-hop should be used for a given + data packet. This memo summarizes current practices, problems, and + solutions. + + + + + + + +Thaler & Hopps Informational [Page 1] + +RFC 2991 Multipath Issues November 2000 + + +2. Concerns + + Several router implementations allow multipath forwarding. This is + sometimes done naively via round-robin, where each packet matching a + given destination route is forwarded using the subsequent next-hop, + in a round-robin fashion. This does provide a form of load + balancing, but there are several problems with approaches such as + round-robin or random: + + Variable Path MTU + Since each of the redundant paths may have a different MTU, + this means that the overall path MTU can change on a packet- + by-packet basis, negating the usefulness of path MTU discovery. + + Variable Latencies + Since each of the redundant paths may have a different latency + involved, having packets take separate paths can cause packets + to always arrive out of order, increasing delivery latency and + buffering requirements. + + Packet reordering causes TCP to believe that loss has taken + place when packets with higher sequence numbers arrive before + an earlier one. When three or more packets are received before + a "late" packet, TCP enters a mode called "fast-retransmit" [6] + which consumes extra bandwidth (which could potentially cause + more loss, decreasing throughput) as it attempts to + unnecessarily retransmit the delayed packet(s). Hence, + reordering can be detrimental to network performance. + + Debugging + Common debugging utilities such as ping and traceroute are much + less reliable in the presence of multiple paths and may even + present completely wrong results. + + In multicast routing, the problem with multiple paths is that + multicast routing protocols prevent loops and duplicates by + constructing a single tree to all receivers of the same group + address. Multicast routing protocols deployed today (DVMRP, PIM-DM, + PIM-SM) [2] construct shortest-path trees rooted at either the + source, or another router known as a Core or Rendezvous Point. + Hence, the way they ensure that duplicates will not arise is that a + given tree must use only a single next-hop towards the root of the + tree. + + + + + + + + +Thaler & Hopps Informational [Page 2] + +RFC 2991 Multipath Issues November 2000 + + +3. Requirements + + In the remainder of this document, we will use the term "flow" to + represent the granularity at which the router keeps state (if at all) + for classes of traffic. The exact definition of a flow may depend on + the actual implementation. For example, a flow might be identified + solely by destination address, or it might be identified by (source + address, destination address, protocol id) triplet. Hence "flow" is + not necessarily synonymous with the term "microflow" as used in RFC + 2474 [7], which also includes port numbers. Indeed, including + transport-layer information in the next-hop selection process can + actually be problematic. For example, if packets are fragmented, the + transport-layer information may not be available in every packet. + Furthermore, having the choice of path depend on transport-layer + fields may negate the benefit of caching information such as MTU for + use in subsequent connections between the same endpoints. + + All of the problems outlined in the previous section arise when + packets in the same unicast or multicast "flow" are split among + multiple paths. The natural solution is therefore to ensure that + packets for the same flow always use the same path. + + Two additional features are desirable: + + Minimal disruption + When multipath is used, meaning that multiple routes contribute + valid next-hops, the chances are higher of routes being added + and deleted from consideration than when only the "best" route + is used (in which case metric changes in alternate routes have + no effect on traffic paths). Since a higher number of routes + may actually be used for forwarding when multipath is in use, + the potential for packet reordering and packet loss due to + route flaps can be much greater than when not using multipath. + Hence, it is desirable to minimize the number of active flows + affected by the addition or deletion of another next-hop. + + Fast implementation + The amount of additional computation required to forward a + packet should be small. For example, when doing round-robin, + this computation might consist of incrementing (modulo the + number of next-hops) a next-hop index. + +4. Solutions + + We now provide three possible methods for improving the performance + of multipath and then discuss their applicability to unicast and + multicast forwarding. + + + + +Thaler & Hopps Informational [Page 3] + +RFC 2991 Multipath Issues November 2000 + + + Modulo-N Hash + To select a next-hop from the list of N next-hops, the router + performs a modulo-N hash over the packet header fields that + identify a flow. This has the advantage of being fast, at the + expense of (N-1)/N of all flows changing paths whenever a + next-hop is added or removed. + + Hash-Threshold + The router first selects a key by performing a hash over the + packet header fields that identify the flow. The N next-hops + have been assigned unique regions in the hash function's output + space. By comparing the hash value against region boundaries + the router can determine which region the hash value belongs to + and thus which next-hop to use. This method has the advantage + of only affecting flows near the region boundaries (or + thresholds) when next-hops are added or removed. For ECMP + hash-threshold's lookup can be done with a simple division + (hash_value / fixed_region_size). When a next-hop is added or + removed, between 1/4 and 1/2 of all flows change paths. An + analysis of this method can be found in [3]. + + Highest Random Weight (HRW) + The router computes a key for EACH next-hop by performing a + hash over the packet header fields that identify the flow, as + well as over the address of the next-hop. The router then + chooses the next-hop with the highest resulting key value [4]. + This has the advantage of minimizing the number of flows + affected by a next-hop addition or deletion (only 1/N of them), + but is approximately N times as expensive as a modulo-N hash. + + The applicability of these three alternatives depends on (at least) + two factors: whether the forwarder maintains per-flow state, and how + precious CPU is to a multipath forwarder. + + Some routers may maintain per-flow state for reasons other than for + supporting multipath. For example, routers typically keep per-flow + state for multicast flows so that they can maintain the list of + interfaces to which packets in the flow should be copied. + + If per-flow state is maintained in a multipath forwarder, then + computation of the next-hop can be done by the router at state + creation time. This entails no additional computations at packet + forwarding time compared with normal forwarding to a single next-hop, + since the next-hop is precomputed. In this case, any method can be + used, including round-robin, random, modulo-N, hash-threshold or HRW. + Hash functions such as modulo-N, hash-threshold and HRW are better if + the forwarder state may be deleted for any reason during the lifetime + of a flow since subsequent next-hop computations by the router will + + + +Thaler & Hopps Informational [Page 4] + +RFC 2991 Multipath Issues November 2000 + + + always select the same path. This also improves the usefulness of + debugging utilities such as traceroute. Finally, to maximize the + stability of paths (and hence the usefulness of traceroute, etc.), + the use of HRW is recommended over the other methods mentioned + herein. + + If per-flow state is not maintained by the forwarder, then using + multiple next-hops requires that the next-hop be calculated at packet + arrival time. When CPU is more precious than stability of flow + paths, hash-threshold is recommended over the other methods mentioned + herein. + +4.1. Unicast Forwarding + + Depending on the implementation, unicast forwarding may or may not + keep per-flow state. We recommend that where forwarder + implementations keep flow state, routers should use HRW at state + creation time (and next-hop deletion time) to select the next-hop, + and that forwarders without per-flow state use hash-threshold. + +4.2. Multicast Forwarding + + Today's multicast forwarding engines use a cache of forwarding + entries indexed by group (or group prefix) and source (or source + prefix). This means that today's multicast forwarder's always keep + per-flow state, although for some multicast routing protocols, the + "flow" may be fairly coarse (e.g., traffic from all sources to the + same destination). Since per-flow state is kept by the forwarder, it + is recommended that the router always use HRW to select the next-hop. + + Routers using explicit-joining protocols such as PIM-SM [5] should + thus use the multipath information when determining to which neighbor + a join message should be sent. For example, when multiple next-hops + exist for a given Rendezvous Point (RP) toward which a (*,G) Join + should be sent, it is recommended that HRW be used to select the + next-hop to use for each group. + +5. Applicability + + The algorithms discussed above (except round-robin) all rely on some + form of hash function. Equal flow distribution is achieved when the + hash function is uniformly distributed. Since the commonly used hash + functions only become uniformly distributed when the number of inputs + is relatively large, these algorithms are more applicable to routers + used to route many flows, than in, for example, a small business + setting. + + + + + +Thaler & Hopps Informational [Page 5] + +RFC 2991 Multipath Issues November 2000 + + +6. Redundant Parallel Links + + A related problem occurs when multiple parallel links are used + between the same pair of routers. A common solution is to bundle the + two links together into a "super"-link when is then used for routing. + For multicast forwarding, this results in the two links being reduced + to a single next-hop (over the combined link) which can be used to + prevent duplicates. When a unicast or multicast packet is queued to + the combined link, some method, such as those discussed earlier, is + still required to determine the physical link on which to transmit + the packet. If the parallel links are identical, then most of the + concerns discussed in this document are avoided with the combined + link. The exception is packet reordering, which can still occur with + round-robin, adversely affecting TCP. + +7. Security Considerations + + This document discusses issues with various methods of choosing a + next-hop from among multiple valid next-hops. As such, it does not + directly impact the security of the Internet infrastructure or its + applications. + + One issue that is worth mentioning, however, is that when next-hop + selection is predictable, an attacker can synthesize traffic that + will all hash the same, making it possible to launch a denial-of- + service attack that overloads a particular path. Since a special + case of this is when the same (single) next-hop is always selected, + such an attack is easiest when multipath is not being used. + Introducing multipath routing can make such an attack more difficult; + the more unpredictable the hash is, the harder it becomes to conduct + a denial-of-service attack against any single link. + + + + + + + + + + + + + + + + + + + + +Thaler & Hopps Informational [Page 6] + +RFC 2991 Multipath Issues November 2000 + + +8. References + + [1] Moy, J., "OSPF Version 2", STD 54, RFC 2328, April 1998. + + [2] Maufer, T., "Deploying IP Multicast in the Enterprise", + Prentice-Hall, 1998. + + [3] Hopps, C., "Analysis of an Equal-Cost Multi-Path Algorithm", RFC + 2992, November 2000. + + [4] Thaler, D., and C.V. Ravishankar, "Using Name-Based Mappings to + Increase Hit Rates", IEEE/ACM Transactions on Networking, + February 1998. + + [5] Estrin, D., Farinacci, D., Helmy, A., Thaler, D., Deering, S., + Handley, M., Jacobson, V., Liu, C., Sharma, P. and L. Wei, + "Protocol Independent Multicast-Sparse Mode (PIM-SM): Protocol + Specification", RFC 2362, June 1998. + + [6] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion Control", + RFC 2581, April 1999. + + [7] Nichols, K., Blake, S., Baker, F. and D. Black., "Definition of + the Differentiated Services Field (DS Field) in the IPv4 and + IPv6 Headers", RFC 2474, December 1998. + + + + + + + + + + + + + + + + + + + + + + + + + + +Thaler & Hopps Informational [Page 7] + +RFC 2991 Multipath Issues November 2000 + + +9. Authors' Addresses + + Dave Thaler + Microsoft + One Microsoft Way + Redmond, WA 98052 + + Phone: +1 425 703 8835 + EMail: dthaler@dthaler.microsoft.com + + + Christian E. Hopps + NextHop Technologies, Inc. + 517 W. William Street + Ann Arbor, MI 48103-4943 + U.S.A + + Phone: +1 734 936 0291 + EMail: chopps@nexthop.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Thaler & Hopps Informational [Page 8] + +RFC 2991 Multipath Issues November 2000 + + +10. Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Thaler & Hopps Informational [Page 9] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc3986.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc3986.txt new file mode 100644 index 0000000..c56ed4e --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc3986.txt @@ -0,0 +1,3419 @@ + + + + + + +Network Working Group T. Berners-Lee +Request for Comments: 3986 W3C/MIT +STD: 66 R. Fielding +Updates: 1738 Day Software +Obsoletes: 2732, 2396, 1808 L. Masinter +Category: Standards Track Adobe Systems + January 2005 + + + Uniform Resource Identifier (URI): Generic Syntax + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + A Uniform Resource Identifier (URI) is a compact sequence of + characters that identifies an abstract or physical resource. This + specification defines the generic URI syntax and a process for + resolving URI references that might be in relative form, along with + guidelines and security considerations for the use of URIs on the + Internet. The URI syntax defines a grammar that is a superset of all + valid URIs, allowing an implementation to parse the common components + of a URI reference without knowing the scheme-specific requirements + of every possible identifier. This specification does not define a + generative grammar for URIs; that task is performed by the individual + specifications of each URI scheme. + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 1] + +RFC 3986 URI Generic Syntax January 2005 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 1.1. Overview of URIs . . . . . . . . . . . . . . . . . . . . 4 + 1.1.1. Generic Syntax . . . . . . . . . . . . . . . . . 6 + 1.1.2. Examples . . . . . . . . . . . . . . . . . . . . 7 + 1.1.3. URI, URL, and URN . . . . . . . . . . . . . . . 7 + 1.2. Design Considerations . . . . . . . . . . . . . . . . . 8 + 1.2.1. Transcription . . . . . . . . . . . . . . . . . 8 + 1.2.2. Separating Identification from Interaction . . . 9 + 1.2.3. Hierarchical Identifiers . . . . . . . . . . . . 10 + 1.3. Syntax Notation . . . . . . . . . . . . . . . . . . . . 11 + 2. Characters . . . . . . . . . . . . . . . . . . . . . . . . . . 11 + 2.1. Percent-Encoding . . . . . . . . . . . . . . . . . . . . 12 + 2.2. Reserved Characters . . . . . . . . . . . . . . . . . . 12 + 2.3. Unreserved Characters . . . . . . . . . . . . . . . . . 13 + 2.4. When to Encode or Decode . . . . . . . . . . . . . . . . 14 + 2.5. Identifying Data . . . . . . . . . . . . . . . . . . . . 14 + 3. Syntax Components . . . . . . . . . . . . . . . . . . . . . . 16 + 3.1. Scheme . . . . . . . . . . . . . . . . . . . . . . . . . 17 + 3.2. Authority . . . . . . . . . . . . . . . . . . . . . . . 17 + 3.2.1. User Information . . . . . . . . . . . . . . . . 18 + 3.2.2. Host . . . . . . . . . . . . . . . . . . . . . . 18 + 3.2.3. Port . . . . . . . . . . . . . . . . . . . . . . 22 + 3.3. Path . . . . . . . . . . . . . . . . . . . . . . . . . . 22 + 3.4. Query . . . . . . . . . . . . . . . . . . . . . . . . . 23 + 3.5. Fragment . . . . . . . . . . . . . . . . . . . . . . . . 24 + 4. Usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25 + 4.1. URI Reference . . . . . . . . . . . . . . . . . . . . . 25 + 4.2. Relative Reference . . . . . . . . . . . . . . . . . . . 26 + 4.3. Absolute URI . . . . . . . . . . . . . . . . . . . . . . 27 + 4.4. Same-Document Reference . . . . . . . . . . . . . . . . 27 + 4.5. Suffix Reference . . . . . . . . . . . . . . . . . . . . 27 + 5. Reference Resolution . . . . . . . . . . . . . . . . . . . . . 28 + 5.1. Establishing a Base URI . . . . . . . . . . . . . . . . 28 + 5.1.1. Base URI Embedded in Content . . . . . . . . . . 29 + 5.1.2. Base URI from the Encapsulating Entity . . . . . 29 + 5.1.3. Base URI from the Retrieval URI . . . . . . . . 30 + 5.1.4. Default Base URI . . . . . . . . . . . . . . . . 30 + 5.2. Relative Resolution . . . . . . . . . . . . . . . . . . 30 + 5.2.1. Pre-parse the Base URI . . . . . . . . . . . . . 31 + 5.2.2. Transform References . . . . . . . . . . . . . . 31 + 5.2.3. Merge Paths . . . . . . . . . . . . . . . . . . 32 + 5.2.4. Remove Dot Segments . . . . . . . . . . . . . . 33 + 5.3. Component Recomposition . . . . . . . . . . . . . . . . 35 + 5.4. Reference Resolution Examples . . . . . . . . . . . . . 35 + 5.4.1. Normal Examples . . . . . . . . . . . . . . . . 36 + 5.4.2. Abnormal Examples . . . . . . . . . . . . . . . 36 + + + +Berners-Lee, et al. Standards Track [Page 2] + +RFC 3986 URI Generic Syntax January 2005 + + + 6. Normalization and Comparison . . . . . . . . . . . . . . . . . 38 + 6.1. Equivalence . . . . . . . . . . . . . . . . . . . . . . 38 + 6.2. Comparison Ladder . . . . . . . . . . . . . . . . . . . 39 + 6.2.1. Simple String Comparison . . . . . . . . . . . . 39 + 6.2.2. Syntax-Based Normalization . . . . . . . . . . . 40 + 6.2.3. Scheme-Based Normalization . . . . . . . . . . . 41 + 6.2.4. Protocol-Based Normalization . . . . . . . . . . 42 + 7. Security Considerations . . . . . . . . . . . . . . . . . . . 43 + 7.1. Reliability and Consistency . . . . . . . . . . . . . . 43 + 7.2. Malicious Construction . . . . . . . . . . . . . . . . . 43 + 7.3. Back-End Transcoding . . . . . . . . . . . . . . . . . . 44 + 7.4. Rare IP Address Formats . . . . . . . . . . . . . . . . 45 + 7.5. Sensitive Information . . . . . . . . . . . . . . . . . 45 + 7.6. Semantic Attacks . . . . . . . . . . . . . . . . . . . . 45 + 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 46 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 46 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 46 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 46 + 10.2. Informative References . . . . . . . . . . . . . . . . . 47 + A. Collected ABNF for URI . . . . . . . . . . . . . . . . . . . . 49 + B. Parsing a URI Reference with a Regular Expression . . . . . . 50 + C. Delimiting a URI in Context . . . . . . . . . . . . . . . . . 51 + D. Changes from RFC 2396 . . . . . . . . . . . . . . . . . . . . 53 + D.1. Additions . . . . . . . . . . . . . . . . . . . . . . . 53 + D.2. Modifications . . . . . . . . . . . . . . . . . . . . . 53 + Index . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 56 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 60 + Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . 61 + + + + + + + + + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 3] + +RFC 3986 URI Generic Syntax January 2005 + + +1. Introduction + + A Uniform Resource Identifier (URI) provides a simple and extensible + means for identifying a resource. This specification of URI syntax + and semantics is derived from concepts introduced by the World Wide + Web global information initiative, whose use of these identifiers + dates from 1990 and is described in "Universal Resource Identifiers + in WWW" [RFC1630]. The syntax is designed to meet the + recommendations laid out in "Functional Recommendations for Internet + Resource Locators" [RFC1736] and "Functional Requirements for Uniform + Resource Names" [RFC1737]. + + This document obsoletes [RFC2396], which merged "Uniform Resource + Locators" [RFC1738] and "Relative Uniform Resource Locators" + [RFC1808] in order to define a single, generic syntax for all URIs. + It obsoletes [RFC2732], which introduced syntax for an IPv6 address. + It excludes portions of RFC 1738 that defined the specific syntax of + individual URI schemes; those portions will be updated as separate + documents. The process for registration of new URI schemes is + defined separately by [BCP35]. Advice for designers of new URI + schemes can be found in [RFC2718]. All significant changes from RFC + 2396 are noted in Appendix D. + + This specification uses the terms "character" and "coded character + set" in accordance with the definitions provided in [BCP19], and + "character encoding" in place of what [BCP19] refers to as a + "charset". + +1.1. Overview of URIs + + URIs are characterized as follows: + + Uniform + + Uniformity provides several benefits. It allows different types + of resource identifiers to be used in the same context, even when + the mechanisms used to access those resources may differ. It + allows uniform semantic interpretation of common syntactic + conventions across different types of resource identifiers. It + allows introduction of new types of resource identifiers without + interfering with the way that existing identifiers are used. It + allows the identifiers to be reused in many different contexts, + thus permitting new applications or protocols to leverage a pre- + existing, large, and widely used set of resource identifiers. + + + + + + + +Berners-Lee, et al. Standards Track [Page 4] + +RFC 3986 URI Generic Syntax January 2005 + + + Resource + + This specification does not limit the scope of what might be a + resource; rather, the term "resource" is used in a general sense + for whatever might be identified by a URI. Familiar examples + include an electronic document, an image, a source of information + with a consistent purpose (e.g., "today's weather report for Los + Angeles"), a service (e.g., an HTTP-to-SMS gateway), and a + collection of other resources. A resource is not necessarily + accessible via the Internet; e.g., human beings, corporations, and + bound books in a library can also be resources. Likewise, + abstract concepts can be resources, such as the operators and + operands of a mathematical equation, the types of a relationship + (e.g., "parent" or "employee"), or numeric values (e.g., zero, + one, and infinity). + + Identifier + + An identifier embodies the information required to distinguish + what is being identified from all other things within its scope of + identification. Our use of the terms "identify" and "identifying" + refer to this purpose of distinguishing one resource from all + other resources, regardless of how that purpose is accomplished + (e.g., by name, address, or context). These terms should not be + mistaken as an assumption that an identifier defines or embodies + the identity of what is referenced, though that may be the case + for some identifiers. Nor should it be assumed that a system + using URIs will access the resource identified: in many cases, + URIs are used to denote resources without any intention that they + be accessed. Likewise, the "one" resource identified might not be + singular in nature (e.g., a resource might be a named set or a + mapping that varies over time). + + A URI is an identifier consisting of a sequence of characters + matching the syntax rule named in Section 3. It enables + uniform identification of resources via a separately defined + extensible set of naming schemes (Section 3.1). How that + identification is accomplished, assigned, or enabled is delegated to + each scheme specification. + + This specification does not place any limits on the nature of a + resource, the reasons why an application might seek to refer to a + resource, or the kinds of systems that might use URIs for the sake of + identifying resources. This specification does not require that a + URI persists in identifying the same resource over time, though that + is a common goal of all URI schemes. Nevertheless, nothing in this + + + + + +Berners-Lee, et al. Standards Track [Page 5] + +RFC 3986 URI Generic Syntax January 2005 + + + specification prevents an application from limiting itself to + particular types of resources, or to a subset of URIs that maintains + characteristics desired by that application. + + URIs have a global scope and are interpreted consistently regardless + of context, though the result of that interpretation may be in + relation to the end-user's context. For example, "http://localhost/" + has the same interpretation for every user of that reference, even + though the network interface corresponding to "localhost" may be + different for each end-user: interpretation is independent of access. + However, an action made on the basis of that reference will take + place in relation to the end-user's context, which implies that an + action intended to refer to a globally unique thing must use a URI + that distinguishes that resource from all other things. URIs that + identify in relation to the end-user's local context should only be + used when the context itself is a defining aspect of the resource, + such as when an on-line help manual refers to a file on the end- + user's file system (e.g., "file:///etc/hosts"). + +1.1.1. Generic Syntax + + Each URI begins with a scheme name, as defined in Section 3.1, that + refers to a specification for assigning identifiers within that + scheme. As such, the URI syntax is a federated and extensible naming + system wherein each scheme's specification may further restrict the + syntax and semantics of identifiers using that scheme. + + This specification defines those elements of the URI syntax that are + required of all URI schemes or are common to many URI schemes. It + thus defines the syntax and semantics needed to implement a scheme- + independent parsing mechanism for URI references, by which the + scheme-dependent handling of a URI can be postponed until the + scheme-dependent semantics are needed. Likewise, protocols and data + formats that make use of URI references can refer to this + specification as a definition for the range of syntax allowed for all + URIs, including those schemes that have yet to be defined. This + decouples the evolution of identification schemes from the evolution + of protocols, data formats, and implementations that make use of + URIs. + + A parser of the generic URI syntax can parse any URI reference into + its major components. Once the scheme is determined, further + scheme-specific parsing can be performed on the components. In other + words, the URI generic syntax is a superset of the syntax of all URI + schemes. + + + + + + +Berners-Lee, et al. Standards Track [Page 6] + +RFC 3986 URI Generic Syntax January 2005 + + +1.1.2. Examples + + The following example URIs illustrate several URI schemes and + variations in their common syntax components: + + ftp://ftp.is.co.za/rfc/rfc1808.txt + + http://www.ietf.org/rfc/rfc2396.txt + + ldap://[2001:db8::7]/c=GB?objectClass?one + + mailto:John.Doe@example.com + + news:comp.infosystems.www.servers.unix + + tel:+1-816-555-1212 + + telnet://192.0.2.16:80/ + + urn:oasis:names:specification:docbook:dtd:xml:4.1.2 + + +1.1.3. URI, URL, and URN + + A URI can be further classified as a locator, a name, or both. The + term "Uniform Resource Locator" (URL) refers to the subset of URIs + that, in addition to identifying a resource, provide a means of + locating the resource by describing its primary access mechanism + (e.g., its network "location"). The term "Uniform Resource Name" + (URN) has been used historically to refer to both URIs under the + "urn" scheme [RFC2141], which are required to remain globally unique + and persistent even when the resource ceases to exist or becomes + unavailable, and to any other URI with the properties of a name. + + An individual scheme does not have to be classified as being just one + of "name" or "locator". Instances of URIs from any given scheme may + have the characteristics of names or locators or both, often + depending on the persistence and care in the assignment of + identifiers by the naming authority, rather than on any quality of + the scheme. Future specifications and related documentation should + use the general term "URI" rather than the more restrictive terms + "URL" and "URN" [RFC3305]. + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 7] + +RFC 3986 URI Generic Syntax January 2005 + + +1.2. Design Considerations + +1.2.1. Transcription + + The URI syntax has been designed with global transcription as one of + its main considerations. A URI is a sequence of characters from a + very limited set: the letters of the basic Latin alphabet, digits, + and a few special characters. A URI may be represented in a variety + of ways; e.g., ink on paper, pixels on a screen, or a sequence of + character encoding octets. The interpretation of a URI depends only + on the characters used and not on how those characters are + represented in a network protocol. + + The goal of transcription can be described by a simple scenario. + Imagine two colleagues, Sam and Kim, sitting in a pub at an + international conference and exchanging research ideas. Sam asks Kim + for a location to get more information, so Kim writes the URI for the + research site on a napkin. Upon returning home, Sam takes out the + napkin and types the URI into a computer, which then retrieves the + information to which Kim referred. + + There are several design considerations revealed by the scenario: + + o A URI is a sequence of characters that is not always represented + as a sequence of octets. + + o A URI might be transcribed from a non-network source and thus + should consist of characters that are most likely able to be + entered into a computer, within the constraints imposed by + keyboards (and related input devices) across languages and + locales. + + o A URI often has to be remembered by people, and it is easier for + people to remember a URI when it consists of meaningful or + familiar components. + + These design considerations are not always in alignment. For + example, it is often the case that the most meaningful name for a URI + component would require characters that cannot be typed into some + systems. The ability to transcribe a resource identifier from one + medium to another has been considered more important than having a + URI consist of the most meaningful of components. + + In local or regional contexts and with improving technology, users + might benefit from being able to use a wider range of characters; + such use is not defined by this specification. Percent-encoded + octets (Section 2.1) may be used within a URI to represent characters + outside the range of the US-ASCII coded character set if this + + + +Berners-Lee, et al. Standards Track [Page 8] + +RFC 3986 URI Generic Syntax January 2005 + + + representation is allowed by the scheme or by the protocol element in + which the URI is referenced. Such a definition should specify the + character encoding used to map those characters to octets prior to + being percent-encoded for the URI. + +1.2.2. Separating Identification from Interaction + + A common misunderstanding of URIs is that they are only used to refer + to accessible resources. The URI itself only provides + identification; access to the resource is neither guaranteed nor + implied by the presence of a URI. Instead, any operation associated + with a URI reference is defined by the protocol element, data format + attribute, or natural language text in which it appears. + + Given a URI, a system may attempt to perform a variety of operations + on the resource, as might be characterized by words such as "access", + "update", "replace", or "find attributes". Such operations are + defined by the protocols that make use of URIs, not by this + specification. However, we do use a few general terms for describing + common operations on URIs. URI "resolution" is the process of + determining an access mechanism and the appropriate parameters + necessary to dereference a URI; this resolution may require several + iterations. To use that access mechanism to perform an action on the + URI's resource is to "dereference" the URI. + + When URIs are used within information retrieval systems to identify + sources of information, the most common form of URI dereference is + "retrieval": making use of a URI in order to retrieve a + representation of its associated resource. A "representation" is a + sequence of octets, along with representation metadata describing + those octets, that constitutes a record of the state of the resource + at the time when the representation is generated. Retrieval is + achieved by a process that might include using the URI as a cache key + to check for a locally cached representation, resolution of the URI + to determine an appropriate access mechanism (if any), and + dereference of the URI for the sake of applying a retrieval + operation. Depending on the protocols used to perform the retrieval, + additional information might be supplied about the resource (resource + metadata) and its relation to other resources. + + URI references in information retrieval systems are designed to be + late-binding: the result of an access is generally determined when it + is accessed and may vary over time or due to other aspects of the + interaction. These references are created in order to be used in the + future: what is being identified is not some specific result that was + obtained in the past, but rather some characteristic that is expected + to be true for future results. In such cases, the resource referred + to by the URI is actually a sameness of characteristics as observed + + + +Berners-Lee, et al. Standards Track [Page 9] + +RFC 3986 URI Generic Syntax January 2005 + + + over time, perhaps elucidated by additional comments or assertions + made by the resource provider. + + Although many URI schemes are named after protocols, this does not + imply that use of these URIs will result in access to the resource + via the named protocol. URIs are often used simply for the sake of + identification. Even when a URI is used to retrieve a representation + of a resource, that access might be through gateways, proxies, + caches, and name resolution services that are independent of the + protocol associated with the scheme name. The resolution of some + URIs may require the use of more than one protocol (e.g., both DNS + and HTTP are typically used to access an "http" URI's origin server + when a representation isn't found in a local cache). + +1.2.3. Hierarchical Identifiers + + The URI syntax is organized hierarchically, with components listed in + order of decreasing significance from left to right. For some URI + schemes, the visible hierarchy is limited to the scheme itself: + everything after the scheme component delimiter (":") is considered + opaque to URI processing. Other URI schemes make the hierarchy + explicit and visible to generic parsing algorithms. + + The generic syntax uses the slash ("/"), question mark ("?"), and + number sign ("#") characters to delimit components that are + significant to the generic parser's hierarchical interpretation of an + identifier. In addition to aiding the readability of such + identifiers through the consistent use of familiar syntax, this + uniform representation of hierarchy across naming schemes allows + scheme-independent references to be made relative to that hierarchy. + + It is often the case that a group or "tree" of documents has been + constructed to serve a common purpose, wherein the vast majority of + URI references in these documents point to resources within the tree + rather than outside it. Similarly, documents located at a particular + site are much more likely to refer to other resources at that site + than to resources at remote sites. Relative referencing of URIs + allows document trees to be partially independent of their location + and access scheme. For instance, it is possible for a single set of + hypertext documents to be simultaneously accessible and traversable + via each of the "file", "http", and "ftp" schemes if the documents + refer to each other with relative references. Furthermore, such + document trees can be moved, as a whole, without changing any of the + relative references. + + A relative reference (Section 4.2) refers to a resource by describing + the difference within a hierarchical name space between the reference + context and the target URI. The reference resolution algorithm, + + + +Berners-Lee, et al. Standards Track [Page 10] + +RFC 3986 URI Generic Syntax January 2005 + + + presented in Section 5, defines how such a reference is transformed + to the target URI. As relative references can only be used within + the context of a hierarchical URI, designers of new URI schemes + should use a syntax consistent with the generic syntax's hierarchical + components unless there are compelling reasons to forbid relative + referencing within that scheme. + + NOTE: Previous specifications used the terms "partial URI" and + "relative URI" to denote a relative reference to a URI. As some + readers misunderstood those terms to mean that relative URIs are a + subset of URIs rather than a method of referencing URIs, this + specification simply refers to them as relative references. + + All URI references are parsed by generic syntax parsers when used. + However, because hierarchical processing has no effect on an absolute + URI used in a reference unless it contains one or more dot-segments + (complete path segments of "." or "..", as described in Section 3.3), + URI scheme specifications can define opaque identifiers by + disallowing use of slash characters, question mark characters, and + the URIs "scheme:." and "scheme:..". + +1.3. Syntax Notation + + This specification uses the Augmented Backus-Naur Form (ABNF) + notation of [RFC2234], including the following core ABNF syntax rules + defined by that specification: ALPHA (letters), CR (carriage return), + DIGIT (decimal digits), DQUOTE (double quote), HEXDIG (hexadecimal + digits), LF (line feed), and SP (space). The complete URI syntax is + collected in Appendix A. + +2. Characters + + The URI syntax provides a method of encoding data, presumably for the + sake of identifying a resource, as a sequence of characters. The URI + characters are, in turn, frequently encoded as octets for transport + or presentation. This specification does not mandate any particular + character encoding for mapping between URI characters and the octets + used to store or transmit those characters. When a URI appears in a + protocol element, the character encoding is defined by that protocol; + without such a definition, a URI is assumed to be in the same + character encoding as the surrounding text. + + The ABNF notation defines its terminal values to be non-negative + integers (codepoints) based on the US-ASCII coded character set + [ASCII]. Because a URI is a sequence of characters, we must invert + that relation in order to understand the URI syntax. Therefore, the + + + + + +Berners-Lee, et al. Standards Track [Page 11] + +RFC 3986 URI Generic Syntax January 2005 + + + integer values used by the ABNF must be mapped back to their + corresponding characters via US-ASCII in order to complete the syntax + rules. + + A URI is composed from a limited set of characters consisting of + digits, letters, and a few graphic symbols. A reserved subset of + those characters may be used to delimit syntax components within a + URI while the remaining characters, including both the unreserved set + and those reserved characters not acting as delimiters, define each + component's identifying data. + +2.1. Percent-Encoding + + A percent-encoding mechanism is used to represent a data octet in a + component when that octet's corresponding character is outside the + allowed set or is being used as a delimiter of, or within, the + component. A percent-encoded octet is encoded as a character + triplet, consisting of the percent character "%" followed by the two + hexadecimal digits representing that octet's numeric value. For + example, "%20" is the percent-encoding for the binary octet + "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space + character (SP). Section 2.4 describes when percent-encoding and + decoding is applied. + + pct-encoded = "%" HEXDIG HEXDIG + + The uppercase hexadecimal digits 'A' through 'F' are equivalent to + the lowercase digits 'a' through 'f', respectively. If two URIs + differ only in the case of hexadecimal digits used in percent-encoded + octets, they are equivalent. For consistency, URI producers and + normalizers should use uppercase hexadecimal digits for all percent- + encodings. + +2.2. Reserved Characters + + URIs include components and subcomponents that are delimited by + characters in the "reserved" set. These characters are called + "reserved" because they may (or may not) be defined as delimiters by + the generic syntax, by each scheme-specific syntax, or by the + implementation-specific syntax of a URI's dereferencing algorithm. + If data for a URI component would conflict with a reserved + character's purpose as a delimiter, then the conflicting data must be + percent-encoded before the URI is formed. + + + + + + + + +Berners-Lee, et al. Standards Track [Page 12] + +RFC 3986 URI Generic Syntax January 2005 + + + reserved = gen-delims / sub-delims + + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + The purpose of reserved characters is to provide a set of delimiting + characters that are distinguishable from other data within a URI. + URIs that differ in the replacement of a reserved character with its + corresponding percent-encoded octet are not equivalent. Percent- + encoding a reserved character, or decoding a percent-encoded octet + that corresponds to a reserved character, will change how the URI is + interpreted by most applications. Thus, characters in the reserved + set are protected from normalization and are therefore safe to be + used by scheme-specific and producer-specific algorithms for + delimiting data subcomponents within a URI. + + A subset of the reserved characters (gen-delims) is used as + delimiters of the generic URI components described in Section 3. A + component's ABNF syntax rule will not use the reserved or gen-delims + rule names directly; instead, each syntax rule lists the characters + allowed within that component (i.e., not delimiting it), and any of + those characters that are also in the reserved set are "reserved" for + use as subcomponent delimiters within the component. Only the most + common subcomponents are defined by this specification; other + subcomponents may be defined by a URI scheme's specification, or by + the implementation-specific syntax of a URI's dereferencing + algorithm, provided that such subcomponents are delimited by + characters in the reserved set allowed within that component. + + URI producing applications should percent-encode data octets that + correspond to characters in the reserved set unless these characters + are specifically allowed by the URI scheme to represent data in that + component. If a reserved character is found in a URI component and + no delimiting role is known for that character, then it must be + interpreted as representing the data octet corresponding to that + character's encoding in US-ASCII. + +2.3. Unreserved Characters + + Characters that are allowed in a URI but do not have a reserved + purpose are called unreserved. These include uppercase and lowercase + letters, decimal digits, hyphen, period, underscore, and tilde. + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + + + + + +Berners-Lee, et al. Standards Track [Page 13] + +RFC 3986 URI Generic Syntax January 2005 + + + URIs that differ in the replacement of an unreserved character with + its corresponding percent-encoded US-ASCII octet are equivalent: they + identify the same resource. However, URI comparison implementations + do not always perform normalization prior to comparison (see Section + 6). For consistency, percent-encoded octets in the ranges of ALPHA + (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), + underscore (%5F), or tilde (%7E) should not be created by URI + producers and, when found in a URI, should be decoded to their + corresponding unreserved characters by URI normalizers. + +2.4. When to Encode or Decode + + Under normal circumstances, the only time when octets within a URI + are percent-encoded is during the process of producing the URI from + its component parts. This is when an implementation determines which + of the reserved characters are to be used as subcomponent delimiters + and which can be safely used as data. Once produced, a URI is always + in its percent-encoded form. + + When a URI is dereferenced, the components and subcomponents + significant to the scheme-specific dereferencing process (if any) + must be parsed and separated before the percent-encoded octets within + those components can be safely decoded, as otherwise the data may be + mistaken for component delimiters. The only exception is for + percent-encoded octets corresponding to characters in the unreserved + set, which can be decoded at any time. For example, the octet + corresponding to the tilde ("~") character is often encoded as "%7E" + by older URI processing implementations; the "%7E" can be replaced by + "~" without changing its interpretation. + + Because the percent ("%") character serves as the indicator for + percent-encoded octets, it must be percent-encoded as "%25" for that + octet to be used as data within a URI. Implementations must not + percent-encode or decode the same string more than once, as decoding + an already decoded string might lead to misinterpreting a percent + data octet as the beginning of a percent-encoding, or vice versa in + the case of percent-encoding an already percent-encoded string. + +2.5. Identifying Data + + URI characters provide identifying data for each of the URI + components, serving as an external interface for identification + between systems. Although the presence and nature of the URI + production interface is hidden from clients that use its URIs (and is + thus beyond the scope of the interoperability requirements defined by + this specification), it is a frequent source of confusion and errors + in the interpretation of URI character issues. Implementers have to + be aware that there are multiple character encodings involved in the + + + +Berners-Lee, et al. Standards Track [Page 14] + +RFC 3986 URI Generic Syntax January 2005 + + + production and transmission of URIs: local name and data encoding, + public interface encoding, URI character encoding, data format + encoding, and protocol encoding. + + Local names, such as file system names, are stored with a local + character encoding. URI producing applications (e.g., origin + servers) will typically use the local encoding as the basis for + producing meaningful names. The URI producer will transform the + local encoding to one that is suitable for a public interface and + then transform the public interface encoding into the restricted set + of URI characters (reserved, unreserved, and percent-encodings). + Those characters are, in turn, encoded as octets to be used as a + reference within a data format (e.g., a document charset), and such + data formats are often subsequently encoded for transmission over + Internet protocols. + + For most systems, an unreserved character appearing within a URI + component is interpreted as representing the data octet corresponding + to that character's encoding in US-ASCII. Consumers of URIs assume + that the letter "X" corresponds to the octet "01011000", and even + when that assumption is incorrect, there is no harm in making it. A + system that internally provides identifiers in the form of a + different character encoding, such as EBCDIC, will generally perform + character translation of textual identifiers to UTF-8 [STD63] (or + some other superset of the US-ASCII character encoding) at an + internal interface, thereby providing more meaningful identifiers + than those resulting from simply percent-encoding the original + octets. + + For example, consider an information service that provides data, + stored locally using an EBCDIC-based file system, to clients on the + Internet through an HTTP server. When an author creates a file with + the name "Laguna Beach" on that file system, the "http" URI + corresponding to that resource is expected to contain the meaningful + string "Laguna%20Beach". If, however, that server produces URIs by + using an overly simplistic raw octet mapping, then the result would + be a URI containing "%D3%81%87%A4%95%81@%C2%85%81%83%88". An + internal transcoding interface fixes this problem by transcoding the + local name to a superset of US-ASCII prior to producing the URI. + Naturally, proper interpretation of an incoming URI on such an + interface requires that percent-encoded octets be decoded (e.g., + "%20" to SP) before the reverse transcoding is applied to obtain the + local name. + + In some cases, the internal interface between a URI component and the + identifying data that it has been crafted to represent is much less + direct than a character encoding translation. For example, portions + of a URI might reflect a query on non-ASCII data, or numeric + + + +Berners-Lee, et al. Standards Track [Page 15] + +RFC 3986 URI Generic Syntax January 2005 + + + coordinates on a map. Likewise, a URI scheme may define components + with additional encoding requirements that are applied prior to + forming the component and producing the URI. + + When a new URI scheme defines a component that represents textual + data consisting of characters from the Universal Character Set [UCS], + the data should first be encoded as octets according to the UTF-8 + character encoding [STD63]; then only those octets that do not + correspond to characters in the unreserved set should be percent- + encoded. For example, the character A would be represented as "A", + the character LATIN CAPITAL LETTER A WITH GRAVE would be represented + as "%C3%80", and the character KATAKANA LETTER A would be represented + as "%E3%82%A2". + +3. Syntax Components + + The generic URI syntax consists of a hierarchical sequence of + components referred to as the scheme, authority, path, query, and + fragment. + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + The scheme and path components are required, though the path may be + empty (no characters). When authority is present, the path must + either be empty or begin with a slash ("/") character. When + authority is not present, the path cannot begin with two slash + characters ("//"). These restrictions result in five different ABNF + rules for a path (Section 3.3), only one of which will match any + given URI reference. + + The following are two example URIs and their component parts: + + foo://example.com:8042/over/there?name=ferret#nose + \_/ \______________/\_________/ \_________/ \__/ + | | | | | + scheme authority path query fragment + | _____________________|__ + / \ / \ + urn:example:animal:ferret:nose + + + + + + + +Berners-Lee, et al. Standards Track [Page 16] + +RFC 3986 URI Generic Syntax January 2005 + + +3.1. Scheme + + Each URI begins with a scheme name that refers to a specification for + assigning identifiers within that scheme. As such, the URI syntax is + a federated and extensible naming system wherein each scheme's + specification may further restrict the syntax and semantics of + identifiers using that scheme. + + Scheme names consist of a sequence of characters beginning with a + letter and followed by any combination of letters, digits, plus + ("+"), period ("."), or hyphen ("-"). Although schemes are case- + insensitive, the canonical form is lowercase and documents that + specify schemes must do so with lowercase letters. An implementation + should accept uppercase letters as equivalent to lowercase in scheme + names (e.g., allow "HTTP" as well as "http") for the sake of + robustness but should only produce lowercase scheme names for + consistency. + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + Individual schemes are not specified by this document. The process + for registration of new URI schemes is defined separately by [BCP35]. + The scheme registry maintains the mapping between scheme names and + their specifications. Advice for designers of new URI schemes can be + found in [RFC2718]. URI scheme specifications must define their own + syntax so that all strings matching their scheme-specific syntax will + also match the grammar, as described in Section 4.3. + + When presented with a URI that violates one or more scheme-specific + restrictions, the scheme-specific resolution process should flag the + reference as an error rather than ignore the unused parts; doing so + reduces the number of equivalent URIs and helps detect abuses of the + generic syntax, which might indicate that the URI has been + constructed to mislead the user (Section 7.6). + +3.2. Authority + + Many URI schemes include a hierarchical element for a naming + authority so that governance of the name space defined by the + remainder of the URI is delegated to that authority (which may, in + turn, delegate it further). The generic syntax provides a common + means for distinguishing an authority based on a registered name or + server address, along with optional port and user information. + + The authority component is preceded by a double slash ("//") and is + terminated by the next slash ("/"), question mark ("?"), or number + sign ("#") character, or by the end of the URI. + + + + +Berners-Lee, et al. Standards Track [Page 17] + +RFC 3986 URI Generic Syntax January 2005 + + + authority = [ userinfo "@" ] host [ ":" port ] + + URI producers and normalizers should omit the ":" delimiter that + separates host from port if the port component is empty. Some + schemes do not allow the userinfo and/or port subcomponents. + + If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character. Non- + validating parsers (those that merely separate a URI reference into + its major components) will often ignore the subcomponent structure of + authority, treating it as an opaque string from the double-slash to + the first terminating delimiter, until such time as the URI is + dereferenced. + +3.2.1. User Information + + The userinfo subcomponent may consist of a user name and, optionally, + scheme-specific information about how to gain authorization to access + the resource. The user information, if present, is followed by a + commercial at-sign ("@") that delimits it from the host. + + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + Use of the format "user:password" in the userinfo field is + deprecated. Applications should not render as clear text any data + after the first colon (":") character found within a userinfo + subcomponent unless the data after the colon is the empty string + (indicating no password). Applications may choose to ignore or + reject such data when it is received as part of a reference and + should reject the storage of such data in unencrypted form. The + passing of authentication information in clear text has proven to be + a security risk in almost every case where it has been used. + + Applications that render a URI for the sake of user feedback, such as + in graphical hypertext browsing, should render userinfo in a way that + is distinguished from the rest of a URI, when feasible. Such + rendering will assist the user in cases where the userinfo has been + misleadingly crafted to look like a trusted domain name + (Section 7.6). + +3.2.2. Host + + The host subcomponent of authority is identified by an IP literal + encapsulated within square brackets, an IPv4 address in dotted- + decimal form, or a registered name. The host subcomponent is case- + insensitive. The presence of a host subcomponent within a URI does + not imply that the scheme requires access to the given host on the + Internet. In many cases, the host syntax is used only for the sake + + + +Berners-Lee, et al. Standards Track [Page 18] + +RFC 3986 URI Generic Syntax January 2005 + + + of reusing the existing registration process created and deployed for + DNS, thus obtaining a globally unique name without the cost of + deploying another registry. However, such use comes with its own + costs: domain name ownership may change over time for reasons not + anticipated by the URI producer. In other cases, the data within the + host component identifies a registered name that has nothing to do + with an Internet host. We use the name "host" for the ABNF rule + because that is its most common purpose, not its only purpose. + + host = IP-literal / IPv4address / reg-name + + The syntax rule for host is ambiguous because it does not completely + distinguish between an IPv4address and a reg-name. In order to + disambiguate the syntax, we apply the "first-match-wins" algorithm: + If host matches the rule for IPv4address, then it should be + considered an IPv4 address literal and not a reg-name. Although host + is case-insensitive, producers and normalizers should use lowercase + for registered names and hexadecimal addresses for the sake of + uniformity, while only using uppercase letters for percent-encodings. + + A host identified by an Internet Protocol literal address, version 6 + [RFC3513] or later, is distinguished by enclosing the IP literal + within square brackets ("[" and "]"). This is the only place where + square bracket characters are allowed in the URI syntax. In + anticipation of future, as-yet-undefined IP literal address formats, + an implementation may use an optional version flag to indicate such a + format explicitly rather than rely on heuristic determination. + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + The version flag does not indicate the IP version; rather, it + indicates future versions of the literal format. As such, + implementations must not provide the version flag for the existing + IPv4 and IPv6 literal address forms described below. If a URI + containing an IP-literal that starts with "v" (case-insensitive), + indicating that the version flag is present, is dereferenced by an + application that does not know the meaning of that version flag, then + the application should return an appropriate error for "address + mechanism not supported". + + A host identified by an IPv6 literal address is represented inside + the square brackets without a preceding version flag. The ABNF + provided here is a translation of the text definition of an IPv6 + literal address provided in [RFC3513]. This syntax does not support + IPv6 scoped addressing zone identifiers. + + + + +Berners-Lee, et al. Standards Track [Page 19] + +RFC 3986 URI Generic Syntax January 2005 + + + A 128-bit IPv6 address is divided into eight 16-bit pieces. Each + piece is represented numerically in case-insensitive hexadecimal, + using one to four hexadecimal digits (leading zeroes are permitted). + The eight encoded pieces are given most-significant first, separated + by colon characters. Optionally, the least-significant two pieces + may instead be represented in IPv4 address textual format. A + sequence of one or more consecutive zero-valued 16-bit pieces within + the address may be elided, omitting all their digits and leaving + exactly two consecutive colons in their place to mark the elision. + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + ls32 = ( h16 ":" h16 ) / IPv4address + ; least-significant 32 bits of address + + h16 = 1*4HEXDIG + ; 16 bits of address represented in hexadecimal + + A host identified by an IPv4 literal address is represented in + dotted-decimal notation (a sequence of four decimal numbers in the + range 0 to 255, separated by "."), as described in [RFC1123] by + reference to [RFC0952]. Note that other forms of dotted notation may + be interpreted on some platforms, as described in Section 7.4, but + only the dotted-decimal form of four octets is allowed by this + grammar. + + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + A host identified by a registered name is a sequence of characters + usually intended for lookup within a locally defined host or service + name registry, though the URI's scheme-specific semantics may require + that a specific registry (or fixed name table) be used instead. The + most common name registry mechanism is the Domain Name System (DNS). + A registered name intended for lookup in the DNS uses the syntax + + + +Berners-Lee, et al. Standards Track [Page 20] + +RFC 3986 URI Generic Syntax January 2005 + + + defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123]. + Such a name consists of a sequence of domain labels separated by ".", + each domain label starting and ending with an alphanumeric character + and possibly also containing "-" characters. The rightmost domain + label of a fully qualified domain name in DNS may be followed by a + single "." and should be if it is necessary to distinguish between + the complete domain name and some local domain. + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + If the URI scheme defines a default for host, then that default + applies when the host subcomponent is undefined or when the + registered name is empty (zero length). For example, the "file" URI + scheme is defined so that no authority, an empty host, and + "localhost" all mean the end-user's machine, whereas the "http" + scheme considers a missing authority or empty host invalid. + + This specification does not mandate a particular registered name + lookup technology and therefore does not restrict the syntax of reg- + name beyond what is necessary for interoperability. Instead, it + delegates the issue of registered name syntax conformance to the + operating system of each application performing URI resolution, and + that operating system decides what it will allow for the purpose of + host identification. A URI resolution implementation might use DNS, + host tables, yellow pages, NetInfo, WINS, or any other system for + lookup of registered names. However, a globally scoped naming + system, such as DNS fully qualified domain names, is necessary for + URIs intended to have global scope. URI producers should use names + that conform to the DNS syntax, even when use of DNS is not + immediately apparent, and should limit these names to no more than + 255 characters in length. + + The reg-name syntax allows percent-encoded octets in order to + represent non-ASCII registered names in a uniform way that is + independent of the underlying name resolution technology. Non-ASCII + characters must first be encoded according to UTF-8 [STD63], and then + each octet of the corresponding UTF-8 sequence must be percent- + encoded to be represented as URI characters. URI producing + applications must not use percent-encoding in host unless it is used + to represent a UTF-8 character sequence. When a non-ASCII registered + name represents an internationalized domain name intended for + resolution via the DNS, the name must be transformed to the IDNA + encoding [RFC3490] prior to name lookup. URI producers should + provide these registered names in the IDNA encoding, rather than a + percent-encoding, if they wish to maximize interoperability with + legacy URI resolvers. + + + + + +Berners-Lee, et al. Standards Track [Page 21] + +RFC 3986 URI Generic Syntax January 2005 + + +3.2.3. Port + + The port subcomponent of authority is designated by an optional port + number in decimal following the host and delimited from it by a + single colon (":") character. + + port = *DIGIT + + A scheme may define a default port. For example, the "http" scheme + defines a default port of "80", corresponding to its reserved TCP + port number. The type of port designated by the port number (e.g., + TCP, UDP, SCTP) is defined by the URI scheme. URI producers and + normalizers should omit the port component and its ":" delimiter if + port is empty or if its value would be the same as that of the + scheme's default. + +3.3. Path + + The path component contains data, usually organized in hierarchical + form, that, along with data in the non-hierarchical query component + (Section 3.4), serves to identify a resource within the scope of the + URI's scheme and naming authority (if any). The path is terminated + by the first question mark ("?") or number sign ("#") character, or + by the end of the URI. + + If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character. If a URI + does not contain an authority component, then the path cannot begin + with two slash characters ("//"). In addition, a URI reference + (Section 4.1) may be a relative-path reference, in which case the + first path segment cannot contain a colon (":") character. The ABNF + requires five separate rules to disambiguate these cases, only one of + which will match the path substring within a given URI reference. We + use the generic term "path component" to describe the URI substring + matched by the parser to one of these rules. + + path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0 + + + + +Berners-Lee, et al. Standards Track [Page 22] + +RFC 3986 URI Generic Syntax January 2005 + + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + A path consists of a sequence of path segments separated by a slash + ("/") character. A path is always defined for a URI, though the + defined path may be empty (zero length). Use of the slash character + to indicate hierarchy is only required when a URI will be used as the + context for relative references. For example, the URI + has a path of "fred@example.com", whereas + the URI has an empty path. + + The path segments "." and "..", also known as dot-segments, are + defined for relative reference within the path name hierarchy. They + are intended for use at the beginning of a relative-path reference + (Section 4.2) to indicate relative position within the hierarchical + tree of names. This is similar to their role within some operating + systems' file directory structures to indicate the current directory + and parent directory, respectively. However, unlike in a file + system, these dot-segments are only interpreted within the URI path + hierarchy and are removed as part of the resolution process (Section + 5.2). + + Aside from dot-segments in hierarchical paths, a path segment is + considered opaque by the generic syntax. URI producing applications + often use the reserved characters allowed in a segment to delimit + scheme-specific or dereference-handler-specific subcomponents. For + example, the semicolon (";") and equals ("=") reserved characters are + often used to delimit parameters and parameter values applicable to + that segment. The comma (",") reserved character is often used for + similar purposes. For example, one URI producer might use a segment + such as "name;v=1.1" to indicate a reference to version 1.1 of + "name", whereas another might use a segment such as "name,1.1" to + indicate the same. Parameter types may be defined by scheme-specific + semantics, but in most cases the syntax of a parameter is specific to + the implementation of the URI's dereferencing algorithm. + +3.4. Query + + The query component contains non-hierarchical data that, along with + data in the path component (Section 3.3), serves to identify a + resource within the scope of the URI's scheme and naming authority + (if any). The query component is indicated by the first question + mark ("?") character and terminated by a number sign ("#") character + or by the end of the URI. + + + +Berners-Lee, et al. Standards Track [Page 23] + +RFC 3986 URI Generic Syntax January 2005 + + + query = *( pchar / "/" / "?" ) + + The characters slash ("/") and question mark ("?") may represent data + within the query component. Beware that some older, erroneous + implementations may not handle such data correctly when it is used as + the base URI for relative references (Section 5.1), apparently + because they fail to distinguish query data from path data when + looking for hierarchical separators. However, as query components + are often used to carry identifying information in the form of + "key=value" pairs and one frequently used value is a reference to + another URI, it is sometimes better for usability to avoid percent- + encoding those characters. + +3.5. Fragment + + The fragment identifier component of a URI allows indirect + identification of a secondary resource by reference to a primary + resource and additional identifying information. The identified + secondary resource may be some portion or subset of the primary + resource, some view on representations of the primary resource, or + some other resource defined or described by those representations. A + fragment identifier component is indicated by the presence of a + number sign ("#") character and terminated by the end of the URI. + + fragment = *( pchar / "/" / "?" ) + + The semantics of a fragment identifier are defined by the set of + representations that might result from a retrieval action on the + primary resource. The fragment's format and resolution is therefore + dependent on the media type [RFC2046] of a potentially retrieved + representation, even though such a retrieval is only performed if the + URI is dereferenced. If no such representation exists, then the + semantics of the fragment are considered unknown and are effectively + unconstrained. Fragment identifier semantics are independent of the + URI scheme and thus cannot be redefined by scheme specifications. + + Individual media types may define their own restrictions on or + structures within the fragment identifier syntax for specifying + different types of subsets, views, or external references that are + identifiable as secondary resources by that media type. If the + primary resource has multiple representations, as is often the case + for resources whose representation is selected based on attributes of + the retrieval request (a.k.a., content negotiation), then whatever is + identified by the fragment should be consistent across all of those + representations. Each representation should either define the + fragment so that it corresponds to the same secondary resource, + regardless of how it is represented, or should leave the fragment + undefined (i.e., not found). + + + +Berners-Lee, et al. Standards Track [Page 24] + +RFC 3986 URI Generic Syntax January 2005 + + + As with any URI, use of a fragment identifier component does not + imply that a retrieval action will take place. A URI with a fragment + identifier may be used to refer to the secondary resource without any + implication that the primary resource is accessible or will ever be + accessed. + + Fragment identifiers have a special role in information retrieval + systems as the primary form of client-side indirect referencing, + allowing an author to specifically identify aspects of an existing + resource that are only indirectly provided by the resource owner. As + such, the fragment identifier is not used in the scheme-specific + processing of a URI; instead, the fragment identifier is separated + from the rest of the URI prior to a dereference, and thus the + identifying information within the fragment itself is dereferenced + solely by the user agent, regardless of the URI scheme. Although + this separate handling is often perceived to be a loss of + information, particularly for accurate redirection of references as + resources move over time, it also serves to prevent information + providers from denying reference authors the right to refer to + information within a resource selectively. Indirect referencing also + provides additional flexibility and extensibility to systems that use + URIs, as new media types are easier to define and deploy than new + schemes of identification. + + The characters slash ("/") and question mark ("?") are allowed to + represent data within the fragment identifier. Beware that some + older, erroneous implementations may not handle this data correctly + when it is used as the base URI for relative references (Section + 5.1). + +4. Usage + + When applications make reference to a URI, they do not always use the + full form of reference defined by the "URI" syntax rule. To save + space and take advantage of hierarchical locality, many Internet + protocol elements and media type formats allow an abbreviation of a + URI, whereas others restrict the syntax to a particular form of URI. + We define the most common forms of reference syntax in this + specification because they impact and depend upon the design of the + generic syntax, requiring a uniform parsing algorithm in order to be + interpreted consistently. + +4.1. URI Reference + + URI-reference is used to denote the most common usage of a resource + identifier. + + URI-reference = URI / relative-ref + + + +Berners-Lee, et al. Standards Track [Page 25] + +RFC 3986 URI Generic Syntax January 2005 + + + A URI-reference is either a URI or a relative reference. If the + URI-reference's prefix does not match the syntax of a scheme followed + by its colon separator, then the URI-reference is a relative + reference. + + A URI-reference is typically parsed first into the five URI + components, in order to determine what components are present and + whether the reference is relative. Then, each component is parsed + for its subparts and their validation. The ABNF of URI-reference, + along with the "first-match-wins" disambiguation rule, is sufficient + to define a validating parser for the generic syntax. Readers + familiar with regular expressions should see Appendix B for an + example of a non-validating URI-reference parser that will take any + given string and extract the URI components. + +4.2. Relative Reference + + A relative reference takes advantage of the hierarchical syntax + (Section 1.2.3) to express a URI reference relative to the name space + of another hierarchical URI. + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + The URI referred to by a relative reference, also known as the target + URI, is obtained by applying the reference resolution algorithm of + Section 5. + + A relative reference that begins with two slash characters is termed + a network-path reference; such references are rarely used. A + relative reference that begins with a single slash character is + termed an absolute-path reference. A relative reference that does + not begin with a slash character is termed a relative-path reference. + + A path segment that contains a colon character (e.g., "this:that") + cannot be used as the first segment of a relative-path reference, as + it would be mistaken for a scheme name. Such a segment must be + preceded by a dot-segment (e.g., "./this:that") to make a relative- + path reference. + + + + + + + + +Berners-Lee, et al. Standards Track [Page 26] + +RFC 3986 URI Generic Syntax January 2005 + + +4.3. Absolute URI + + Some protocol elements allow only the absolute form of a URI without + a fragment identifier. For example, defining a base URI for later + use by relative references calls for an absolute-URI syntax rule that + does not allow a fragment. + + absolute-URI = scheme ":" hier-part [ "?" query ] + + URI scheme specifications must define their own syntax so that all + strings matching their scheme-specific syntax will also match the + grammar. Scheme specifications will not define + fragment identifier syntax or usage, regardless of its applicability + to resources identifiable via that scheme, as fragment identification + is orthogonal to scheme definition. However, scheme specifications + are encouraged to include a wide range of examples, including + examples that show use of the scheme's URIs with fragment identifiers + when such usage is appropriate. + +4.4. Same-Document Reference + + When a URI reference refers to a URI that is, aside from its fragment + component (if any), identical to the base URI (Section 5.1), that + reference is called a "same-document" reference. The most frequent + examples of same-document references are relative references that are + empty or include only the number sign ("#") separator followed by a + fragment identifier. + + When a same-document reference is dereferenced for a retrieval + action, the target of that reference is defined to be within the same + entity (representation, document, or message) as the reference; + therefore, a dereference should not result in a new retrieval action. + + Normalization of the base and target URIs prior to their comparison, + as described in Sections 6.2.2 and 6.2.3, is allowed but rarely + performed in practice. Normalization may increase the set of same- + document references, which may be of benefit to some caching + applications. As such, reference authors should not assume that a + slightly different, though equivalent, reference URI will (or will + not) be interpreted as a same-document reference by any given + application. + +4.5. Suffix Reference + + The URI syntax is designed for unambiguous reference to resources and + extensibility via the URI scheme. However, as URI identification and + usage have become commonplace, traditional media (television, radio, + newspapers, billboards, etc.) have increasingly used a suffix of the + + + +Berners-Lee, et al. Standards Track [Page 27] + +RFC 3986 URI Generic Syntax January 2005 + + + URI as a reference, consisting of only the authority and path + portions of the URI, such as + + www.w3.org/Addressing/ + + or simply a DNS registered name on its own. Such references are + primarily intended for human interpretation rather than for machines, + with the assumption that context-based heuristics are sufficient to + complete the URI (e.g., most registered names beginning with "www" + are likely to have a URI prefix of "http://"). Although there is no + standard set of heuristics for disambiguating a URI suffix, many + client implementations allow them to be entered by the user and + heuristically resolved. + + Although this practice of using suffix references is common, it + should be avoided whenever possible and should never be used in + situations where long-term references are expected. The heuristics + noted above will change over time, particularly when a new URI scheme + becomes popular, and are often incorrect when used out of context. + Furthermore, they can lead to security issues along the lines of + those described in [RFC1535]. + + As a URI suffix has the same syntax as a relative-path reference, a + suffix reference cannot be used in contexts where a relative + reference is expected. As a result, suffix references are limited to + places where there is no defined base URI, such as dialog boxes and + off-line advertisements. + +5. Reference Resolution + + This section defines the process of resolving a URI reference within + a context that allows relative references so that the result is a + string matching the syntax rule of Section 3. + +5.1. Establishing a Base URI + + The term "relative" implies that a "base URI" exists against which + the relative reference is applied. Aside from fragment-only + references (Section 4.4), relative references are only usable when a + base URI is known. A base URI must be established by the parser + prior to parsing URI references that might be relative. A base URI + must conform to the syntax rule (Section 4.3). If the + base URI is obtained from a URI reference, then that reference must + be converted to absolute form and stripped of any fragment component + prior to its use as a base URI. + + + + + + +Berners-Lee, et al. Standards Track [Page 28] + +RFC 3986 URI Generic Syntax January 2005 + + + The base URI of a reference can be established in one of four ways, + discussed below in order of precedence. The order of precedence can + be thought of in terms of layers, where the innermost defined base + URI has the highest precedence. This can be visualized graphically + as follows: + + .----------------------------------------------------------. + | .----------------------------------------------------. | + | | .----------------------------------------------. | | + | | | .----------------------------------------. | | | + | | | | .----------------------------------. | | | | + | | | | | | | | | | + | | | | `----------------------------------' | | | | + | | | | (5.1.1) Base URI embedded in content | | | | + | | | `----------------------------------------' | | | + | | | (5.1.2) Base URI of the encapsulating entity | | | + | | | (message, representation, or none) | | | + | | `----------------------------------------------' | | + | | (5.1.3) URI used to retrieve the entity | | + | `----------------------------------------------------' | + | (5.1.4) Default Base URI (application-dependent) | + `----------------------------------------------------------' + +5.1.1. Base URI Embedded in Content + + Within certain media types, a base URI for relative references can be + embedded within the content itself so that it can be readily obtained + by a parser. This can be useful for descriptive documents, such as + tables of contents, which may be transmitted to others through + protocols other than their usual retrieval context (e.g., email or + USENET news). + + It is beyond the scope of this specification to specify how, for each + media type, a base URI can be embedded. The appropriate syntax, when + available, is described by the data format specification associated + with each media type. + +5.1.2. Base URI from the Encapsulating Entity + + If no base URI is embedded, the base URI is defined by the + representation's retrieval context. For a document that is enclosed + within another entity, such as a message or archive, the retrieval + context is that entity. Thus, the default base URI of a + representation is the base URI of the entity in which the + representation is encapsulated. + + + + + + +Berners-Lee, et al. Standards Track [Page 29] + +RFC 3986 URI Generic Syntax January 2005 + + + A mechanism for embedding a base URI within MIME container types + (e.g., the message and multipart types) is defined by MHTML + [RFC2557]. Protocols that do not use the MIME message header syntax, + but that do allow some form of tagged metadata to be included within + messages, may define their own syntax for defining a base URI as part + of a message. + +5.1.3. Base URI from the Retrieval URI + + If no base URI is embedded and the representation is not encapsulated + within some other entity, then, if a URI was used to retrieve the + representation, that URI shall be considered the base URI. Note that + if the retrieval was the result of a redirected request, the last URI + used (i.e., the URI that resulted in the actual retrieval of the + representation) is the base URI. + +5.1.4. Default Base URI + + If none of the conditions described above apply, then the base URI is + defined by the context of the application. As this definition is + necessarily application-dependent, failing to define a base URI by + using one of the other methods may result in the same content being + interpreted differently by different types of applications. + + A sender of a representation containing relative references is + responsible for ensuring that a base URI for those references can be + established. Aside from fragment-only references, relative + references can only be used reliably in situations where the base URI + is well defined. + +5.2. Relative Resolution + + This section describes an algorithm for converting a URI reference + that might be relative to a given base URI into the parsed components + of the reference's target. The components can then be recomposed, as + described in Section 5.3, to form the target URI. This algorithm + provides definitive results that can be used to test the output of + other implementations. Applications may implement relative reference + resolution by using some other algorithm, provided that the results + match what would be given by this one. + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 30] + +RFC 3986 URI Generic Syntax January 2005 + + +5.2.1. Pre-parse the Base URI + + The base URI (Base) is established according to the procedure of + Section 5.1 and parsed into the five main components described in + Section 3. Note that only the scheme component is required to be + present in a base URI; the other components may be empty or + undefined. A component is undefined if its associated delimiter does + not appear in the URI reference; the path component is never + undefined, though it may be empty. + + Normalization of the base URI, as described in Sections 6.2.2 and + 6.2.3, is optional. A URI reference must be transformed to its + target URI before it can be normalized. + +5.2.2. Transform References + + For each URI reference (R), the following pseudocode describes an + algorithm for transforming R into its target URI (T): + + -- The URI reference is parsed into the five URI components + -- + (R.scheme, R.authority, R.path, R.query, R.fragment) = parse(R); + + -- A non-strict parser may ignore a scheme in the reference + -- if it is identical to the base URI's scheme. + -- + if ((not strict) and (R.scheme == Base.scheme)) then + undefine(R.scheme); + endif; + + + + + + + + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 31] + +RFC 3986 URI Generic Syntax January 2005 + + + if defined(R.scheme) then + T.scheme = R.scheme; + T.authority = R.authority; + T.path = remove_dot_segments(R.path); + T.query = R.query; + else + if defined(R.authority) then + T.authority = R.authority; + T.path = remove_dot_segments(R.path); + T.query = R.query; + else + if (R.path == "") then + T.path = Base.path; + if defined(R.query) then + T.query = R.query; + else + T.query = Base.query; + endif; + else + if (R.path starts-with "/") then + T.path = remove_dot_segments(R.path); + else + T.path = merge(Base.path, R.path); + T.path = remove_dot_segments(T.path); + endif; + T.query = R.query; + endif; + T.authority = Base.authority; + endif; + T.scheme = Base.scheme; + endif; + + T.fragment = R.fragment; + +5.2.3. Merge Paths + + The pseudocode above refers to a "merge" routine for merging a + relative-path reference with the path of the base URI. This is + accomplished as follows: + + o If the base URI has a defined authority component and an empty + path, then return a string consisting of "/" concatenated with the + reference's path; otherwise, + + + + + + + + +Berners-Lee, et al. Standards Track [Page 32] + +RFC 3986 URI Generic Syntax January 2005 + + + o return a string consisting of the reference's path component + appended to all but the last segment of the base URI's path (i.e., + excluding any characters after the right-most "/" in the base URI + path, or excluding the entire base URI path if it does not contain + any "/" characters). + +5.2.4. Remove Dot Segments + + The pseudocode also refers to a "remove_dot_segments" routine for + interpreting and removing the special "." and ".." complete path + segments from a referenced path. This is done after the path is + extracted from a reference, whether or not the path was relative, in + order to remove any invalid or extraneous dot-segments prior to + forming the target URI. Although there are many ways to accomplish + this removal process, we describe a simple method using two string + buffers. + + 1. The input buffer is initialized with the now-appended path + components and the output buffer is initialized to the empty + string. + + 2. While the input buffer is not empty, loop as follows: + + A. If the input buffer begins with a prefix of "../" or "./", + then remove that prefix from the input buffer; otherwise, + + B. if the input buffer begins with a prefix of "/./" or "/.", + where "." is a complete path segment, then replace that + prefix with "/" in the input buffer; otherwise, + + C. if the input buffer begins with a prefix of "/../" or "/..", + where ".." is a complete path segment, then replace that + prefix with "/" in the input buffer and remove the last + segment and its preceding "/" (if any) from the output + buffer; otherwise, + + D. if the input buffer consists only of "." or "..", then remove + that from the input buffer; otherwise, + + E. move the first path segment in the input buffer to the end of + the output buffer, including the initial "/" character (if + any) and any subsequent characters up to, but not including, + the next "/" character or the end of the input buffer. + + 3. Finally, the output buffer is returned as the result of + remove_dot_segments. + + + + + +Berners-Lee, et al. Standards Track [Page 33] + +RFC 3986 URI Generic Syntax January 2005 + + + Note that dot-segments are intended for use in URI references to + express an identifier relative to the hierarchy of names in the base + URI. The remove_dot_segments algorithm respects that hierarchy by + removing extra dot-segments rather than treat them as an error or + leaving them to be misinterpreted by dereference implementations. + + The following illustrates how the above steps are applied for two + examples of merged paths, showing the state of the two buffers after + each step. + + STEP OUTPUT BUFFER INPUT BUFFER + + 1 : /a/b/c/./../../g + 2E: /a /b/c/./../../g + 2E: /a/b /c/./../../g + 2E: /a/b/c /./../../g + 2B: /a/b/c /../../g + 2C: /a/b /../g + 2C: /a /g + 2E: /a/g + + STEP OUTPUT BUFFER INPUT BUFFER + + 1 : mid/content=5/../6 + 2E: mid /content=5/../6 + 2E: mid/content=5 /../6 + 2C: mid /6 + 2E: mid/6 + + Some applications may find it more efficient to implement the + remove_dot_segments algorithm by using two segment stacks rather than + strings. + + Note: Beware that some older, erroneous implementations will fail + to separate a reference's query component from its path component + prior to merging the base and reference paths, resulting in an + interoperability failure if the query component contains the + strings "/../" or "/./". + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 34] + +RFC 3986 URI Generic Syntax January 2005 + + +5.3. Component Recomposition + + Parsed URI components can be recomposed to obtain the corresponding + URI reference string. Using pseudocode, this would be: + + result = "" + + if defined(scheme) then + append scheme to result; + append ":" to result; + endif; + + if defined(authority) then + append "//" to result; + append authority to result; + endif; + + append path to result; + + if defined(query) then + append "?" to result; + append query to result; + endif; + + if defined(fragment) then + append "#" to result; + append fragment to result; + endif; + + return result; + + Note that we are careful to preserve the distinction between a + component that is undefined, meaning that its separator was not + present in the reference, and a component that is empty, meaning that + the separator was present and was immediately followed by the next + component separator or the end of the reference. + +5.4. Reference Resolution Examples + + Within a representation with a well defined base URI of + + http://a/b/c/d;p?q + + a relative reference is transformed to its target URI as follows. + + + + + + + +Berners-Lee, et al. Standards Track [Page 35] + +RFC 3986 URI Generic Syntax January 2005 + + +5.4.1. Normal Examples + + "g:h" = "g:h" + "g" = "http://a/b/c/g" + "./g" = "http://a/b/c/g" + "g/" = "http://a/b/c/g/" + "/g" = "http://a/g" + "//g" = "http://g" + "?y" = "http://a/b/c/d;p?y" + "g?y" = "http://a/b/c/g?y" + "#s" = "http://a/b/c/d;p?q#s" + "g#s" = "http://a/b/c/g#s" + "g?y#s" = "http://a/b/c/g?y#s" + ";x" = "http://a/b/c/;x" + "g;x" = "http://a/b/c/g;x" + "g;x?y#s" = "http://a/b/c/g;x?y#s" + "" = "http://a/b/c/d;p?q" + "." = "http://a/b/c/" + "./" = "http://a/b/c/" + ".." = "http://a/b/" + "../" = "http://a/b/" + "../g" = "http://a/b/g" + "../.." = "http://a/" + "../../" = "http://a/" + "../../g" = "http://a/g" + +5.4.2. Abnormal Examples + + Although the following abnormal examples are unlikely to occur in + normal practice, all URI parsers should be capable of resolving them + consistently. Each example uses the same base as that above. + + Parsers must be careful in handling cases where there are more ".." + segments in a relative-path reference than there are hierarchical + levels in the base URI's path. Note that the ".." syntax cannot be + used to change the authority component of a URI. + + "../../../g" = "http://a/g" + "../../../../g" = "http://a/g" + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 36] + +RFC 3986 URI Generic Syntax January 2005 + + + Similarly, parsers must remove the dot-segments "." and ".." when + they are complete components of a path, but not when they are only + part of a segment. + + "/./g" = "http://a/g" + "/../g" = "http://a/g" + "g." = "http://a/b/c/g." + ".g" = "http://a/b/c/.g" + "g.." = "http://a/b/c/g.." + "..g" = "http://a/b/c/..g" + + Less likely are cases where the relative reference uses unnecessary + or nonsensical forms of the "." and ".." complete path segments. + + "./../g" = "http://a/b/g" + "./g/." = "http://a/b/c/g/" + "g/./h" = "http://a/b/c/g/h" + "g/../h" = "http://a/b/c/h" + "g;x=1/./y" = "http://a/b/c/g;x=1/y" + "g;x=1/../y" = "http://a/b/c/y" + + Some applications fail to separate the reference's query and/or + fragment components from the path component before merging it with + the base path and removing dot-segments. This error is rarely + noticed, as typical usage of a fragment never includes the hierarchy + ("/") character and the query component is not normally used within + relative references. + + "g?y/./x" = "http://a/b/c/g?y/./x" + "g?y/../x" = "http://a/b/c/g?y/../x" + "g#s/./x" = "http://a/b/c/g#s/./x" + "g#s/../x" = "http://a/b/c/g#s/../x" + + Some parsers allow the scheme name to be present in a relative + reference if it is the same as the base URI scheme. This is + considered to be a loophole in prior specifications of partial URI + [RFC1630]. Its use should be avoided but is allowed for backward + compatibility. + + "http:g" = "http:g" ; for strict parsers + / "http://a/b/c/g" ; for backward compatibility + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 37] + +RFC 3986 URI Generic Syntax January 2005 + + +6. Normalization and Comparison + + One of the most common operations on URIs is simple comparison: + determining whether two URIs are equivalent without using the URIs to + access their respective resource(s). A comparison is performed every + time a response cache is accessed, a browser checks its history to + color a link, or an XML parser processes tags within a namespace. + Extensive normalization prior to comparison of URIs is often used by + spiders and indexing engines to prune a search space or to reduce + duplication of request actions and response storage. + + URI comparison is performed for some particular purpose. Protocols + or implementations that compare URIs for different purposes will + often be subject to differing design trade-offs in regards to how + much effort should be spent in reducing aliased identifiers. This + section describes various methods that may be used to compare URIs, + the trade-offs between them, and the types of applications that might + use them. + +6.1. Equivalence + + Because URIs exist to identify resources, presumably they should be + considered equivalent when they identify the same resource. However, + this definition of equivalence is not of much practical use, as there + is no way for an implementation to compare two resources unless it + has full knowledge or control of them. For this reason, + determination of equivalence or difference of URIs is based on string + comparison, perhaps augmented by reference to additional rules + provided by URI scheme definitions. We use the terms "different" and + "equivalent" to describe the possible outcomes of such comparisons, + but there are many application-dependent versions of equivalence. + + Even though it is possible to determine that two URIs are equivalent, + URI comparison is not sufficient to determine whether two URIs + identify different resources. For example, an owner of two different + domain names could decide to serve the same resource from both, + resulting in two different URIs. Therefore, comparison methods are + designed to minimize false negatives while strictly avoiding false + positives. + + In testing for equivalence, applications should not directly compare + relative references; the references should be converted to their + respective target URIs before comparison. When URIs are compared to + select (or avoid) a network action, such as retrieval of a + representation, fragment components (if any) should be excluded from + the comparison. + + + + + +Berners-Lee, et al. Standards Track [Page 38] + +RFC 3986 URI Generic Syntax January 2005 + + +6.2. Comparison Ladder + + A variety of methods are used in practice to test URI equivalence. + These methods fall into a range, distinguished by the amount of + processing required and the degree to which the probability of false + negatives is reduced. As noted above, false negatives cannot be + eliminated. In practice, their probability can be reduced, but this + reduction requires more processing and is not cost-effective for all + applications. + + If this range of comparison practices is considered as a ladder, the + following discussion will climb the ladder, starting with practices + that are cheap but have a relatively higher chance of producing false + negatives, and proceeding to those that have higher computational + cost and lower risk of false negatives. + +6.2.1. Simple String Comparison + + If two URIs, when considered as character strings, are identical, + then it is safe to conclude that they are equivalent. This type of + equivalence test has very low computational cost and is in wide use + in a variety of applications, particularly in the domain of parsing. + + Testing strings for equivalence requires some basic precautions. + This procedure is often referred to as "bit-for-bit" or + "byte-for-byte" comparison, which is potentially misleading. Testing + strings for equality is normally based on pair comparison of the + characters that make up the strings, starting from the first and + proceeding until both strings are exhausted and all characters are + found to be equal, until a pair of characters compares unequal, or + until one of the strings is exhausted before the other. + + This character comparison requires that each pair of characters be + put in comparable form. For example, should one URI be stored in a + byte array in EBCDIC encoding and the second in a Java String object + (UTF-16), bit-for-bit comparisons applied naively will produce + errors. It is better to speak of equality on a character-for- + character basis rather than on a byte-for-byte or bit-for-bit basis. + In practical terms, character-by-character comparisons should be done + codepoint-by-codepoint after conversion to a common character + encoding. + + False negatives are caused by the production and use of URI aliases. + Unnecessary aliases can be reduced, regardless of the comparison + method, by consistently providing URI references in an already- + normalized form (i.e., a form identical to what would be produced + after normalization is applied, as described below). + + + + +Berners-Lee, et al. Standards Track [Page 39] + +RFC 3986 URI Generic Syntax January 2005 + + + Protocols and data formats often limit some URI comparisons to simple + string comparison, based on the theory that people and + implementations will, in their own best interest, be consistent in + providing URI references, or at least consistent enough to negate any + efficiency that might be obtained from further normalization. + +6.2.2. Syntax-Based Normalization + + Implementations may use logic based on the definitions provided by + this specification to reduce the probability of false negatives. + This processing is moderately higher in cost than character-for- + character string comparison. For example, an application using this + approach could reasonably consider the following two URIs equivalent: + + example://a/b/c/%7Bfoo%7D + eXAMPLE://a/./b/../b/%63/%7bfoo%7d + + Web user agents, such as browsers, typically apply this type of URI + normalization when determining whether a cached response is + available. Syntax-based normalization includes such techniques as + case normalization, percent-encoding normalization, and removal of + dot-segments. + +6.2.2.1. Case Normalization + + For all URIs, the hexadecimal digits within a percent-encoding + triplet (e.g., "%3a" versus "%3A") are case-insensitive and therefore + should be normalized to use uppercase letters for the digits A-F. + + When a URI uses components of the generic syntax, the component + syntax equivalence rules always apply; namely, that the scheme and + host are case-insensitive and therefore should be normalized to + lowercase. For example, the URI is + equivalent to . The other generic syntax + components are assumed to be case-sensitive unless specifically + defined otherwise by the scheme (see Section 6.2.3). + +6.2.2.2. Percent-Encoding Normalization + + The percent-encoding mechanism (Section 2.1) is a frequent source of + variance among otherwise identical URIs. In addition to the case + normalization issue noted above, some URI producers percent-encode + octets that do not require percent-encoding, resulting in URIs that + are equivalent to their non-encoded counterparts. These URIs should + be normalized by decoding any percent-encoded octet that corresponds + to an unreserved character, as described in Section 2.3. + + + + + +Berners-Lee, et al. Standards Track [Page 40] + +RFC 3986 URI Generic Syntax January 2005 + + +6.2.2.3. Path Segment Normalization + + The complete path segments "." and ".." are intended only for use + within relative references (Section 4.1) and are removed as part of + the reference resolution process (Section 5.2). However, some + deployed implementations incorrectly assume that reference resolution + is not necessary when the reference is already a URI and thus fail to + remove dot-segments when they occur in non-relative paths. URI + normalizers should remove dot-segments by applying the + remove_dot_segments algorithm to the path, as described in + Section 5.2.4. + +6.2.3. Scheme-Based Normalization + + The syntax and semantics of URIs vary from scheme to scheme, as + described by the defining specification for each scheme. + Implementations may use scheme-specific rules, at further processing + cost, to reduce the probability of false negatives. For example, + because the "http" scheme makes use of an authority component, has a + default port of "80", and defines an empty path to be equivalent to + "/", the following four URIs are equivalent: + + http://example.com + http://example.com/ + http://example.com:/ + http://example.com:80/ + + In general, a URI that uses the generic syntax for authority with an + empty path should be normalized to a path of "/". Likewise, an + explicit ":port", for which the port is empty or the default for the + scheme, is equivalent to one where the port and its ":" delimiter are + elided and thus should be removed by scheme-based normalization. For + example, the second URI above is the normal form for the "http" + scheme. + + Another case where normalization varies by scheme is in the handling + of an empty authority component or empty host subcomponent. For many + scheme specifications, an empty authority or host is considered an + error; for others, it is considered equivalent to "localhost" or the + end-user's host. When a scheme defines a default for authority and a + URI reference to that default is desired, the reference should be + normalized to an empty authority for the sake of uniformity, brevity, + and internationalization. If, however, either the userinfo or port + subcomponents are non-empty, then the host should be given explicitly + even if it matches the default. + + Normalization should not remove delimiters when their associated + component is empty unless licensed to do so by the scheme + + + +Berners-Lee, et al. Standards Track [Page 41] + +RFC 3986 URI Generic Syntax January 2005 + + + specification. For example, the URI "http://example.com/?" cannot be + assumed to be equivalent to any of the examples above. Likewise, the + presence or absence of delimiters within a userinfo subcomponent is + usually significant to its interpretation. The fragment component is + not subject to any scheme-based normalization; thus, two URIs that + differ only by the suffix "#" are considered different regardless of + the scheme. + + Some schemes define additional subcomponents that consist of case- + insensitive data, giving an implicit license to normalizers to + convert this data to a common case (e.g., all lowercase). For + example, URI schemes that define a subcomponent of path to contain an + Internet hostname, such as the "mailto" URI scheme, cause that + subcomponent to be case-insensitive and thus subject to case + normalization (e.g., "mailto:Joe@Example.COM" is equivalent to + "mailto:Joe@example.com", even though the generic syntax considers + the path component to be case-sensitive). + + Other scheme-specific normalizations are possible. + +6.2.4. Protocol-Based Normalization + + Substantial effort to reduce the incidence of false negatives is + often cost-effective for web spiders. Therefore, they implement even + more aggressive techniques in URI comparison. For example, if they + observe that a URI such as + + http://example.com/data + + redirects to a URI differing only in the trailing slash + + http://example.com/data/ + + they will likely regard the two as equivalent in the future. This + kind of technique is only appropriate when equivalence is clearly + indicated by both the result of accessing the resources and the + common conventions of their scheme's dereference algorithm (in this + case, use of redirection by HTTP origin servers to avoid problems + with relative references). + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 42] + +RFC 3986 URI Generic Syntax January 2005 + + +7. Security Considerations + + A URI does not in itself pose a security threat. However, as URIs + are often used to provide a compact set of instructions for access to + network resources, care must be taken to properly interpret the data + within a URI, to prevent that data from causing unintended access, + and to avoid including data that should not be revealed in plain + text. + +7.1. Reliability and Consistency + + There is no guarantee that once a URI has been used to retrieve + information, the same information will be retrievable by that URI in + the future. Nor is there any guarantee that the information + retrievable via that URI in the future will be observably similar to + that retrieved in the past. The URI syntax does not constrain how a + given scheme or authority apportions its namespace or maintains it + over time. Such guarantees can only be obtained from the person(s) + controlling that namespace and the resource in question. A specific + URI scheme may define additional semantics, such as name persistence, + if those semantics are required of all naming authorities for that + scheme. + +7.2. Malicious Construction + + It is sometimes possible to construct a URI so that an attempt to + perform a seemingly harmless, idempotent operation, such as the + retrieval of a representation, will in fact cause a possibly damaging + remote operation. The unsafe URI is typically constructed by + specifying a port number other than that reserved for the network + protocol in question. The client unwittingly contacts a site running + a different protocol service, and data within the URI contains + instructions that, when interpreted according to this other protocol, + cause an unexpected operation. A frequent example of such abuse has + been the use of a protocol-based scheme with a port component of + "25", thereby fooling user agent software into sending an unintended + or impersonating message via an SMTP server. + + Applications should prevent dereference of a URI that specifies a TCP + port number within the "well-known port" range (0 - 1023) unless the + protocol being used to dereference that URI is compatible with the + protocol expected on that well-known port. Although IANA maintains a + registry of well-known ports, applications should make such + restrictions user-configurable to avoid preventing the deployment of + new services. + + + + + + +Berners-Lee, et al. Standards Track [Page 43] + +RFC 3986 URI Generic Syntax January 2005 + + + When a URI contains percent-encoded octets that match the delimiters + for a given resolution or dereference protocol (for example, CR and + LF characters for the TELNET protocol), these percent-encodings must + not be decoded before transmission across that protocol. Transfer of + the percent-encoding, which might violate the protocol, is less + harmful than allowing decoded octets to be interpreted as additional + operations or parameters, perhaps triggering an unexpected and + possibly harmful remote operation. + +7.3. Back-End Transcoding + + When a URI is dereferenced, the data within it is often parsed by + both the user agent and one or more servers. In HTTP, for example, a + typical user agent will parse a URI into its five major components, + access the authority's server, and send it the data within the + authority, path, and query components. A typical server will take + that information, parse the path into segments and the query into + key/value pairs, and then invoke implementation-specific handlers to + respond to the request. As a result, a common security concern for + server implementations that handle a URI, either as a whole or split + into separate components, is proper interpretation of the octet data + represented by the characters and percent-encodings within that URI. + + Percent-encoded octets must be decoded at some point during the + dereference process. Applications must split the URI into its + components and subcomponents prior to decoding the octets, as + otherwise the decoded octets might be mistaken for delimiters. + Security checks of the data within a URI should be applied after + decoding the octets. Note, however, that the "%00" percent-encoding + (NUL) may require special handling and should be rejected if the + application is not expecting to receive raw data within a component. + + Special care should be taken when the URI path interpretation process + involves the use of a back-end file system or related system + functions. File systems typically assign an operational meaning to + special characters, such as the "/", "\", ":", "[", and "]" + characters, and to special device names like ".", "..", "...", "aux", + "lpt", etc. In some cases, merely testing for the existence of such + a name will cause the operating system to pause or invoke unrelated + system calls, leading to significant security concerns regarding + denial of service and unintended data transfer. It would be + impossible for this specification to list all such significant + characters and device names. Implementers should research the + reserved names and characters for the types of storage device that + may be attached to their applications and restrict the use of data + obtained from URI components accordingly. + + + + + +Berners-Lee, et al. Standards Track [Page 44] + +RFC 3986 URI Generic Syntax January 2005 + + +7.4. Rare IP Address Formats + + Although the URI syntax for IPv4address only allows the common + dotted-decimal form of IPv4 address literal, many implementations + that process URIs make use of platform-dependent system routines, + such as gethostbyname() and inet_aton(), to translate the string + literal to an actual IP address. Unfortunately, such system routines + often allow and process a much larger set of formats than those + described in Section 3.2.2. + + For example, many implementations allow dotted forms of three + numbers, wherein the last part is interpreted as a 16-bit quantity + and placed in the right-most two bytes of the network address (e.g., + a Class B network). Likewise, a dotted form of two numbers means + that the last part is interpreted as a 24-bit quantity and placed in + the right-most three bytes of the network address (Class A), and a + single number (without dots) is interpreted as a 32-bit quantity and + stored directly in the network address. Adding further to the + confusion, some implementations allow each dotted part to be + interpreted as decimal, octal, or hexadecimal, as specified in the C + language (i.e., a leading 0x or 0X implies hexadecimal; a leading 0 + implies octal; otherwise, the number is interpreted as decimal). + + These additional IP address formats are not allowed in the URI syntax + due to differences between platform implementations. However, they + can become a security concern if an application attempts to filter + access to resources based on the IP address in string literal format. + If this filtering is performed, literals should be converted to + numeric form and filtered based on the numeric value, and not on a + prefix or suffix of the string form. + +7.5. Sensitive Information + + URI producers should not provide a URI that contains a username or + password that is intended to be secret. URIs are frequently + displayed by browsers, stored in clear text bookmarks, and logged by + user agent history and intermediary applications (proxies). A + password appearing within the userinfo component is deprecated and + should be considered an error (or simply ignored) except in those + rare cases where the 'password' parameter is intended to be public. + +7.6. Semantic Attacks + + Because the userinfo subcomponent is rarely used and appears before + the host in the authority component, it can be used to construct a + URI intended to mislead a human user by appearing to identify one + (trusted) naming authority while actually identifying a different + authority hidden behind the noise. For example + + + +Berners-Lee, et al. Standards Track [Page 45] + +RFC 3986 URI Generic Syntax January 2005 + + + ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm + + might lead a human user to assume that the host is 'cnn.example.com', + whereas it is actually '10.0.0.1'. Note that a misleading userinfo + subcomponent could be much longer than the example above. + + A misleading URI, such as that above, is an attack on the user's + preconceived notions about the meaning of a URI rather than an attack + on the software itself. User agents may be able to reduce the impact + of such attacks by distinguishing the various components of the URI + when they are rendered, such as by using a different color or tone to + render userinfo if any is present, though there is no panacea. More + information on URI-based semantic attacks can be found in [Siedzik]. + +8. IANA Considerations + + URI scheme names, as defined by in Section 3.1, form a + registered namespace that is managed by IANA according to the + procedures defined in [BCP35]. No IANA actions are required by this + document. + +9. Acknowledgements + + This specification is derived from RFC 2396 [RFC2396], RFC 1808 + [RFC1808], and RFC 1738 [RFC1738]; the acknowledgements in those + documents still apply. It also incorporates the update (with + corrections) for IPv6 literals in the host syntax, as defined by + Robert M. Hinden, Brian E. Carpenter, and Larry Masinter in + [RFC2732]. In addition, contributions by Gisle Aas, Reese Anschultz, + Daniel Barclay, Tim Bray, Mike Brown, Rob Cameron, Jeremy Carroll, + Dan Connolly, Adam M. Costello, John Cowan, Jason Diamond, Martin + Duerst, Stefan Eissing, Clive D.W. Feather, Al Gilman, Tony Hammond, + Elliotte Harold, Pat Hayes, Henry Holtzman, Ian B. Jacobs, Michael + Kay, John C. Klensin, Graham Klyne, Dan Kohn, Bruce Lilly, Andrew + Main, Dave McAlpin, Ira McDonald, Michael Mealling, Ray Merkert, + Stephen Pollei, Julian Reschke, Tomas Rokicki, Miles Sabin, Kai + Schaetzl, Mark Thomson, Ronald Tschalaer, Norm Walsh, Marc Warne, + Stuart Williams, and Henry Zongaro are gratefully acknowledged. + +10. References + +10.1. Normative References + + [ASCII] American National Standards Institute, "Coded Character + Set -- 7-bit American Standard Code for Information + Interchange", ANSI X3.4, 1986. + + + + + +Berners-Lee, et al. Standards Track [Page 46] + +RFC 3986 URI Generic Syntax January 2005 + + + [RFC2234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, November 1997. + + [STD63] Yergeau, F., "UTF-8, a transformation format of + ISO 10646", STD 63, RFC 3629, November 2003. + + [UCS] International Organization for Standardization, + "Information Technology - Universal Multiple-Octet Coded + Character Set (UCS)", ISO/IEC 10646:2003, December 2003. + +10.2. Informative References + + [BCP19] Freed, N. and J. Postel, "IANA Charset Registration + Procedures", BCP 19, RFC 2978, October 2000. + + [BCP35] Petke, R. and I. King, "Registration Procedures for URL + Scheme Names", BCP 35, RFC 2717, November 1999. + + [RFC0952] Harrenstien, K., Stahl, M., and E. Feinler, "DoD Internet + host table specification", RFC 952, October 1985. + + [RFC1034] Mockapetris, P., "Domain names - concepts and facilities", + STD 13, RFC 1034, November 1987. + + [RFC1123] Braden, R., "Requirements for Internet Hosts - Application + and Support", STD 3, RFC 1123, October 1989. + + [RFC1535] Gavron, E., "A Security Problem and Proposed Correction + With Widely Deployed DNS Software", RFC 1535, + October 1993. + + [RFC1630] Berners-Lee, T., "Universal Resource Identifiers in WWW: A + Unifying Syntax for the Expression of Names and Addresses + of Objects on the Network as used in the World-Wide Web", + RFC 1630, June 1994. + + [RFC1736] Kunze, J., "Functional Recommendations for Internet + Resource Locators", RFC 1736, February 1995. + + [RFC1737] Sollins, K. and L. Masinter, "Functional Requirements for + Uniform Resource Names", RFC 1737, December 1994. + + [RFC1738] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform + Resource Locators (URL)", RFC 1738, December 1994. + + [RFC1808] Fielding, R., "Relative Uniform Resource Locators", + RFC 1808, June 1995. + + + + +Berners-Lee, et al. Standards Track [Page 47] + +RFC 3986 URI Generic Syntax January 2005 + + + [RFC2046] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Two: Media Types", RFC 2046, + November 1996. + + [RFC2141] Moats, R., "URN Syntax", RFC 2141, May 1997. + + [RFC2396] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifiers (URI): Generic Syntax", RFC 2396, + August 1998. + + [RFC2518] Goland, Y., Whitehead, E., Faizi, A., Carter, S., and D. + Jensen, "HTTP Extensions for Distributed Authoring -- + WEBDAV", RFC 2518, February 1999. + + [RFC2557] Palme, J., Hopmann, A., and N. Shelness, "MIME + Encapsulation of Aggregate Documents, such as HTML + (MHTML)", RFC 2557, March 1999. + + [RFC2718] Masinter, L., Alvestrand, H., Zigmond, D., and R. Petke, + "Guidelines for new URL Schemes", RFC 2718, November 1999. + + [RFC2732] Hinden, R., Carpenter, B., and L. Masinter, "Format for + Literal IPv6 Addresses in URL's", RFC 2732, December 1999. + + [RFC3305] Mealling, M. and R. Denenberg, "Report from the Joint + W3C/IETF URI Planning Interest Group: Uniform Resource + Identifiers (URIs), URLs, and Uniform Resource Names + (URNs): Clarifications and Recommendations", RFC 3305, + August 2002. + + [RFC3490] Faltstrom, P., Hoffman, P., and A. Costello, + "Internationalizing Domain Names in Applications (IDNA)", + RFC 3490, March 2003. + + [RFC3513] Hinden, R. and S. Deering, "Internet Protocol Version 6 + (IPv6) Addressing Architecture", RFC 3513, April 2003. + + [Siedzik] Siedzik, R., "Semantic Attacks: What's in a URL?", + April 2001, . + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 48] + +RFC 3986 URI Generic Syntax January 2005 + + +Appendix A. Collected ABNF for URI + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + URI-reference = URI / relative-ref + + absolute-URI = scheme ":" hier-part [ "?" query ] + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + host = IP-literal / IPv4address / reg-name + port = *DIGIT + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + h16 = 1*4HEXDIG + ls32 = ( h16 ":" h16 ) / IPv4address + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + + + + + + +Berners-Lee, et al. Standards Track [Page 49] + +RFC 3986 URI Generic Syntax January 2005 + + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0 + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + query = *( pchar / "/" / "?" ) + + fragment = *( pchar / "/" / "?" ) + + pct-encoded = "%" HEXDIG HEXDIG + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + +Appendix B. Parsing a URI Reference with a Regular Expression + + As the "first-match-wins" algorithm is identical to the "greedy" + disambiguation method used by POSIX regular expressions, it is + natural and commonplace to use a regular expression for parsing the + potential five components of a URI reference. + + The following line is the regular expression for breaking-down a + well-formed URI reference into its components. + + + +Berners-Lee, et al. Standards Track [Page 50] + +RFC 3986 URI Generic Syntax January 2005 + + + ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + 12 3 4 5 6 7 8 9 + + The numbers in the second line above are only to assist readability; + they indicate the reference points for each subexpression (i.e., each + paired parenthesis). We refer to the value matched for subexpression + as $. For example, matching the above expression to + + http://www.ics.uci.edu/pub/ietf/uri/#Related + + results in the following subexpression matches: + + $1 = http: + $2 = http + $3 = //www.ics.uci.edu + $4 = www.ics.uci.edu + $5 = /pub/ietf/uri/ + $6 = + $7 = + $8 = #Related + $9 = Related + + where indicates that the component is not present, as is + the case for the query component in the above example. Therefore, we + can determine the value of the five components as + + scheme = $2 + authority = $4 + path = $5 + query = $7 + fragment = $9 + + Going in the opposite direction, we can recreate a URI reference from + its components by using the algorithm of Section 5.3. + +Appendix C. Delimiting a URI in Context + + URIs are often transmitted through formats that do not provide a + clear context for their interpretation. For example, there are many + occasions when a URI is included in plain text; examples include text + sent in email, USENET news, and on printed paper. In such cases, it + is important to be able to delimit the URI from the rest of the text, + and in particular from punctuation marks that might be mistaken for + part of the URI. + + In practice, URIs are delimited in a variety of ways, but usually + within double-quotes "http://example.com/", angle brackets + , or just by using whitespace: + + + +Berners-Lee, et al. Standards Track [Page 51] + +RFC 3986 URI Generic Syntax January 2005 + + + http://example.com/ + + These wrappers do not form part of the URI. + + In some cases, extra whitespace (spaces, line-breaks, tabs, etc.) may + have to be added to break a long URI across lines. The whitespace + should be ignored when the URI is extracted. + + No whitespace should be introduced after a hyphen ("-") character. + Because some typesetters and printers may (erroneously) introduce a + hyphen at the end of line when breaking it, the interpreter of a URI + containing a line break immediately after a hyphen should ignore all + whitespace around the line break and should be aware that the hyphen + may or may not actually be part of the URI. + + Using <> angle brackets around each URI is especially recommended as + a delimiting style for a reference that contains embedded whitespace. + + The prefix "URL:" (with or without a trailing space) was formerly + recommended as a way to help distinguish a URI from other bracketed + designators, though it is not commonly used in practice and is no + longer recommended. + + For robustness, software that accepts user-typed URI should attempt + to recognize and strip both delimiters and embedded whitespace. + + For example, the text + + Yes, Jim, I found it under "http://www.w3.org/Addressing/", + but you can probably pick it up from . Note the warning in . + + contains the URI references + + http://www.w3.org/Addressing/ + ftp://foo.example.com/rfc/ + http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 52] + +RFC 3986 URI Generic Syntax January 2005 + + +Appendix D. Changes from RFC 2396 + +D.1. Additions + + An ABNF rule for URI has been introduced to correspond to one common + usage of the term: an absolute URI with optional fragment. + + IPv6 (and later) literals have been added to the list of possible + identifiers for the host portion of an authority component, as + described by [RFC2732], with the addition of "[" and "]" to the + reserved set and a version flag to anticipate future versions of IP + literals. Square brackets are now specified as reserved within the + authority component and are not allowed outside their use as + delimiters for an IP literal within host. In order to make this + change without changing the technical definition of the path, query, + and fragment components, those rules were redefined to directly + specify the characters allowed. + + As [RFC2732] defers to [RFC3513] for definition of an IPv6 literal + address, which, unfortunately, lacks an ABNF description of + IPv6address, we created a new ABNF rule for IPv6address that matches + the text representations defined by Section 2.2 of [RFC3513]. + Likewise, the definition of IPv4address has been improved in order to + limit each decimal octet to the range 0-255. + + Section 6, on URI normalization and comparison, has been completely + rewritten and extended by using input from Tim Bray and discussion + within the W3C Technical Architecture Group. + +D.2. Modifications + + The ad-hoc BNF syntax of RFC 2396 has been replaced with the ABNF of + [RFC2234]. This change required all rule names that formerly + included underscore characters to be renamed with a dash instead. In + addition, a number of syntax rules have been eliminated or simplified + to make the overall grammar more comprehensible. Specifications that + refer to the obsolete grammar rules may be understood by replacing + those rules according to the following table: + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 53] + +RFC 3986 URI Generic Syntax January 2005 + + + +----------------+--------------------------------------------------+ + | obsolete rule | translation | + +----------------+--------------------------------------------------+ + | absoluteURI | absolute-URI | + | relativeURI | relative-part [ "?" query ] | + | hier_part | ( "//" authority path-abempty / | + | | path-absolute ) [ "?" query ] | + | | | + | opaque_part | path-rootless [ "?" query ] | + | net_path | "//" authority path-abempty | + | abs_path | path-absolute | + | rel_path | path-rootless | + | rel_segment | segment-nz-nc | + | reg_name | reg-name | + | server | authority | + | hostport | host [ ":" port ] | + | hostname | reg-name | + | path_segments | path-abempty | + | param | * | + | | | + | uric | unreserved / pct-encoded / ";" / "?" / ":" | + | | / "@" / "&" / "=" / "+" / "$" / "," / "/" | + | | | + | uric_no_slash | unreserved / pct-encoded / ";" / "?" / ":" | + | | / "@" / "&" / "=" / "+" / "$" / "," | + | | | + | mark | "-" / "_" / "." / "!" / "~" / "*" / "'" | + | | / "(" / ")" | + | | | + | escaped | pct-encoded | + | hex | HEXDIG | + | alphanum | ALPHA / DIGIT | + +----------------+--------------------------------------------------+ + + Use of the above obsolete rules for the definition of scheme-specific + syntax is deprecated. + + Section 2, on characters, has been rewritten to explain what + characters are reserved, when they are reserved, and why they are + reserved, even when they are not used as delimiters by the generic + syntax. The mark characters that are typically unsafe to decode, + including the exclamation mark ("!"), asterisk ("*"), single-quote + ("'"), and open and close parentheses ("(" and ")"), have been moved + to the reserved set in order to clarify the distinction between + reserved and unreserved and, hopefully, to answer the most common + question of scheme designers. Likewise, the section on + percent-encoded characters has been rewritten, and URI normalizers + are now given license to decode any percent-encoded octets + + + +Berners-Lee, et al. Standards Track [Page 54] + +RFC 3986 URI Generic Syntax January 2005 + + + corresponding to unreserved characters. In general, the terms + "escaped" and "unescaped" have been replaced with "percent-encoded" + and "decoded", respectively, to reduce confusion with other forms of + escape mechanisms. + + The ABNF for URI and URI-reference has been redesigned to make them + more friendly to LALR parsers and to reduce complexity. As a result, + the layout form of syntax description has been removed, along with + the uric, uric_no_slash, opaque_part, net_path, abs_path, rel_path, + path_segments, rel_segment, and mark rules. All references to + "opaque" URIs have been replaced with a better description of how the + path component may be opaque to hierarchy. The relativeURI rule has + been replaced with relative-ref to avoid unnecessary confusion over + whether they are a subset of URI. The ambiguity regarding the + parsing of URI-reference as a URI or a relative-ref with a colon in + the first segment has been eliminated through the use of five + separate path matching rules. + + The fragment identifier has been moved back into the section on + generic syntax components and within the URI and relative-ref rules, + though it remains excluded from absolute-URI. The number sign ("#") + character has been moved back to the reserved set as a result of + reintegrating the fragment syntax. + + The ABNF has been corrected to allow the path component to be empty. + This also allows an absolute-URI to consist of nothing after the + "scheme:", as is present in practice with the "dav:" namespace + [RFC2518] and with the "about:" scheme used internally by many WWW + browser implementations. The ambiguity regarding the boundary + between authority and path has been eliminated through the use of + five separate path matching rules. + + Registry-based naming authorities that use the generic syntax are now + defined within the host rule. This change allows current + implementations, where whatever name provided is simply fed to the + local name resolution mechanism, to be consistent with the + specification. It also removes the need to re-specify DNS name + formats here. Furthermore, it allows the host component to contain + percent-encoded octets, which is necessary to enable + internationalized domain names to be provided in URIs, processed in + their native character encodings at the application layers above URI + processing, and passed to an IDNA library as a registered name in the + UTF-8 character encoding. The server, hostport, hostname, + domainlabel, toplabel, and alphanum rules have been removed. + + The resolving relative references algorithm of [RFC2396] has been + rewritten with pseudocode for this revision to improve clarity and + fix the following issues: + + + +Berners-Lee, et al. Standards Track [Page 55] + +RFC 3986 URI Generic Syntax January 2005 + + + o [RFC2396] section 5.2, step 6a, failed to account for a base URI + with no path. + + o Restored the behavior of [RFC1808] where, if the reference + contains an empty path and a defined query component, the target + URI inherits the base URI's path component. + + o The determination of whether a URI reference is a same-document + reference has been decoupled from the URI parser, simplifying the + URI processing interface within applications in a way consistent + with the internal architecture of deployed URI processing + implementations. The determination is now based on comparison to + the base URI after transforming a reference to absolute form, + rather than on the format of the reference itself. This change + may result in more references being considered "same-document" + under this specification than there would be under the rules given + in RFC 2396, especially when normalization is used to reduce + aliases. However, it does not change the status of existing + same-document references. + + o Separated the path merge routine into two routines: merge, for + describing combination of the base URI path with a relative-path + reference, and remove_dot_segments, for describing how to remove + the special "." and ".." segments from a composed path. The + remove_dot_segments algorithm is now applied to all URI reference + paths in order to match common implementations and to improve the + normalization of URIs in practice. This change only impacts the + parsing of abnormal references and same-scheme references wherein + the base URI has a non-hierarchical path. + +Index + + A + ABNF 11 + absolute 27 + absolute-path 26 + absolute-URI 27 + access 9 + authority 17, 18 + + B + base URI 28 + + C + character encoding 4 + character 4 + characters 8, 11 + coded character set 4 + + + +Berners-Lee, et al. Standards Track [Page 56] + +RFC 3986 URI Generic Syntax January 2005 + + + D + dec-octet 20 + dereference 9 + dot-segments 23 + + F + fragment 16, 24 + + G + gen-delims 13 + generic syntax 6 + + H + h16 20 + hier-part 16 + hierarchical 10 + host 18 + + I + identifier 5 + IP-literal 19 + IPv4 20 + IPv4address 19, 20 + IPv6 19 + IPv6address 19, 20 + IPvFuture 19 + + L + locator 7 + ls32 20 + + M + merge 32 + + N + name 7 + network-path 26 + + P + path 16, 22, 26 + path-abempty 22 + path-absolute 22 + path-empty 22 + path-noscheme 22 + path-rootless 22 + path-abempty 16, 22, 26 + path-absolute 16, 22, 26 + path-empty 16, 22, 26 + + + +Berners-Lee, et al. Standards Track [Page 57] + +RFC 3986 URI Generic Syntax January 2005 + + + path-rootless 16, 22 + pchar 23 + pct-encoded 12 + percent-encoding 12 + port 22 + + Q + query 16, 23 + + R + reg-name 21 + registered name 20 + relative 10, 28 + relative-path 26 + relative-ref 26 + remove_dot_segments 33 + representation 9 + reserved 12 + resolution 9, 28 + resource 5 + retrieval 9 + + S + same-document 27 + sameness 9 + scheme 16, 17 + segment 22, 23 + segment-nz 23 + segment-nz-nc 23 + sub-delims 13 + suffix 27 + + T + transcription 8 + + U + uniform 4 + unreserved 13 + URI grammar + absolute-URI 27 + ALPHA 11 + authority 18 + CR 11 + dec-octet 20 + DIGIT 11 + DQUOTE 11 + fragment 24 + gen-delims 13 + + + +Berners-Lee, et al. Standards Track [Page 58] + +RFC 3986 URI Generic Syntax January 2005 + + + h16 20 + HEXDIG 11 + hier-part 16 + host 19 + IP-literal 19 + IPv4address 20 + IPv6address 20 + IPvFuture 19 + LF 11 + ls32 20 + OCTET 11 + path 22 + path-abempty 22 + path-absolute 22 + path-empty 22 + path-noscheme 22 + path-rootless 22 + pchar 23 + pct-encoded 12 + port 22 + query 24 + reg-name 21 + relative-ref 26 + reserved 13 + scheme 17 + segment 23 + segment-nz 23 + segment-nz-nc 23 + SP 11 + sub-delims 13 + unreserved 13 + URI 16 + URI-reference 25 + userinfo 18 + URI 16 + URI-reference 25 + URL 7 + URN 7 + userinfo 18 + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 59] + +RFC 3986 URI Generic Syntax January 2005 + + +Authors' Addresses + + Tim Berners-Lee + World Wide Web Consortium + Massachusetts Institute of Technology + 77 Massachusetts Avenue + Cambridge, MA 02139 + USA + + Phone: +1-617-253-5702 + Fax: +1-617-258-5999 + EMail: timbl@w3.org + URI: http://www.w3.org/People/Berners-Lee/ + + + Roy T. Fielding + Day Software + 5251 California Ave., Suite 110 + Irvine, CA 92617 + USA + + Phone: +1-949-679-2960 + Fax: +1-949-679-2972 + EMail: fielding@gbiv.com + URI: http://roy.gbiv.com/ + + + Larry Masinter + Adobe Systems Incorporated + 345 Park Ave + San Jose, CA 95110 + USA + + Phone: +1-408-536-3024 + EMail: LMM@acm.org + URI: http://larry.masinter.net/ + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 60] + +RFC 3986 URI Generic Syntax January 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the IETF's procedures with respect to rights in IETF Documents can + be found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + +Berners-Lee, et al. Standards Track [Page 61] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc3987.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc3987.txt new file mode 100644 index 0000000..f0b1513 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc3987.txt @@ -0,0 +1,2579 @@ + + + + + + +Network Working Group M. Duerst +Request for Comments: 3987 W3C +Category: Standards Track M. Suignard + Microsoft Corporation + January 2005 + + + Internationalized Resource Identifiers (IRIs) + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + This document defines a new protocol element, the Internationalized + Resource Identifier (IRI), as a complement to the Uniform Resource + Identifier (URI). An IRI is a sequence of characters from the + Universal Character Set (Unicode/ISO 10646). A mapping from IRIs to + URIs is defined, which means that IRIs can be used instead of URIs, + where appropriate, to identify resources. + + The approach of defining a new protocol element was chosen instead of + extending or changing the definition of URIs. This was done in order + to allow a clear distinction and to avoid incompatibilities with + existing software. Guidelines are provided for the use and + deployment of IRIs in various protocols, formats, and software + components that currently deal with URIs. + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3 + 1.1. Overview and Motivation . . . . . . . . . . . . . . . . 3 + 1.2. Applicability . . . . . . . . . . . . . . . . . . . . . 3 + 1.3. Definitions . . . . . . . . . . . . . . . . . . . . . . 4 + 1.4. Notation . . . . . . . . . . . . . . . . . . . . . . . . 5 + 2. IRI Syntax . . . . . . . . . . . . . . . . . . . . . . . . . . 6 + 2.1. Summary of IRI Syntax . . . . . . . . . . . . . . . . . 6 + 2.2. ABNF for IRI References and IRIs . . . . . . . . . . . . 7 + + + + +Duerst & Suignard Standards Track [Page 1] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + 3. Relationship between IRIs and URIs . . . . . . . . . . . . . . 10 + 3.1. Mapping of IRIs to URIs . . . . . . . . . . . . . . . . 10 + 3.2. Converting URIs to IRIs . . . . . . . . . . . . . . . . 14 + 3.2.1. Examples . . . . . . . . . . . . . . . . . . . . 15 + 4. Bidirectional IRIs for Right-to-Left Languages. . . . . . . . 16 + 4.1. Logical Storage and Visual Presentation . . . . . . . . 17 + 4.2. Bidi IRI Structure . . . . . . . . . . . . . . . . . . . 18 + 4.3. Input of Bidi IRIs . . . . . . . . . . . . . . . . . . . 19 + 4.4. Examples . . . . . . . . . . . . . . . . . . . . . . . . 19 + 5. Normalization and Comparison . . . . . . . . . . . . . . . . . 21 + 5.1. Equivalence . . . . . . . . . . . . . . . . . . . . . . 22 + 5.2. Preparation for Comparison . . . . . . . . . . . . . . . 22 + 5.3. Comparison Ladder . . . . . . . . . . . . . . . . . . . 23 + 5.3.1. Simple String Comparison . . . . . . . . . . . . 23 + 5.3.2. Syntax-Based Normalization . . . . . . . . . . . 24 + 5.3.3. Scheme-Based Normalization . . . . . . . . . . . 27 + 5.3.4. Protocol-Based Normalization . . . . . . . . . . 28 + 6. Use of IRIs . . . . . . . . . . . . . . . . . . . . . . . . . 29 + 6.1. Limitations on UCS Characters Allowed in IRIs . . . . . 29 + 6.2. Software Interfaces and Protocols . . . . . . . . . . . 29 + 6.3. Format of URIs and IRIs in Documents and Protocols . . . 30 + 6.4. Use of UTF-8 for Encoding Original Characters .. . . . . 30 + 6.5. Relative IRI References . . . . . . . . . . . . . . . . 32 + 7. URI/IRI Processing Guidelines (informative) . . . . . . . . . 32 + 7.1. URI/IRI Software Interfaces . . . . . . . . . . . . . . 32 + 7.2. URI/IRI Entry . . . . . . . . . . . . . . . . . . . . . 33 + 7.3. URI/IRI Transfer between Applications . . . . . . . . . 33 + 7.4. URI/IRI Generation . . . . . . . . . . . . . . . . . . . 34 + 7.5. URI/IRI Selection . . . . . . . . . . . . . . . . . . . 34 + 7.6. Display of URIs/IRIs . . . . . . . . . . . . . . . . . . 35 + 7.7. Interpretation of URIs and IRIs . . . . . . . . . . . . 36 + 7.8. Upgrading Strategy . . . . . . . . . . . . . . . . . . . 36 + 8. Security Considerations . . . . . . . . . . . . . . . . . . . 37 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 39 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 40 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 40 + 10.2. Informative References . . . . . . . . . . . . . . . . . 41 + A. Design Alternatives . . . . . . . . . . . . . . . . . . . . . 44 + A.1. New Scheme(s) . . . . . . . . . . . . . . . . . . . . . 44 + A.2. Character Encodings Other Than UTF-8 . . . . . . . . . . 44 + A.3. New Encoding Convention . . . . . . . . . . . . . . . . 44 + A.4. Indicating Character Encodings in the URI/IRI . . . . . 45 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 45 + Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . 46 + + + + + + + +Duerst & Suignard Standards Track [Page 2] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +1. Introduction + +1.1. Overview and Motivation + + A Uniform Resource Identifier (URI) is defined in [RFC3986] as a + sequence of characters chosen from a limited subset of the repertoire + of US-ASCII [ASCII] characters. + + The characters in URIs are frequently used for representing words of + natural languages. This usage has many advantages: Such URIs are + easier to memorize, easier to interpret, easier to transcribe, easier + to create, and easier to guess. For most languages other than + English, however, the natural script uses characters other than A - + Z. For many people, handling Latin characters is as difficult as + handling the characters of other scripts is for those who use only + the Latin alphabet. Many languages with non-Latin scripts are + transcribed with Latin letters. These transcriptions are now often + used in URIs, but they introduce additional ambiguities. + + The infrastructure for the appropriate handling of characters from + local scripts is now widely deployed in local versions of operating + system and application software. Software that can handle a wide + variety of scripts and languages at the same time is increasingly + common. Also, increasing numbers of protocols and formats can carry + a wide range of characters. + + This document defines a new protocol element called Internationalized + Resource Identifier (IRI) by extending the syntax of URIs to a much + wider repertoire of characters. It also defines "internationalized" + versions corresponding to other constructs from [RFC3986], such as + URI references. The syntax of IRIs is defined in section 2, and the + relationship between IRIs and URIs in section 3. + + Using characters outside of A - Z in IRIs brings some difficulties. + Section 4 discusses the special case of bidirectional IRIs, section 5 + various forms of equivalence between IRIs, and section 6 the use of + IRIs in different situations. Section 7 gives additional informative + guidelines, and section 8 security considerations. + +1.2. Applicability + + IRIs are designed to be compatible with recommendations for new URI + schemes [RFC2718]. The compatibility is provided by specifying a + well-defined and deterministic mapping from the IRI character + sequence to the functionally equivalent URI character sequence. + Practical use of IRIs (or IRI references) in place of URIs (or URI + references) depends on the following conditions being met: + + + + +Duerst & Suignard Standards Track [Page 3] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + a. A protocol or format element should be explicitly designated to + be able to carry IRIs. The intent is not to introduce IRIs into + contexts that are not defined to accept them. For example, XML + schema [XMLSchema] has an explicit type "anyURI" that includes + IRIs and IRI references. Therefore, IRIs and IRI references can + be in attributes and elements of type "anyURI". On the other + hand, in the HTTP protocol [RFC2616], the Request URI is defined + as a URI, which means that direct use of IRIs is not allowed in + HTTP requests. + + b. The protocol or format carrying the IRIs should have a mechanism + to represent the wide range of characters used in IRIs, either + natively or by some protocol- or format-specific escaping + mechanism (for example, numeric character references in [XML1]). + + c. The URI corresponding to the IRI in question has to encode + original characters into octets using UTF-8. For new URI + schemes, this is recommended in [RFC2718]. It can apply to a + whole scheme (e.g., IMAP URLs [RFC2192] and POP URLs [RFC2384], + or the URN syntax [RFC2141]). It can apply to a specific part of + a URI, such as the fragment identifier (e.g., [XPointer]). It + can apply to a specific URI or part(s) thereof. For details, + please see section 6.4. + +1.3. Definitions + + The following definitions are used in this document; they follow the + terms in [RFC2130], [RFC2277], and [ISO10646]. + + character: A member of a set of elements used for the organization, + control, or representation of data. For example, "LATIN CAPITAL + LETTER A" names a character. + + octet: An ordered sequence of eight bits considered as a unit. + + character repertoire: A set of characters (in the mathematical + sense). + + sequence of characters: A sequence of characters (one after another). + + sequence of octets: A sequence of octets (one after another). + + character encoding: A method of representing a sequence of characters + as a sequence of octets (maybe with variants). Also, a method of + (unambiguously) converting a sequence of octets into a sequence of + characters. + + + + + +Duerst & Suignard Standards Track [Page 4] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + charset: The name of a parameter or attribute used to identify a + character encoding. + + UCS: Universal Character Set. The coded character set defined by + ISO/IEC 10646 [ISO10646] and the Unicode Standard [UNIV4]. + + IRI reference: Denotes the common usage of an Internationalized + Resource Identifier. An IRI reference may be absolute or + relative. However, the "IRI" that results from such a reference + only includes absolute IRIs; any relative IRI references are + resolved to their absolute form. Note that in [RFC2396] URIs did + not include fragment identifiers, but in [RFC3986] fragment + identifiers are part of URIs. + + running text: Human text (paragraphs, sentences, phrases) with syntax + according to orthographic conventions of a natural language, as + opposed to syntax defined for ease of processing by machines + (e.g., markup, programming languages). + + protocol element: Any portion of a message that affects processing of + that message by the protocol in question. + + presentation element: A presentation form corresponding to a protocol + element; for example, using a wider range of characters. + + create (a URI or IRI): With respect to URIs and IRIs, the term is + used for the initial creation. This may be the initial creation + of a resource with a certain identifier, or the initial exposition + of a resource under a particular identifier. + + generate (a URI or IRI): With respect to URIs and IRIs, the term is + used when the IRI is generated by derivation from other + information. + +1.4. Notation + + RFCs and Internet Drafts currently do not allow any characters + outside the US-ASCII repertoire. Therefore, this document uses + various special notations to denote such characters in examples. + + In text, characters outside US-ASCII are sometimes referenced by + using a prefix of 'U+', followed by four to six hexadecimal digits. + + To represent characters outside US-ASCII in examples, this document + uses two notations: 'XML Notation' and 'Bidi Notation'. + + + + + + +Duerst & Suignard Standards Track [Page 5] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + XML Notation uses a leading '&#x', a trailing ';', and the + hexadecimal number of the character in the UCS in between. For + example, я stands for CYRILLIC CAPITAL LETTER YA. In this + notation, an actual '&' is denoted by '&'. + + Bidi Notation is used for bidirectional examples: Lowercase letters + stand for Latin letters or other letters that are written left to + right, whereas uppercase letters represent Arabic or Hebrew letters + that are written right to left. + + To denote actual octets in examples (as opposed to percent-encoded + octets), the two hex digits denoting the octet are enclosed in "<" + and ">". For example, the octet often denoted as 0xc9 is denoted + here as . + + In this document, the key words "MUST", "MUST NOT", "REQUIRED", + "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", + and "OPTIONAL" are to be interpreted as described in [RFC2119]. + +2. IRI Syntax + + This section defines the syntax of Internationalized Resource + Identifiers (IRIs). + + As with URIs, an IRI is defined as a sequence of characters, not as a + sequence of octets. This definition accommodates the fact that IRIs + may be written on paper or read over the radio as well as stored or + transmitted digitally. The same IRI may be represented as different + sequences of octets in different protocols or documents if these + protocols or documents use different character encodings (and/or + transfer encodings). Using the same character encoding as the + containing protocol or document ensures that the characters in the + IRI can be handled (e.g., searched, converted, displayed) in the same + way as the rest of the protocol or document. + +2.1. Summary of IRI Syntax + + IRIs are defined similarly to URIs in [RFC3986], but the class of + unreserved characters is extended by adding the characters of the UCS + (Universal Character Set, [ISO10646]) beyond U+007F, subject to the + limitations given in the syntax rules below and in section 6.1. + + Otherwise, the syntax and use of components and reserved characters + is the same as that in [RFC3986]. All the operations defined in + [RFC3986], such as the resolution of relative references, can be + applied to IRIs by IRI-processing software in exactly the same way as + they are for URIs by URI-processing software. + + + + +Duerst & Suignard Standards Track [Page 6] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Characters outside the US-ASCII repertoire are not reserved and + therefore MUST NOT be used for syntactical purposes, such as to + delimit components in newly defined schemes. For example, U+00A2, + CENT SIGN, is not allowed as a delimiter in IRIs, because it is in + the 'iunreserved' category. This is similar to the fact that it is + not possible to use '-' as a delimiter in URIs, because it is in the + 'unreserved' category. + +2.2. ABNF for IRI References and IRIs + + Although it might be possible to define IRI references and IRIs + merely by their transformation to URI references and URIs, they can + also be accepted and processed directly. Therefore, an ABNF + definition for IRI references (which are the most general concept and + the start of the grammar) and IRIs is given here. The syntax of this + ABNF is described in [RFC2234]. Character numbers are taken from the + UCS, without implying any actual binary encoding. Terminals in the + ABNF are characters, not bytes. + + The following grammar closely follows the URI grammar in [RFC3986], + except that the range of unreserved characters is expanded to include + UCS characters, with the restriction that private UCS characters can + occur only in query parts. The grammar is split into two parts: + Rules that differ from [RFC3986] because of the above-mentioned + expansion, and rules that are the same as those in [RFC3986]. For + rules that are different than those in [RFC3986], the names of the + non-terminals have been changed as follows. If the non-terminal + contains 'URI', this has been changed to 'IRI'. Otherwise, an 'i' + has been prefixed. + + The following rules are different from those in [RFC3986]: + + IRI = scheme ":" ihier-part [ "?" iquery ] + [ "#" ifragment ] + + ihier-part = "//" iauthority ipath-abempty + / ipath-absolute + / ipath-rootless + / ipath-empty + + IRI-reference = IRI / irelative-ref + + absolute-IRI = scheme ":" ihier-part [ "?" iquery ] + + irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] + + irelative-part = "//" iauthority ipath-abempty + / ipath-absolute + + + +Duerst & Suignard Standards Track [Page 7] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + / ipath-noscheme + / ipath-empty + + iauthority = [ iuserinfo "@" ] ihost [ ":" port ] + iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" ) + ihost = IP-literal / IPv4address / ireg-name + + ireg-name = *( iunreserved / pct-encoded / sub-delims ) + + ipath = ipath-abempty ; begins with "/" or is empty + / ipath-absolute ; begins with "/" but not "//" + / ipath-noscheme ; begins with a non-colon segment + / ipath-rootless ; begins with a segment + / ipath-empty ; zero characters + + ipath-abempty = *( "/" isegment ) + ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ] + ipath-noscheme = isegment-nz-nc *( "/" isegment ) + ipath-rootless = isegment-nz *( "/" isegment ) + ipath-empty = 0 + + isegment = *ipchar + isegment-nz = 1*ipchar + isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims + / "@" ) + ; non-zero-length segment without any colon ":" + + ipchar = iunreserved / pct-encoded / sub-delims / ":" + / "@" + + iquery = *( ipchar / iprivate / "/" / "?" ) + + ifragment = *( ipchar / "/" / "?" ) + + iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar + + ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF + / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD + / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD + / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD + / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD + / %xD0000-DFFFD / %xE1000-EFFFD + + iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD + + Some productions are ambiguous. The "first-match-wins" (a.k.a. + "greedy") algorithm applies. For details, see [RFC3986]. + + + + +Duerst & Suignard Standards Track [Page 8] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + The following rules are the same as those in [RFC3986]: + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + port = *DIGIT + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + h16 = 1*4HEXDIG + ls32 = ( h16 ":" h16 ) / IPv4address + + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + pct-encoded = "%" HEXDIG HEXDIG + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + This syntax does not support IPv6 scoped addressing zone identifiers. + + + + + + + + + + + +Duerst & Suignard Standards Track [Page 9] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +3. Relationship between IRIs and URIs + + IRIs are meant to replace URIs in identifying resources for + protocols, formats, and software components that use a UCS-based + character repertoire. These protocols and components may never need + to use URIs directly, especially when the resource identifier is used + simply for identification purposes. However, when the resource + identifier is used for resource retrieval, it is in many cases + necessary to determine the associated URI, because currently most + retrieval mechanisms are only defined for URIs. In this case, IRIs + can serve as presentation elements for URI protocol elements. An + example would be an address bar in a Web user agent. (Additional + rationale is given in section 3.1.) + +3.1. Mapping of IRIs to URIs + + This section defines how to map an IRI to a URI. Everything in this + section also applies to IRI references and URI references, as well as + to components thereof (for example, fragment identifiers). + + This mapping has two purposes: + + Syntaxical. Many URI schemes and components define additional + syntactical restrictions not captured in section 2.2. + Scheme-specific restrictions are applied to IRIs by converting + IRIs to URIs and checking the URIs against the scheme-specific + restrictions. + + Interpretational. URIs identify resources in various ways. IRIs also + identify resources. When the IRI is used solely for + identification purposes, it is not necessary to map the IRI to a + URI (see section 5). However, when an IRI is used for resource + retrieval, the resource that the IRI locates is the same as the + one located by the URI obtained after converting the IRI according + to the procedure defined here. This means that there is no need + to define resolution separately on the IRI level. + + Applications MUST map IRIs to URIs by using the following two steps. + + Step 1. Generate a UCS character sequence from the original IRI + format. This step has the following three variants, + depending on the form of the input: + + a. If the IRI is written on paper, read aloud, or otherwise + represented as a sequence of characters independent of + any character encoding, represent the IRI as a sequence + of characters from the UCS normalized according to + Normalization Form C (NFC, [UTR15]). + + + +Duerst & Suignard Standards Track [Page 10] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + b. If the IRI is in some digital representation (e.g., an + octet stream) in some known non-Unicode character + encoding, convert the IRI to a sequence of characters + from the UCS normalized according to NFC. + + c. If the IRI is in a Unicode-based character encoding (for + example, UTF-8 or UTF-16), do not normalize (see section + 5.3.2.2 for details). Apply step 2 directly to the + encoded Unicode character sequence. + + Step 2. For each character in 'ucschar' or 'iprivate', apply steps + 2.1 through 2.3 below. + + 2.1. Convert the character to a sequence of one or more octets + using UTF-8 [RFC3629]. + + 2.2. Convert each octet to %HH, where HH is the hexadecimal + notation of the octet value. Note that this is identical + to the percent-encoding mechanism in section 2.1 of + [RFC3986]. To reduce variability, the hexadecimal notation + SHOULD use uppercase letters. + + 2.3. Replace the original character with the resulting character + sequence (i.e., a sequence of %HH triplets). + + The above mapping from IRIs to URIs produces URIs fully conforming to + [RFC3986]. The mapping is also an identity transformation for URIs + and is idempotent; applying the mapping a second time will not + change anything. Every URI is by definition an IRI. + + Systems accepting IRIs MAY convert the ireg-name component of an IRI + as follows (before step 2 above) for schemes known to use domain + names in ireg-name, if the scheme definition does not allow + percent-encoding for ireg-name: + + Replace the ireg-name part of the IRI by the part converted using the + ToASCII operation specified in section 4.1 of [RFC3490] on each + dot-separated label, and by using U+002E (FULL STOP) as a label + separator, with the flag UseSTD3ASCIIRules set to TRUE, and with the + flag AllowUnassigned set to FALSE for creating IRIs and set to TRUE + otherwise. + + + + + + + + + + +Duerst & Suignard Standards Track [Page 11] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + The ToASCII operation may fail, but this would mean that the IRI + cannot be resolved. This conversion SHOULD be used when the goal is + to maximize interoperability with legacy URI resolvers. For example, + the IRI + + "http://résumé.example.org" + + may be converted to + + "http://xn--rsum-bpad.example.org" + + instead of + + "http://r%C3%A9sum%C3%A9.example.org". + + An IRI with a scheme that is known to use domain names in ireg-name, + but where the scheme definition does not allow percent-encoding for + ireg-name, meets scheme-specific restrictions if either the + straightforward conversion or the conversion using the ToASCII + operation on ireg-name result in an URI that meets the scheme- + specific restrictions. + + Such an IRI resolves to the URI obtained after converting the IRI and + uses the ToASCII operation on ireg-name. Implementations do not have + to do this conversion as long as they produce the same result. + + Note: The difference between variants b and c in step 1 (using + normalization with NFC, versus not using any normalization) + accounts for the fact that in many non-Unicode character + encodings, some text cannot be represented directly. For example, + the word "Vietnam" is natively written "Việt Nam" + (containing a LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW) + in NFC, but a direct transcoding from the windows-1258 character + encoding leads to "Việt Nam" (containing a LATIN SMALL + LETTER E WITH CIRCUMFLEX followed by a COMBINING DOT BELOW). + Direct transcoding of other 8-bit encodings of Vietnamese may lead + to other representations. + + Note: The uniform treatment of the whole IRI in step 2 is important + to make processing independent of URI scheme. See [Gettys] for an + in-depth discussion. + + Note: In practice, whether the general mapping (steps 1 and 2) or the + ToASCII operation of [RFC3490] is used for ireg-name will not be + noticed if mapping from IRI to URI and resolution is tightly + integrated (e.g., carried out in the same user agent). But + + + + + +Duerst & Suignard Standards Track [Page 12] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + conversion using [RFC3490] may be able to better deal with + backwards compatibility issues in case mapping and resolution are + separated, as in the case of using an HTTP proxy. + + Note: Internationalized Domain Names may be contained in parts of an + IRI other than the ireg-name part. It is the responsibility of + scheme-specific implementations (if the Internationalized Domain + Name is part of the scheme syntax) or of server-side + implementations (if the Internationalized Domain Name is part of + 'iquery') to apply the necessary conversions at the appropriate + point. Example: Trying to validate the Web page at + http://résumé.example.org would lead to an IRI of + http://validator.w3.org/check?uri=http%3A%2F%2Frésumé. + example.org, which would convert to a URI of + http://validator.w3.org/check?uri=http%3A%2F%2Fr%C3%A9sum%C3%A9. + example.org. The server side implementation would be responsible + for making the necessary conversions to be able to retrieve the + Web page. + + Systems accepting IRIs MAY also deal with the printable characters in + US-ASCII that are not allowed in URIs, namely "<", ">", '"', space, + "{", "}", "|", "\", "^", and "`", in step 2 above. If these + characters are found but are not converted, then the conversion + SHOULD fail. Please note that the number sign ("#"), the percent + sign ("%"), and the square bracket characters ("[", "]") are not part + of the above list and MUST NOT be converted. Protocols and formats + that have used earlier definitions of IRIs including these characters + MAY require percent-encoding of these characters as a preprocessing + step to extract the actual IRI from a given field. This + preprocessing MAY also be used by applications allowing the user to + enter an IRI. + + Note: In this process (in step 2.3), characters allowed in URI + references and existing percent-encoded sequences are not encoded + further. (This mapping is similar to, but different from, the + encoding applied when arbitrary content is included in some part + of a URI.) For example, an IRI of + "http://www.example.org/red%09rosé#red" (in XML notation) is + converted to + "http://www.example.org/red%09ros%C3%A9#red", not to something + like + "http%3A%2F%2Fwww.example.org%2Fred%2509ros%C3%A9%23red". + + Note: Some older software transcoding to UTF-8 may produce illegal + output for some input, in particular for characters outside the + BMP (Basic Multilingual Plane). As an example, for the IRI with + non-BMP characters (in XML Notation): + "http://example.com/𐌀𐌁𐌂"; + + + +Duerst & Suignard Standards Track [Page 13] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + which contains the first three letters of the Old Italic alphabet, + the correct conversion to a URI is + "http://example.com/%F0%90%8C%80%F0%90%8C%81%F0%90%8C%82" + +3.2. Converting URIs to IRIs + + In some situations, converting a URI into an equivalent IRI may be + desirable. This section gives a procedure for this conversion. The + conversion described in this section will always result in an IRI + that maps back to the URI used as an input for the conversion (except + for potential case differences in percent-encoding and for potential + percent-encoded unreserved characters). However, the IRI resulting + from this conversion may not be exactly the same as the original IRI + (if there ever was one). + + URI-to-IRI conversion removes percent-encodings, but not all + percent-encodings can be eliminated. There are several reasons for + this: + + 1. Some percent-encodings are necessary to distinguish percent- + encoded and unencoded uses of reserved characters. + + 2. Some percent-encodings cannot be interpreted as sequences of + UTF-8 octets. + + (Note: The octet patterns of UTF-8 are highly regular. + Therefore, there is a very high probability, but no guarantee, + that percent-encodings that can be interpreted as sequences of + UTF-8 octets actually originated from UTF-8. For a detailed + discussion, see [Duerst97].) + + 3. The conversion may result in a character that is not appropriate + in an IRI. See sections 2.2, 4.1, and 6.1 for further details. + + Conversion from a URI to an IRI is done by using the following steps + (or any other algorithm that produces the same result): + + 1. Represent the URI as a sequence of octets in US-ASCII. + + 2. Convert all percent-encodings ("%" followed by two hexadecimal + digits) to the corresponding octets, except those corresponding + to "%", characters in "reserved", and characters in US-ASCII not + allowed in URIs. + + 3. Re-percent-encode any octet produced in step 2 that is not part + of a strictly legal UTF-8 octet sequence. + + + + + +Duerst & Suignard Standards Track [Page 14] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + 4. Re-percent-encode all octets produced in step 3 that in UTF-8 + represent characters that are not appropriate according to + sections 2.2, 4.1, and 6.1. + + 5. Interpret the resulting octet sequence as a sequence of characters + encoded in UTF-8. + + This procedure will convert as many percent-encoded characters as + possible to characters in an IRI. Because there are some choices + when step 4 is applied (see section 6.1), results may vary. + + Conversions from URIs to IRIs MUST NOT use any character encoding + other than UTF-8 in steps 3 and 4, even if it might be possible to + guess from the context that another character encoding than UTF-8 was + used in the URI. For example, the URI + "http://www.example.org/r%E9sum%E9.html" might with some guessing be + interpreted to contain two e-acute characters encoded as iso-8859-1. + It must not be converted to an IRI containing these e-acute + characters. Otherwise, in the future the IRI will be mapped to + "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different + URI from "http://www.example.org/r%E9sum%E9.html". + +3.2.1. Examples + + This section shows various examples of converting URIs to IRIs. Each + example shows the result after each of the steps 1 through 5 is + applied. XML Notation is used for the final result. Octets are + denoted by "<" followed by two hexadecimal digits followed by ">". + + The following example contains the sequence "%C3%BC", which is a + strictly legal UTF-8 sequence, and which is converted into the actual + character U+00FC, LATIN SMALL LETTER U WITH DIAERESIS (also known as + u-umlaut). + + 1. http://www.example.org/D%C3%BCrst + + 2. http://www.example.org/Drst + + 3. http://www.example.org/Drst + + 4. http://www.example.org/Drst + + 5. http://www.example.org/Dürst + + The following example contains the sequence "%FC", which might + represent U+00FC, LATIN SMALL LETTER U WITH DIAERESIS, in the + iso-8859-1 character encoding. (It might represent other characters + in other character encodings. For example, the octet in + + + +Duerst & Suignard Standards Track [Page 15] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + iso-8859-5 represents U+045C, CYRILLIC SMALL LETTER KJE.) Because + is not part of a strictly legal UTF-8 sequence, it is + re-percent-encoded in step 3. + + 1. http://www.example.org/D%FCrst + + 2. http://www.example.org/Drst + + 3. http://www.example.org/D%FCrst + + 4. http://www.example.org/D%FCrst + + 5. http://www.example.org/D%FCrst + + The following example contains "%e2%80%ae", which is the percent- + encoded UTF-8 character encoding of U+202E, RIGHT-TO-LEFT OVERRIDE. + Section 4.1 forbids the direct use of this character in an IRI. + Therefore, the corresponding octets are re-percent-encoded in step 4. + This example shows that the case (upper- or lowercase) of letters + used in percent-encodings may not be preserved. The example also + contains a punycode-encoded domain name label (xn--99zt52a), which is + not converted. + + 1. http://xn--99zt52a.example.org/%e2%80%ae + + 2. http://xn--99zt52a.example.org/<80> + + 3. http://xn--99zt52a.example.org/<80> + + 4. http://xn--99zt52a.example.org/%E2%80%AE + + 5. http://xn--99zt52a.example.org/%E2%80%AE + + Implementations with scheme-specific knowledge MAY convert + punycode-encoded domain name labels to the corresponding characters + by using the ToUnicode procedure. Thus, for the example above, the + label "xn--99zt52a" may be converted to U+7D0D U+8C46 (Japanese + Natto), leading to the overall IRI of + "http://納豆.example.org/%E2%80%AE". + +4. Bidirectional IRIs for Right-to-Left Languages + + Some UCS characters, such as those used in the Arabic and Hebrew + scripts, have an inherent right-to-left (rtl) writing direction. + IRIs containing these characters (called bidirectional IRIs or Bidi + IRIs) require additional attention because of the non-trivial + + + + + +Duerst & Suignard Standards Track [Page 16] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + relation between logical representation (used for digital + representation and for reading/spelling) and visual representation + (used for display/printing). + + Because of the complex interaction between the logical + representation, the visual representation, and the syntax of a Bidi + IRI, a balance is needed between various requirements. The main + requirements are + + 1. user-predictable conversion between visual and logical + representation; + + 2. the ability to include a wide range of characters in various + parts of the IRI; and + + 3. minor or no changes or restrictions for implementations. + +4.1. Logical Storage and Visual Presentation + + When stored or transmitted in digital representation, bidirectional + IRIs MUST be in full logical order and MUST conform to the IRI syntax + rules (which includes the rules relevant to their scheme). This + ensures that bidirectional IRIs can be processed in the same way as + other IRIs. + + Bidirectional IRIs MUST be rendered by using the Unicode + Bidirectional Algorithm [UNIV4], [UNI9]. Bidirectional IRIs MUST be + rendered in the same way as they would be if they were in a + left-to-right embedding; i.e., as if they were preceded by U+202A, + LEFT-TO-RIGHT EMBEDDING (LRE), and followed by U+202C, POP + DIRECTIONAL FORMATTING (PDF). Setting the embedding direction can + also be done in a higher-level protocol (e.g., the dir='ltr' + attribute in HTML). + + There is no requirement to use the above embedding if the display is + still the same without the embedding. For example, a bidirectional + IRI in a text with left-to-right base directionality (such as used + for English or Cyrillic) that is preceded and followed by whitespace + and strong left-to-right characters does not need an embedding. + Also, a bidirectional relative IRI reference that only contains + strong right-to-left characters and weak characters and that starts + and ends with a strong right-to-left character and appears in a text + with right-to-left base directionality (such as used for Arabic or + Hebrew) and is preceded and followed by whitespace and strong + characters does not need an embedding. + + + + + + +Duerst & Suignard Standards Track [Page 17] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + In some other cases, using U+200E, LEFT-TO-RIGHT MARK (LRM), may be + sufficient to force the correct display behavior. However, the + details of the Unicode Bidirectional algorithm are not always easy to + understand. Implementers are strongly advised to err on the side of + caution and to use embedding in all cases where they are not + completely sure that the display behavior is unaffected without the + embedding. + + The Unicode Bidirectional Algorithm ([UNI9], section 4.3) permits + higher-level protocols to influence bidirectional rendering. Such + changes by higher-level protocols MUST NOT be used if they change the + rendering of IRIs. + + The bidirectional formatting characters that may be used before or + after the IRI to ensure correct display are not themselves part of + the IRI. IRIs MUST NOT contain bidirectional formatting characters + (LRM, RLM, LRE, RLE, LRO, RLO, and PDF). They affect the visual + rendering of the IRI but do not appear themselves. It would + therefore not be possible to input an IRI with such characters + correctly. + +4.2. Bidi IRI Structure + + The Unicode Bidirectional Algorithm is designed mainly for running + text. To make sure that it does not affect the rendering of + bidirectional IRIs too much, some restrictions on bidirectional IRIs + are necessary. These restrictions are given in terms of delimiters + (structural characters, mostly punctuation such as "@", ".", ":", and + "/") and components (usually consisting mostly of letters and + digits). + + The following syntax rules from section 2.2 correspond to components + for the purpose of Bidi behavior: iuserinfo, ireg-name, isegment, + isegment-nz, isegment-nz-nc, ireg-name, iquery, and ifragment. + + Specifications that define the syntax of any of the above components + MAY divide them further and define smaller parts to be components + according to this document. As an example, the restrictions of + [RFC3490] on bidirectional domain names correspond to treating each + label of a domain name as a component for schemes with ireg-name as a + domain name. Even where the components are not defined formally, it + may be helpful to think about some syntax in terms of components and + to apply the relevant restrictions. For example, for the usual + name/value syntax in query parts, it is convenient to treat each name + and each value as a component. As another example, the extensions in + a resource name can be treated as separate components. + + + + + +Duerst & Suignard Standards Track [Page 18] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + For each component, the following restrictions apply: + + 1. A component SHOULD NOT use both right-to-left and left-to-right + characters. + + 2. A component using right-to-left characters SHOULD start and end + with right-to-left characters. + + The above restrictions are given as shoulds, rather than as musts. + For IRIs that are never presented visually, they are not relevant. + However, for IRIs in general, they are very important to ensure + consistent conversion between visual presentation and logical + representation, in both directions. + + Note: In some components, the above restrictions may actually be + strictly enforced. For example, [RFC3490] requires that these + restrictions apply to the labels of a host name for those schemes + where ireg-name is a host name. In some other components (for + example, path components) following these restrictions may not be + too difficult. For other components, such as parts of the query + part, it may be very difficult to enforce the restrictions because + the values of query parameters may be arbitrary character + sequences. + + If the above restrictions cannot be satisfied otherwise, the affected + component can always be mapped to URI notation as described in + section 3.1. Please note that the whole component has to be mapped + (see also Example 9 below). + +4.3. Input of Bidi IRIs + + Bidi input methods MUST generate Bidi IRIs in logical order while + rendering them according to section 4.1. During input, rendering + SHOULD be updated after every new character is input to avoid end- + user confusion. + +4.4. Examples + + This section gives examples of bidirectional IRIs, in Bidi Notation. + It shows legal IRIs with the relationship between logical and visual + representation and explains how certain phenomena in this + relationship may look strange to somebody not familiar with + bidirectional behavior, but familiar to users of Arabic and Hebrew. + It also shows what happens if the restrictions given in section 4.2 + are not followed. The examples below can be seen at [BidiEx], in + Arabic, Hebrew, and Bidi Notation variants. + + + + + +Duerst & Suignard Standards Track [Page 19] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + To read the bidi text in the examples, read the visual representation + from left to right until you encounter a block of rtl text. Read the + rtl block (including slashes and other special characters) from right + to left, then continue at the next unread ltr character. + + Example 1: A single component with rtl characters is inverted: + Logical representation: "http://ab.CDEFGH.ij/kl/mn/op.html" + Visual representation: "http://ab.HGFEDC.ij/kl/mn/op.html" + Components can be read one by one, and each component can be read in + its natural direction. + + Example 2: More than one consecutive component with rtl characters is + inverted as a whole: + Logical representation: "http://ab.CDE.FGH/ij/kl/mn/op.html" + Visual representation: "http://ab.HGF.EDC/ij/kl/mn/op.html" + A sequence of rtl components is read rtl, in the same way as a + sequence of rtl words is read rtl in a bidi text. + + Example 3: All components of an IRI (except for the scheme) are rtl. + All rtl components are inverted overall: + Logical representation: "http://AB.CD.EF/GH/IJ/KL?MN=OP;QR=ST#UV" + Visual representation: "http://VU#TS=RQ;PO=NM?LK/JI/HG/FE.DC.BA" + The whole IRI (except the scheme) is read rtl. Delimiters between + rtl components stay between the respective components; delimiters + between ltr and rtl components don't move. + + Example 4: Each of several sequences of rtl components is inverted on + its own: + Logical representation: "http://AB.CD.ef/gh/IJ/KL.html" + Visual representation: "http://DC.BA.ef/gh/LK/JI.html" + Each sequence of rtl components is read rtl, in the same way as each + sequence of rtl words in an ltr text is read rtl. + + Example 5: Example 2, applied to components of different kinds: + Logical representation: "http://ab.cd.EF/GH/ij/kl.html" + Visual representation: "http://ab.cd.HG/FE/ij/kl.html" + The inversion of the domain name label and the path component may be + unexpected, but it is consistent with other bidi behavior. For + reassurance that the domain component really is "ab.cd.EF", it may be + helpful to read aloud the visual representation following the bidi + algorithm. After "http://ab.cd." one reads the RTL block + "E-F-slash-G-H", which corresponds to the logical representation. + + Example 6: Same as Example 5, with more rtl components: + Logical representation: "http://ab.CD.EF/GH/IJ/kl.html" + Visual representation: "http://ab.JI/HG/FE.DC/kl.html" + The inversion of the domain name labels and the path components may + be easier to identify because the delimiters also move. + + + +Duerst & Suignard Standards Track [Page 20] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Example 7: A single rtl component includes digits: + Logical representation: "http://ab.CDE123FGH.ij/kl/mn/op.html" + Visual representation: "http://ab.HGF123EDC.ij/kl/mn/op.html" + Numbers are written ltr in all cases but are treated as an additional + embedding inside a run of rtl characters. This is completely + consistent with usual bidirectional text. + + Example 8 (not allowed): Numbers are at the start or end of an rtl + component: + Logical representation: "http://ab.cd.ef/GH1/2IJ/KL.html" + Visual representation: "http://ab.cd.ef/LK/JI1/2HG.html" + The sequence "1/2" is interpreted by the bidi algorithm as a + fraction, fragmenting the components and leading to confusion. There + are other characters that are interpreted in a special way close to + numbers; in particular, "+", "-", "#", "$", "%", ",", ".", and ":". + + Example 9 (not allowed): The numbers in the previous example are + percent-encoded: + Logical representation: "http://ab.cd.ef/GH%31/%32IJ/KL.html", + Visual representation (Hebrew): "http://ab.cd.ef/%31HG/LK/JI%32.html" + Visual representation (Arabic): "http://ab.cd.ef/31%HG/%LK/JI32.html" + Depending on whether the uppercase letters represent Arabic or + Hebrew, the visual representation is different. + + Example 10 (allowed but not recommended): + Logical representation: "http://ab.CDEFGH.123/kl/mn/op.html" + Visual representation: "http://ab.123.HGFEDC/kl/mn/op.html" + Components consisting of only numbers are allowed (it would be rather + difficult to prohibit them), but these may interact with adjacent RTL + components in ways that are not easy to predict. + +5. Normalization and Comparison + + Note: The structure and much of the material for this section is + taken from section 6 of [RFC3986]; the differences are due to the + specifics of IRIs. + + One of the most common operations on IRIs is simple comparison: + Determining whether two IRIs are equivalent without using the IRIs or + the mapped URIs to access their respective resource(s). A comparison + is performed whenever a response cache is accessed, a browser checks + its history to color a link, or an XML parser processes tags within a + namespace. Extensive normalization prior to comparison of IRIs may + be used by spiders and indexing engines to prune a search space or + reduce duplication of request actions and response storage. + + + + + + +Duerst & Suignard Standards Track [Page 21] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + IRI comparison is performed for some particular purpose. Protocols + or implementations that compare IRIs for different purposes will + often be subject to differing design trade-offs in regards to how + much effort should be spent in reducing aliased identifiers. This + section describes various methods that may be used to compare IRIs, + the trade-offs between them, and the types of applications that might + use them. + +5.1. Equivalence + + Because IRIs exist to identify resources, presumably they should be + considered equivalent when they identify the same resource. However, + this definition of equivalence is not of much practical use, as there + is no way for an implementation to compare two resources unless it + has full knowledge or control of them. For this reason, determination + of equivalence or difference of IRIs is based on string comparison, + perhaps augmented by reference to additional rules provided by URI + scheme definitions. We use the terms "different" and "equivalent" to + describe the possible outcomes of such comparisons, but there are + many application-dependent versions of equivalence. + + Even though it is possible to determine that two IRIs are equivalent, + IRI comparison is not sufficient to determine whether two IRIs + identify different resources. For example, an owner of two different + domain names could decide to serve the same resource from both, + resulting in two different IRIs. Therefore, comparison methods are + designed to minimize false negatives while strictly avoiding false + positives. + + In testing for equivalence, applications should not directly compare + relative references; the references should be converted to their + respective target IRIs before comparison. When IRIs are compared to + select (or avoid) a network action, such as retrieval of a + representation, fragment components (if any) should be excluded from + the comparison. + + Applications using IRIs as identity tokens with no relationship to a + protocol MUST use the Simple String Comparison (see section 5.3.1). + All other applications MUST select one of the comparison practices + from the Comparison Ladder (see section 5.3 or, after IRI-to-URI + conversion, select one of the comparison practices from the URI + comparison ladder in [RFC3986], section 6.2) + +5.2. Preparation for Comparison + + Any kind of IRI comparison REQUIRES that all escapings or encodings + in the protocol or format that carries an IRI are resolved. This is + usually done when the protocol or format is parsed. Examples of such + + + +Duerst & Suignard Standards Track [Page 22] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + escapings or encodings are entities and numeric character references + in [HTML4] and [XML1]. As an example, + "http://example.org/rosé" (in HTML), + "http://example.org/rosé"; (in HTML or XML), and + "http://example.org/rosé"; (in HTML or XML) are all resolved into + what is denoted in this document (see section 1.4) as + "http://example.org/rosé"; (the "é" here standing for the + actual e-acute character, to compensate for the fact that this + document cannot contain non-ASCII characters). + + Similar considerations apply to encodings such as Transfer Codings in + HTTP (see [RFC2616]) and Content Transfer Encodings in MIME + ([RFC2045]), although in these cases, the encoding is based not on + characters but on octets, and additional care is required to make + sure that characters, and not just arbitrary octets, are compared + (see section 5.3.1). + +5.3. Comparison Ladder + + In practice, a variety of methods are used, to test IRI equivalence. + These methods fall into a range distinguished by the amount of + processing required and the degree to which the probability of false + negatives is reduced. As noted above, false negatives cannot be + eliminated. In practice, their probability can be reduced, but this + reduction requires more processing and is not cost-effective for all + applications. + + If this range of comparison practices is considered as a ladder, the + following discussion will climb the ladder, starting with practices + that are cheap but have a relatively higher chance of producing false + negatives, and proceeding to those that have higher computational + cost and lower risk of false negatives. + +5.3.1. Simple String Comparison + + If two IRIs, when considered as character strings, are identical, + then it is safe to conclude that they are equivalent. This type of + equivalence test has very low computational cost and is in wide use + in a variety of applications, particularly in the domain of parsing. + It is also used when a definitive answer to the question of IRI + equivalence is needed that is independent of the scheme used and that + can be calculated quickly and without accessing a network. An + example of such a case is XML Namespaces ([XMLNamespace]). + + Testing strings for equivalence requires some basic precautions. This + procedure is often referred to as "bit-for-bit" or "byte-for-byte" + comparison, which is potentially misleading. Testing strings for + equality is normally based on pair comparison of the characters that + + + +Duerst & Suignard Standards Track [Page 23] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + make up the strings, starting from the first and proceeding until + both strings are exhausted and all characters are found to be equal, + until a pair of characters compares unequal, or until one of the + strings is exhausted before the other. + + This character comparison requires that each pair of characters be + put in comparable encoding form. For example, should one IRI be + stored in a byte array in UTF-8 encoding form and the second in a + UTF-16 encoding form, bit-for-bit comparisons applied naively will + produce errors. It is better to speak of equality on a + character-for-character rather than on a byte-for-byte or bit-for-bit + basis. In practical terms, character-by-character comparisons should + be done codepoint by codepoint after conversion to a common character + encoding form. When comparing character by character, the comparison + function MUST NOT map IRIs to URIs, because such a mapping would + create additional spurious equivalences. It follows that an IRI + SHOULD NOT be modified when being transported if there is any chance + that this IRI might be used as an identifier. + + False negatives are caused by the production and use of IRI aliases. + Unnecessary aliases can be reduced, regardless of the comparison + method, by consistently providing IRI references in an already + normalized form (i.e., a form identical to what would be produced + after normalization is applied, as described below). Protocols and + data formats often limit some IRI comparisons to simple string + comparison, based on the theory that people and implementations will, + in their own best interest, be consistent in providing IRI + references, or at least be consistent enough to negate any efficiency + that might be obtained from further normalization. + +5.3.2. Syntax-Based Normalization + + Implementations may use logic based on the definitions provided by + this specification to reduce the probability of false negatives. This + processing is moderately higher in cost than character-for-character + string comparison. For example, an application using this approach + could reasonably consider the following two IRIs equivalent: + + example://a/b/c/%7Bfoo%7D/rosé + eXAMPLE://a/./b/../b/%63/%7bfoo%7d/ros%C3%A9 + + Web user agents, such as browsers, typically apply this type of IRI + normalization when determining whether a cached response is + available. Syntax-based normalization includes such techniques as + case normalization, character normalization, percent-encoding + normalization, and removal of dot-segments. + + + + + +Duerst & Suignard Standards Track [Page 24] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +5.3.2.1. Case Normalization + + For all IRIs, the hexadecimal digits within a percent-encoding + triplet (e.g., "%3a" versus "%3A") are case-insensitive and therefore + should be normalized to use uppercase letters for the digits A - F. + + When an IRI uses components of the generic syntax, the component + syntax equivalence rules always apply; namely, that the scheme and + US-ASCII only host are case insensitive and therefore should be + normalized to lowercase. For example, the URI + "HTTP://www.EXAMPLE.com/" is equivalent to "http://www.example.com/". + Case equivalence for non-ASCII characters in IRI components that are + IDNs are discussed in section 5.3.3. The other generic syntax + components are assumed to be case sensitive unless specifically + defined otherwise by the scheme. + + Creating schemes that allow case-insensitive syntax components + containing non-ASCII characters should be avoided. Case normalization + of non-ASCII characters can be culturally dependent and is always a + complex operation. The only exception concerns non-ASCII host names + for which the character normalization includes a mapping step derived + from case folding. + +5.3.2.2. Character Normalization + + The Unicode Standard [UNIV4] defines various equivalences between + sequences of characters for various purposes. Unicode Standard Annex + #15 [UTR15] defines various Normalization Forms for these + equivalences, in particular Normalization Form C (NFC, Canonical + Decomposition, followed by Canonical Composition) and Normalization + Form KC (NFKC, Compatibility Decomposition, followed by Canonical + Composition). + + Equivalence of IRIs MUST rely on the assumption that IRIs are + appropriately pre-character-normalized rather than apply character + normalization when comparing two IRIs. The exceptions are conversion + from a non-digital form, and conversion from a non-UCS-based + character encoding to a UCS-based character encoding. In these cases, + NFC or a normalizing transcoder using NFC MUST be used for + interoperability. To avoid false negatives and problems with + transcoding, IRIs SHOULD be created by using NFC. Using NFKC may + avoid even more problems; for example, by choosing half-width Latin + letters instead of full-width ones, and full-width instead of + half-width Katakana. + + As an example, "http://www.example.org/résumé.html" (in XML + Notation) is in NFC. On the other hand, + "http://www.example.org/résumé.html" is not in NFC. + + + +Duerst & Suignard Standards Track [Page 25] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + The former uses precombined e-acute characters, and the latter uses + "e" characters followed by combining acute accents. Both usages are + defined as canonically equivalent in [UNIV4]. + + Note: Because it is unknown how a particular sequence of characters + is being treated with respect to character normalization, it would + be inappropriate to allow third parties to normalize an IRI + arbitrarily. This does not contradict the recommendation that + when a resource is created, its IRI should be as character + normalized as possible (i.e., NFC or even NFKC). This is similar + to the uppercase/lowercase problems. Some parts of a URI are case + insensitive (domain name). For others, it is unclear whether they + are case sensitive, case insensitive, or something in between + (e.g., case sensitive, but with a multiple choice selection if the + wrong case is used, instead of a direct negative result). The + best recipe is that the creator use a reasonable capitalization + and, when transferring the URI, capitalization never be changed. + + Various IRI schemes may allow the usage of Internationalized Domain + Names (IDN) [RFC3490] either in the ireg-name part or elsewhere. + Character Normalization also applies to IDNs, as discussed in section + 5.3.3. + +5.3.2.3. Percent-Encoding Normalization + + The percent-encoding mechanism (section 2.1 of [RFC3986]) is a + frequent source of variance among otherwise identical IRIs. In + addition to the case normalization issue noted above, some IRI + producers percent-encode octets that do not require percent-encoding, + resulting in IRIs that are equivalent to their non encoded + counterparts. These IRIs should be normalized by decoding any + percent-encoded octet sequence that corresponds to an unreserved + character, as described in section 2.3 of [RFC3986]. + + For actual resolution, differences in percent-encoding (except for + the percent-encoding of reserved characters) MUST always result in + the same resource. For example, "http://example.org/~user", + "http://example.org/%7euser", and "http://example.org/%7Euser", must + resolve to the same resource. + + If this kind of equivalence is to be tested, the percent-encoding of + both IRIs to be compared has to be aligned; for example, by + converting both IRIs to URIs (see section 3.1), eliminating escape + differences in the resulting URIs, and making sure that the case of + the hexadecimal characters in the percent-encoding is always the same + (preferably uppercase). If the IRI is to be passed to another + + + + + +Duerst & Suignard Standards Track [Page 26] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + application or used further in some other way, its original form MUST + be preserved. The conversion described here should be performed only + for local comparison. + +5.3.2.4. Path Segment Normalization + + The complete path segments "." and ".." are intended only for use + within relative references (section 4.1 of [RFC3986]) and are removed + as part of the reference resolution process (section 5.2 of + [RFC3986]). However, some implementations may incorrectly assume + that reference resolution is not necessary when the reference is + already an IRI, and thus fail to remove dot-segments when they occur + in non-relative paths. IRI normalizers should remove dot-segments by + applying the remove_dot_segments algorithm to the path, as described + in section 5.2.4 of [RFC3986]. + +5.3.3. Scheme-Based Normalization + + The syntax and semantics of IRIs vary from scheme to scheme, as + described by the defining specification for each scheme. + Implementations may use scheme-specific rules, at further processing + cost, to reduce the probability of false negatives. For example, + because the "http" scheme makes use of an authority component, has a + default port of "80", and defines an empty path to be equivalent to + "/", the following four IRIs are equivalent: + + http://example.com + http://example.com/ + http://example.com:/ + http://example.com:80/ + + In general, an IRI that uses the generic syntax for authority with an + empty path should be normalized to a path of "/". Likewise, an + explicit ":port", for which the port is empty or the default for the + scheme, is equivalent to one where the port and its ":" delimiter are + elided and thus should be removed by scheme-based normalization. For + example, the second IRI above is the normal form for the "http" + scheme. + + Another case where normalization varies by scheme is in the handling + of an empty authority component or empty host subcomponent. For many + scheme specifications, an empty authority or host is considered an + error; for others, it is considered equivalent to "localhost" or the + end-user's host. When a scheme defines a default for authority and + an IRI reference to that default is desired, the reference should be + normalized to an empty authority for the sake of uniformity, brevity, + + + + + +Duerst & Suignard Standards Track [Page 27] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + and internationalization. If, however, either the userinfo or port + subcomponents are non-empty, then the host should be given explicitly + even if it matches the default. + + Normalization should not remove delimiters when their associated + component is empty unless it is licensed to do so by the scheme + specification. For example, the IRI "http://example.com/?" cannot be + assumed to be equivalent to any of the examples above. Likewise, the + presence or absence of delimiters within a userinfo subcomponent is + usually significant to its interpretation. The fragment component is + not subject to any scheme-based normalization; thus, two IRIs that + differ only by the suffix "#" are considered different regardless of + the scheme. + + Some IRI schemes may allow the usage of Internationalized Domain + Names (IDN) [RFC3490] either in their ireg-name part or elsewhere. + When in use in IRIs, those names SHOULD be validated by using the + ToASCII operation defined in [RFC3490], with the flags + "UseSTD3ASCIIRules" and "AllowUnassigned". An IRI containing an + invalid IDN cannot successfully be resolved. Validated IDN + components of IRIs SHOULD be character normalized by using the + Nameprep process [RFC3491]; however, for legibility purposes, they + SHOULD NOT be converted into ASCII Compatible Encoding (ACE). + + Scheme-based normalization may also consider IDN components and their + conversions to punycode as equivalent. As an example, + "http://résumé.example.org" may be considered equivalent to + "http://xn--rsum-bpad.example.org". + + Other scheme-specific normalizations are possible. + +5.3.4. Protocol-Based Normalization + + Substantial effort to reduce the incidence of false negatives is + often cost-effective for web spiders. Consequently, they implement + even more aggressive techniques in IRI comparison. For example, if + they observe that an IRI such as + + http://example.com/data + + redirects to an IRI differing only in the trailing slash + + http://example.com/data/ + + they will likely regard the two as equivalent in the future. This + kind of technique is only appropriate when equivalence is clearly + indicated by both the result of accessing the resources and the + + + + +Duerst & Suignard Standards Track [Page 28] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + common conventions of their scheme's dereference algorithm (in this + case, use of redirection by HTTP origin servers to avoid problems + with relative references). + +6. Use of IRIs + +6.1. Limitations on UCS Characters Allowed in IRIs + + This section discusses limitations on characters and character + sequences usable for IRIs beyond those given in section 2.2 and + section 4.1. The considerations in this section are relevant when + IRIs are created and when URIs are converted to IRIs. + + a. The repertoire of characters allowed in each IRI component is + limited by the definition of that component. For example, the + definition of the scheme component does not allow characters + beyond US-ASCII. + + (Note: In accordance with URI practice, generic IRI software + cannot and should not check for such limitations.) + + b. The UCS contains many areas of characters for which there are + strong visual look-alikes. Because of the likelihood of + transcription errors, these also should be avoided. This + includes the full-width equivalents of Latin characters, + half-width Katakana characters for Japanese, and many others. It + also includes many look-alikes of "space", "delims", and + "unwise", characters excluded in [RFC3491]. + + Additional information is available from [UNIXML]. [UNIXML] is + written in the context of running text rather than in that of + identifiers. Nevertheless, it discusses many of the categories of + characters not appropriate for IRIs. + +6.2. Software Interfaces and Protocols + + Although an IRI is defined as a sequence of characters, software + interfaces for URIs typically function on sequences of octets or + other kinds of code units. Thus, software interfaces and protocols + MUST define which character encoding is used. + + Intermediate software interfaces between IRI-capable components and + URI-only components MUST map the IRIs per section 3.1, when + transferring from IRI-capable to URI-only components. This mapping + SHOULD be applied as late as possible. It SHOULD NOT be applied + between components that are known to be able to handle IRIs. + + + + + +Duerst & Suignard Standards Track [Page 29] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +6.3. Format of URIs and IRIs in Documents and Protocols + + Document formats that transport URIs may have to be upgraded to allow + the transport of IRIs. In cases where the document as a whole has a + native character encoding, IRIs MUST also be encoded in this + character encoding and converted accordingly by a parser or + interpreter. IRI characters not expressible in the native character + encoding SHOULD be escaped by using the escaping conventions of the + document format if such conventions are available. Alternatively, + they MAY be percent-encoded according to section 3.1. For example, in + HTML or XML, numeric character references SHOULD be used. If a + document as a whole has a native character encoding and that + character encoding is not UTF-8, then IRIs MUST NOT be placed into + the document in the UTF-8 character encoding. + + Note: Some formats already accommodate IRIs, although they use + different terminology. HTML 4.0 [HTML4] defines the conversion from + IRIs to URIs as error-avoiding behavior. XML 1.0 [XML1], XLink + [XLink], XML Schema [XMLSchema], and specifications based upon them + allow IRIs. Also, it is expected that all relevant new W3C formats + and protocols will be required to handle IRIs [CharMod]. + +6.4. Use of UTF-8 for Encoding Original Characters + + This section discusses details and gives examples for point c) in + section 1.2. To be able to use IRIs, the URI corresponding to the + IRI in question has to encode original characters into octets by + using UTF-8. This can be specified for all URIs of a URI scheme or + can apply to individual URIs for schemes that do not specify how to + encode original characters. It can apply to the whole URI, or only + to some part. For background information on encoding characters into + URIs, see also section 2.5 of [RFC3986]. + + For new URI schemes, using UTF-8 is recommended in [RFC2718]. + Examples where UTF-8 is already used are the URN syntax [RFC2141], + IMAP URLs [RFC2192], and POP URLs [RFC2384]. On the other hand, + because the HTTP URL scheme does not specify how to encode original + characters, only some HTTP URLs can have corresponding but different + IRIs. + + For example, for a document with a URI of + "http://www.example.org/r%C3%A9sum%C3%A9.html", it is possible to + construct a corresponding IRI (in XML notation, see, section 1.4): + "http://www.example.org/résumé.html" ("é"; stands for + the e-acute character, and "%C3%A9" is the UTF-8 encoded and + percent-encoded representation of that character). On the other + hand, for a document with a URI of + + + + +Duerst & Suignard Standards Track [Page 30] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + "http://www.example.org/r%E9sum%E9.html", the percent-encoding octets + cannot be converted to actual characters in an IRI, as the + percent-encoding is not based on UTF-8. + + This means that for most URI schemes, there is no need to upgrade + their scheme definition in order for them to work with IRIs. The + main case where upgrading makes sense is when a scheme definition, or + a particular component of a scheme, is strictly limited to the use of + US-ASCII characters with no provision to include non-ASCII + characters/octets via percent-encoding, or if a scheme definition + currently uses highly scheme-specific provisions for the encoding of + non-ASCII characters. An example of this is the mailto: scheme + [RFC2368]. + + This specification does not upgrade any scheme specifications in any + way; this has to be done separately. Also, note that there is no + such thing as an "IRI scheme"; all IRIs use URI schemes, and all URI + schemes can be used with IRIs, even though in some cases only by + using URIs directly as IRIs, without any conversion. + + URI schemes can impose restrictions on the syntax of scheme-specific + URIs; i.e., URIs that are admissible under the generic URI syntax + [RFC3986] may not be admissible due to narrower syntactic constraints + imposed by a URI scheme specification. URI scheme definitions cannot + broaden the syntactic restrictions of the generic URI syntax; + otherwise, it would be possible to generate URIs that satisfied the + scheme-specific syntactic constraints without satisfying the + syntactic constraints of the generic URI syntax. However, additional + syntactic constraints imposed by URI scheme specifications are + applicable to IRI, as the corresponding URI resulting from the + mapping defined in section 3.1 MUST be a valid URI under the + syntactic restrictions of generic URI syntax and any narrower + restrictions imposed by the corresponding URI scheme specification. + + The requirement for the use of UTF-8 applies to all parts of a URI + (with the potential exception of the ireg-name part; see section + 3.1). However, it is possible that the capability of IRIs to + represent a wide range of characters directly is used just in some + parts of the IRI (or IRI reference). The other parts of the IRI may + only contain US-ASCII characters, or they may not be based on UTF-8. + They may be based on another character encoding, or they may directly + encode raw binary data (see also [RFC2397]). + + For example, it is possible to have a URI reference of + "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the + document name is encoded in iso-8859-1 based on server settings, but + where the fragment identifier is encoded in UTF-8 according to + + + + +Duerst & Suignard Standards Track [Page 31] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [XPointer]. The IRI corresponding to the above URI would be (in XML + notation) + "http://www.example.org/r%E9sum%E9.xml#résumé";. + + Similar considerations apply to query parts. The functionality of + IRIs (namely, to be able to include non-ASCII characters) can only be + used if the query part is encoded in UTF-8. + +6.5. Relative IRI References + + Processing of relative IRI references against a base is handled + straightforwardly; the algorithms of [RFC3986] can be applied + directly, treating the characters additionally allowed in IRI + references in the same way that unreserved characters are in URI + references. + +7. URI/IRI Processing Guidelines (Informative) + + This informative section provides guidelines for supporting IRIs in + the same software components and operations that currently process + URIs: Software interfaces that handle URIs, software that allows + users to enter URIs, software that creates or generates URIs, + software that displays URIs, formats and protocols that transport + URIs, and software that interprets URIs. These may all require + modification before functioning properly with IRIs. The + considerations in this section also apply to URI references and IRI + references. + +7.1. URI/IRI Software Interfaces + + Software interfaces that handle URIs, such as URI-handling APIs and + protocols transferring URIs, need interfaces and protocol elements + that are designed to carry IRIs. + + In case the current handling in an API or protocol is based on + US-ASCII, UTF-8 is recommended as the character encoding for IRIs, as + it is compatible with US-ASCII, is in accordance with the + recommendations of [RFC2277], and makes converting to URIs easy. In + any case, the API or protocol definition must clearly define the + character encoding to be used. + + The transfer from URI-only to IRI-capable components requires no + mapping, although the conversion described in section 3.2 above may + be performed. It is preferable not to perform this inverse + conversion when there is a chance that this cannot be done correctly. + + + + + + +Duerst & Suignard Standards Track [Page 32] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +7.2. URI/IRI Entry + + Some components allow users to enter URIs into the system by typing + or dictation, for example. This software must be updated to allow + for IRI entry. + + A person viewing a visual representation of an IRI (as a sequence of + glyphs, in some order, in some visual display) or hearing an IRI will + use an entry method for characters in the user's language to input + the IRI. Depending on the script and the input method used, this may + be a more or less complicated process. + + The process of IRI entry must ensure, as much as possible, that the + restrictions defined in section 2.2 are met. This may be done by + choosing appropriate input methods or variants/settings thereof, by + appropriately converting the characters being input, by eliminating + characters that cannot be converted, and/or by issuing a warning or + error message to the user. + + As an example of variant settings, input method editors for East + Asian Languages usually allow the input of Latin letters and related + characters in full-width or half-width versions. For IRI input, the + input method editor should be set so that it produces half-width + Latin letters and punctuation and full-width Katakana. + + An input field primarily or solely used for the input of URIs/IRIs + may allow the user to view an IRI as it is mapped to a URI. Places + where the input of IRIs is frequent may provide the possibility for + viewing an IRI as mapped to a URI. This will help users when some of + the software they use does not yet accept IRIs. + + An IRI input component interfacing to components that handle URIs, + but not IRIs, must map the IRI to a URI before passing it to these + components. + + For the input of IRIs with right-to-left characters, please see + section 4.3. + +7.3. URI/IRI Transfer between Applications + + Many applications, particularly mail user agents, try to detect URIs + appearing in plain text. For this, they use some heuristics based on + URI syntax. They then allow the user to click on such URIs and + retrieve the corresponding resource in an appropriate (usually + scheme-dependent) application. + + + + + + +Duerst & Suignard Standards Track [Page 33] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Such applications have to be upgraded to use the IRI syntax as a base + for heuristics. In particular, a non-ASCII character should not be + taken as the indication of the end of an IRI. Such applications also + have to make sure that they correctly convert the detected IRI from + the character encoding of the document or application where the IRI + appears to the character encoding used by the system-wide IRI + invocation mechanism, or to a URI (according to section 3.1) if the + system-wide invocation mechanism only accepts URIs. + + The clipboard is another frequently used way to transfer URIs and + IRIs from one application to another. On most platforms, the + clipboard is able to store and transfer text in many languages and + scripts. Correctly used, the clipboard transfers characters, not + bytes, which will do the right thing with IRIs. + +7.4. URI/IRI Generation + + Systems that offer resources through the Internet, where those + resources have logical names, sometimes automatically generate URIs + for the resources they offer. For example, some HTTP servers can + generate a directory listing for a file directory and then respond to + the generated URIs with the files. + + Many legacy character encodings are in use in various file systems. + Many currently deployed systems do not transform the local character + representation of the underlying system before generating URIs. + + For maximum interoperability, systems that generate resource + identifiers should make the appropriate transformations. For + example, if a file system contains a file named + "résumé.html", a server should expose this as + "r%C3%A9sum%C3%A9.html" in a URI, which allows use of + "résumé.html" in an IRI, even if locally the file name is + kept in a character encoding other than UTF-8. + + This recommendation particularly applies to HTTP servers. For FTP + servers, similar considerations apply; see [RFC2640]. + +7.5. URI/IRI Selection + + In some cases, resource owners and publishers have control over the + IRIs used to identify their resources. This control is mostly + executed by controlling the resource names, such as file names, + directly. + + + + + + + +Duerst & Suignard Standards Track [Page 34] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + In these cases, it is recommended to avoid choosing IRIs that are + easily confused. For example, for US-ASCII, the lower-case ell ("l") + is easily confused with the digit one ("1"), and the upper-case oh + ("O") is easily confused with the digit zero ("0"). Publishers + should avoid confusing users with "br0ken" or "1ame" identifiers. + + Outside the US-ASCII repertoire, there are many more opportunities + for confusion; a complete set of guidelines is too lengthy to include + here. As long as names are limited to characters from a single + script, native writers of a given script or language will know best + when ambiguities can appear, and how they can be avoided. What may + look ambiguous to a stranger may be completely obvious to the average + native user. On the other hand, in some cases, the UCS contains + variants for compatibility reasons; for example, for typographic + purposes. These should be avoided wherever possible. Although there + may be exceptions, newly created resource names should generally be + in NFKC [UTR15] (which means that they are also in NFC). + + As an example, the UCS contains the "fi" ligature at U+FB01 for + compatibility reasons. Wherever possible, IRIs should use the two + letters "f" and "i" rather than the "fi" ligature. An example where + the latter may be used is in the query part of an IRI for an explicit + search for a word written containing the "fi" ligature. + + In certain cases, there is a chance that characters from different + scripts look the same. The best known example is the similarity of + the Latin "A", the Greek "Alpha", and the Cyrillic "A". To avoid + such cases, only IRIs should be created where all the characters in a + single component are used together in a given language. This usually + means that all of these characters will be from the same script, but + there are languages that mix characters from different scripts (such + as Japanese). This is similar to the heuristics used to distinguish + between letters and numbers in the examples above. Also, for Latin, + Greek, and Cyrillic, using lowercase letters results in fewer + ambiguities than using uppercase letters would. + +7.6. Display of URIs/IRIs + + In situations where the rendering software is not expected to display + non-ASCII parts of the IRI correctly using the available layout and + font resources, these parts should be percent-encoded before being + displayed. + + For display of Bidi IRIs, please see section 4.1. + + + + + + + +Duerst & Suignard Standards Track [Page 35] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +7.7. Interpretation of URIs and IRIs + + Software that interprets IRIs as the names of local resources should + accept IRIs in multiple forms and convert and match them with the + appropriate local resource names. + + First, multiple representations include both IRIs in the native + character encoding of the protocol and also their URI counterparts. + + Second, it may include URIs constructed based on character encodings + other than UTF-8. These URIs may be produced by user agents that do + not conform to this specification and that use legacy character + encodings to convert non-ASCII characters to URIs. Whether this is + necessary, and what character encodings to cover, depends on a number + of factors, such as the legacy character encodings used locally and + the distribution of various versions of user agents. For example, + software for Japanese may accept URIs in Shift_JIS and/or EUC-JP in + addition to UTF-8. + + Third, it may include additional mappings to be more user-friendly + and robust against transmission errors. These would be similar to + how some servers currently treat URIs as case insensitive or perform + additional matching to account for spelling errors. For characters + beyond the US-ASCII repertoire, this may, for example, include + ignoring the accents on received IRIs or resource names. Please note + that such mappings, including case mappings, are language dependent. + + It can be difficult to identify a resource unambiguously if too many + mappings are taken into consideration. However, percent-encoded and + not percent-encoded parts of IRIs can always be clearly + distinguished. Also, the regularity of UTF-8 (see [Duerst97]) makes + the potential for collisions lower than it may seem at first. + +7.8. Upgrading Strategy + + Where this recommendation places further constraints on software for + which many instances are already deployed, it is important to + introduce upgrades carefully and to be aware of the various + interdependencies. + + If IRIs cannot be interpreted correctly, they should not be created, + generated, or transported. This suggests that upgrading URI + interpreting software to accept IRIs should have highest priority. + + On the other hand, a single IRI is interpreted only by a single or + very few interpreters that are known in advance, although it may be + entered and transported very widely. + + + + +Duerst & Suignard Standards Track [Page 36] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Therefore, IRIs benefit most from a broad upgrade of software to be + able to enter and transport IRIs. However, before an individual IRI + is published, care should be taken to upgrade the corresponding + interpreting software in order to cover the forms expected to be + received by various versions of entry and transport software. + + The upgrade of generating software to generate IRIs instead of using + a local character encoding should happen only after the service is + upgraded to accept IRIs. Similarly, IRIs should only be generated + when the service accepts IRIs and the intervening infrastructure and + protocol is known to transport them safely. + + Software converting from URIs to IRIs for display should be upgraded + only after upgraded entry software has been widely deployed to the + population that will see the displayed result. + + Where there is a free choice of character encodings, it is often + possible to reduce the effort and dependencies for upgrading to IRIs + by using UTF-8 rather than another encoding. For example, when a new + file-based Web server is set up, using UTF-8 as the character + encoding for file names will make the transition to IRIs easier. + Likewise, when a new Web form is set up using UTF-8 as the character + encoding of the form page, the returned query URIs will use UTF-8 as + the character encoding (unless the user, for whatever reason, changes + the character encoding) and will therefore be compatible with IRIs. + + These recommendations, when taken together, will allow for the + extension from URIs to IRIs in order to handle characters other than + US-ASCII while minimizing interoperability problems. For + considerations regarding the upgrade of URI scheme definitions, see + section 6.4. + +8. Security Considerations + + The security considerations discussed in [RFC3986] also apply to + IRIs. In addition, the following issues require particular care for + IRIs. + + Incorrect encoding or decoding can lead to security problems. In + particular, some UTF-8 decoders do not check against overlong byte + sequences. As an example, a "/" is encoded with the byte 0x2F both + in UTF-8 and in US-ASCII, but some UTF-8 decoders also wrongly + interpret the sequence 0xC0 0xAF as a "/". A sequence such as + + + + + + + + +Duerst & Suignard Standards Track [Page 37] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + "%C0%AF.." may pass some security tests and then be interpreted as + "/.." in a path if UTF-8 decoders are fault-tolerant, if conversion + and checking are not done in the right order, and/or if reserved + characters and unreserved characters are not clearly distinguished. + + There are various ways in which "spoofing" can occur with IRIs. + "Spoofing" means that somebody may add a resource name that looks the + same or similar to the user, but that points to a different resource. + The added resource may pretend to be the real resource by looking + very similar but may contain all kinds of changes that may be + difficult to spot and that can cause all kinds of problems. Most + spoofing possibilities for IRIs are extensions of those for URIs. + + Spoofing can occur for various reasons. First, a user's + normalization expectations or actual normalization when entering an + IRI or transcoding an IRI from a legacy character encoding do not + match the normalization used on the server side. Conceptually, this + is no different from the problems surrounding the use of + case-insensitive web servers. For example, a popular web page with a + mixed-case name ("http://big.example.com/PopularPage.html") might be + "spoofed" by someone who is able to create + "http://big.example.com/popularpage.html". However, the use of + unnormalized character sequences, and of additional mappings for user + convenience, may increase the chance for spoofing. Protocols and + servers that allow the creation of resources with names that are not + normalized are particularly vulnerable to such attacks. This is an + inherent security problem of the relevant protocol, server, or + resource and is not specific to IRIs, but it is mentioned here for + completeness. + + Spoofing can occur in various IRI components, such as the domain name + part or a path part. For considerations specific to the domain name + part, see [RFC3491]. For the path part, administrators of sites that + allow independent users to create resources in the same sub area may + have to be careful to check for spoofing. + + Spoofing can occur because in the UCS many characters look very + similar. Details are discussed in Section 7.5. Again, this is very + similar to spoofing possibilities on US-ASCII, e.g., using "br0ken" + or "1ame" URIs. + + Spoofing can occur when URIs with percent-encodings based on various + character encodings are accepted to deal with older user agents. In + some cases, particularly for Latin-based resource names, this is + usually easy to detect because UTF-8-encoded names, when interpreted + and viewed as legacy character encodings, produce mostly garbage. + + + + + +Duerst & Suignard Standards Track [Page 38] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + When concurrently used character encodings have a similar structure + but there are no characters that have exactly the same encoding, + detection is more difficult. + + Spoofing can occur with bidirectional IRIs, if the restrictions in + section 4.2 are not followed. The same visual representation may be + interpreted as different logical representations, and vice versa. It + is also very important that a correct Unicode bidirectional + implementation be used. + +9. Acknowledgements + + We would like to thank Larry Masinter for his work as coauthor of + many earlier versions of this document (draft-masinter-url-i18n-xx). + + The discussion on the issue addressed here started a long time ago. + There was a thread in the HTML working group in August 1995 (under + the topic of "Globalizing URIs") and in the www-international mailing + list in July 1996 (under the topic of "Internationalization and + URLs"), and there were ad-hoc meetings at the Unicode conferences in + September 1995 and September 1997. + + Many thanks go to Francois Yergeau, Matitiahu Allouche, Roy Fielding, + Tim Berners-Lee, Mark Davis, M.T. Carrasco Benitez, James Clark, Tim + Bray, Chris Wendt, Yaron Goland, Andrea Vine, Misha Wolf, Leslie + Daigle, Ted Hardie, Bill Fenner, Margaret Wasserman, Russ Housley, + Makoto MURATA, Steven Atkin, Ryan Stansifer, Tex Texin, Graham Klyne, + Bjoern Hoehrmann, Chris Lilley, Ian Jacobs, Adam Costello, Dan + Oscarson, Elliotte Rusty Harold, Mike J. Brown, Roy Badami, Jonathan + Rosenne, Asmus Freytag, Simon Josefsson, Carlos Viegas Damasio, Chris + Haynes, Walter Underwood, and many others for help with understanding + the issues and possible solutions, and with getting the details + right. + + This document is a product of the Internationalization Working Group + (I18N WG) of the World Wide Web Consortium (W3C). Thanks to the + members of the W3C I18N Working Group and Interest Group for their + contributions and their work on [CharMod]. Thanks also go to the + members of many other W3C Working Groups for adopting IRIs, and to + the members of the Montreal IAB Workshop on Internationalization and + Localization for their review. + + + + + + + + + + +Duerst & Suignard Standards Track [Page 39] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +10. References + +10.1. Normative References + + [ASCII] American National Standards Institute, "Coded + Character Set -- 7-bit American Standard Code for + Information Interchange", ANSI X3.4, 1986. + + [ISO10646] International Organization for Standardization, + "ISO/IEC 10646:2003: Information Technology - + Universal Multiple-Octet Coded Character Set (UCS)", + ISO Standard 10646, December 2003. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, November 1997. + + [RFC3490] Faltstrom, P., Hoffman, P., and A. Costello, + "Internationalizing Domain Names in Applications + (IDNA)", RFC 3490, March 2003. + + [RFC3491] Hoffman, P. and M. Blanchet, "Nameprep: A Stringprep + Profile for Internationalized Domain Names (IDN)", RFC + 3491, March 2003. + + [RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO + 10646", STD 63, RFC 3629, November 2003. + + [RFC3986] Berners-Lee, T., Fielding, R., and L. Masinter, + "Uniform Resource Identifier (URI): Generic Syntax", + STD 66, RFC 3986, January 2005. + + [UNI9] Davis, M., "The Bidirectional Algorithm", Unicode + Standard Annex #9, March 2004, + . + + [UNIV4] The Unicode Consortium, "The Unicode Standard, Version + 4.0.1, defined by: The Unicode Standard, Version 4.0 + (Reading, MA, Addison-Wesley, 2003. ISBN + 0-321-18578-1), as amended by Unicode 4.0.1 + (http://www.unicode.org/versions/Unicode4.0.1/)", + March 2004. + + + + + + + +Duerst & Suignard Standards Track [Page 40] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [UTR15] Davis, M. and M. Duerst, "Unicode Normalization + Forms", Unicode Standard Annex #15, April 2003, + . + +10.2. Informative References + + [BidiEx] "Examples of bidirectional IRIs", + . + + [CharMod] Duerst, M., Yergeau, F., Ishida, R., Wolf, M., and T. + Texin, "Character Model for the World Wide Web: + Resource Identifiers", World Wide Web Consortium + Candidate Recommendation, November 2004, + . + + [Duerst97] Duerst, M., "The Properties and Promises of UTF-8", + Proc. 11th International Unicode Conference, San Jose + , September 1997, + . + + [Gettys] Gettys, J., "URI Model Consequences", + . + + [HTML4] Raggett, D., Le Hors, A., and I. Jacobs, "HTML 4.01 + Specification", World Wide Web Consortium + Recommendation, December 1999, + . + + [RFC2045] Freed, N. and N. Borenstein, "Multipurpose Internet + Mail Extensions (MIME) Part One: Format of Internet + Message Bodies", RFC 2045, November 1996. + + [RFC2130] Weider, C., Preston, C., Simonsen, K., Alvestrand, H., + Atkinson, R., Crispin, M., and P. Svanberg, "The + Report of the IAB Character Set Workshop held 29 + February - 1 March, 1996", RFC 2130, April 1997. + + [RFC2141] Moats, R., "URN Syntax", RFC 2141, May 1997. + + [RFC2192] Newman, C., "IMAP URL Scheme", RFC 2192, September + 1997. + + [RFC2277] Alvestrand, H., "IETF Policy on Character Sets and + Languages", BCP 18, RFC 2277, January 1998. + + + +Duerst & Suignard Standards Track [Page 41] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [RFC2368] Hoffman, P., Masinter, L., and J. Zawinski, "The + mailto URL scheme", RFC 2368, July 1998. + + [RFC2384] Gellens, R., "POP URL Scheme", RFC 2384, August 1998. + + [RFC2396] Berners-Lee, T., Fielding, R., and L. Masinter, + "Uniform Resource Identifiers (URI): Generic Syntax", + RFC 2396, August 1998. + + [RFC2397] Masinter, L., "The "data" URL scheme", RFC 2397, + August 1998. + + [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., + Masinter, L., Leach, P., and T. Berners-Lee, + "Hypertext Transfer Protocol -- HTTP/1.1", RFC 2616, + June 1999. + + [RFC2640] Curtin, B., "Internationalization of the File Transfer + Protocol", RFC 2640, July 1999. + + [RFC2718] Masinter, L., Alvestrand, H., Zigmond, D., and R. + Petke, "Guidelines for new URL Schemes", RFC 2718, + November 1999. + + [UNIXML] Duerst, M. and A. Freytag, "Unicode in XML and other + Markup Languages", Unicode Technical Report #20, World + Wide Web Consortium Note, June 2003, + . + + [XLink] DeRose, S., Maler, E., and D. Orchard, "XML Linking + Language (XLink) Version 1.0", World Wide Web + Consortium Recommendation, June 2001, + . + + [XML1] Bray, T., Paoli, J., Sperberg-McQueen, C., Maler, E., + and F. Yergeau, "Extensible Markup Language (XML) 1.0 + (Third Edition)", World Wide Web Consortium + Recommendation, February 2004, + . + + [XMLNamespace] Bray, T., Hollander, D., and A. Layman, "Namespaces in + XML", World Wide Web Consortium Recommendation, + January 1999, . + + [XMLSchema] Biron, P. and A. Malhotra, "XML Schema Part 2: + Datatypes", World Wide Web Consortium Recommendation, + May 2001, . + + + + +Duerst & Suignard Standards Track [Page 42] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [XPointer] Grosso, P., Maler, E., Marsh, J. and N. Walsh, + "XPointer Framework", World Wide Web Consortium + Recommendation, March 2003, + . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Duerst & Suignard Standards Track [Page 43] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +Appendix A. Design Alternatives + + This section shortly summarizes major design alternatives and the + reasons for why they were not chosen. + +Appendix A.1. New Scheme(s) + + Introducing new schemes (for example, httpi:, ftpi:,...) or a new + metascheme (e.g., i:, leading to URI/IRI prefixes such as i:http:, + i:ftp:,...) was proposed to make IRI-to-URI conversion scheme + dependent or to distinguish between percent-encodings resulting from + IRI-to-URI conversion and percent-encodings from legacy character + encodings. + + New schemes are not needed to distinguish URIs from true IRIs (i.e., + IRIs that contain non-ASCII characters). The benefit of being able + to detect the origin of percent-encodings is marginal, as UTF-8 can + be detected with very high reliability. Deploying new schemes is + extremely hard, so not requiring new schemes for IRIs makes + deployment of IRIs vastly easier. Making conversion scheme dependent + is highly inadvisable and would be encouraged by separate schemes for + IRIs. Using a uniform convention for conversion from IRIs to URIs + makes IRI implementation orthogonal to the introduction of actual new + schemes. + +Appendix A.2. Character Encodings Other Than UTF-8 + + At an early stage, UTF-7 was considered as an alternative to UTF-8 + when IRIs are converted to URIs. UTF-7 would not have needed + percent-encoding and in most cases would have been shorter than + percent-encoded UTF-8. + + Using UTF-8 avoids a double layering and overloading of the use of + the "+" character. UTF-8 is fully compatible with US-ASCII and has + therefore been recommended by the IETF, and is being used widely. + + UTF-7 has never been used much and is now clearly being discouraged. + Requiring implementations to convert from UTF-8 to UTF-7 and back + would be an additional implementation burden. + +Appendix A.3. New Encoding Convention + + Instead of using the existing percent-encoding convention of URIs, + which is based on octets, the idea was to create a new encoding + convention; for example, to use "%u" to introduce UCS code points. + + + + + + +Duerst & Suignard Standards Track [Page 44] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Using the existing octet-based percent-encoding mechanism does not + need an upgrade of the URI syntax and does not need corresponding + server upgrades. + +Appendix A.4. Indicating Character Encodings in the URI/IRI + + Some proposals suggested indicating the character encodings used in + an URI or IRI with some new syntactic convention in the URI itself, + similar to the "charset" parameter for e-mails and Web pages. As an + example, the label in square brackets in + "http://www.example.org/ros[iso-8859-1]é"; indicated that the + following "é"; had to be interpreted as iso-8859-1. + + If UTF-8 is used exclusively, an upgrade to the URI syntax is not + needed. It avoids potentially multiple labels that have to be copied + correctly in all cases, even on the side of a bus or on a napkin, + leading to usability problems (and being prohibitively annoying). + Exclusively using UTF-8 also reduces transcoding errors and + confusion. + +Authors' Addresses + + Martin Duerst (Note: Please write "Duerst" with u-umlaut wherever + possible, for example as "Dürst" in XML and + HTML.) + World Wide Web Consortium + 5322 Endo + Fujisawa, Kanagawa 252-8520 + Japan + + Phone: +81 466 49 1170 + Fax: +81 466 49 1171 + EMail: duerst@w3.org + URI: http://www.w3.org/People/D%C3%BCrst/ + (Note: This is the percent-encoded form of an IRI.) + + + Michel Suignard + Microsoft Corporation + One Microsoft Way + Redmond, WA 98052 + U.S.A. + + Phone: +1 425 882-8080 + EMail: michelsu@microsoft.com + URI: http://www.suignard.com + + + + + +Duerst & Suignard Standards Track [Page 45] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the IETF's procedures with respect to rights in IETF Documents can + be found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + +Duerst & Suignard Standards Track [Page 46] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc4088.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc4088.txt new file mode 100644 index 0000000..6a4964c --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc4088.txt @@ -0,0 +1,1011 @@ + + + + + + +Network Working Group D. Black +Request for Comments: 4088 EMC Corporation +Category: Standards Track K. McCloghrie + Cisco Systems + J. Schoenwaelder + International University Bremen + June 2005 + + + Uniform Resource Identifier (URI) Scheme for the + Simple Network Management Protocol (SNMP) + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + The Simple Network Management Protocol (SNMP) and the Internet + Standard Management Framework are widely used for the management of + communication devices, creating a need to specify SNMP access + (including access to SNMP MIB object instances) from non-SNMP + management environments. For example, when out-of-band IP management + is used via a separate management interface (e.g., for a device that + does not support in-band IP access), a uniform way to indicate how to + contact the device for management is needed. Uniform Resource + Identifiers (URIs) fit this need well, as they allow a single text + string to indicate a management access communication endpoint for a + wide variety of IP-based protocols. + + This document defines a URI scheme so that SNMP can be designated as + the protocol used for management. The scheme also allows a URI to + designate one or more MIB object instances. + + + + + + + + + + +Black, et al. Standards Track [Page 1] + +RFC 4088 URI Scheme for SNMP June 2005 + + +Table of Contents + + 1. Introduction.................................................. 2 + 2. Usage......................................................... 3 + 3. Syntax of an SNMP URI......................................... 4 + 3.1. Relative Reference Considerations........................ 5 + 4. Semantics and Operations...................................... 6 + 4.1. SNMP Service URIs........................................ 6 + 4.2. SNMP Object URIs......................................... 7 + 4.2.1. SNMP Object URI Data Access....................... 8 + 4.3. OID Groups in SNMP URIs.................................. 10 + 4.4. Interoperability Considerations.......................... 10 + 5. Examples...................................................... 11 + 6. Security Considerations....................................... 12 + 6.1. SNMP URI to SNMP Gateway Security Considerations......... 13 + 7. IANA Considerations........................................... 14 + 8. Normative References.......................................... 14 + 9. Informative References........................................ 15 + 10. Acknowledgements............................................. 16 + Appendix A. Registration Template................................ 17 + +1. Introduction + + SNMP and the Internet-Standard Management Framework were originally + devised to manage IP devices via in-band means, in which management + access is primarily via the same interface(s) used to send and + receive IP traffic. SNMP's wide adoption has resulted in its use for + managing communication devices that do not support in-band IP access + (e.g., Fibre Channel devices); a separate out-of-band IP interface is + often used for management. URIs provide a convenient way to locate + that interface and specify the protocol to be used for management; + one possible scenario is for an in-band query to return a URI that + indicates how the device is managed. This document specifies a URI + scheme to permit SNMP (including a specific SNMP context) to be + designated as the management protocol by such a URI. This scheme + also allows a URI to refer to specific object instances within an + SNMP MIB. + + For a detailed overview of the documents that describe the current + Internet-Standard Management Framework, please refer to Section 7 of + [RFC3410]. + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in [RFC2119]. + + + + + + +Black, et al. Standards Track [Page 2] + +RFC 4088 URI Scheme for SNMP June 2005 + + +2. Usage + + There are two major classes of SNMP URI usage: configuration and + gateways between SNMP and other protocols that use SNMP URIs. + + An SNMP URI used for configuration indicates the location of + management information as part of the configuration of an application + containing an SNMP manager. The URI can be obtained from a + configuration file or may be provided by a managed device (see + Section 1 for an example). Management information is exchanged + between the SNMP manager and agent, but it does not flow beyond the + manager, as shown in the following diagram: + + *********** SNMP-Request ********* + * *================>* * + URI ---------->* Manager * * Agent * + * *<================* * + *********** SNMP-Response ********* + ^ + | + Other Config Info ------------+ + + Additional configuration information (e.g., a security secret or key) + may be provided via an interface other than that used for the URI. + For example, when a managed device provides an SNMP URI in an + unprotected fashion, that device should not provide a secret or key + required to use the URI. The secret or key should instead be pre- + configured in or pre-authorized to the manager; see Section 6. + + For gateway usage, clients employ SNMP URIs to request management + information via an SNMP URI to SNMP gateway (also called an SNMP + gateway in this document). The SNMP manager within the SNMP gateway + accesses the management information and returns it to the requesting + client, as shown in the following diagram: + + SNMP gateway + ********** URI *********** SNMP-Request ********* + * *===========>* *================>* * + * Client * * Manager * * Agent * + * *<===========* *<================* * + ********** Info *********** SNMP-Response ********* + ^ + | + Other Config Info ------------+ + + Additional configuration information (e.g., security secrets or keys) + may be provided via an interface other than that used for the URI. + For example, some types of security information, including secrets + + + +Black, et al. Standards Track [Page 3] + +RFC 4088 URI Scheme for SNMP June 2005 + + + and keys, should be pre-configured in or pre-authorized to the + manager rather than be provided by the client; see Section 6. + +3. Syntax of an SNMP URI + + An SNMP URI has the following ABNF [RFC2234] syntax, based on the + ABNF syntax rules for userinfo, host, port, and (path) segment in + [RFC3986] and the ABNF syntax rule for HEXDIG in [RFC2234]: + + snmp-uri = "snmp://" snmp-authority [ context [ oids ]] + + snmp-authority = [ securityName "@" ] host [ ":" port ] + securityName = userinfo ; SNMP securityName + + context = "/" contextName [ ";" contextEngineID ] + contextName = segment ; SNMP contextName + contextEngineID = 1*(HEXDIG HEXDIG) ; SNMP contextEngineID + + oids = "/" ( oid / oid-group ) [ suffix ] + oid-group = "(" oid *( "," oid ) ")" + oid = < as specified by [RFC 3061] > + suffix = "+" / ".*" + + The userinfo and (path) segment ABNF rules are reused for syntax + only. In contrast, host and port have both the syntax and semantics + specified in [RFC3986]. See [RFC3411] for the semantics of + securityName, contextEngineID, and contextName. + + The snmp-authority syntax matches the URI authority syntax in Section + 3.2 of [RFC3986], with the additional restriction that the userinfo + component of an authority (when present) MUST be an SNMP + securityName. If the securityName is empty or not given, the entity + making use of an SNMP URI is expected to know what SNMP securityName + to use if one is required. Inclusion of authentication information + (e.g., passwords) in URIs has been deprecated (see Section 3.2.1 of + [RFC3986]), so any secret or key required for SNMP access must be + provided via other means that may be out-of-band with respect to + communication of the URI. If the port is empty or not given, port + 161 is assumed. + + If the contextName is empty or not given, the zero-length string ("") + is assumed, as it is the default SNMP context. An SNMP + contextEngineID is a variable-format binary element that is usually + discovered by an SNMP manager. An SNMP URI encodes a contextEngineID + as hexadecimal digits corresponding to a sequence of bytes. If the + contextEngineID is empty or not given, the context engine is to be + discovered by querying the SNMP agent at the specified host and port; + see Section 4.1 below. The contextEngineID component of the URI + + + +Black, et al. Standards Track [Page 4] + +RFC 4088 URI Scheme for SNMP June 2005 + + + SHOULD be present if more than one context engine at the designated + host and port supports the designated context. + + An SNMP URI that designates the default SNMP context ("") MAY end + with the "/" character that introduces the contextName component. An + SNMP URI MUST NOT end with the "/" character that introduces an oid + or oid-group component, as the empty string is not a valid OID for + SNMP. + + The encoding rules specified in [RFC3986] MUST be used for SNMP URIs, + including the use of percent encoding ("%" followed by two hex + digits) as needed to represent characters defined as reserved in + [RFC3986] and any characters not allowed in a URI. SNMP permits any + UTF-8 character to be used in a securityName or contextName; all + multi-byte UTF-8 characters in an SNMP URI MUST be percent encoded as + specified in Sections 2.1 and 2.5 of [RFC3986]. These requirements + are a consequence of reusing the ABNF syntax rules for userinfo and + segment from [RFC3986]. + + SNMP URIs will generally be short enough to avoid implementation + string-length limits (e.g., that may occur at 255 characters). Such + limits may be a concern for large OID groups; relative references to + URIs (see Section 4.2 of [RFC3986]) may provide an alternative in + some circumstances. + + Use of IP addresses in SNMP URIs is acceptable in situations where + dependence on availability of DNS service is undesirable or must be + avoided; otherwise, IP addresses should not be used (see [RFC1900] + for further explanation). + +3.1. Relative Reference Considerations + + Use of the SNMP default context (zero-length string) within an SNMP + URI can result in a second instance of "//" in the URI, such as the + following: + + snmp://// + + This is allowed by [RFC3986] syntax; if a URI parser does not handle + the second "//" correctly, the parser is broken and needs to be + fixed. This example is important because use of the SNMP default + context in SNMP URIs is expected to be common. + + On the other hand, the second occurrence of "//" in an absolute SNMP + URI affects usage of relative references to that URI (see Section 4.2 + of [RFC3986]) because a "//" at the start of a relative reference + always introduces a URI authority component (host plus optional + userinfo and/or port; see [RFC3986]). Specifically, a relative + + + +Black, et al. Standards Track [Page 5] + +RFC 4088 URI Scheme for SNMP June 2005 + + + reference of the form // will not work, because the "//" will + cause to be parsed as a URI authority, resulting in a syntax + error when the parser fails to find a host in . To avoid this + problem, relative references that start with "//" but do not contain + a URI authority component MUST NOT be used. Functionality equivalent + to any such forbidden relative reference can be obtained by prefixing + "." or ".." to the forbidden relative reference (e.g., ..//). + The prefix to use depends on the base URI. + +4. Semantics and Operations + + An SNMP URI that does not include any OIDs is called an SNMP service + URI because it designates a communication endpoint for access to SNMP + management service. An SNMP URI that includes one or more OIDs is + called an SNMP object URI because it designates one or more object + instances in an SNMP MIB. The expected means of using an SNMP URI is + to employ an SNMP manager to access the SNMP context designated by + the URI via the SNMP agent at the host and port designated by the + URI. + +4.1. SNMP Service URIs + + An SNMP service URI does not designate a data object, but rather an + SNMP context to be accessed by a service; the telnet URI scheme + [RFC1738] is another example of URIs that designate service access. + If the contextName in the URI is empty or not given, "" (the zero- + length string) is assumed, as it is the default SNMP context. + + If a contextEngineID is given in an SNMP service URI, the context + engine that it designates is to be used. If the contextEngineID is + empty or not given in the URI, the context engine is to be + discovered; the context engine to be used is the one that supports + the context designated by the URI. The contextEngineID component of + the URI SHOULD be present if more than one context engine at the + designated host and port supports the designated context. + + Many common uses of SNMP URIs are expected to omit (i.e., default) + the contextEngineID because they do not involve SNMP proxy agents, + which are the most common reason for multiple SNMP context engines to + exist at a single host and port. Specifically, when an SNMP agent is + local to the network interface that it manages, the agent will + usually have only one context engine, in which case it is safe to + omit the contextEngineID component of an SNMP URI. In addition, many + SNMP agents that are local to a network interface support only the + default SNMP context (zero-length string). + + + + + + +Black, et al. Standards Track [Page 6] + +RFC 4088 URI Scheme for SNMP June 2005 + + +4.2. SNMP Object URIs + + An SNMP object URI contains one or more OIDs. The URI is used by + first separating the OID or OID group (including its preceding slash + plus any parentheses and suffix) and then processing the resulting + SNMP service URI as specified in Section 4.1 (above) to determine the + SNMP context to be accessed. The OID or OID group is then used to + generate SNMP operations directed to that SNMP context. + + The semantics of an SNMP object URI depend on whether the OID or OID + group has a suffix and what that suffix is. There are three possible + formats; in each case, the MIB object instances are designated within + the SNMP context specified by the service URI portion of the SNMP + object URI. The semantics of an SNMP object URI that contains a + single OID are as follows: + + (1) An OID without a suffix designates the MIB object instance + named by the OID. + + (2) An OID with a "+" suffix designates the lexically next MIB + object instance following the OID. + + (3) An OID with a ".*" suffix designates the set of MIB object + instances for which the OID is a strict lexical prefix; this + does not include the MIB object instance named by the OID. + + An OID group in an SNMP URI consists of a set of OIDs in parentheses. + In each case, the OID group semantics are the extension of the single + OID semantics to each OID in the group (e.g., a URI with a "+" suffix + designates the set of MIB object instances consisting of the + lexically next instance for each OID in the OID group). + + When there is a choice among URI formats to designate the same MIB + object instance or instances, the above list is in order of + preference (no suffix is most preferable), as it runs from most + precise to least precise. This is because an OID without a suffix + precisely designates an object instance, whereas a "+" suffix + designates the next object instance, which may change, and the ".*" + suffix could designate multiple object instances. Multiple + syntactically distinct SNMP URIs SHOULD NOT be used to designate the + same MIB object instance or set of instances, as this may cause + unexpected results in URI-based systems that use string comparison to + test URIs for equality. + + SNMP object URIs designate the data to be accessed, as opposed to the + specific SNMP operations to be used for access; Section 4.2.1 + provides examples of how SNMP operations can be used to access data + for SNMP object URIs. Nonetheless, any applicable SNMP operation, + + + +Black, et al. Standards Track [Page 7] + +RFC 4088 URI Scheme for SNMP June 2005 + + + including GetBulk, MAY be used to access data for all or part of one + or more SNMP object URIs (e.g., via use of multiple variable bindings + in a single operation); it is not necessary to use the specific + operations described in Section 4.2.1 as long as the results + (returned variable bindings or error) could have been obtained by + following Section 4.2.1's descriptions. The use of relative + references that do not change the contextName (i.e., ./) should + be viewed as a hint that optimization of SNMP access across multiple + SNMP URIs may be possible. + + An SNMP object URI MAY also be used to specify a MIB object instance + or instances to be written; this causes generation of an SNMP Set + operation instead of a Get. The "+" and ".*" suffixes MUST NOT be + used in this case; any attempt to do so is an error that MUST NOT + generate any SNMP Set operations. Values to be written to the MIB + object instance or instances are not specified within an SNMP object + URI. + + SNMP object URIs designate data in SNMP MIBs and hence do not provide + the means to generate all possible SNMP protocol operations. For + example, data access for an SNMP object URI cannot directly generate + either Snmpv2-Trap or InformRequest notifications, although side + effects of data access could cause such notifications (depending on + the MIB). In addition, whether and how GetBulk is used for an SNMP + object URI with a ".*" suffix is implementation specific. + +4.2.1. SNMP Object URI Data Access + + Data access based on an SNMP object URI returns an SNMP variable + binding for each MIB object instance designated by the URI, or an + SNMP error if the operation fails. An SNMP variable binding binds a + variable name (OID) to a value or an SNMP exception (see [RFC3416]). + The SNMP operation or operations needed to access data designated by + an SNMP object URI depend on the OID or OID group suffix or absence + thereof. The following descriptions are not the only method of + performing data access for an SNMP object URI; any suitable SNMP + operations may be used as long as the results (returned variable + bindings or error) are functionally equivalent. + + (1) For an OID or OID group without a suffix, an SNMP Get + operation is generated using each OID as a variable binding + name. If an SNMP error occurs, that error is the result of + URI data access; otherwise, the returned variable binding or + bindings are the result of URI data access. Note that any + returned variable binding may contain an SNMP "noSuchObject" + or "noSuchInstance" exception. + + + + + +Black, et al. Standards Track [Page 8] + +RFC 4088 URI Scheme for SNMP June 2005 + + + (2) For an OID or OID group with a "+" suffix, an SNMP GetNext + operation is generated using each OID as a variable binding + name. If an SNMP error occurs, that error is the result of + URI data access; otherwise, the returned variable binding or + bindings are the result of URI data access. Note that any + returned variable binding may contain an SNMP "endOfMibView" + exception. + + (3) For an OID or OID group with a ".*" suffix, an SNMP GetNext + operation is initially generated using each OID as a variable + binding name. If the result is an SNMP error, that error is + the result of URI data access. If all returned variable + bindings contain either a) an OID for which the corresponding + URI OID is not a lexical prefix or b) an SNMP "endOfMibView" + exception, then the returned variable bindings are the result + of URI data access. + + Otherwise, the results of the GetNext operation are saved, and + another SNMP GetNext operation is generated using the newly + returned OIDs as variable binding names. This is repeated + (save the results and generate a GetNext with newly returned + OIDs as variable binding names) until all the returned + variable bindings from a GetNext contain either a) an OID for + which the corresponding URI OID is not a lexical prefix or b) + an SNMP "endOfMibView" exception. The results from all of the + GetNext operations are combined to become the overall result + of URI data access; this may include variable bindings whose + OID is not a lexical extension of the corresponding URI OID. + If the OID subtrees (set of OIDs for which a specific URI OID + is a lexical prefix) are not the same size for all OIDs in the + OID group, the largest subtree determines when this iteration + ends. SNMP GetBulk operations MAY be used to optimize this + iterated access. + + Whenever a returned variable binding contains an OID for which + the corresponding URI OID is not a lexical prefix or an SNMP + "endOfMibView" exception, iteration of that element of the OID + group MAY cease, reducing the number of variable bindings used + in subsequent GetNext operations. In this case, the results + of URI data access for the SNMP URI will not consist entirely + of OID-group-sized sets of variable bindings. Even if this + does not occur, the last variable binding returned for each + member of the OID group will generally contain an SNMP + "endOfMibView" exception or an OID for which the corresponding + URI OID is not a lexical prefix. + + + + + + +Black, et al. Standards Track [Page 9] + +RFC 4088 URI Scheme for SNMP June 2005 + + +4.3. OID Groups in SNMP URIs + + Parenthesized OID groups in SNMP URIs are intended to support MIB + object instances for which access via a single SNMP operation is + required to ensure consistent results. Therefore, the OIDs within an + OID group in an SNMP URI SHOULD be accessed by a single SNMP + operation containing a variable binding corresponding to each OID in + the group. A specific example involves the InetAddress and + InetAddressType textual conventions defined in [RFC4001], for which + the format of an InetAddress instance is specified by an associated + InetAddressType instance. If two such associated instances are read + via separate SNMP operations, the resulting values could be + inconsistent (e.g., due to an intervening Set), causing the + InetAddress value to be interpreted incorrectly. + + This single operation requirement ("SHOULD") also applies to each OID + group resulting from iterated access for an SNMP URI with a ".*" + suffix. When members of an SNMP URI OID group differ in the number + of OIDs for which each is a lexical prefix, this iteration may + overrun by returning numerous variable bindings for which the + corresponding OID in the OID group is not a lexical prefix. Such + overrun can be avoided by using relative references within the same + context (i.e., ./.* ) when it is not important to access + multiple MIB object instances in a single SNMP operation. + +4.4. Interoperability Considerations + + This document defines a transport-independent "snmp" scheme that is + intended to accommodate SNMP transports other than UDP. UDP is the + default transport for access to information specified by an SNMP URI + for backward compatibility with existing usage, but other transports + MAY be used. If more than one transport can be used (e.g., SNMP over + TCP [RFC3430] in addition to SNMP over UDP), the information or SNMP + service access designated by an SNMP URI SHOULD NOT depend on which + transport is used (for SNMP over TCP, this is implied by Section 2 of + [RFC3430]). + + An SNMP URI designates use of SNMPv3 as specified by [RFC3416], + [RFC3417], and related documents, but older versions of SNMP MAY be + used in accordance with [RFC3584] when usage of such older versions + is unavoidable. For SNMPv1 and SNMPv2c, the securityName, + contextName, and contextEngineID elements of an SNMP URI are mapped + to/from the community name, as described in [RFC3584]. When the + community name is kept secret as a weak form of authentication, this + mapping should be configured so that these three elements do not + reveal information about the community name. If this is not done, + then any SNMP URI component that would disclose significant + information about a secret community name SHOULD be omitted. Note + + + +Black, et al. Standards Track [Page 10] + +RFC 4088 URI Scheme for SNMP June 2005 + + + that some community names contain reserved characters (e.g., "@") + that require percent encoding when they are used in an SNMP URI. + SNMP versions (e.g., v3) have been omitted from the SNMP URI scheme + to permit use of older versions of SNMP, as well as any possible + future successor to SNMPv3. + +5. Examples + + snmp://example.com + + This example designates the default SNMP context at the SNMP agent at + port 161 of host example.com . + + snmp://tester5@example.com:8161 + + This example designates the default SNMP context at the SNMP agent at + port 8161 of host example.com and indicates that the SNMP + securityName "tester5" is to be used to access that agent. A + possible reason to use a non-standard port is for testing a new + version of SNMP agent code. + + snmp://example.com/bridge1 + + This example designates the "bridge1" SNMP context at example.com. + Because the contextEngineID component of the URI is omitted, there + SHOULD be at most one SNMP context engine at example.com that + supports the "bridge1" context. + + snmp://example.com/bridge1;800002b804616263 + + This example designates the "bridge1" context at snmp.example.com via + the SNMP context engine 800002b804616263 (string representation of a + hexadecimal value). This avoids ambiguity if any other context + engine supports a "bridge1" context. The above two examples are + based on the figure in Section 3.3 of [RFC3411]. + + snmp://example.com//1.3.6.1.2.1.1.3.0 + snmp://example.com//1.3.6.1.2.1.1.3+ + snmp://example.com//1.3.6.1.2.1.1.3.* + + These three examples all designate the sysUpTime.0 object instance in + the SNMPv2-MIB or RFC1213-MIB for the default SNMP context ("") at + example.com as sysUpTime.0 is: + + a) designated directly by OID 1.3.6.1.2.1.1.3.0, + + b) the lexically next MIB object instance after the OID + 1.3.6.1.2.1.1.3, and + + + +Black, et al. Standards Track [Page 11] + +RFC 4088 URI Scheme for SNMP June 2005 + + + c) the only MIB object instance whose OID has 1.3.6.1.2.1.1.3 as a + lexical prefix. + + These three examples are provided for illustrative purposes only, as + multiple syntactically distinct URIs SHOULD NOT be used to designate + the same MIB object instance, in order to avoid unexpected results in + URI-based systems that use string comparison to test URIs for + equality. + + snmp://example.com/bridge1/1.3.6.1.2.1.2.2.1.8.* + + This example designates the ifOperStatus column of the IF-MIB in the + bridge1 SNMP context at example.com. + + snmp://example.com//(1.3.6.1.2.1.2.2.1.7,1.3.6.1.2.1.2.2.1.8).* + + This example designates all (ifAdminStatus, ifOperStatus) pairs in + the IF-MIB in the default SNMP context at example.com. + +6. Security Considerations + + An intended use of this URI scheme is designation of the location of + management access to communication devices. Such location + information may be considered sensitive in some environments, making + it important to control access to this information and possibly even + to encrypt it when it is sent over the network. All uses of this URI + scheme should provide security mechanisms appropriate to the + environments in which such uses are likely to be deployed. + + The SNMP architecture includes control of access to management + information (see Section 4.3 of [RFC3411]). An SNMP URI does not + contain sufficient security information to obtain access in all + situations, as the SNMP URI syntax is incapable of encoding SNMP + securityModels, SNMP securityLevels, and credential or keying + information for SNMP securityNames. Other means are necessary to + provide such information; one possibility is out-of-band pre- + configuration of the SNMP manager, as shown in the diagrams in + Section 2. + + By itself, the presence of a securityName in an SNMP URI does not + authorize use of that securityName to access management information. + Instead, the SNMP manager SHOULD match the securityName in the URI to + an SNMP securityName and associated security information that have + been pre-configured for use by the manager. If an SNMP URI contains + a securityName that the SNMP manager is not provisioned to use, SNMP + operations for that URI SHOULD NOT be generated. + + + + + +Black, et al. Standards Track [Page 12] + +RFC 4088 URI Scheme for SNMP June 2005 + + + SNMP versions prior to SNMPv3 did not include adequate security. + Even if the network itself is secure (for example, via use of IPsec), + there is no control over who on the secure network is allowed to + access and GET/SET (read/change/create/delete) the objects in MIB + modules. It is RECOMMENDED that implementers consider the security + features provided by the SNMPv3 framework (see [RFC3410], Section 8, + for an overview), including full support for SNMPv3 cryptographic + mechanisms (for authentication and privacy). This is of additional + importance for MIB elements considered sensitive or vulnerable + because GETs have side effects. + + Further, deployment of SNMP versions prior to SNMPv3 is NOT + RECOMMENDED. Instead, it is RECOMMENDED to deploy SNMPv3 and to + enable cryptographic security. It is then a customer/operator + responsibility to ensure that the SNMP entity giving access to a MIB + module instance is properly configured to give access to the objects + only to those principals (users) that have legitimate rights to + indeed GET or SET (read/change/create/delete) them. + +6.1. SNMP URI to SNMP Gateway Security Considerations + + Additional security considerations apply to SNMP gateways that + generate SNMP operations for SNMP URIs and return the results to + clients (see Section 2) because management information is + communicated beyond the SNMP framework. In general, an SNMP gateway + should have some knowledge of the structure and function of the + management information that it accesses via SNMP. Among other + benefits, this allows an SNMP gateway to avoid SNMP access control + failures because the gateway can reject an SNMP URI that will cause + such failures before generating any SNMP operations. + + SNMP gateways SHOULD impose authorization or access-control checks on + all clients. If an SNMP gateway does not impose authorization or + access controls, the gateway MUST NOT automatically obtain or use + SNMP authentication material for arbitrary securityNames, as doing so + would defeat SNMP's access controls. Instead, all SNMP gateways + SHOULD authenticate each client and check the client's authorization + to use a securityName in an SNMP URI before using the securityName on + behalf of that client. + + An SNMP gateway is also responsible for ensuring that all of its + communication is appropriately secured. Specifically, an SNMP + gateway SHOULD ensure that communication of management information + with any client is protected to at least the SNMP securityLevel used + for the corresponding SNMP access (see Section 3.4.3 of [RFC3411] for + more information on securityLevel). If the client provides SNMP + security information, the SNMP gateway SHOULD authenticate the client + and SHOULD ensure that an authenticated cryptographic integrity check + + + +Black, et al. Standards Track [Page 13] + +RFC 4088 URI Scheme for SNMP June 2005 + + + is used for that communication to prevent modification of the + security information. In addition, if a client provides any key or + secret, the SNMP gateway SHOULD ensure that encryption is used in + addition to the integrity check for that communication to prevent + disclosure of keys or secrets. + + There are management objects defined in SNMP MIBs whose MAX-ACCESS is + read-write and/or read-create. Such objects may be considered + sensitive or vulnerable in some network environments. SNMP gateway + support for SNMP SET operations in a non-secure environment without + proper protection can have a negative effect on network operations. + The individual MIB module specifications, and especially their + security considerations, should be consulted for further information. + + Some readable objects in some MIB modules (i.e., objects with a MAX- + ACCESS other than not-accessible) may be considered sensitive or + vulnerable in some network environments. It is thus important to + control even GET access to these objects via an SNMP gateway and + possibly to even encrypt the values of these objects when they are + sent over the network. The individual MIB module specifications, and + especially their security considerations, should be consulted for + further information. This consideration also applies to objects for + which read operations have side effects. + +7. IANA Considerations + + The IANA has registered the URL registration template found in + Appendix A in accordance with [RFC2717]. + +8. Normative References + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, November 1997. + + [RFC3061] Mealling, M., "A URN Namespace of Object Identifiers", RFC + 3061, February 2001. + + [RFC3411] Harrington, D., Presuhn, R., and B. Wijnen, "An + Architecture for Describing Simple Network Management + Protocol (SNMP) Management Frameworks", STD 62, RFC 3411, + December 2002. + + [RFC3416] Presuhn, R., "Version 2 of the Protocol Operations for the + Simple Network Management Protocol (SNMP)", STD 62, RFC + 3416, December 2002. + + + +Black, et al. Standards Track [Page 14] + +RFC 4088 URI Scheme for SNMP June 2005 + + + [RFC3417] Presuhn, R., "Transport Mappings for the Simple Network + Management Protocol (SNMP)", STD 62, RFC 3417, December + 2002. + + [RFC3584] Frye, R., Levi, D., Routhier, S., and B. Wijnen, + "Coexistence between Version 1, Version 2, and Version 3 of + the Internet-standard Network Management Framework", BCP + 74, RFC 3584, August 2003. + + [RFC3986] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifier (URI): Generic Syntax", STD 66, RFC + 3986, January 2005. + +9. Informative References + + [RFC1738] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform + Resource Locators (URL)", RFC 1738, December 1994. + + [RFC1900] Carpenter, B. and Y. Rekhter, "Renumbering Needs Work", RFC + 1900, February 1996. + + [RFC2717] Petke, R. and I. King, "Registration Procedures for URL + Scheme Names", BCP 35, RFC 2717, November 1999. + + [RFC3410] Case, J., Mundy, R., Partain, D., and B. Stewart, + "Introduction and Applicability Statements for Internet- + Standard Management Framework", RFC 3410, December 2002. + + [RFC3430] Schoenwaelder, J., "Simple Network Management Protocol Over + Transmission Control Protocol Transport Mapping", RFC 3430, + December 2002. + + [RFC3617] Lear, E., "Uniform Resource Identifier (URI) Scheme and + Applicability Statement for the Trivial File Transfer + Protocol (TFTP)", RFC 3617, October 2003. + + [RFC4001] Daniele, M., Haberman, B., Routhier, S., and J. + Schoenwaelder, "Textual Conventions for Internet Network + Addresses", RFC 4001, February 2005. + + + + + + + + + + + + +Black, et al. Standards Track [Page 15] + +RFC 4088 URI Scheme for SNMP June 2005 + + +10. Acknowledgements + + Portions of this document were adapted from Eliot Lear's TFTP URI + scheme specification [RFC3617]. Portions of the security + considerations were adapted from the widely used security + considerations "boilerplate" for MIB modules. Comments from Ted + Hardie, Michael Mealing, Larry Masinter, Frank Strauss, Bert Wijnen, + Steve Bellovin, the mreview@ops.ietf.org mailing list and the + uri@w3c.org mailing list on earlier versions of this document have + resulted in significant improvements and are gratefully acknowledged. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Black, et al. Standards Track [Page 16] + +RFC 4088 URI Scheme for SNMP June 2005 + + +Appendix A. Registration Template + + URL scheme name: snmp + URL scheme syntax: Section 3 + Character encoding considerations: Section 3 + Intended usage: Sections 1 and 2 + Applications and/or protocols which use this scheme: SNMP, all + versions, see [RFC3410] and [RFC3584]. Also SNMP over TCP, + see [RFC3430]. + Interoperability considerations: Section 4.4 + Security considerations: Section 6 + Relevant publications: See [RFC3410] for list. Also [RFC3430] + and [RFC3584]. + Contact: David L. Black, see below + Author/Change Controller: IESG + +Authors' Addresses + + David L. Black + EMC Corporation + 176 South Street + Hopkinton, MA 01748 + + Phone: +1 (508) 293-7953 + EMail: black_david@emc.com + + + Keith McCloghrie + Cisco Systems, Inc. + 170 West Tasman Drive + San Jose, CA USA 95134 + + Phone: +1 (408) 526-5260 + EMail: kzm@cisco.com + + + Juergen Schoenwaelder + International University Bremen + P.O. Box 750 561 + 28725 Bremen + Germany + + Phone: +49 421 200 3587 + EMail: j.schoenwaelder@iu-bremen.de + + + + + + + +Black, et al. Standards Track [Page 17] + +RFC 4088 URI Scheme for SNMP June 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + +Black, et al. Standards Track [Page 18] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc4271.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc4271.txt new file mode 100644 index 0000000..73f4298 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc4271.txt @@ -0,0 +1,5827 @@ + + + + + + +Network Working Group Y. Rekhter, Ed. +Request for Comments: 4271 T. Li, Ed. +Obsoletes: 1771 S. Hares, Ed. +Category: Standards Track January 2006 + + + A Border Gateway Protocol 4 (BGP-4) + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2006). + +Abstract + + This document discusses the Border Gateway Protocol (BGP), which is + an inter-Autonomous System routing protocol. + + The primary function of a BGP speaking system is to exchange network + reachability information with other BGP systems. This network + reachability information includes information on the list of + Autonomous Systems (ASes) that reachability information traverses. + This information is sufficient for constructing a graph of AS + connectivity for this reachability from which routing loops may be + pruned, and, at the AS level, some policy decisions may be enforced. + + BGP-4 provides a set of mechanisms for supporting Classless Inter- + Domain Routing (CIDR). These mechanisms include support for + advertising a set of destinations as an IP prefix, and eliminating + the concept of network "class" within BGP. BGP-4 also introduces + mechanisms that allow aggregation of routes, including aggregation of + AS paths. + + This document obsoletes RFC 1771. + + + + + + + + + + +Rekhter, et al. Standards Track [Page 1] + +RFC 4271 BGP-4 January 2006 + + +Table of Contents + + 1. Introduction ....................................................4 + 1.1. Definition of Commonly Used Terms ..........................4 + 1.2. Specification of Requirements ..............................6 + 2. Acknowledgements ................................................6 + 3. Summary of Operation ............................................7 + 3.1. Routes: Advertisement and Storage ..........................9 + 3.2. Routing Information Base ..................................10 + 4. Message Formats ................................................11 + 4.1. Message Header Format .....................................12 + 4.2. OPEN Message Format .......................................13 + 4.3. UPDATE Message Format .....................................14 + 4.4. KEEPALIVE Message Format ..................................21 + 4.5. NOTIFICATION Message Format ...............................21 + 5. Path Attributes ................................................23 + 5.1. Path Attribute Usage ......................................25 + 5.1.1. ORIGIN .............................................25 + 5.1.2. AS_PATH ............................................25 + 5.1.3. NEXT_HOP ...........................................26 + 5.1.4. MULTI_EXIT_DISC ....................................28 + 5.1.5. LOCAL_PREF .........................................29 + 5.1.6. ATOMIC_AGGREGATE ...................................29 + 5.1.7. AGGREGATOR .........................................30 + 6. BGP Error Handling. ............................................30 + 6.1. Message Header Error Handling .............................31 + 6.2. OPEN Message Error Handling ...............................31 + 6.3. UPDATE Message Error Handling .............................32 + 6.4. NOTIFICATION Message Error Handling .......................34 + 6.5. Hold Timer Expired Error Handling .........................34 + 6.6. Finite State Machine Error Handling .......................35 + 6.7. Cease .....................................................35 + 6.8. BGP Connection Collision Detection ........................35 + 7. BGP Version Negotiation ........................................36 + 8. BGP Finite State Machine (FSM) .................................37 + 8.1. Events for the BGP FSM ....................................38 + 8.1.1. Optional Events Linked to Optional Session + Attributes .........................................38 + 8.1.2. Administrative Events ..............................42 + 8.1.3. Timer Events .......................................46 + 8.1.4. TCP Connection-Based Events ........................47 + 8.1.5. BGP Message-Based Events ...........................49 + 8.2. Description of FSM ........................................51 + 8.2.1. FSM Definition .....................................51 + 8.2.1.1. Terms "active" and "passive" ..............52 + 8.2.1.2. FSM and Collision Detection ...............52 + 8.2.1.3. FSM and Optional Session Attributes .......52 + 8.2.1.4. FSM Event Numbers .........................53 + + + +Rekhter, et al. Standards Track [Page 2] + +RFC 4271 BGP-4 January 2006 + + + 8.2.1.5. FSM Actions that are Implementation + Dependent .................................53 + 8.2.2. Finite State Machine ...............................53 + 9. UPDATE Message Handling ........................................75 + 9.1. Decision Process ..........................................76 + 9.1.1. Phase 1: Calculation of Degree of Preference .......77 + 9.1.2. Phase 2: Route Selection ...........................77 + 9.1.2.1. Route Resolvability Condition .............79 + 9.1.2.2. Breaking Ties (Phase 2) ...................80 + 9.1.3. Phase 3: Route Dissemination .......................82 + 9.1.4. Overlapping Routes .................................83 + 9.2. Update-Send Process .......................................84 + 9.2.1. Controlling Routing Traffic Overhead ...............85 + 9.2.1.1. Frequency of Route Advertisement ..........85 + 9.2.1.2. Frequency of Route Origination ............85 + 9.2.2. Efficient Organization of Routing Information ......86 + 9.2.2.1. Information Reduction .....................86 + 9.2.2.2. Aggregating Routing Information ...........87 + 9.3. Route Selection Criteria ..................................89 + 9.4. Originating BGP routes ....................................89 + 10. BGP Timers ....................................................90 + Appendix A. Comparison with RFC 1771 .............................92 + Appendix B. Comparison with RFC 1267 .............................93 + Appendix C. Comparison with RFC 1163 .............................93 + Appendix D. Comparison with RFC 1105 .............................94 + Appendix E. TCP Options that May Be Used with BGP ................94 + Appendix F. Implementation Recommendations .......................95 + Appendix F.1. Multiple Networks Per Message .........95 + Appendix F.2. Reducing Route Flapping ...............96 + Appendix F.3. Path Attribute Ordering ...............96 + Appendix F.4. AS_SET Sorting ........................96 + Appendix F.5. Control Over Version Negotiation ......96 + Appendix F.6. Complex AS_PATH Aggregation ...........96 + Security Considerations ...........................................97 + IANA Considerations ...............................................99 + Normative References .............................................101 + Informative References ...........................................101 + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 3] + +RFC 4271 BGP-4 January 2006 + + +1. Introduction + + The Border Gateway Protocol (BGP) is an inter-Autonomous System + routing protocol. + + The primary function of a BGP speaking system is to exchange network + reachability information with other BGP systems. This network + reachability information includes information on the list of + Autonomous Systems (ASes) that reachability information traverses. + This information is sufficient for constructing a graph of AS + connectivity for this reachability, from which routing loops may be + pruned and, at the AS level, some policy decisions may be enforced. + + BGP-4 provides a set of mechanisms for supporting Classless Inter- + Domain Routing (CIDR) [RFC1518, RFC1519]. These mechanisms include + support for advertising a set of destinations as an IP prefix and + eliminating the concept of network "class" within BGP. BGP-4 also + introduces mechanisms that allow aggregation of routes, including + aggregation of AS paths. + + Routing information exchanged via BGP supports only the destination- + based forwarding paradigm, which assumes that a router forwards a + packet based solely on the destination address carried in the IP + header of the packet. This, in turn, reflects the set of policy + decisions that can (and cannot) be enforced using BGP. BGP can + support only those policies conforming to the destination-based + forwarding paradigm. + +1.1. Definition of Commonly Used Terms + + This section provides definitions for terms that have a specific + meaning to the BGP protocol and that are used throughout the text. + + Adj-RIB-In + The Adj-RIBs-In contains unprocessed routing information that has + been advertised to the local BGP speaker by its peers. + + Adj-RIB-Out + The Adj-RIBs-Out contains the routes for advertisement to specific + peers by means of the local speaker's UPDATE messages. + + Autonomous System (AS) + The classic definition of an Autonomous System is a set of routers + under a single technical administration, using an interior gateway + protocol (IGP) and common metrics to determine how to route + packets within the AS, and using an inter-AS routing protocol to + determine how to route packets to other ASes. Since this classic + definition was developed, it has become common for a single AS to + + + +Rekhter, et al. Standards Track [Page 4] + +RFC 4271 BGP-4 January 2006 + + + use several IGPs and, sometimes, several sets of metrics within an + AS. The use of the term Autonomous System stresses the fact that, + even when multiple IGPs and metrics are used, the administration + of an AS appears to other ASes to have a single coherent interior + routing plan, and presents a consistent picture of the + destinations that are reachable through it. + + BGP Identifier + A 4-octet unsigned integer that indicates the BGP Identifier of + the sender of BGP messages. A given BGP speaker sets the value of + its BGP Identifier to an IP address assigned to that BGP speaker. + The value of the BGP Identifier is determined upon startup and is + the same for every local interface and BGP peer. + + BGP speaker + A router that implements BGP. + + EBGP + External BGP (BGP connection between external peers). + + External peer + Peer that is in a different Autonomous System than the local + system. + + Feasible route + An advertised route that is available for use by the recipient. + + IBGP + Internal BGP (BGP connection between internal peers). + + Internal peer + Peer that is in the same Autonomous System as the local system. + + IGP + Interior Gateway Protocol - a routing protocol used to exchange + routing information among routers within a single Autonomous + System. + + Loc-RIB + The Loc-RIB contains the routes that have been selected by the + local BGP speaker's Decision Process. + + NLRI + Network Layer Reachability Information. + + Route + A unit of information that pairs a set of destinations with the + attributes of a path to those destinations. The set of + + + +Rekhter, et al. Standards Track [Page 5] + +RFC 4271 BGP-4 January 2006 + + + destinations are systems whose IP addresses are contained in one + IP address prefix carried in the Network Layer Reachability + Information (NLRI) field of an UPDATE message. The path is the + information reported in the path attributes field of the same + UPDATE message. + + RIB + Routing Information Base. + + Unfeasible route + A previously advertised feasible route that is no longer available + for use. + +1.2. Specification of Requirements + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +2. Acknowledgements + + This document was originally published as [RFC1267] in October 1991, + jointly authored by Kirk Lougheed and Yakov Rekhter. + + We would like to express our thanks to Guy Almes, Len Bosack, and + Jeffrey C. Honig for their contributions to the earlier version + (BGP-1) of this document. + + We would like to specially acknowledge numerous contributions by + Dennis Ferguson to the earlier version of this document. + + We would like to explicitly thank Bob Braden for the review of the + earlier version (BGP-2) of this document, and for his constructive + and valuable comments. + + We would also like to thank Bob Hinden, Director for Routing of the + Internet Engineering Steering Group, and the team of reviewers he + assembled to review the earlier version (BGP-2) of this document. + This team, consisting of Deborah Estrin, Milo Medin, John Moy, Radia + Perlman, Martha Steenstrup, Mike St. Johns, and Paul Tsuchiya, acted + with a strong combination of toughness, professionalism, and + courtesy. + + Certain sections of the document borrowed heavily from IDRP + [IS10747], which is the OSI counterpart of BGP. For this, credit + should be given to the ANSI X3S3.3 group chaired by Lyman Chapin and + to Charles Kunzinger, who was the IDRP editor within that group. + + + + +Rekhter, et al. Standards Track [Page 6] + +RFC 4271 BGP-4 January 2006 + + + We would also like to thank Benjamin Abarbanel, Enke Chen, Edward + Crabbe, Mike Craren, Vincent Gillet, Eric Gray, Jeffrey Haas, Dimitry + Haskin, Stephen Kent, John Krawczyk, David LeRoy, Dan Massey, + Jonathan Natale, Dan Pei, Mathew Richardson, John Scudder, John + Stewart III, Dave Thaler, Paul Traina, Russ White, Curtis Villamizar, + and Alex Zinin for their comments. + + We would like to specially acknowledge Andrew Lange for his help in + preparing the final version of this document. + + Finally, we would like to thank all the members of the IDR Working + Group for their ideas and the support they have given to this + document. + +3. Summary of Operation + + The Border Gateway Protocol (BGP) is an inter-Autonomous System + routing protocol. It is built on experience gained with EGP (as + defined in [RFC904]) and EGP usage in the NSFNET Backbone (as + described in [RFC1092] and [RFC1093]). For more BGP-related + information, see [RFC1772], [RFC1930], [RFC1997], and [RFC2858]. + + The primary function of a BGP speaking system is to exchange network + reachability information with other BGP systems. This network + reachability information includes information on the list of + Autonomous Systems (ASes) that reachability information traverses. + This information is sufficient for constructing a graph of AS + connectivity, from which routing loops may be pruned, and, at the AS + level, some policy decisions may be enforced. + + In the context of this document, we assume that a BGP speaker + advertises to its peers only those routes that it uses itself (in + this context, a BGP speaker is said to "use" a BGP route if it is the + most preferred BGP route and is used in forwarding). All other cases + are outside the scope of this document. + + In the context of this document, the term "IP address" refers to an + IP Version 4 address [RFC791]. + + Routing information exchanged via BGP supports only the destination- + based forwarding paradigm, which assumes that a router forwards a + packet based solely on the destination address carried in the IP + header of the packet. This, in turn, reflects the set of policy + decisions that can (and cannot) be enforced using BGP. Note that + some policies cannot be supported by the destination-based forwarding + paradigm, and thus require techniques such as source routing (aka + explicit routing) to be enforced. Such policies cannot be enforced + using BGP either. For example, BGP does not enable one AS to send + + + +Rekhter, et al. Standards Track [Page 7] + +RFC 4271 BGP-4 January 2006 + + + traffic to a neighboring AS for forwarding to some destination + (reachable through but) beyond that neighboring AS, intending that + the traffic take a different route to that taken by the traffic + originating in the neighboring AS (for that same destination). On + the other hand, BGP can support any policy conforming to the + destination-based forwarding paradigm. + + BGP-4 provides a new set of mechanisms for supporting Classless + Inter-Domain Routing (CIDR) [RFC1518, RFC1519]. These mechanisms + include support for advertising a set of destinations as an IP prefix + and eliminating the concept of a network "class" within BGP. BGP-4 + also introduces mechanisms that allow aggregation of routes, + including aggregation of AS paths. + + This document uses the term `Autonomous System' (AS) throughout. The + classic definition of an Autonomous System is a set of routers under + a single technical administration, using an interior gateway protocol + (IGP) and common metrics to determine how to route packets within the + AS, and using an inter-AS routing protocol to determine how to route + packets to other ASes. Since this classic definition was developed, + it has become common for a single AS to use several IGPs and, + sometimes, several sets of metrics within an AS. The use of the term + Autonomous System stresses the fact that, even when multiple IGPs and + metrics are used, the administration of an AS appears to other ASes + to have a single coherent interior routing plan and presents a + consistent picture of the destinations that are reachable through it. + + BGP uses TCP [RFC793] as its transport protocol. This eliminates the + need to implement explicit update fragmentation, retransmission, + acknowledgement, and sequencing. BGP listens on TCP port 179. The + error notification mechanism used in BGP assumes that TCP supports a + "graceful" close (i.e., that all outstanding data will be delivered + before the connection is closed). + + A TCP connection is formed between two systems. They exchange + messages to open and confirm the connection parameters. + + The initial data flow is the portion of the BGP routing table that is + allowed by the export policy, called the Adj-Ribs-Out (see 3.2). + Incremental updates are sent as the routing tables change. BGP does + not require a periodic refresh of the routing table. To allow local + policy changes to have the correct effect without resetting any BGP + connections, a BGP speaker SHOULD either (a) retain the current + version of the routes advertised to it by all of its peers for the + duration of the connection, or (b) make use of the Route Refresh + extension [RFC2918]. + + + + + +Rekhter, et al. Standards Track [Page 8] + +RFC 4271 BGP-4 January 2006 + + + KEEPALIVE messages may be sent periodically to ensure that the + connection is live. NOTIFICATION messages are sent in response to + errors or special conditions. If a connection encounters an error + condition, a NOTIFICATION message is sent and the connection is + closed. + + A peer in a different AS is referred to as an external peer, while a + peer in the same AS is referred to as an internal peer. Internal BGP + and external BGP are commonly abbreviated as IBGP and EBGP. + + If a particular AS has multiple BGP speakers and is providing transit + service for other ASes, then care must be taken to ensure a + consistent view of routing within the AS. A consistent view of the + interior routes of the AS is provided by the IGP used within the AS. + For the purpose of this document, it is assumed that a consistent + view of the routes exterior to the AS is provided by having all BGP + speakers within the AS maintain IBGP with each other. + + This document specifies the base behavior of the BGP protocol. This + behavior can be, and is, modified by extension specifications. When + the protocol is extended, the new behavior is fully documented in the + extension specifications. + +3.1. Routes: Advertisement and Storage + + For the purpose of this protocol, a route is defined as a unit of + information that pairs a set of destinations with the attributes of a + path to those destinations. The set of destinations are systems + whose IP addresses are contained in one IP address prefix that is + carried in the Network Layer Reachability Information (NLRI) field of + an UPDATE message, and the path is the information reported in the + path attributes field of the same UPDATE message. + + Routes are advertised between BGP speakers in UPDATE messages. + Multiple routes that have the same path attributes can be advertised + in a single UPDATE message by including multiple prefixes in the NLRI + field of the UPDATE message. + + Routes are stored in the Routing Information Bases (RIBs): namely, + the Adj-RIBs-In, the Loc-RIB, and the Adj-RIBs-Out, as described in + Section 3.2. + + If a BGP speaker chooses to advertise a previously received route, it + MAY add to, or modify, the path attributes of the route before + advertising it to a peer. + + + + + + +Rekhter, et al. Standards Track [Page 9] + +RFC 4271 BGP-4 January 2006 + + + BGP provides mechanisms by which a BGP speaker can inform its peers + that a previously advertised route is no longer available for use. + There are three methods by which a given BGP speaker can indicate + that a route has been withdrawn from service: + + a) the IP prefix that expresses the destination for a previously + advertised route can be advertised in the WITHDRAWN ROUTES + field in the UPDATE message, thus marking the associated route + as being no longer available for use, + + b) a replacement route with the same NLRI can be advertised, or + + c) the BGP speaker connection can be closed, which implicitly + removes all routes the pair of speakers had advertised to each + other from service. + + Changing the attribute(s) of a route is accomplished by advertising a + replacement route. The replacement route carries new (changed) + attributes and has the same address prefix as the original route. + +3.2. Routing Information Base + + The Routing Information Base (RIB) within a BGP speaker consists of + three distinct parts: + + a) Adj-RIBs-In: The Adj-RIBs-In stores routing information learned + from inbound UPDATE messages that were received from other BGP + speakers. Their contents represent routes that are available + as input to the Decision Process. + + b) Loc-RIB: The Loc-RIB contains the local routing information the + BGP speaker selected by applying its local policies to the + routing information contained in its Adj-RIBs-In. These are + the routes that will be used by the local BGP speaker. The + next hop for each of these routes MUST be resolvable via the + local BGP speaker's Routing Table. + + c) Adj-RIBs-Out: The Adj-RIBs-Out stores information the local BGP + speaker selected for advertisement to its peers. The routing + information stored in the Adj-RIBs-Out will be carried in the + local BGP speaker's UPDATE messages and advertised to its + peers. + + In summary, the Adj-RIBs-In contains unprocessed routing information + that has been advertised to the local BGP speaker by its peers; the + Loc-RIB contains the routes that have been selected by the local BGP + + + + + +Rekhter, et al. Standards Track [Page 10] + +RFC 4271 BGP-4 January 2006 + + + speaker's Decision Process; and the Adj-RIBs-Out organizes the routes + for advertisement to specific peers (by means of the local speaker's + UPDATE messages). + + Although the conceptual model distinguishes between Adj-RIBs-In, + Loc-RIB, and Adj-RIBs-Out, this neither implies nor requires that an + implementation must maintain three separate copies of the routing + information. The choice of implementation (for example, 3 copies of + the information vs 1 copy with pointers) is not constrained by the + protocol. + + Routing information that the BGP speaker uses to forward packets (or + to construct the forwarding table used for packet forwarding) is + maintained in the Routing Table. The Routing Table accumulates + routes to directly connected networks, static routes, routes learned + from the IGP protocols, and routes learned from BGP. Whether a + specific BGP route should be installed in the Routing Table, and + whether a BGP route should override a route to the same destination + installed by another source, is a local policy decision, and is not + specified in this document. In addition to actual packet forwarding, + the Routing Table is used for resolution of the next-hop addresses + specified in BGP updates (see Section 5.1.3). + +4. Message Formats + + This section describes message formats used by BGP. + + BGP messages are sent over TCP connections. A message is processed + only after it is entirely received. The maximum message size is 4096 + octets. All implementations are required to support this maximum + message size. The smallest message that may be sent consists of a + BGP header without a data portion (19 octets). + + All multi-octet fields are in network byte order. + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 11] + +RFC 4271 BGP-4 January 2006 + + +4.1. Message Header Format + + Each message has a fixed-size header. There may or may not be a data + portion following the header, depending on the message type. The + layout of these fields is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + + + + | | + + + + | Marker | + + + + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Length | Type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Marker: + + This 16-octet field is included for compatibility; it MUST be + set to all ones. + + Length: + + This 2-octet unsigned integer indicates the total length of the + message, including the header in octets. Thus, it allows one + to locate the (Marker field of the) next message in the TCP + stream. The value of the Length field MUST always be at least + 19 and no greater than 4096, and MAY be further constrained, + depending on the message type. "padding" of extra data after + the message is not allowed. Therefore, the Length field MUST + have the smallest value required, given the rest of the + message. + + Type: + + This 1-octet unsigned integer indicates the type code of the + message. This document defines the following type codes: + + 1 - OPEN + 2 - UPDATE + 3 - NOTIFICATION + 4 - KEEPALIVE + + [RFC2918] defines one more type code. + + + +Rekhter, et al. Standards Track [Page 12] + +RFC 4271 BGP-4 January 2006 + + +4.2. OPEN Message Format + + After a TCP connection is established, the first message sent by each + side is an OPEN message. If the OPEN message is acceptable, a + KEEPALIVE message confirming the OPEN is sent back. + + In addition to the fixed-size BGP header, the OPEN message contains + the following fields: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+ + | Version | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | My Autonomous System | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Hold Time | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | BGP Identifier | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Opt Parm Len | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Optional Parameters (variable) | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Version: + + This 1-octet unsigned integer indicates the protocol version + number of the message. The current BGP version number is 4. + + My Autonomous System: + + This 2-octet unsigned integer indicates the Autonomous System + number of the sender. + + Hold Time: + + This 2-octet unsigned integer indicates the number of seconds + the sender proposes for the value of the Hold Timer. Upon + receipt of an OPEN message, a BGP speaker MUST calculate the + value of the Hold Timer by using the smaller of its configured + Hold Time and the Hold Time received in the OPEN message. The + Hold Time MUST be either zero or at least three seconds. An + implementation MAY reject connections on the basis of the Hold + + + + + +Rekhter, et al. Standards Track [Page 13] + +RFC 4271 BGP-4 January 2006 + + + Time. The calculated value indicates the maximum number of + seconds that may elapse between the receipt of successive + KEEPALIVE and/or UPDATE messages from the sender. + + BGP Identifier: + + This 4-octet unsigned integer indicates the BGP Identifier of + the sender. A given BGP speaker sets the value of its BGP + Identifier to an IP address that is assigned to that BGP + speaker. The value of the BGP Identifier is determined upon + startup and is the same for every local interface and BGP peer. + + Optional Parameters Length: + + This 1-octet unsigned integer indicates the total length of the + Optional Parameters field in octets. If the value of this + field is zero, no Optional Parameters are present. + + Optional Parameters: + + This field contains a list of optional parameters, in which + each parameter is encoded as a triplet. + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-... + | Parm. Type | Parm. Length | Parameter Value (variable) + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-... + + Parameter Type is a one octet field that unambiguously + identifies individual parameters. Parameter Length is a one + octet field that contains the length of the Parameter Value + field in octets. Parameter Value is a variable length field + that is interpreted according to the value of the Parameter + Type field. + + [RFC3392] defines the Capabilities Optional Parameter. + + The minimum length of the OPEN message is 29 octets (including the + message header). + +4.3. UPDATE Message Format + + UPDATE messages are used to transfer routing information between BGP + peers. The information in the UPDATE message can be used to + construct a graph that describes the relationships of the various + Autonomous Systems. By applying rules to be discussed, routing + + + +Rekhter, et al. Standards Track [Page 14] + +RFC 4271 BGP-4 January 2006 + + + information loops and some other anomalies may be detected and + removed from inter-AS routing. + + An UPDATE message is used to advertise feasible routes that share + common path attributes to a peer, or to withdraw multiple unfeasible + routes from service (see 3.1). An UPDATE message MAY simultaneously + advertise a feasible route and withdraw multiple unfeasible routes + from service. The UPDATE message always includes the fixed-size BGP + header, and also includes the other fields, as shown below (note, + some of the shown fields may not be present in every UPDATE message): + + +-----------------------------------------------------+ + | Withdrawn Routes Length (2 octets) | + +-----------------------------------------------------+ + | Withdrawn Routes (variable) | + +-----------------------------------------------------+ + | Total Path Attribute Length (2 octets) | + +-----------------------------------------------------+ + | Path Attributes (variable) | + +-----------------------------------------------------+ + | Network Layer Reachability Information (variable) | + +-----------------------------------------------------+ + + Withdrawn Routes Length: + + This 2-octets unsigned integer indicates the total length of + the Withdrawn Routes field in octets. Its value allows the + length of the Network Layer Reachability Information field to + be determined, as specified below. + + A value of 0 indicates that no routes are being withdrawn from + service, and that the WITHDRAWN ROUTES field is not present in + this UPDATE message. + + Withdrawn Routes: + + This is a variable-length field that contains a list of IP + address prefixes for the routes that are being withdrawn from + service. Each IP address prefix is encoded as a 2-tuple of the + form , whose fields are described below: + + +---------------------------+ + | Length (1 octet) | + +---------------------------+ + | Prefix (variable) | + +---------------------------+ + + + + + +Rekhter, et al. Standards Track [Page 15] + +RFC 4271 BGP-4 January 2006 + + + The use and the meaning of these fields are as follows: + + a) Length: + + The Length field indicates the length in bits of the IP + address prefix. A length of zero indicates a prefix that + matches all IP addresses (with prefix, itself, of zero + octets). + + b) Prefix: + + The Prefix field contains an IP address prefix, followed by + the minimum number of trailing bits needed to make the end + of the field fall on an octet boundary. Note that the value + of trailing bits is irrelevant. + + Total Path Attribute Length: + + This 2-octet unsigned integer indicates the total length of the + Path Attributes field in octets. Its value allows the length + of the Network Layer Reachability field to be determined as + specified below. + + A value of 0 indicates that neither the Network Layer + Reachability Information field nor the Path Attribute field is + present in this UPDATE message. + + Path Attributes: + + A variable-length sequence of path attributes is present in + every UPDATE message, except for an UPDATE message that carries + only the withdrawn routes. Each path attribute is a triple + of variable + length. + + Attribute Type is a two-octet field that consists of the + Attribute Flags octet, followed by the Attribute Type Code + octet. + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Attr. Flags |Attr. Type Code| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The high-order bit (bit 0) of the Attribute Flags octet is the + Optional bit. It defines whether the attribute is optional (if + set to 1) or well-known (if set to 0). + + + +Rekhter, et al. Standards Track [Page 16] + +RFC 4271 BGP-4 January 2006 + + + The second high-order bit (bit 1) of the Attribute Flags octet + is the Transitive bit. It defines whether an optional + attribute is transitive (if set to 1) or non-transitive (if set + to 0). + + For well-known attributes, the Transitive bit MUST be set to 1. + (See Section 5 for a discussion of transitive attributes.) + + The third high-order bit (bit 2) of the Attribute Flags octet + is the Partial bit. It defines whether the information + contained in the optional transitive attribute is partial (if + set to 1) or complete (if set to 0). For well-known attributes + and for optional non-transitive attributes, the Partial bit + MUST be set to 0. + + The fourth high-order bit (bit 3) of the Attribute Flags octet + is the Extended Length bit. It defines whether the Attribute + Length is one octet (if set to 0) or two octets (if set to 1). + + The lower-order four bits of the Attribute Flags octet are + unused. They MUST be zero when sent and MUST be ignored when + received. + + The Attribute Type Code octet contains the Attribute Type Code. + Currently defined Attribute Type Codes are discussed in Section + 5. + + If the Extended Length bit of the Attribute Flags octet is set + to 0, the third octet of the Path Attribute contains the length + of the attribute data in octets. + + If the Extended Length bit of the Attribute Flags octet is set + to 1, the third and fourth octets of the path attribute contain + the length of the attribute data in octets. + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 17] + +RFC 4271 BGP-4 January 2006 + + + The remaining octets of the Path Attribute represent the + attribute value and are interpreted according to the Attribute + Flags and the Attribute Type Code. The supported Attribute + Type Codes, and their attribute values and uses are as follows: + + a) ORIGIN (Type Code 1): + + ORIGIN is a well-known mandatory attribute that defines the + origin of the path information. The data octet can assume + the following values: + + Value Meaning + + 0 IGP - Network Layer Reachability Information + is interior to the originating AS + + 1 EGP - Network Layer Reachability Information + learned via the EGP protocol [RFC904] + + 2 INCOMPLETE - Network Layer Reachability + Information learned by some other means + + Usage of this attribute is defined in 5.1.1. + + b) AS_PATH (Type Code 2): + + AS_PATH is a well-known mandatory attribute that is composed + of a sequence of AS path segments. Each AS path segment is + represented by a triple . + + The path segment type is a 1-octet length field with the + following values defined: + + Value Segment Type + + 1 AS_SET: unordered set of ASes a route in the + UPDATE message has traversed + + 2 AS_SEQUENCE: ordered set of ASes a route in + the UPDATE message has traversed + + The path segment length is a 1-octet length field, + containing the number of ASes (not the number of octets) in + the path segment value field. + + The path segment value field contains one or more AS + numbers, each encoded as a 2-octet length field. + + + +Rekhter, et al. Standards Track [Page 18] + +RFC 4271 BGP-4 January 2006 + + + Usage of this attribute is defined in 5.1.2. + + c) NEXT_HOP (Type Code 3): + + This is a well-known mandatory attribute that defines the + (unicast) IP address of the router that SHOULD be used as + the next hop to the destinations listed in the Network Layer + Reachability Information field of the UPDATE message. + + Usage of this attribute is defined in 5.1.3. + + d) MULTI_EXIT_DISC (Type Code 4): + + This is an optional non-transitive attribute that is a + four-octet unsigned integer. The value of this attribute + MAY be used by a BGP speaker's Decision Process to + discriminate among multiple entry points to a neighboring + autonomous system. + + Usage of this attribute is defined in 5.1.4. + + e) LOCAL_PREF (Type Code 5): + + LOCAL_PREF is a well-known attribute that is a four-octet + unsigned integer. A BGP speaker uses it to inform its other + internal peers of the advertising speaker's degree of + preference for an advertised route. + + Usage of this attribute is defined in 5.1.5. + + f) ATOMIC_AGGREGATE (Type Code 6) + + ATOMIC_AGGREGATE is a well-known discretionary attribute of + length 0. + + Usage of this attribute is defined in 5.1.6. + + g) AGGREGATOR (Type Code 7) + + AGGREGATOR is an optional transitive attribute of length 6. + The attribute contains the last AS number that formed the + aggregate route (encoded as 2 octets), followed by the IP + address of the BGP speaker that formed the aggregate route + (encoded as 4 octets). This SHOULD be the same address as + the one used for the BGP Identifier of the speaker. + + Usage of this attribute is defined in 5.1.7. + + + + +Rekhter, et al. Standards Track [Page 19] + +RFC 4271 BGP-4 January 2006 + + + Network Layer Reachability Information: + + This variable length field contains a list of IP address + prefixes. The length, in octets, of the Network Layer + Reachability Information is not encoded explicitly, but can be + calculated as: + + UPDATE message Length - 23 - Total Path Attributes Length + - Withdrawn Routes Length + + where UPDATE message Length is the value encoded in the fixed- + size BGP header, Total Path Attribute Length, and Withdrawn + Routes Length are the values encoded in the variable part of + the UPDATE message, and 23 is a combined length of the fixed- + size BGP header, the Total Path Attribute Length field, and the + Withdrawn Routes Length field. + + Reachability information is encoded as one or more 2-tuples of + the form , whose fields are described below: + + +---------------------------+ + | Length (1 octet) | + +---------------------------+ + | Prefix (variable) | + +---------------------------+ + + The use and the meaning of these fields are as follows: + + a) Length: + + The Length field indicates the length in bits of the IP + address prefix. A length of zero indicates a prefix that + matches all IP addresses (with prefix, itself, of zero + octets). + + b) Prefix: + + The Prefix field contains an IP address prefix, followed by + enough trailing bits to make the end of the field fall on an + octet boundary. Note that the value of the trailing bits is + irrelevant. + + The minimum length of the UPDATE message is 23 octets -- 19 octets + for the fixed header + 2 octets for the Withdrawn Routes Length + 2 + octets for the Total Path Attribute Length (the value of Withdrawn + Routes Length is 0 and the value of Total Path Attribute Length is + 0). + + + + +Rekhter, et al. Standards Track [Page 20] + +RFC 4271 BGP-4 January 2006 + + + An UPDATE message can advertise, at most, one set of path attributes, + but multiple destinations, provided that the destinations share these + attributes. All path attributes contained in a given UPDATE message + apply to all destinations carried in the NLRI field of the UPDATE + message. + + + An UPDATE message can list multiple routes that are to be withdrawn + from service. Each such route is identified by its destination + (expressed as an IP prefix), which unambiguously identifies the route + in the context of the BGP speaker - BGP speaker connection to which + it has been previously advertised. + + + An UPDATE message might advertise only routes that are to be + withdrawn from service, in which case the message will not include + path attributes or Network Layer Reachability Information. + Conversely, it may advertise only a feasible route, in which case the + WITHDRAWN ROUTES field need not be present. + + An UPDATE message SHOULD NOT include the same address prefix in the + WITHDRAWN ROUTES and Network Layer Reachability Information fields. + However, a BGP speaker MUST be able to process UPDATE messages in + this form. A BGP speaker SHOULD treat an UPDATE message of this form + as though the WITHDRAWN ROUTES do not contain the address prefix. + +4.4. KEEPALIVE Message Format + + BGP does not use any TCP-based, keep-alive mechanism to determine if + peers are reachable. Instead, KEEPALIVE messages are exchanged + between peers often enough not to cause the Hold Timer to expire. A + reasonable maximum time between KEEPALIVE messages would be one third + of the Hold Time interval. KEEPALIVE messages MUST NOT be sent more + frequently than one per second. An implementation MAY adjust the + rate at which it sends KEEPALIVE messages as a function of the Hold + Time interval. + + If the negotiated Hold Time interval is zero, then periodic KEEPALIVE + messages MUST NOT be sent. + + A KEEPALIVE message consists of only the message header and has a + length of 19 octets. + +4.5. NOTIFICATION Message Format + + A NOTIFICATION message is sent when an error condition is detected. + The BGP connection is closed immediately after it is sent. + + + + +Rekhter, et al. Standards Track [Page 21] + +RFC 4271 BGP-4 January 2006 + + + In addition to the fixed-size BGP header, the NOTIFICATION message + contains the following fields: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Error code | Error subcode | Data (variable) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Error Code: + + This 1-octet unsigned integer indicates the type of + NOTIFICATION. The following Error Codes have been defined: + + Error Code Symbolic Name Reference + + 1 Message Header Error Section 6.1 + + 2 OPEN Message Error Section 6.2 + + 3 UPDATE Message Error Section 6.3 + + 4 Hold Timer Expired Section 6.5 + + 5 Finite State Machine Error Section 6.6 + + 6 Cease Section 6.7 + + Error subcode: + + This 1-octet unsigned integer provides more specific + information about the nature of the reported error. Each Error + Code may have one or more Error Subcodes associated with it. + If no appropriate Error Subcode is defined, then a zero + (Unspecific) value is used for the Error Subcode field. + + Message Header Error subcodes: + + 1 - Connection Not Synchronized. + 2 - Bad Message Length. + 3 - Bad Message Type. + + + + + + + + + + +Rekhter, et al. Standards Track [Page 22] + +RFC 4271 BGP-4 January 2006 + + + OPEN Message Error subcodes: + + 1 - Unsupported Version Number. + 2 - Bad Peer AS. + 3 - Bad BGP Identifier. + 4 - Unsupported Optional Parameter. + 5 - [Deprecated - see Appendix A]. + 6 - Unacceptable Hold Time. + + UPDATE Message Error subcodes: + + 1 - Malformed Attribute List. + 2 - Unrecognized Well-known Attribute. + 3 - Missing Well-known Attribute. + 4 - Attribute Flags Error. + 5 - Attribute Length Error. + 6 - Invalid ORIGIN Attribute. + 7 - [Deprecated - see Appendix A]. + 8 - Invalid NEXT_HOP Attribute. + 9 - Optional Attribute Error. + 10 - Invalid Network Field. + 11 - Malformed AS_PATH. + + Data: + + This variable-length field is used to diagnose the reason for + the NOTIFICATION. The contents of the Data field depend upon + the Error Code and Error Subcode. See Section 6 for more + details. + + Note that the length of the Data field can be determined from + the message Length field by the formula: + + Message Length = 21 + Data Length + + The minimum length of the NOTIFICATION message is 21 octets + (including message header). + +5. Path Attributes + + This section discusses the path attributes of the UPDATE message. + + Path attributes fall into four separate categories: + + 1. Well-known mandatory. + 2. Well-known discretionary. + 3. Optional transitive. + 4. Optional non-transitive. + + + +Rekhter, et al. Standards Track [Page 23] + +RFC 4271 BGP-4 January 2006 + + + BGP implementations MUST recognize all well-known attributes. Some + of these attributes are mandatory and MUST be included in every + UPDATE message that contains NLRI. Others are discretionary and MAY + or MAY NOT be sent in a particular UPDATE message. + + Once a BGP peer has updated any well-known attributes, it MUST pass + these attributes to its peers in any updates it transmits. + + In addition to well-known attributes, each path MAY contain one or + more optional attributes. It is not required or expected that all + BGP implementations support all optional attributes. The handling of + an unrecognized optional attribute is determined by the setting of + the Transitive bit in the attribute flags octet. Paths with + unrecognized transitive optional attributes SHOULD be accepted. If a + path with an unrecognized transitive optional attribute is accepted + and passed to other BGP peers, then the unrecognized transitive + optional attribute of that path MUST be passed, along with the path, + to other BGP peers with the Partial bit in the Attribute Flags octet + set to 1. If a path with a recognized, transitive optional attribute + is accepted and passed along to other BGP peers and the Partial bit + in the Attribute Flags octet is set to 1 by some previous AS, it MUST + NOT be set back to 0 by the current AS. Unrecognized non-transitive + optional attributes MUST be quietly ignored and not passed along to + other BGP peers. + + New, transitive optional attributes MAY be attached to the path by + the originator or by any other BGP speaker in the path. If they are + not attached by the originator, the Partial bit in the Attribute + Flags octet is set to 1. The rules for attaching new non-transitive + optional attributes will depend on the nature of the specific + attribute. The documentation of each new non-transitive optional + attribute will be expected to include such rules (the description of + the MULTI_EXIT_DISC attribute gives an example). All optional + attributes (both transitive and non-transitive), MAY be updated (if + appropriate) by BGP speakers in the path. + + The sender of an UPDATE message SHOULD order path attributes within + the UPDATE message in ascending order of attribute type. The + receiver of an UPDATE message MUST be prepared to handle path + attributes within UPDATE messages that are out of order. + + The same attribute (attribute with the same type) cannot appear more + than once within the Path Attributes field of a particular UPDATE + message. + + + + + + + +Rekhter, et al. Standards Track [Page 24] + +RFC 4271 BGP-4 January 2006 + + + The mandatory category refers to an attribute that MUST be present in + both IBGP and EBGP exchanges if NLRI are contained in the UPDATE + message. Attributes classified as optional for the purpose of the + protocol extension mechanism may be purely discretionary, + discretionary, required, or disallowed in certain contexts. + + attribute EBGP IBGP + ORIGIN mandatory mandatory + AS_PATH mandatory mandatory + NEXT_HOP mandatory mandatory + MULTI_EXIT_DISC discretionary discretionary + LOCAL_PREF see Section 5.1.5 required + ATOMIC_AGGREGATE see Section 5.1.6 and 9.1.4 + AGGREGATOR discretionary discretionary + +5.1. Path Attribute Usage + + The usage of each BGP path attribute is described in the following + clauses. + +5.1.1. ORIGIN + + ORIGIN is a well-known mandatory attribute. The ORIGIN attribute is + generated by the speaker that originates the associated routing + information. Its value SHOULD NOT be changed by any other speaker. + +5.1.2. AS_PATH + + AS_PATH is a well-known mandatory attribute. This attribute + identifies the autonomous systems through which routing information + carried in this UPDATE message has passed. The components of this + list can be AS_SETs or AS_SEQUENCEs. + + When a BGP speaker propagates a route it learned from another BGP + speaker's UPDATE message, it modifies the route's AS_PATH attribute + based on the location of the BGP speaker to which the route will be + sent: + + a) When a given BGP speaker advertises the route to an internal + peer, the advertising speaker SHALL NOT modify the AS_PATH + attribute associated with the route. + + b) When a given BGP speaker advertises the route to an external + peer, the advertising speaker updates the AS_PATH attribute as + follows: + + + + + + +Rekhter, et al. Standards Track [Page 25] + +RFC 4271 BGP-4 January 2006 + + + 1) if the first path segment of the AS_PATH is of type + AS_SEQUENCE, the local system prepends its own AS number as + the last element of the sequence (put it in the leftmost + position with respect to the position of octets in the + protocol message). If the act of prepending will cause an + overflow in the AS_PATH segment (i.e., more than 255 ASes), + it SHOULD prepend a new segment of type AS_SEQUENCE and + prepend its own AS number to this new segment. + + 2) if the first path segment of the AS_PATH is of type AS_SET, + the local system prepends a new path segment of type + AS_SEQUENCE to the AS_PATH, including its own AS number in + that segment. + + 3) if the AS_PATH is empty, the local system creates a path + segment of type AS_SEQUENCE, places its own AS into that + segment, and places that segment into the AS_PATH. + + When a BGP speaker originates a route then: + + a) the originating speaker includes its own AS number in a path + segment, of type AS_SEQUENCE, in the AS_PATH attribute of all + UPDATE messages sent to an external peer. In this case, the AS + number of the originating speaker's autonomous system will be + the only entry the path segment, and this path segment will be + the only segment in the AS_PATH attribute. + + b) the originating speaker includes an empty AS_PATH attribute in + all UPDATE messages sent to internal peers. (An empty AS_PATH + attribute is one whose length field contains the value zero). + + Whenever the modification of the AS_PATH attribute calls for + including or prepending the AS number of the local system, the local + system MAY include/prepend more than one instance of its own AS + number in the AS_PATH attribute. This is controlled via local + configuration. + +5.1.3. NEXT_HOP + + The NEXT_HOP is a well-known mandatory attribute that defines the IP + address of the router that SHOULD be used as the next hop to the + destinations listed in the UPDATE message. The NEXT_HOP attribute is + calculated as follows: + + 1) When sending a message to an internal peer, if the route is not + locally originated, the BGP speaker SHOULD NOT modify the + NEXT_HOP attribute unless it has been explicitly configured to + announce its own IP address as the NEXT_HOP. When announcing a + + + +Rekhter, et al. Standards Track [Page 26] + +RFC 4271 BGP-4 January 2006 + + + locally-originated route to an internal peer, the BGP speaker + SHOULD use the interface address of the router through which + the announced network is reachable for the speaker as the + NEXT_HOP. If the route is directly connected to the speaker, + or if the interface address of the router through which the + announced network is reachable for the speaker is the internal + peer's address, then the BGP speaker SHOULD use its own IP + address for the NEXT_HOP attribute (the address of the + interface that is used to reach the peer). + + 2) When sending a message to an external peer, X, and the peer is + one IP hop away from the speaker: + + - If the route being announced was learned from an internal + peer or is locally originated, the BGP speaker can use an + interface address of the internal peer router (or the + internal router) through which the announced network is + reachable for the speaker for the NEXT_HOP attribute, + provided that peer X shares a common subnet with this + address. This is a form of "third party" NEXT_HOP attribute. + + - Otherwise, if the route being announced was learned from an + external peer, the speaker can use an IP address of any + adjacent router (known from the received NEXT_HOP attribute) + that the speaker itself uses for local route calculation in + the NEXT_HOP attribute, provided that peer X shares a common + subnet with this address. This is a second form of "third + party" NEXT_HOP attribute. + + - Otherwise, if the external peer to which the route is being + advertised shares a common subnet with one of the interfaces + of the announcing BGP speaker, the speaker MAY use the IP + address associated with such an interface in the NEXT_HOP + attribute. This is known as a "first party" NEXT_HOP + attribute. + + - By default (if none of the above conditions apply), the BGP + speaker SHOULD use the IP address of the interface that the + speaker uses to establish the BGP connection to peer X in the + NEXT_HOP attribute. + + 3) When sending a message to an external peer X, and the peer is + multiple IP hops away from the speaker (aka "multihop EBGP"): + + - The speaker MAY be configured to propagate the NEXT_HOP + attribute. In this case, when advertising a route that the + speaker learned from one of its peers, the NEXT_HOP attribute + of the advertised route is exactly the same as the NEXT_HOP + + + +Rekhter, et al. Standards Track [Page 27] + +RFC 4271 BGP-4 January 2006 + + + attribute of the learned route (the speaker does not modify + the NEXT_HOP attribute). + + - By default, the BGP speaker SHOULD use the IP address of the + interface that the speaker uses in the NEXT_HOP attribute to + establish the BGP connection to peer X. + + Normally, the NEXT_HOP attribute is chosen such that the shortest + available path will be taken. A BGP speaker MUST be able to support + the disabling advertisement of third party NEXT_HOP attributes in + order to handle imperfectly bridged media. + + A route originated by a BGP speaker SHALL NOT be advertised to a peer + using an address of that peer as NEXT_HOP. A BGP speaker SHALL NOT + install a route with itself as the next hop. + + The NEXT_HOP attribute is used by the BGP speaker to determine the + actual outbound interface and immediate next-hop address that SHOULD + be used to forward transit packets to the associated destinations. + + The immediate next-hop address is determined by performing a + recursive route lookup operation for the IP address in the NEXT_HOP + attribute, using the contents of the Routing Table, selecting one + entry if multiple entries of equal cost exist. The Routing Table + entry that resolves the IP address in the NEXT_HOP attribute will + always specify the outbound interface. If the entry specifies an + attached subnet, but does not specify a next-hop address, then the + address in the NEXT_HOP attribute SHOULD be used as the immediate + next-hop address. If the entry also specifies the next-hop address, + this address SHOULD be used as the immediate next-hop address for + packet forwarding. + +5.1.4. MULTI_EXIT_DISC + + The MULTI_EXIT_DISC is an optional non-transitive attribute that is + intended to be used on external (inter-AS) links to discriminate + among multiple exit or entry points to the same neighboring AS. The + value of the MULTI_EXIT_DISC attribute is a four-octet unsigned + number, called a metric. All other factors being equal, the exit + point with the lower metric SHOULD be preferred. If received over + EBGP, the MULTI_EXIT_DISC attribute MAY be propagated over IBGP to + other BGP speakers within the same AS (see also 9.1.2.2). The + MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be + propagated to other neighboring ASes. + + A BGP speaker MUST implement a mechanism (based on local + configuration) that allows the MULTI_EXIT_DISC attribute to be + removed from a route. If a BGP speaker is configured to remove the + + + +Rekhter, et al. Standards Track [Page 28] + +RFC 4271 BGP-4 January 2006 + + + MULTI_EXIT_DISC attribute from a route, then this removal MUST be + done prior to determining the degree of preference of the route and + prior to performing route selection (Decision Process phases 1 and + 2). + + An implementation MAY also (based on local configuration) alter the + value of the MULTI_EXIT_DISC attribute received over EBGP. If a BGP + speaker is configured to alter the value of the MULTI_EXIT_DISC + attribute received over EBGP, then altering the value MUST be done + prior to determining the degree of preference of the route and prior + to performing route selection (Decision Process phases 1 and 2). See + Section 9.1.2.2 for necessary restrictions on this. + +5.1.5. LOCAL_PREF + + LOCAL_PREF is a well-known attribute that SHALL be included in all + UPDATE messages that a given BGP speaker sends to other internal + peers. A BGP speaker SHALL calculate the degree of preference for + each external route based on the locally-configured policy, and + include the degree of preference when advertising a route to its + internal peers. The higher degree of preference MUST be preferred. + A BGP speaker uses the degree of preference learned via LOCAL_PREF in + its Decision Process (see Section 9.1.1). + + A BGP speaker MUST NOT include this attribute in UPDATE messages it + sends to external peers, except in the case of BGP Confederations + [RFC3065]. If it is contained in an UPDATE message that is received + from an external peer, then this attribute MUST be ignored by the + receiving speaker, except in the case of BGP Confederations + [RFC3065]. + +5.1.6. ATOMIC_AGGREGATE + + ATOMIC_AGGREGATE is a well-known discretionary attribute. + + When a BGP speaker aggregates several routes for the purpose of + advertisement to a particular peer, the AS_PATH of the aggregated + route normally includes an AS_SET formed from the set of ASes from + which the aggregate was formed. In many cases, the network + administrator can determine if the aggregate can safely be advertised + without the AS_SET, and without forming route loops. + + If an aggregate excludes at least some of the AS numbers present in + the AS_PATH of the routes that are aggregated as a result of dropping + the AS_SET, the aggregated route, when advertised to the peer, SHOULD + include the ATOMIC_AGGREGATE attribute. + + + + + +Rekhter, et al. Standards Track [Page 29] + +RFC 4271 BGP-4 January 2006 + + + A BGP speaker that receives a route with the ATOMIC_AGGREGATE + attribute SHOULD NOT remove the attribute when propagating the route + to other speakers. + + A BGP speaker that receives a route with the ATOMIC_AGGREGATE + attribute MUST NOT make any NLRI of that route more specific (as + defined in 9.1.4) when advertising this route to other BGP speakers. + + A BGP speaker that receives a route with the ATOMIC_AGGREGATE + attribute needs to be aware of the fact that the actual path to + destinations, as specified in the NLRI of the route, while having the + loop-free property, may not be the path specified in the AS_PATH + attribute of the route. + +5.1.7. AGGREGATOR + + AGGREGATOR is an optional transitive attribute, which MAY be included + in updates that are formed by aggregation (see Section 9.2.2.2). A + BGP speaker that performs route aggregation MAY add the AGGREGATOR + attribute, which SHALL contain its own AS number and IP address. The + IP address SHOULD be the same as the BGP Identifier of the speaker. + +6. BGP Error Handling. + + This section describes actions to be taken when errors are detected + while processing BGP messages. + + When any of the conditions described here are detected, a + NOTIFICATION message, with the indicated Error Code, Error Subcode, + and Data fields, is sent, and the BGP connection is closed (unless it + is explicitly stated that no NOTIFICATION message is to be sent and + the BGP connection is not to be closed). If no Error Subcode is + specified, then a zero MUST be used. + + The phrase "the BGP connection is closed" means the TCP connection + has been closed, the associated Adj-RIB-In has been cleared, and all + resources for that BGP connection have been deallocated. Entries in + the Loc-RIB associated with the remote peer are marked as invalid. + The local system recalculates its best routes for the destinations of + the routes marked as invalid. Before the invalid routes are deleted + from the system, it advertises, to its peers, either withdraws for + the routes marked as invalid, or the new best routes before the + invalid routes are deleted from the system. + + Unless specified explicitly, the Data field of the NOTIFICATION + message that is sent to indicate an error is empty. + + + + + +Rekhter, et al. Standards Track [Page 30] + +RFC 4271 BGP-4 January 2006 + + +6.1. Message Header Error Handling + + All errors detected while processing the Message Header MUST be + indicated by sending the NOTIFICATION message with the Error Code + Message Header Error. The Error Subcode elaborates on the specific + nature of the error. + + The expected value of the Marker field of the message header is all + ones. If the Marker field of the message header is not as expected, + then a synchronization error has occurred and the Error Subcode MUST + be set to Connection Not Synchronized. + + If at least one of the following is true: + + - if the Length field of the message header is less than 19 or + greater than 4096, or + + - if the Length field of an OPEN message is less than the minimum + length of the OPEN message, or + + - if the Length field of an UPDATE message is less than the + minimum length of the UPDATE message, or + + - if the Length field of a KEEPALIVE message is not equal to 19, + or + + - if the Length field of a NOTIFICATION message is less than the + minimum length of the NOTIFICATION message, + + then the Error Subcode MUST be set to Bad Message Length. The Data + field MUST contain the erroneous Length field. + + If the Type field of the message header is not recognized, then the + Error Subcode MUST be set to Bad Message Type. The Data field MUST + contain the erroneous Type field. + +6.2. OPEN Message Error Handling + + All errors detected while processing the OPEN message MUST be + indicated by sending the NOTIFICATION message with the Error Code + OPEN Message Error. The Error Subcode elaborates on the specific + nature of the error. + + If the version number in the Version field of the received OPEN + message is not supported, then the Error Subcode MUST be set to + Unsupported Version Number. The Data field is a 2-octet unsigned + integer, which indicates the largest, locally-supported version + number less than the version the remote BGP peer bid (as indicated in + + + +Rekhter, et al. Standards Track [Page 31] + +RFC 4271 BGP-4 January 2006 + + + the received OPEN message), or if the smallest, locally-supported + version number is greater than the version the remote BGP peer bid, + then the smallest, locally-supported version number. + + If the Autonomous System field of the OPEN message is unacceptable, + then the Error Subcode MUST be set to Bad Peer AS. The determination + of acceptable Autonomous System numbers is outside the scope of this + protocol. + + If the Hold Time field of the OPEN message is unacceptable, then the + Error Subcode MUST be set to Unacceptable Hold Time. An + implementation MUST reject Hold Time values of one or two seconds. + An implementation MAY reject any proposed Hold Time. An + implementation that accepts a Hold Time MUST use the negotiated value + for the Hold Time. + + If the BGP Identifier field of the OPEN message is syntactically + incorrect, then the Error Subcode MUST be set to Bad BGP Identifier. + Syntactic correctness means that the BGP Identifier field represents + a valid unicast IP host address. + + If one of the Optional Parameters in the OPEN message is not + recognized, then the Error Subcode MUST be set to Unsupported + Optional Parameters. + + If one of the Optional Parameters in the OPEN message is recognized, + but is malformed, then the Error Subcode MUST be set to 0 + (Unspecific). + +6.3. UPDATE Message Error Handling + + All errors detected while processing the UPDATE message MUST be + indicated by sending the NOTIFICATION message with the Error Code + UPDATE Message Error. The error subcode elaborates on the specific + nature of the error. + + Error checking of an UPDATE message begins by examining the path + attributes. If the Withdrawn Routes Length or Total Attribute Length + is too large (i.e., if Withdrawn Routes Length + Total Attribute + Length + 23 exceeds the message Length), then the Error Subcode MUST + be set to Malformed Attribute List. + + If any recognized attribute has Attribute Flags that conflict with + the Attribute Type Code, then the Error Subcode MUST be set to + Attribute Flags Error. The Data field MUST contain the erroneous + attribute (type, length, and value). + + + + + +Rekhter, et al. Standards Track [Page 32] + +RFC 4271 BGP-4 January 2006 + + + If any recognized attribute has an Attribute Length that conflicts + with the expected length (based on the attribute type code), then the + Error Subcode MUST be set to Attribute Length Error. The Data field + MUST contain the erroneous attribute (type, length, and value). + + If any of the well-known mandatory attributes are not present, then + the Error Subcode MUST be set to Missing Well-known Attribute. The + Data field MUST contain the Attribute Type Code of the missing, + well-known attribute. + + If any of the well-known mandatory attributes are not recognized, + then the Error Subcode MUST be set to Unrecognized Well-known + Attribute. The Data field MUST contain the unrecognized attribute + (type, length, and value). + + If the ORIGIN attribute has an undefined value, then the Error Sub- + code MUST be set to Invalid Origin Attribute. The Data field MUST + contain the unrecognized attribute (type, length, and value). + + If the NEXT_HOP attribute field is syntactically incorrect, then the + Error Subcode MUST be set to Invalid NEXT_HOP Attribute. The Data + field MUST contain the incorrect attribute (type, length, and value). + Syntactic correctness means that the NEXT_HOP attribute represents a + valid IP host address. + + The IP address in the NEXT_HOP MUST meet the following criteria to be + considered semantically correct: + + a) It MUST NOT be the IP address of the receiving speaker. + + b) In the case of an EBGP, where the sender and receiver are one + IP hop away from each other, either the IP address in the + NEXT_HOP MUST be the sender's IP address that is used to + establish the BGP connection, or the interface associated with + the NEXT_HOP IP address MUST share a common subnet with the + receiving BGP speaker. + + If the NEXT_HOP attribute is semantically incorrect, the error SHOULD + be logged, and the route SHOULD be ignored. In this case, a + NOTIFICATION message SHOULD NOT be sent, and the connection SHOULD + NOT be closed. + + The AS_PATH attribute is checked for syntactic correctness. If the + path is syntactically incorrect, then the Error Subcode MUST be set + to Malformed AS_PATH. + + + + + + +Rekhter, et al. Standards Track [Page 33] + +RFC 4271 BGP-4 January 2006 + + + If the UPDATE message is received from an external peer, the local + system MAY check whether the leftmost (with respect to the position + of octets in the protocol message) AS in the AS_PATH attribute is + equal to the autonomous system number of the peer that sent the + message. If the check determines this is not the case, the Error + Subcode MUST be set to Malformed AS_PATH. + + If an optional attribute is recognized, then the value of this + attribute MUST be checked. If an error is detected, the attribute + MUST be discarded, and the Error Subcode MUST be set to Optional + Attribute Error. The Data field MUST contain the attribute (type, + length, and value). + + If any attribute appears more than once in the UPDATE message, then + the Error Subcode MUST be set to Malformed Attribute List. + + The NLRI field in the UPDATE message is checked for syntactic + validity. If the field is syntactically incorrect, then the Error + Subcode MUST be set to Invalid Network Field. + + If a prefix in the NLRI field is semantically incorrect (e.g., an + unexpected multicast IP address), an error SHOULD be logged locally, + and the prefix SHOULD be ignored. + + An UPDATE message that contains correct path attributes, but no NLRI, + SHALL be treated as a valid UPDATE message. + +6.4. NOTIFICATION Message Error Handling + + If a peer sends a NOTIFICATION message, and the receiver of the + message detects an error in that message, the receiver cannot use a + NOTIFICATION message to report this error back to the peer. Any such + error (e.g., an unrecognized Error Code or Error Subcode) SHOULD be + noticed, logged locally, and brought to the attention of the + administration of the peer. The means to do this, however, lies + outside the scope of this document. + +6.5. Hold Timer Expired Error Handling + + If a system does not receive successive KEEPALIVE, UPDATE, and/or + NOTIFICATION messages within the period specified in the Hold Time + field of the OPEN message, then the NOTIFICATION message with the + Hold Timer Expired Error Code is sent and the BGP connection is + closed. + + + + + + + +Rekhter, et al. Standards Track [Page 34] + +RFC 4271 BGP-4 January 2006 + + +6.6. Finite State Machine Error Handling + + Any error detected by the BGP Finite State Machine (e.g., receipt of + an unexpected event) is indicated by sending the NOTIFICATION message + with the Error Code Finite State Machine Error. + +6.7. Cease + + In the absence of any fatal errors (that are indicated in this + section), a BGP peer MAY choose, at any given time, to close its BGP + connection by sending the NOTIFICATION message with the Error Code + Cease. However, the Cease NOTIFICATION message MUST NOT be used when + a fatal error indicated by this section does exist. + + A BGP speaker MAY support the ability to impose a locally-configured, + upper bound on the number of address prefixes the speaker is willing + to accept from a neighbor. When the upper bound is reached, the + speaker, under control of local configuration, either (a) discards + new address prefixes from the neighbor (while maintaining the BGP + connection with the neighbor), or (b) terminates the BGP connection + with the neighbor. If the BGP speaker decides to terminate its BGP + connection with a neighbor because the number of address prefixes + received from the neighbor exceeds the locally-configured, upper + bound, then the speaker MUST send the neighbor a NOTIFICATION message + with the Error Code Cease. The speaker MAY also log this locally. + +6.8. BGP Connection Collision Detection + + If a pair of BGP speakers try to establish a BGP connection with each + other simultaneously, then two parallel connections well be formed. + If the source IP address used by one of these connections is the same + as the destination IP address used by the other, and the destination + IP address used by the first connection is the same as the source IP + address used by the other, connection collision has occurred. In the + event of connection collision, one of the connections MUST be closed. + + Based on the value of the BGP Identifier, a convention is established + for detecting which BGP connection is to be preserved when a + collision occurs. The convention is to compare the BGP Identifiers + of the peers involved in the collision and to retain only the + connection initiated by the BGP speaker with the higher-valued BGP + Identifier. + + Upon receipt of an OPEN message, the local system MUST examine all of + its connections that are in the OpenConfirm state. A BGP speaker MAY + also examine connections in an OpenSent state if it knows the BGP + Identifier of the peer by means outside of the protocol. If, among + these connections, there is a connection to a remote BGP speaker + + + +Rekhter, et al. Standards Track [Page 35] + +RFC 4271 BGP-4 January 2006 + + + whose BGP Identifier equals the one in the OPEN message, and this + connection collides with the connection over which the OPEN message + is received, then the local system performs the following collision + resolution procedure: + + 1) The BGP Identifier of the local system is compared to the BGP + Identifier of the remote system (as specified in the OPEN + message). Comparing BGP Identifiers is done by converting them + to host byte order and treating them as 4-octet unsigned + integers. + + 2) If the value of the local BGP Identifier is less than the + remote one, the local system closes the BGP connection that + already exists (the one that is already in the OpenConfirm + state), and accepts the BGP connection initiated by the remote + system. + + 3) Otherwise, the local system closes the newly created BGP + connection (the one associated with the newly received OPEN + message), and continues to use the existing one (the one that + is already in the OpenConfirm state). + + Unless allowed via configuration, a connection collision with an + existing BGP connection that is in the Established state causes + closing of the newly created connection. + + Note that a connection collision cannot be detected with connections + that are in Idle, Connect, or Active states. + + Closing the BGP connection (that results from the collision + resolution procedure) is accomplished by sending the NOTIFICATION + message with the Error Code Cease. + +7. BGP Version Negotiation + + BGP speakers MAY negotiate the version of the protocol by making + multiple attempts at opening a BGP connection, starting with the + highest version number each BGP speaker supports. If an open attempt + fails with an Error Code, OPEN Message Error, and an Error Subcode, + Unsupported Version Number, then the BGP speaker has available the + version number it tried, the version number its peer tried, the + version number passed by its peer in the NOTIFICATION message, and + the version numbers it supports. If the two peers do support one or + more common versions, then this will allow them to rapidly determine + the highest common version. In order to support BGP version + negotiation, future versions of BGP MUST retain the format of the + OPEN and NOTIFICATION messages. + + + + +Rekhter, et al. Standards Track [Page 36] + +RFC 4271 BGP-4 January 2006 + + +8. BGP Finite State Machine (FSM) + + The data structures and FSM described in this document are conceptual + and do not have to be implemented precisely as described here, as + long as the implementations support the described functionality and + they exhibit the same externally visible behavior. + + This section specifies the BGP operation in terms of a Finite State + Machine (FSM). The section falls into two parts: + + 1) Description of Events for the State machine (Section 8.1) + 2) Description of the FSM (Section 8.2) + + Session attributes required (mandatory) for each connection are: + + 1) State + 2) ConnectRetryCounter + 3) ConnectRetryTimer + 4) ConnectRetryTime + 5) HoldTimer + 6) HoldTime + 7) KeepaliveTimer + 8) KeepaliveTime + + The state session attribute indicates the current state of the BGP + FSM. The ConnectRetryCounter indicates the number of times a BGP + peer has tried to establish a peer session. + + The mandatory attributes related to timers are described in Section + 10. Each timer has a "timer" and a "time" (the initial value). + + The optional Session attributes are listed below. These optional + attributes may be supported, either per connection or per local + system: + + 1) AcceptConnectionsUnconfiguredPeers + 2) AllowAutomaticStart + 3) AllowAutomaticStop + 4) CollisionDetectEstablishedState + 5) DampPeerOscillations + 6) DelayOpen + 7) DelayOpenTime + 8) DelayOpenTimer + 9) IdleHoldTime + 10) IdleHoldTimer + 11) PassiveTcpEstablishment + 12) SendNOTIFICATIONwithoutOPEN + 13) TrackTcpState + + + +Rekhter, et al. Standards Track [Page 37] + +RFC 4271 BGP-4 January 2006 + + + The optional session attributes support different features of the BGP + functionality that have implications for the BGP FSM state + transitions. Two groups of the attributes which relate to timers + are: + + group 1: DelayOpen, DelayOpenTime, DelayOpenTimer + group 2: DampPeerOscillations, IdleHoldTime, IdleHoldTimer + + The first parameter (DelayOpen, DampPeerOscillations) is an optional + attribute that indicates that the Timer function is active. The + "Time" value specifies the initial value for the "Timer" + (DelayOpenTime, IdleHoldTime). The "Timer" specifies the actual + timer. + + Please refer to Section 8.1.1 for an explanation of the interaction + between these optional attributes and the events signaled to the + state machine. Section 8.2.1.3 also provides a short overview of the + different types of optional attributes (flags or timers). + +8.1. Events for the BGP FSM + +8.1.1. Optional Events Linked to Optional Session Attributes + + The Inputs to the BGP FSM are events. Events can either be mandatory + or optional. Some optional events are linked to optional session + attributes. Optional session attributes enable several groups of FSM + functionality. + + The linkage between FSM functionality, events, and the optional + session attributes are described below. + + Group 1: Automatic Administrative Events (Start/Stop) + + Optional Session Attributes: AllowAutomaticStart, + AllowAutomaticStop, + DampPeerOscillations, + IdleHoldTime, IdleHoldTimer + + Option 1: AllowAutomaticStart + + Description: A BGP peer connection can be started and stopped + by administrative control. This administrative + control can either be manual, based on operator + intervention, or under the control of logic that + is specific to a BGP implementation. The term + "automatic" refers to a start being issued to the + BGP peer connection FSM when such logic determines + that the BGP peer connection should be restarted. + + + +Rekhter, et al. Standards Track [Page 38] + +RFC 4271 BGP-4 January 2006 + + + The AllowAutomaticStart attribute specifies that + this BGP connection supports automatic starting of + the BGP connection. + + If the BGP implementation supports + AllowAutomaticStart, the peer may be repeatedly + restarted. Three other options control the rate + at which the automatic restart occurs: + DampPeerOscillations, IdleHoldTime, and the + IdleHoldTimer. + + The DampPeerOscillations option specifies that the + implementation engages additional logic to damp + the oscillations of BGP peers in the face of + sequences of automatic start and automatic stop. + IdleHoldTime specifies the length of time the BGP + peer is held in the Idle state prior to allowing + the next automatic restart. The IdleHoldTimer is + the timer that holds the peer in Idle state. + + An example of DampPeerOscillations logic is an + increase of the IdleHoldTime value if a BGP peer + oscillates connectivity (connected/disconnected) + repeatedly within a time period. To engage this + logic, a peer could connect and disconnect 10 + times within 5 minutes. The IdleHoldTime value + would be reset from 0 to 120 seconds. + + Values: TRUE or FALSE + + Option 2: AllowAutomaticStop + + Description: This BGP peer session optional attribute indicates + that the BGP connection allows "automatic" + stopping of the BGP connection. An "automatic" + stop is defined as a stop under the control of + implementation-specific logic. The + implementation-specific logic is outside the scope + of this specification. + + Values: TRUE or FALSE + + Option 3: DampPeerOscillations + + Description: The DampPeerOscillations optional session + attribute indicates that the BGP connection is + using logic that damps BGP peer oscillations in + the Idle State. + + + +Rekhter, et al. Standards Track [Page 39] + +RFC 4271 BGP-4 January 2006 + + + Value: TRUE or FALSE + + Option 4: IdleHoldTime + + Description: The IdleHoldTime is the value that is set in the + IdleHoldTimer. + + Values: Time in seconds + + Option 5: IdleHoldTimer + + Description: The IdleHoldTimer aids in controlling BGP peer + oscillation. The IdleHoldTimer is used to keep + the BGP peer in Idle for a particular duration. + The IdleHoldTimer_Expires event is described in + Section 8.1.3. + + Values: Time in seconds + + Group 2: Unconfigured Peers + + Optional Session Attributes: AcceptConnectionsUnconfiguredPeers + + Option 1: AcceptConnectionsUnconfiguredPeers + + Description: The BGP FSM optionally allows the acceptance of + BGP peer connections from neighbors that are not + pre-configured. The + "AcceptConnectionsUnconfiguredPeers" optional + session attribute allows the FSM to support the + state transitions that allow the implementation to + accept or reject these unconfigured peers. + + The AcceptConnectionsUnconfiguredPeers has + security implications. Please refer to the BGP + Vulnerabilities document [RFC4272] for details. + + Value: True or False + + Group 3: TCP processing + + Optional Session Attributes: PassiveTcpEstablishment, + TrackTcpState + + Option 1: PassiveTcpEstablishment + + + + + + +Rekhter, et al. Standards Track [Page 40] + +RFC 4271 BGP-4 January 2006 + + + Description: This option indicates that the BGP FSM will + passively wait for the remote BGP peer to + establish the BGP TCP connection. + + value: TRUE or FALSE + + Option 2: TrackTcpState + + Description: The BGP FSM normally tracks the end result of a + TCP connection attempt rather than individual TCP + messages. Optionally, the BGP FSM can support + additional interaction with the TCP connection + negotiation. The interaction with the TCP events + may increase the amount of logging the BGP peer + connection requires and the number of BGP FSM + changes. + + Value: TRUE or FALSE + + Group 4: BGP Message Processing + + Optional Session Attributes: DelayOpen, DelayOpenTime, + DelayOpenTimer, + SendNOTIFICATIONwithoutOPEN, + CollisionDetectEstablishedState + + Option 1: DelayOpen + + Description: The DelayOpen optional session attribute allows + implementations to be configured to delay sending + an OPEN message for a specific time period + (DelayOpenTime). The delay allows the remote BGP + Peer time to send the first OPEN message. + + Value: TRUE or FALSE + + Option 2: DelayOpenTime + + Description: The DelayOpenTime is the initial value set in the + DelayOpenTimer. + + Value: Time in seconds + + Option 3: DelayOpenTimer + + Description: The DelayOpenTimer optional session attribute is + used to delay the sending of an OPEN message on a + + + + +Rekhter, et al. Standards Track [Page 41] + +RFC 4271 BGP-4 January 2006 + + + connection. The DelayOpenTimer_Expires event + (Event 12) is described in Section 8.1.3. + + Value: Time in seconds + + Option 4: SendNOTIFICATIONwithoutOPEN + + Description: The SendNOTIFICATIONwithoutOPEN allows a peer to + send a NOTIFICATION without first sending an OPEN + message. Without this optional session attribute, + the BGP connection assumes that an OPEN message + must be sent by a peer prior to the peer sending a + NOTIFICATION message. + + Value: True or False + + Option 5: CollisionDetectEstablishedState + + Description: Normally, a Detect Collision (see Section 6.8) + will be ignored in the Established state. This + optional session attribute indicates that this BGP + connection processes collisions in the Established + state. + + Value: True or False + + Note: The optional session attributes clarify the BGP FSM + description for existing features of BGP implementations. + The optional session attributes may be pre-defined for an + implementation and not readable via management interfaces + for existing correct implementations. As newer BGP MIBs + (version 2 and beyond) are supported, these fields will be + accessible via a management interface. + +8.1.2. Administrative Events + + An administrative event is an event in which the operator interface + and BGP Policy engine signal the BGP-finite state machine to start or + stop the BGP state machine. The basic start and stop indications are + augmented by optional connection attributes that signal a certain + type of start or stop mechanism to the BGP FSM. An example of this + combination is Event 5, AutomaticStart_with_PassiveTcpEstablishment. + With this event, the BGP implementation signals to the BGP FSM that + the implementation is using an Automatic Start with the option to use + a Passive TCP Establishment. The Passive TCP establishment signals + that this BGP FSM will wait for the remote side to start the TCP + establishment. + + + + +Rekhter, et al. Standards Track [Page 42] + +RFC 4271 BGP-4 January 2006 + + + Note that only Event 1 (ManualStart) and Event 2 (ManualStop) are + mandatory administrative events. All other administrative events are + optional (Events 3-8). Each event below has a name, definition, + status (mandatory or optional), and the optional session attributes + that SHOULD be set at each stage. When generating Event 1 through + Event 8 for the BGP FSM, the conditions specified in the "Optional + Attribute Status" section are verified. If any of these conditions + are not satisfied, then the local system should log an FSM error. + + The settings of optional session attributes may be implicit in some + implementations, and therefore may not be set explicitly by an + external operator action. Section 8.2.1.5 describes these implicit + settings of the optional session attributes. The administrative + states described below may also be implicit in some implementations + and not directly configurable by an external operator. + + Event 1: ManualStart + + Definition: Local system administrator manually starts the peer + connection. + + Status: Mandatory + + Optional + Attribute + Status: The PassiveTcpEstablishment attribute SHOULD be set + to FALSE. + + Event 2: ManualStop + + Definition: Local system administrator manually stops the peer + connection. + + Status: Mandatory + + Optional + Attribute + Status: No interaction with any optional attributes. + + Event 3: AutomaticStart + + Definition: Local system automatically starts the BGP + connection. + + Status: Optional, depending on local system + + + + + + +Rekhter, et al. Standards Track [Page 43] + +RFC 4271 BGP-4 January 2006 + + + Optional + Attribute + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE if this event occurs. + 2) If the PassiveTcpEstablishment optional session + attribute is supported, it SHOULD be set to + FALSE. + 3) If the DampPeerOscillations is supported, it + SHOULD be set to FALSE when this event occurs. + + Event 4: ManualStart_with_PassiveTcpEstablishment + + Definition: Local system administrator manually starts the peer + connection, but has PassiveTcpEstablishment + enabled. The PassiveTcpEstablishment optional + attribute indicates that the peer will listen prior + to establishing the connection. + + Status: Optional, depending on local system + + Optional + Attribute + Status: 1) The PassiveTcpEstablishment attribute SHOULD be + set to TRUE if this event occurs. + 2) The DampPeerOscillations attribute SHOULD be set + to FALSE when this event occurs. + + Event 5: AutomaticStart_with_PassiveTcpEstablishment + + Definition: Local system automatically starts the BGP + connection with the PassiveTcpEstablishment + enabled. The PassiveTcpEstablishment optional + attribute indicates that the peer will listen prior + to establishing a connection. + + Status: Optional, depending on local system + + Optional + Attribute + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE. + 2) The PassiveTcpEstablishment attribute SHOULD be + set to TRUE. + 3) If the DampPeerOscillations attribute is + supported, the DampPeerOscillations SHOULD be + set to FALSE. + + + + + +Rekhter, et al. Standards Track [Page 44] + +RFC 4271 BGP-4 January 2006 + + + Event 6: AutomaticStart_with_DampPeerOscillations + + Definition: Local system automatically starts the BGP peer + connection with peer oscillation damping enabled. + The exact method of damping persistent peer + oscillations is determined by the implementation + and is outside the scope of this document. + + Status: Optional, depending on local system. + + Optional + Attribute + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE. + 2) The DampPeerOscillations attribute SHOULD be set + to TRUE. + 3) The PassiveTcpEstablishment attribute SHOULD be + set to FALSE. + + Event 7: AutomaticStart_with_DampPeerOscillations_and_ + PassiveTcpEstablishment + + Definition: Local system automatically starts the BGP peer + connection with peer oscillation damping enabled + and PassiveTcpEstablishment enabled. The exact + method of damping persistent peer oscillations is + determined by the implementation and is outside the + scope of this document. + + Status: Optional, depending on local system + + Optional + Attributes + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE. + 2) The DampPeerOscillations attribute SHOULD be set + to TRUE. + 3) The PassiveTcpEstablishment attribute SHOULD be + set to TRUE. + + Event 8: AutomaticStop + + Definition: Local system automatically stops the BGP + connection. + + An example of an automatic stop event is exceeding + the number of prefixes for a given peer and the + local system automatically disconnecting the peer. + + + +Rekhter, et al. Standards Track [Page 45] + +RFC 4271 BGP-4 January 2006 + + + Status: Optional, depending on local system + + Optional + Attribute + Status: 1) The AllowAutomaticStop attribute SHOULD be TRUE. + +8.1.3. Timer Events + + Event 9: ConnectRetryTimer_Expires + + Definition: An event generated when the ConnectRetryTimer + expires. + + Status: Mandatory + + Event 10: HoldTimer_Expires + + Definition: An event generated when the HoldTimer expires. + + Status: Mandatory + + Event 11: KeepaliveTimer_Expires + + Definition: An event generated when the KeepaliveTimer expires. + + Status: Mandatory + + Event 12: DelayOpenTimer_Expires + + Definition: An event generated when the DelayOpenTimer expires. + + Status: Optional + + Optional + Attribute + Status: If this event occurs, + 1) DelayOpen attribute SHOULD be set to TRUE, + 2) DelayOpenTime attribute SHOULD be supported, + 3) DelayOpenTimer SHOULD be supported. + + Event 13: IdleHoldTimer_Expires + + Definition: An event generated when the IdleHoldTimer expires, + indicating that the BGP connection has completed + waiting for the back-off period to prevent BGP peer + oscillation. + + + + + +Rekhter, et al. Standards Track [Page 46] + +RFC 4271 BGP-4 January 2006 + + + The IdleHoldTimer is only used when the persistent + peer oscillation damping function is enabled by + setting the DampPeerOscillations optional attribute + to TRUE. + + Implementations not implementing the persistent + peer oscillation damping function may not have the + IdleHoldTimer. + + Status: Optional + + Optional + Attribute + Status: If this event occurs: + 1) DampPeerOscillations attribute SHOULD be set to + TRUE. + 2) IdleHoldTimer SHOULD have just expired. + +8.1.4. TCP Connection-Based Events + + Event 14: TcpConnection_Valid + + Definition: Event indicating the local system reception of a + TCP connection request with a valid source IP + address, TCP port, destination IP address, and TCP + Port. The definition of invalid source and invalid + destination IP address is determined by the + implementation. + + BGP's destination port SHOULD be port 179, as + defined by IANA. + + TCP connection request is denoted by the local + system receiving a TCP SYN. + + Status: Optional + + Optional + Attribute + Status: 1) The TrackTcpState attribute SHOULD be set to + TRUE if this event occurs. + + Event 15: Tcp_CR_Invalid + + Definition: Event indicating the local system reception of a + TCP connection request with either an invalid + source address or port number, or an invalid + destination address or port number. + + + +Rekhter, et al. Standards Track [Page 47] + +RFC 4271 BGP-4 January 2006 + + + BGP destination port number SHOULD be 179, as + defined by IANA. + + A TCP connection request occurs when the local + system receives a TCP SYN. + + Status: Optional + + Optional + Attribute + Status: 1) The TrackTcpState attribute should be set to + TRUE if this event occurs. + + Event 16: Tcp_CR_Acked + + Definition: Event indicating the local system's request to + establish a TCP connection to the remote peer. + + The local system's TCP connection sent a TCP SYN, + received a TCP SYN/ACK message, and sent a TCP ACK. + + Status: Mandatory + + Event 17: TcpConnectionConfirmed + + Definition: Event indicating that the local system has received + a confirmation that the TCP connection has been + established by the remote site. + + The remote peer's TCP engine sent a TCP SYN. The + local peer's TCP engine sent a SYN, ACK message and + now has received a final ACK. + + Status: Mandatory + + Event 18: TcpConnectionFails + + Definition: Event indicating that the local system has received + a TCP connection failure notice. + + The remote BGP peer's TCP machine could have sent a + FIN. The local peer would respond with a FIN-ACK. + Another possibility is that the local peer + indicated a timeout in the TCP connection and + downed the connection. + + Status: Mandatory + + + + +Rekhter, et al. Standards Track [Page 48] + +RFC 4271 BGP-4 January 2006 + + +8.1.5. BGP Message-Based Events + + Event 19: BGPOpen + + Definition: An event is generated when a valid OPEN message has + been received. + + Status: Mandatory + + Optional + Attribute + Status: 1) The DelayOpen optional attribute SHOULD be set + to FALSE. + 2) The DelayOpenTimer SHOULD not be running. + + Event 20: BGPOpen with DelayOpenTimer running + + Definition: An event is generated when a valid OPEN message has + been received for a peer that has a successfully + established transport connection and is currently + delaying the sending of a BGP open message. + + Status: Optional + + Optional + Attribute + Status: 1) The DelayOpen attribute SHOULD be set to TRUE. + 2) The DelayOpenTimer SHOULD be running. + + Event 21: BGPHeaderErr + + Definition: An event is generated when a received BGP message + header is not valid. + + Status: Mandatory + + Event 22: BGPOpenMsgErr + + Definition: An event is generated when an OPEN message has been + received with errors. + + Status: Mandatory + + Event 23: OpenCollisionDump + + Definition: An event generated administratively when a + connection collision has been detected while + processing an incoming OPEN message and this + + + +Rekhter, et al. Standards Track [Page 49] + +RFC 4271 BGP-4 January 2006 + + + connection has been selected to be disconnected. + See Section 6.8 for more information on collision + detection. + + Event 23 is an administrative action generated by + implementation logic that determines whether this + connection needs to be dropped per the rules in + Section 6.8. This event may occur if the FSM is + implemented as two linked state machines. + + Status: Optional + + Optional + Attribute + Status: If the state machine is to process this event in + the Established state, + 1) CollisionDetectEstablishedState optional + attribute SHOULD be set to TRUE. + + Please note: The OpenCollisionDump event can occur + in Idle, Connect, Active, OpenSent, and OpenConfirm + without any optional attributes being set. + + Event 24: NotifMsgVerErr + + Definition: An event is generated when a NOTIFICATION message + with "version error" is received. + + Status: Mandatory + + Event 25: NotifMsg + + Definition: An event is generated when a NOTIFICATION message + is received and the error code is anything but + "version error". + + Status: Mandatory + + Event 26: KeepAliveMsg + + Definition: An event is generated when a KEEPALIVE message is + received. + + Status: Mandatory + + + + + + + +Rekhter, et al. Standards Track [Page 50] + +RFC 4271 BGP-4 January 2006 + + + Event 27: UpdateMsg + + Definition: An event is generated when a valid UPDATE message + is received. + + Status: Mandatory + + Event 28: UpdateMsgErr + + Definition: An event is generated when an invalid UPDATE + message is received. + + Status: Mandatory + +8.2. Description of FSM + +8.2.1. FSM Definition + + BGP MUST maintain a separate FSM for each configured peer. Each BGP + peer paired in a potential connection will attempt to connect to the + other, unless configured to remain in the idle state, or configured + to remain passive. For the purpose of this discussion, the active or + connecting side of the TCP connection (the side of a TCP connection + sending the first TCP SYN packet) is called outgoing. The passive or + listening side (the sender of the first SYN/ACK) is called an + incoming connection. (See Section 8.2.1.1 for information on the + terms active and passive used below.) + + A BGP implementation MUST connect to and listen on TCP port 179 for + incoming connections in addition to trying to connect to peers. For + each incoming connection, a state machine MUST be instantiated. + There exists a period in which the identity of the peer on the other + end of an incoming connection is known, but the BGP identifier is not + known. During this time, both an incoming and outgoing connection + may exist for the same configured peering. This is referred to as a + connection collision (see Section 6.8). + + A BGP implementation will have, at most, one FSM for each configured + peering, plus one FSM for each incoming TCP connection for which the + peer has not yet been identified. Each FSM corresponds to exactly + one TCP connection. + + There may be more than one connection between a pair of peers if the + connections are configured to use a different pair of IP addresses. + This is referred to as multiple "configured peerings" to the same + peer. + + + + + +Rekhter, et al. Standards Track [Page 51] + +RFC 4271 BGP-4 January 2006 + + +8.2.1.1. Terms "active" and "passive" + + The terms active and passive have been in the Internet operator's + vocabulary for almost a decade and have proven useful. The words + active and passive have slightly different meanings when applied to a + TCP connection or a peer. There is only one active side and one + passive side to any one TCP connection, per the definition above and + the state machine below. When a BGP speaker is configured as active, + it may end up on either the active or passive side of the connection + that eventually gets established. Once the TCP connection is + completed, it doesn't matter which end was active and which was + passive. The only difference is in which side of the TCP connection + has port number 179. + +8.2.1.2. FSM and Collision Detection + + There is one FSM per BGP connection. When the connection collision + occurs prior to determining what peer a connection is associated + with, there may be two connections for one peer. After the + connection collision is resolved (see Section 6.8), the FSM for the + connection that is closed SHOULD be disposed. + +8.2.1.3. FSM and Optional Session Attributes + + Optional Session Attributes specify either attributes that act as + flags (TRUE or FALSE) or optional timers. For optional attributes + that act as flags, if the optional session attribute can be set to + TRUE on the system, the corresponding BGP FSM actions must be + supported. For example, if the following options can be set in a BGP + implementation: AutoStart and PassiveTcpEstablishment, then Events 3, + 4 and 5 must be supported. If an Optional Session attribute cannot + be set to TRUE, the events supporting that set of options do not have + to be supported. + + Each of the optional timers (DelayOpenTimer and IdleHoldTimer) has a + group of attributes that are: + + - flag indicating support, + - Time set in Timer + - Timer. + + The two optional timers show this format: + + DelayOpenTimer: DelayOpen, DelayOpenTime, DelayOpenTimer + IdleHoldTimer: DampPeerOscillations, IdleHoldTime, + IdleHoldTimer + + + + + +Rekhter, et al. Standards Track [Page 52] + +RFC 4271 BGP-4 January 2006 + + + If the flag indicating support for an optional timer (DelayOpen or + DampPeerOscillations) cannot be set to TRUE, the timers and events + supporting that option do not have to be supported. + +8.2.1.4. FSM Event Numbers + + The Event numbers (1-28) utilized in this state machine description + aid in specifying the behavior of the BGP state machine. + Implementations MAY use these numbers to provide network management + information. The exact form of an FSM or the FSM events are specific + to each implementation. + +8.2.1.5. FSM Actions that are Implementation Dependent + + At certain points, the BGP FSM specifies that BGP initialization will + occur or that BGP resources will be deleted. The initialization of + the BGP FSM and the associated resources depend on the policy portion + of the BGP implementation. The details of these actions are outside + the scope of the FSM document. + +8.2.2. Finite State Machine + + Idle state: + + Initially, the BGP peer FSM is in the Idle state. Hereafter, the + BGP peer FSM will be shortened to BGP FSM. + + In this state, BGP FSM refuses all incoming BGP connections for + this peer. No resources are allocated to the peer. In response + to a ManualStart event (Event 1) or an AutomaticStart event (Event + 3), the local system: + + - initializes all BGP resources for the peer connection, + + - sets ConnectRetryCounter to zero, + + - starts the ConnectRetryTimer with the initial value, + + - initiates a TCP connection to the other BGP peer, + + - listens for a connection that may be initiated by the remote + BGP peer, and + + - changes its state to Connect. + + The ManualStop event (Event 2) and AutomaticStop (Event 8) event + are ignored in the Idle state. + + + + +Rekhter, et al. Standards Track [Page 53] + +RFC 4271 BGP-4 January 2006 + + + In response to a ManualStart_with_PassiveTcpEstablishment event + (Event 4) or AutomaticStart_with_PassiveTcpEstablishment event + (Event 5), the local system: + + - initializes all BGP resources, + + - sets the ConnectRetryCounter to zero, + + - starts the ConnectRetryTimer with the initial value, + + - listens for a connection that may be initiated by the remote + peer, and + + - changes its state to Active. + + The exact value of the ConnectRetryTimer is a local matter, but it + SHOULD be sufficiently large to allow TCP initialization. + + If the DampPeerOscillations attribute is set to TRUE, the + following three additional events may occur within the Idle state: + + - AutomaticStart_with_DampPeerOscillations (Event 6), + + - AutomaticStart_with_DampPeerOscillations_and_ + PassiveTcpEstablishment (Event 7), + + - IdleHoldTimer_Expires (Event 13). + + Upon receiving these 3 events, the local system will use these + events to prevent peer oscillations. The method of preventing + persistent peer oscillation is outside the scope of this document. + + Any other event (Events 9-12, 15-28) received in the Idle state + does not cause change in the state of the local system. + + Connect State: + + In this state, BGP FSM is waiting for the TCP connection to be + completed. + + The start events (Events 1, 3-7) are ignored in the Connect state. + + In response to a ManualStop event (Event 2), the local system: + + - drops the TCP connection, + + - releases all BGP resources, + + + + +Rekhter, et al. Standards Track [Page 54] + +RFC 4271 BGP-4 January 2006 + + + - sets ConnectRetryCounter to zero, + + - stops the ConnectRetryTimer and sets ConnectRetryTimer to + zero, and + + - changes its state to Idle. + + In response to the ConnectRetryTimer_Expires event (Event 9), the + local system: + + - drops the TCP connection, + + - restarts the ConnectRetryTimer, + + - stops the DelayOpenTimer and resets the timer to zero, + + - initiates a TCP connection to the other BGP peer, + + - continues to listen for a connection that may be initiated by + the remote BGP peer, and + + - stays in the Connect state. + + If the DelayOpenTimer_Expires event (Event 12) occurs in the + Connect state, the local system: + + - sends an OPEN message to its peer, + + - sets the HoldTimer to a large value, and + + - changes its state to OpenSent. + + If the BGP FSM receives a TcpConnection_Valid event (Event 14), + the TCP connection is processed, and the connection remains in the + Connect state. + + If the BGP FSM receives a Tcp_CR_Invalid event (Event 15), the + local system rejects the TCP connection, and the connection + remains in the Connect state. + + If the TCP connection succeeds (Event 16 or Event 17), the local + system checks the DelayOpen attribute prior to processing. If the + DelayOpen attribute is set to TRUE, the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - sets the DelayOpenTimer to the initial value, and + + + +Rekhter, et al. Standards Track [Page 55] + +RFC 4271 BGP-4 January 2006 + + + - stays in the Connect state. + + If the DelayOpen attribute is set to FALSE, the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - completes BGP initialization + + - sends an OPEN message to its peer, + + - sets the HoldTimer to a large value, and + + - changes its state to OpenSent. + + A HoldTimer value of 4 minutes is suggested. + + If the TCP connection fails (Event 18), the local system checks + the DelayOpenTimer. If the DelayOpenTimer is running, the local + system: + + - restarts the ConnectRetryTimer with the initial value, + + - stops the DelayOpenTimer and resets its value to zero, + + - continues to listen for a connection that may be initiated by + the remote BGP peer, and + + - changes its state to Active. + + If the DelayOpenTimer is not running, the local system: + + - stops the ConnectRetryTimer to zero, + + - drops the TCP connection, + + - releases all BGP resources, and + + - changes its state to Idle. + + If an OPEN message is received while the DelayOpenTimer is running + (Event 20), the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - completes the BGP initialization, + + + + +Rekhter, et al. Standards Track [Page 56] + +RFC 4271 BGP-4 January 2006 + + + - stops and clears the DelayOpenTimer (sets the value to zero), + + - sends an OPEN message, + + - sends a KEEPALIVE message, + + - if the HoldTimer initial value is non-zero, + + - starts the KeepaliveTimer with the initial value and + + - resets the HoldTimer to the negotiated value, + + else, if the HoldTimer initial value is zero, + + - resets the KeepaliveTimer and + + - resets the HoldTimer value to zero, + + - and changes its state to OpenConfirm. + + If the value of the autonomous system field is the same as the + local Autonomous System number, set the connection status to an + internal connection; otherwise it will be "external". + + If BGP message header checking (Event 21) or OPEN message checking + detects an error (Event 22) (see Section 6.2), the local system: + + - (optionally) If the SendNOTIFICATIONwithoutOPEN attribute is + set to TRUE, then the local system first sends a NOTIFICATION + message with the appropriate error code, and then + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a NOTIFICATION message is received with a version error (Event + 24), the local system checks the DelayOpenTimer. If the + DelayOpenTimer is running, the local system: + + + +Rekhter, et al. Standards Track [Page 57] + +RFC 4271 BGP-4 January 2006 + + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - stops and resets the DelayOpenTimer (sets to zero), + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + If the DelayOpenTimer is not running, the local system: + + - stops the ConnectRetryTimer and sets the ConnectRetryTimer to + zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - performs peer oscillation damping if the DampPeerOscillations + attribute is set to True, and + + - changes its state to Idle. + + In response to any other events (Events 8, 10-11, 13, 19, 23, + 25-28), the local system: + + - if the ConnectRetryTimer is running, stops and resets the + ConnectRetryTimer (sets to zero), + + - if the DelayOpenTimer is running, stops and resets the + DelayOpenTimer (sets to zero), + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - performs peer oscillation damping if the DampPeerOscillations + attribute is set to True, and + + - changes its state to Idle. + + + + + +Rekhter, et al. Standards Track [Page 58] + +RFC 4271 BGP-4 January 2006 + + + Active State: + + In this state, BGP FSM is trying to acquire a peer by listening + for, and accepting, a TCP connection. + + The start events (Events 1, 3-7) are ignored in the Active state. + + In response to a ManualStop event (Event 2), the local system: + + - If the DelayOpenTimer is running and the + SendNOTIFICATIONwithoutOPEN session attribute is set, the + local system sends a NOTIFICATION with a Cease, + + - releases all BGP resources including stopping the + DelayOpenTimer + + - drops the TCP connection, + + - sets ConnectRetryCounter to zero, + + - stops the ConnectRetryTimer and sets the ConnectRetryTimer to + zero, and + + - changes its state to Idle. + + In response to a ConnectRetryTimer_Expires event (Event 9), the + local system: + + - restarts the ConnectRetryTimer (with initial value), + + - initiates a TCP connection to the other BGP peer, + + - continues to listen for a TCP connection that may be initiated + by a remote BGP peer, and + + - changes its state to Connect. + + If the local system receives a DelayOpenTimer_Expires event (Event + 12), the local system: + + - sets the ConnectRetryTimer to zero, + + - stops and clears the DelayOpenTimer (set to zero), + + - completes the BGP initialization, + + - sends the OPEN message to its remote peer, + + + + +Rekhter, et al. Standards Track [Page 59] + +RFC 4271 BGP-4 January 2006 + + + - sets its hold timer to a large value, and + + - changes its state to OpenSent. + + A HoldTimer value of 4 minutes is also suggested for this state + transition. + + If the local system receives a TcpConnection_Valid event (Event + 14), the local system processes the TCP connection flags and stays + in the Active state. + + If the local system receives a Tcp_CR_Invalid event (Event 15), + the local system rejects the TCP connection and stays in the + Active State. + + In response to the success of a TCP connection (Event 16 or Event + 17), the local system checks the DelayOpen optional attribute + prior to processing. + + If the DelayOpen attribute is set to TRUE, the local system: + + - stops the ConnectRetryTimer and sets the ConnectRetryTimer + to zero, + + - sets the DelayOpenTimer to the initial value + (DelayOpenTime), and + + - stays in the Active state. + + If the DelayOpen attribute is set to FALSE, the local system: + + - sets the ConnectRetryTimer to zero, + + - completes the BGP initialization, + + - sends the OPEN message to its peer, + + - sets its HoldTimer to a large value, and + + - changes its state to OpenSent. + + A HoldTimer value of 4 minutes is suggested as a "large value" for + the HoldTimer. + + If the local system receives a TcpConnectionFails event (Event + 18), the local system: + + - restarts the ConnectRetryTimer (with the initial value), + + + +Rekhter, et al. Standards Track [Page 60] + +RFC 4271 BGP-4 January 2006 + + + - stops and clears the DelayOpenTimer (sets the value to zero), + + - releases all BGP resource, + + - increments the ConnectRetryCounter by 1, + + - optionally performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If an OPEN message is received and the DelayOpenTimer is running + (Event 20), the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - stops and clears the DelayOpenTimer (sets to zero), + + - completes the BGP initialization, + + - sends an OPEN message, + + - sends a KEEPALIVE message, + + - if the HoldTimer value is non-zero, + + - starts the KeepaliveTimer to initial value, + + - resets the HoldTimer to the negotiated value, + + else if the HoldTimer is zero + + - resets the KeepaliveTimer (set to zero), + + - resets the HoldTimer to zero, and + + - changes its state to OpenConfirm. + + If the value of the autonomous system field is the same as the + local Autonomous System number, set the connection status to an + internal connection; otherwise it will be external. + + If BGP message header checking (Event 21) or OPEN message checking + detects an error (Event 22) (see Section 6.2), the local system: + + + + + + +Rekhter, et al. Standards Track [Page 61] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) sends a NOTIFICATION message with the appropriate + error code if the SendNOTIFICATIONwithoutOPEN attribute is set + to TRUE, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a NOTIFICATION message is received with a version error (Event + 24), the local system checks the DelayOpenTimer. If the + DelayOpenTimer is running, the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - stops and resets the DelayOpenTimer (sets to zero), + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + If the DelayOpenTimer is not running, the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + + + + +Rekhter, et al. Standards Track [Page 62] + +RFC 4271 BGP-4 January 2006 + + + In response to any other event (Events 8, 10-11, 13, 19, 23, + 25-28), the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by one, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + OpenSent: + + In this state, BGP FSM waits for an OPEN message from its peer. + + The start events (Events 1, 3-7) are ignored in the OpenSent + state. + + If a ManualStop event (Event 2) is issued in the OpenSent state, + the local system: + + - sends the NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - sets the ConnectRetryCounter to zero, and + + - changes its state to Idle. + + If an AutomaticStop event (Event 8) is issued in the OpenSent + state, the local system: + + - sends the NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all the BGP resources, + + - drops the TCP connection, + + + +Rekhter, et al. Standards Track [Page 63] + +RFC 4271 BGP-4 January 2006 + + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the HoldTimer_Expires (Event 10), the local system: + + - sends a NOTIFICATION message with the error code Hold Timer + Expired, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a TcpConnection_Valid (Event 14), Tcp_CR_Acked (Event 16), or a + TcpConnectionConfirmed event (Event 17) is received, a second TCP + connection may be in progress. This second TCP connection is + tracked per Connection Collision processing (Section 6.8) until an + OPEN message is received. + + A TCP Connection Request for an Invalid port (Tcp_CR_Invalid + (Event 15)) is ignored. + + If a TcpConnectionFails event (Event 18) is received, the local + system: + + - closes the BGP connection, + + - restarts the ConnectRetryTimer, + + - continues to listen for a connection that may be initiated by + the remote BGP peer, and + + - changes its state to Active. + + + + + + +Rekhter, et al. Standards Track [Page 64] + +RFC 4271 BGP-4 January 2006 + + + When an OPEN message is received, all fields are checked for + correctness. If there are no errors in the OPEN message (Event + 19), the local system: + + - resets the DelayOpenTimer to zero, + + - sets the BGP ConnectRetryTimer to zero, + + - sends a KEEPALIVE message, and + + - sets a KeepaliveTimer (via the text below) + + - sets the HoldTimer according to the negotiated value (see + Section 4.2), + + - changes its state to OpenConfirm. + + If the negotiated hold time value is zero, then the HoldTimer and + KeepaliveTimer are not started. If the value of the Autonomous + System field is the same as the local Autonomous System number, + then the connection is an "internal" connection; otherwise, it is + an "external" connection. (This will impact UPDATE processing as + described below.) + + If the BGP message header checking (Event 21) or OPEN message + checking detects an error (Event 22)(see Section 6.2), the local + system: + + - sends a NOTIFICATION message with the appropriate error code, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is TRUE, and + + - changes its state to Idle. + + Collision detection mechanisms (Section 6.8) need to be applied + when a valid BGP OPEN message is received (Event 19 or Event 20). + Please refer to Section 6.8 for the details of the comparison. A + + + + + +Rekhter, et al. Standards Track [Page 65] + +RFC 4271 BGP-4 January 2006 + + + CollisionDetectDump event occurs when the BGP implementation + determines, by means outside the scope of this document, that a + connection collision has occurred. + + If a connection in the OpenSent state is determined to be the + connection that must be closed, an OpenCollisionDump (Event 23) is + signaled to the state machine. If such an event is received in + the OpenSent state, the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a NOTIFICATION message is received with a version error (Event + 24), the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + In response to any other event (Events 9, 11-13, 20, 25-28), the + local system: + + - sends the NOTIFICATION with the Error Code Finite State + Machine Error, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + + +Rekhter, et al. Standards Track [Page 66] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + OpenConfirm State: + + In this state, BGP waits for a KEEPALIVE or NOTIFICATION message. + + Any start event (Events 1, 3-7) is ignored in the OpenConfirm + state. + + In response to a ManualStop event (Event 2) initiated by the + operator, the local system: + + - sends the NOTIFICATION message with a Cease, + + - releases all BGP resources, + + - drops the TCP connection, + + - sets the ConnectRetryCounter to zero, + + - sets the ConnectRetryTimer to zero, and + + - changes its state to Idle. + + In response to the AutomaticStop event initiated by the system + (Event 8), the local system: + + - sends the NOTIFICATION message with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the HoldTimer_Expires event (Event 10) occurs before a + KEEPALIVE message is received, the local system: + + + + +Rekhter, et al. Standards Track [Page 67] + +RFC 4271 BGP-4 January 2006 + + + - sends the NOTIFICATION message with the Error Code Hold Timer + Expired, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the local system receives a KeepaliveTimer_Expires event (Event + 11), the local system: + + - sends a KEEPALIVE message, + + - restarts the KeepaliveTimer, and + + - remains in the OpenConfirmed state. + + In the event of a TcpConnection_Valid event (Event 14), or the + success of a TCP connection (Event 16 or Event 17) while in + OpenConfirm, the local system needs to track the second + connection. + + If a TCP connection is attempted with an invalid port (Event 15), + the local system will ignore the second connection attempt. + + If the local system receives a TcpConnectionFails event (Event 18) + from the underlying TCP or a NOTIFICATION message (Event 25), the + local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + + + +Rekhter, et al. Standards Track [Page 68] + +RFC 4271 BGP-4 January 2006 + + + - changes its state to Idle. + + If the local system receives a NOTIFICATION message with a version + error (NotifMsgVerErr (Event 24)), the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + If the local system receives a valid OPEN message (BGPOpen (Event + 19)), the collision detect function is processed per Section 6.8. + If this connection is to be dropped due to connection collision, + the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection (send TCP FIN), + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If an OPEN message is received, all fields are checked for + correctness. If the BGP message header checking (BGPHeaderErr + (Event 21)) or OPEN message checking detects an error (see Section + 6.2) (BGPOpenMsgErr (Event 22)), the local system: + + - sends a NOTIFICATION message with the appropriate error code, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + + + +Rekhter, et al. Standards Track [Page 69] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If, during the processing of another OPEN message, the BGP + implementation determines, by a means outside the scope of this + document, that a connection collision has occurred and this + connection is to be closed, the local system will issue an + OpenCollisionDump event (Event 23). When the local system + receives an OpenCollisionDump event (Event 23), the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the local system receives a KEEPALIVE message (KeepAliveMsg + (Event 26)), the local system: + + - restarts the HoldTimer and + + - changes its state to Established. + + In response to any other event (Events 9, 12-13, 20, 27-28), the + local system: + + - sends a NOTIFICATION with a code of Finite State Machine + Error, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + + + +Rekhter, et al. Standards Track [Page 70] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + Established State: + + In the Established state, the BGP FSM can exchange UPDATE, + NOTIFICATION, and KEEPALIVE messages with its peer. + + Any Start event (Events 1, 3-7) is ignored in the Established + state. + + In response to a ManualStop event (initiated by an operator) + (Event 2), the local system: + + - sends the NOTIFICATION message with a Cease, + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases BGP resources, + + - drops the TCP connection, + + - sets the ConnectRetryCounter to zero, and + + - changes its state to Idle. + + In response to an AutomaticStop event (Event 8), the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero + + - deletes all routes associated with this connection, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + + +Rekhter, et al. Standards Track [Page 71] + +RFC 4271 BGP-4 January 2006 + + + One reason for an AutomaticStop event is: A BGP receives an UPDATE + messages with a number of prefixes for a given peer such that the + total prefixes received exceeds the maximum number of prefixes + configured. The local system automatically disconnects the peer. + + If the HoldTimer_Expires event occurs (Event 10), the local + system: + + - sends a NOTIFICATION message with the Error Code Hold Timer + Expired, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the KeepaliveTimer_Expires event occurs (Event 11), the local + system: + + - sends a KEEPALIVE message, and + + - restarts its KeepaliveTimer, unless the negotiated HoldTime + value is zero. + + Each time the local system sends a KEEPALIVE or UPDATE message, it + restarts its KeepaliveTimer, unless the negotiated HoldTime value + is zero. + + A TcpConnection_Valid (Event 14), received for a valid port, will + cause the second connection to be tracked. + + An invalid TCP connection (Tcp_CR_Invalid event (Event 15)) will + be ignored. + + In response to an indication that the TCP connection is + successfully established (Event 16 or Event 17), the second + connection SHALL be tracked until it sends an OPEN message. + + + + + + +Rekhter, et al. Standards Track [Page 72] + +RFC 4271 BGP-4 January 2006 + + + If a valid OPEN message (BGPOpen (Event 19)) is received, and if + the CollisionDetectEstablishedState optional attribute is TRUE, + the OPEN message will be checked to see if it collides (Section + 6.8) with any other connection. If the BGP implementation + determines that this connection needs to be terminated, it will + process an OpenCollisionDump event (Event 23). If this connection + needs to be terminated, the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations is set to TRUE, and + + - changes its state to Idle. + + If the local system receives a NOTIFICATION message (Event 24 or + Event 25) or a TcpConnectionFails (Event 18) from the underlying + TCP, the local system: + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases all the BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - changes its state to Idle. + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 73] + +RFC 4271 BGP-4 January 2006 + + + If the local system receives a KEEPALIVE message (Event 26), the + local system: + + - restarts its HoldTimer, if the negotiated HoldTime value is + non-zero, and + + - remains in the Established state. + + If the local system receives an UPDATE message (Event 27), the + local system: + + - processes the message, + + - restarts its HoldTimer, if the negotiated HoldTime value is + non-zero, and + + - remains in the Established state. + + If the local system receives an UPDATE message, and the UPDATE + message error handling procedure (see Section 6.3) detects an + error (Event 28), the local system: + + - sends a NOTIFICATION message with an Update error, + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + In response to any other event (Events 9, 12-13, 20-22), the local + system: + + - sends a NOTIFICATION message with the Error Code Finite State + Machine Error, + + - deletes all routes associated with this connection, + + - sets the ConnectRetryTimer to zero, + + + +Rekhter, et al. Standards Track [Page 74] + +RFC 4271 BGP-4 January 2006 + + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + +9. UPDATE Message Handling + + An UPDATE message may be received only in the Established state. + Receiving an UPDATE message in any other state is an error. When an + UPDATE message is received, each field is checked for validity, as + specified in Section 6.3. + + If an optional non-transitive attribute is unrecognized, it is + quietly ignored. If an optional transitive attribute is + unrecognized, the Partial bit (the third high-order bit) in the + attribute flags octet is set to 1, and the attribute is retained for + propagation to other BGP speakers. + + If an optional attribute is recognized and has a valid value, then, + depending on the type of the optional attribute, it is processed + locally, retained, and updated, if necessary, for possible + propagation to other BGP speakers. + + If the UPDATE message contains a non-empty WITHDRAWN ROUTES field, + the previously advertised routes, whose destinations (expressed as IP + prefixes) are contained in this field, SHALL be removed from the + Adj-RIB-In. This BGP speaker SHALL run its Decision Process because + the previously advertised route is no longer available for use. + + If the UPDATE message contains a feasible route, the Adj-RIB-In will + be updated with this route as follows: if the NLRI of the new route + is identical to the one the route currently has stored in the Adj- + RIB-In, then the new route SHALL replace the older route in the Adj- + RIB-In, thus implicitly withdrawing the older route from service. + Otherwise, if the Adj-RIB-In has no route with NLRI identical to the + new route, the new route SHALL be placed in the Adj-RIB-In. + + Once the BGP speaker updates the Adj-RIB-In, the speaker SHALL run + its Decision Process. + + + + + + +Rekhter, et al. Standards Track [Page 75] + +RFC 4271 BGP-4 January 2006 + + +9.1. Decision Process + + The Decision Process selects routes for subsequent advertisement by + applying the policies in the local Policy Information Base (PIB) to + the routes stored in its Adj-RIBs-In. The output of the Decision + Process is the set of routes that will be advertised to peers; the + selected routes will be stored in the local speaker's Adj-RIBs-Out, + according to policy. + + The BGP Decision Process described here is conceptual, and does not + have to be implemented precisely as described, as long as the + implementations support the described functionality and they exhibit + the same externally visible behavior. + + The selection process is formalized by defining a function that takes + the attribute of a given route as an argument and returns either (a) + a non-negative integer denoting the degree of preference for the + route, or (b) a value denoting that this route is ineligible to be + installed in Loc-RIB and will be excluded from the next phase of + route selection. + + The function that calculates the degree of preference for a given + route SHALL NOT use any of the following as its inputs: the existence + of other routes, the non-existence of other routes, or the path + attributes of other routes. Route selection then consists of the + individual application of the degree of preference function to each + feasible route, followed by the choice of the one with the highest + degree of preference. + + The Decision Process operates on routes contained in the Adj-RIBs-In, + and is responsible for: + + - selection of routes to be used locally by the speaker + + - selection of routes to be advertised to other BGP peers + + - route aggregation and route information reduction + + The Decision Process takes place in three distinct phases, each + triggered by a different event: + + a) Phase 1 is responsible for calculating the degree of preference + for each route received from a peer. + + b) Phase 2 is invoked on completion of phase 1. It is responsible + for choosing the best route out of all those available for each + distinct destination, and for installing each chosen route into + the Loc-RIB. + + + +Rekhter, et al. Standards Track [Page 76] + +RFC 4271 BGP-4 January 2006 + + + c) Phase 3 is invoked after the Loc-RIB has been modified. It is + responsible for disseminating routes in the Loc-RIB to each + peer, according to the policies contained in the PIB. Route + aggregation and information reduction can optionally be + performed within this phase. + +9.1.1. Phase 1: Calculation of Degree of Preference + + The Phase 1 decision function is invoked whenever the local BGP + speaker receives, from a peer, an UPDATE message that advertises a + new route, a replacement route, or withdrawn routes. + + The Phase 1 decision function is a separate process,f which completes + when it has no further work to do. + + The Phase 1 decision function locks an Adj-RIB-In prior to operating + on any route contained within it, and unlocks it after operating on + all new or unfeasible routes contained within it. + + For each newly received or replacement feasible route, the local BGP + speaker determines a degree of preference as follows: + + If the route is learned from an internal peer, either the value of + the LOCAL_PREF attribute is taken as the degree of preference, or + the local system computes the degree of preference of the route + based on preconfigured policy information. Note that the latter + may result in formation of persistent routing loops. + + If the route is learned from an external peer, then the local BGP + speaker computes the degree of preference based on preconfigured + policy information. If the return value indicates the route is + ineligible, the route MAY NOT serve as an input to the next phase + of route selection; otherwise, the return value MUST be used as + the LOCAL_PREF value in any IBGP readvertisement. + + The exact nature of this policy information, and the computation + involved, is a local matter. + +9.1.2. Phase 2: Route Selection + + The Phase 2 decision function is invoked on completion of Phase 1. + The Phase 2 function is a separate process, which completes when it + has no further work to do. The Phase 2 process considers all routes + that are eligible in the Adj-RIBs-In. + + + + + + + +Rekhter, et al. Standards Track [Page 77] + +RFC 4271 BGP-4 January 2006 + + + The Phase 2 decision function is blocked from running while the Phase + 3 decision function is in process. The Phase 2 function locks all + Adj-RIBs-In prior to commencing its function, and unlocks them on + completion. + + If the NEXT_HOP attribute of a BGP route depicts an address that is + not resolvable, or if it would become unresolvable if the route was + installed in the routing table, the BGP route MUST be excluded from + the Phase 2 decision function. + + If the AS_PATH attribute of a BGP route contains an AS loop, the BGP + route should be excluded from the Phase 2 decision function. AS loop + detection is done by scanning the full AS path (as specified in the + AS_PATH attribute), and checking that the autonomous system number of + the local system does not appear in the AS path. Operations of a BGP + speaker that is configured to accept routes with its own autonomous + system number in the AS path are outside the scope of this document. + + It is critical that BGP speakers within an AS do not make conflicting + decisions regarding route selection that would cause forwarding loops + to occur. + + For each set of destinations for which a feasible route exists in the + Adj-RIBs-In, the local BGP speaker identifies the route that has: + + a) the highest degree of preference of any route to the same set + of destinations, or + + b) is the only route to that destination, or + + c) is selected as a result of the Phase 2 tie breaking rules + specified in Section 9.1.2.2. + + The local speaker SHALL then install that route in the Loc-RIB, + replacing any route to the same destination that is currently being + held in the Loc-RIB. When the new BGP route is installed in the + Routing Table, care must be taken to ensure that existing routes to + the same destination that are now considered invalid are removed from + the Routing Table. Whether the new BGP route replaces an existing + non-BGP route in the Routing Table depends on the policy configured + on the BGP speaker. + + The local speaker MUST determine the immediate next-hop address from + the NEXT_HOP attribute of the selected route (see Section 5.1.3). If + either the immediate next-hop or the IGP cost to the NEXT_HOP (where + the NEXT_HOP is resolved through an IGP route) changes, Phase 2 Route + Selection MUST be performed again. + + + + +Rekhter, et al. Standards Track [Page 78] + +RFC 4271 BGP-4 January 2006 + + + Notice that even though BGP routes do not have to be installed in the + Routing Table with the immediate next-hop(s), implementations MUST + take care that, before any packets are forwarded along a BGP route, + its associated NEXT_HOP address is resolved to the immediate + (directly connected) next-hop address, and that this address (or + multiple addresses) is finally used for actual packet forwarding. + + Unresolvable routes SHALL be removed from the Loc-RIB and the routing + table. However, corresponding unresolvable routes SHOULD be kept in + the Adj-RIBs-In (in case they become resolvable). + +9.1.2.1. Route Resolvability Condition + + As indicated in Section 9.1.2, BGP speakers SHOULD exclude + unresolvable routes from the Phase 2 decision. This ensures that + only valid routes are installed in Loc-RIB and the Routing Table. + + The route resolvability condition is defined as follows: + + 1) A route Rte1, referencing only the intermediate network + address, is considered resolvable if the Routing Table contains + at least one resolvable route Rte2 that matches Rte1's + intermediate network address and is not recursively resolved + (directly or indirectly) through Rte1. If multiple matching + routes are available, only the longest matching route SHOULD be + considered. + + 2) Routes referencing interfaces (with or without intermediate + addresses) are considered resolvable if the state of the + referenced interface is up and if IP processing is enabled on + this interface. + + BGP routes do not refer to interfaces, but can be resolved through + the routes in the Routing Table that can be of both types (those that + specify interfaces or those that do not). IGP routes and routes to + directly connected networks are expected to specify the outbound + interface. Static routes can specify the outbound interface, the + intermediate address, or both. + + Note that a BGP route is considered unresolvable in a situation where + the BGP speaker's Routing Table contains no route matching the BGP + route's NEXT_HOP. Mutually recursive routes (routes resolving each + other or themselves) also fail the resolvability check. + + It is also important that implementations do not consider feasible + routes that would become unresolvable if they were installed in the + Routing Table, even if their NEXT_HOPs are resolvable using the + current contents of the Routing Table (an example of such routes + + + +Rekhter, et al. Standards Track [Page 79] + +RFC 4271 BGP-4 January 2006 + + + would be mutually recursive routes). This check ensures that a BGP + speaker does not install routes in the Routing Table that will be + removed and not used by the speaker. Therefore, in addition to local + Routing Table stability, this check also improves behavior of the + protocol in the network. + + Whenever a BGP speaker identifies a route that fails the + resolvability check because of mutual recursion, an error message + SHOULD be logged. + +9.1.2.2. Breaking Ties (Phase 2) + + In its Adj-RIBs-In, a BGP speaker may have several routes to the same + destination that have the same degree of preference. The local + speaker can select only one of these routes for inclusion in the + associated Loc-RIB. The local speaker considers all routes with the + same degrees of preference, both those received from internal peers, + and those received from external peers. + + The following tie-breaking procedure assumes that, for each candidate + route, all the BGP speakers within an autonomous system can ascertain + the cost of a path (interior distance) to the address depicted by the + NEXT_HOP attribute of the route, and follow the same route selection + algorithm. + + The tie-breaking algorithm begins by considering all equally + preferable routes to the same destination, and then selects routes to + be removed from consideration. The algorithm terminates as soon as + only one route remains in consideration. The criteria MUST be + applied in the order specified. + + Several of the criteria are described using pseudo-code. Note that + the pseudo-code shown was chosen for clarity, not efficiency. It is + not intended to specify any particular implementation. BGP + implementations MAY use any algorithm that produces the same results + as those described here. + + a) Remove from consideration all routes that are not tied for + having the smallest number of AS numbers present in their + AS_PATH attributes. Note that when counting this number, an + AS_SET counts as 1, no matter how many ASes are in the set. + + b) Remove from consideration all routes that are not tied for + having the lowest Origin number in their Origin attribute. + + + + + + + +Rekhter, et al. Standards Track [Page 80] + +RFC 4271 BGP-4 January 2006 + + + c) Remove from consideration routes with less-preferred + MULTI_EXIT_DISC attributes. MULTI_EXIT_DISC is only comparable + between routes learned from the same neighboring AS (the + neighboring AS is determined from the AS_PATH attribute). + Routes that do not have the MULTI_EXIT_DISC attribute are + considered to have the lowest possible MULTI_EXIT_DISC value. + + This is also described in the following procedure: + + for m = all routes still under consideration + for n = all routes still under consideration + if (neighborAS(m) == neighborAS(n)) and (MED(n) < MED(m)) + remove route m from consideration + + In the pseudo-code above, MED(n) is a function that returns the + value of route n's MULTI_EXIT_DISC attribute. If route n has + no MULTI_EXIT_DISC attribute, the function returns the lowest + possible MULTI_EXIT_DISC value (i.e., 0). + + Similarly, neighborAS(n) is a function that returns the + neighbor AS from which the route was received. If the route is + learned via IBGP, and the other IBGP speaker didn't originate + the route, it is the neighbor AS from which the other IBGP + speaker learned the route. If the route is learned via IBGP, + and the other IBGP speaker either (a) originated the route, or + (b) created the route by aggregation and the AS_PATH attribute + of the aggregate route is either empty or begins with an + AS_SET, it is the local AS. + + If a MULTI_EXIT_DISC attribute is removed before re-advertising + a route into IBGP, then comparison based on the received EBGP + MULTI_EXIT_DISC attribute MAY still be performed. If an + implementation chooses to remove MULTI_EXIT_DISC, then the + optional comparison on MULTI_EXIT_DISC, if performed, MUST be + performed only among EBGP-learned routes. The best EBGP- + learned route may then be compared with IBGP-learned routes + after the removal of the MULTI_EXIT_DISC attribute. If + MULTI_EXIT_DISC is removed from a subset of EBGP-learned + routes, and the selected "best" EBGP-learned route will not + have MULTI_EXIT_DISC removed, then the MULTI_EXIT_DISC must be + used in the comparison with IBGP-learned routes. For IBGP- + learned routes, the MULTI_EXIT_DISC MUST be used in route + comparisons that reach this step in the Decision Process. + Including the MULTI_EXIT_DISC of an EBGP-learned route in the + comparison with an IBGP-learned route, then removing the + MULTI_EXIT_DISC attribute, and advertising the route has been + proven to cause route loops. + + + + +Rekhter, et al. Standards Track [Page 81] + +RFC 4271 BGP-4 January 2006 + + + d) If at least one of the candidate routes was received via EBGP, + remove from consideration all routes that were received via + IBGP. + + e) Remove from consideration any routes with less-preferred + interior cost. The interior cost of a route is determined by + calculating the metric to the NEXT_HOP for the route using the + Routing Table. If the NEXT_HOP hop for a route is reachable, + but no cost can be determined, then this step should be skipped + (equivalently, consider all routes to have equal costs). + + This is also described in the following procedure. + + for m = all routes still under consideration + for n = all routes in still under consideration + if (cost(n) is lower than cost(m)) + remove m from consideration + + In the pseudo-code above, cost(n) is a function that returns + the cost of the path (interior distance) to the address given + in the NEXT_HOP attribute of the route. + + f) Remove from consideration all routes other than the route that + was advertised by the BGP speaker with the lowest BGP + Identifier value. + + g) Prefer the route received from the lowest peer address. + +9.1.3. Phase 3: Route Dissemination + + The Phase 3 decision function is invoked on completion of Phase 2, or + when any of the following events occur: + + a) when routes in the Loc-RIB to local destinations have changed + + b) when locally generated routes learned by means outside of BGP + have changed + + c) when a new BGP speaker connection has been established + + The Phase 3 function is a separate process that completes when it has + no further work to do. The Phase 3 Routing Decision function is + blocked from running while the Phase 2 decision function is in + process. + + All routes in the Loc-RIB are processed into Adj-RIBs-Out according + to configured policy. This policy MAY exclude a route in the Loc-RIB + from being installed in a particular Adj-RIB-Out. A route SHALL NOT + + + +Rekhter, et al. Standards Track [Page 82] + +RFC 4271 BGP-4 January 2006 + + + be installed in the Adj-Rib-Out unless the destination, and NEXT_HOP + described by this route, may be forwarded appropriately by the + Routing Table. If a route in Loc-RIB is excluded from a particular + Adj-RIB-Out, the previously advertised route in that Adj-RIB-Out MUST + be withdrawn from service by means of an UPDATE message (see 9.2). + + Route aggregation and information reduction techniques (see Section + 9.2.2.1) may optionally be applied. + + Any local policy that results in routes being added to an Adj-RIB-Out + without also being added to the local BGP speaker's forwarding table + is outside the scope of this document. + + When the updating of the Adj-RIBs-Out and the Routing Table is + complete, the local BGP speaker runs the Update-Send process of 9.2. + +9.1.4. Overlapping Routes + + A BGP speaker may transmit routes with overlapping Network Layer + Reachability Information (NLRI) to another BGP speaker. NLRI overlap + occurs when a set of destinations are identified in non-matching + multiple routes. Because BGP encodes NLRI using IP prefixes, overlap + will always exhibit subset relationships. A route describing a + smaller set of destinations (a longer prefix) is said to be more + specific than a route describing a larger set of destinations (a + shorter prefix); similarly, a route describing a larger set of + destinations is said to be less specific than a route describing a + smaller set of destinations. + + The precedence relationship effectively decomposes less specific + routes into two parts: + + - a set of destinations described only by the less specific route, + and + + - a set of destinations described by the overlap of the less + specific and the more specific routes + + The set of destinations described by the overlap represents a portion + of the less specific route that is feasible, but is not currently in + use. If a more specific route is later withdrawn, the set of + destinations described by the overlap will still be reachable using + the less specific route. + + If a BGP speaker receives overlapping routes, the Decision Process + MUST consider both routes based on the configured acceptance policy. + If both a less and a more specific route are accepted, then the + Decision Process MUST install, in Loc-RIB, either both the less and + + + +Rekhter, et al. Standards Track [Page 83] + +RFC 4271 BGP-4 January 2006 + + + the more specific routes or aggregate the two routes and install, in + Loc-RIB, the aggregated route, provided that both routes have the + same value of the NEXT_HOP attribute. + + If a BGP speaker chooses to aggregate, then it SHOULD either include + all ASes used to form the aggregate in an AS_SET, or add the + ATOMIC_AGGREGATE attribute to the route. This attribute is now + primarily informational. With the elimination of IP routing + protocols that do not support classless routing, and the elimination + of router and host implementations that do not support classless + routing, there is no longer a need to de-aggregate. Routes SHOULD + NOT be de-aggregated. In particular, a route that carries the + ATOMIC_AGGREGATE attribute MUST NOT be de-aggregated. That is, the + NLRI of this route cannot be more specific. Forwarding along such a + route does not guarantee that IP packets will actually traverse only + ASes listed in the AS_PATH attribute of the route. + +9.2. Update-Send Process + + The Update-Send process is responsible for advertising UPDATE + messages to all peers. For example, it distributes the routes chosen + by the Decision Process to other BGP speakers, which may be located + in either the same autonomous system or a neighboring autonomous + system. + + When a BGP speaker receives an UPDATE message from an internal peer, + the receiving BGP speaker SHALL NOT re-distribute the routing + information contained in that UPDATE message to other internal peers + (unless the speaker acts as a BGP Route Reflector [RFC2796]). + + As part of Phase 3 of the route selection process, the BGP speaker + has updated its Adj-RIBs-Out. All newly installed routes and all + newly unfeasible routes for which there is no replacement route SHALL + be advertised to its peers by means of an UPDATE message. + + A BGP speaker SHOULD NOT advertise a given feasible BGP route from + its Adj-RIB-Out if it would produce an UPDATE message containing the + same BGP route as was previously advertised. + + Any routes in the Loc-RIB marked as unfeasible SHALL be removed. + Changes to the reachable destinations within its own autonomous + system SHALL also be advertised in an UPDATE message. + + If, due to the limits on the maximum size of an UPDATE message (see + Section 4), a single route doesn't fit into the message, the BGP + speaker MUST not advertise the route to its peers and MAY choose to + log an error locally. + + + + +Rekhter, et al. Standards Track [Page 84] + +RFC 4271 BGP-4 January 2006 + + +9.2.1. Controlling Routing Traffic Overhead + + The BGP protocol constrains the amount of routing traffic (that is, + UPDATE messages), in order to limit both the link bandwidth needed to + advertise UPDATE messages and the processing power needed by the + Decision Process to digest the information contained in the UPDATE + messages. + +9.2.1.1. Frequency of Route Advertisement + + The parameter MinRouteAdvertisementIntervalTimer determines the + minimum amount of time that must elapse between an advertisement + and/or withdrawal of routes to a particular destination by a BGP + speaker to a peer. This rate limiting procedure applies on a per- + destination basis, although the value of + MinRouteAdvertisementIntervalTimer is set on a per BGP peer basis. + + Two UPDATE messages sent by a BGP speaker to a peer that advertise + feasible routes and/or withdrawal of unfeasible routes to some common + set of destinations MUST be separated by at least + MinRouteAdvertisementIntervalTimer. This can only be achieved by + keeping a separate timer for each common set of destinations. This + would be unwarranted overhead. Any technique that ensures that the + interval between two UPDATE messages sent from a BGP speaker to a + peer that advertise feasible routes and/or withdrawal of unfeasible + routes to some common set of destinations will be at least + MinRouteAdvertisementIntervalTimer, and will also ensure that a + constant upper bound on the interval is acceptable. + + Since fast convergence is needed within an autonomous system, either + (a) the MinRouteAdvertisementIntervalTimer used for internal peers + SHOULD be shorter than the MinRouteAdvertisementIntervalTimer used + for external peers, or (b) the procedure describe in this section + SHOULD NOT apply to routes sent to internal peers. + + This procedure does not limit the rate of route selection, but only + the rate of route advertisement. If new routes are selected multiple + times while awaiting the expiration of + MinRouteAdvertisementIntervalTimer, the last route selected SHALL be + advertised at the end of MinRouteAdvertisementIntervalTimer. + +9.2.1.2. Frequency of Route Origination + + The parameter MinASOriginationIntervalTimer determines the minimum + amount of time that must elapse between successive advertisements of + UPDATE messages that report changes within the advertising BGP + speaker's own autonomous systems. + + + + +Rekhter, et al. Standards Track [Page 85] + +RFC 4271 BGP-4 January 2006 + + +9.2.2. Efficient Organization of Routing Information + + Having selected the routing information it will advertise, a BGP + speaker may avail itself of several methods to organize this + information in an efficient manner. + +9.2.2.1. Information Reduction + + Information reduction may imply a reduction in granularity of policy + control - after information is collapsed, the same policies will + apply to all destinations and paths in the equivalence class. + + The Decision Process may optionally reduce the amount of information + that it will place in the Adj-RIBs-Out by any of the following + methods: + + a) Network Layer Reachability Information (NLRI): + + Destination IP addresses can be represented as IP address + prefixes. In cases where there is a correspondence between the + address structure and the systems under control of an + autonomous system administrator, it will be possible to reduce + the size of the NLRI carried in the UPDATE messages. + + b) AS_PATHs: + + AS path information can be represented as ordered AS_SEQUENCEs + or unordered AS_SETs. AS_SETs are used in the route + aggregation algorithm described in Section 9.2.2.2. They + reduce the size of the AS_PATH information by listing each AS + number only once, regardless of how many times it may have + appeared in multiple AS_PATHs that were aggregated. + + An AS_SET implies that the destinations listed in the NLRI can + be reached through paths that traverse at least some of the + constituent autonomous systems. AS_SETs provide sufficient + information to avoid routing information looping; however, + their use may prune potentially feasible paths because such + paths are no longer listed individually in the form of + AS_SEQUENCEs. In practice, this is not likely to be a problem + because once an IP packet arrives at the edge of a group of + autonomous systems, the BGP speaker is likely to have more + detailed path information and can distinguish individual paths + from destinations. + + + + + + + +Rekhter, et al. Standards Track [Page 86] + +RFC 4271 BGP-4 January 2006 + + +9.2.2.2. Aggregating Routing Information + + Aggregation is the process of combining the characteristics of + several different routes in such a way that a single route can be + advertised. Aggregation can occur as part of the Decision Process to + reduce the amount of routing information that will be placed in the + Adj-RIBs-Out. + + Aggregation reduces the amount of information that a BGP speaker must + store and exchange with other BGP speakers. Routes can be aggregated + by applying the following procedure, separately, to path attributes + of the same type and to the Network Layer Reachability Information. + + Routes that have different MULTI_EXIT_DISC attributes SHALL NOT be + aggregated. + + If the aggregated route has an AS_SET as the first element in its + AS_PATH attribute, then the router that originates the route SHOULD + NOT advertise the MULTI_EXIT_DISC attribute with this route. + + Path attributes that have different type codes cannot be aggregated + together. Path attributes of the same type code may be aggregated, + according to the following rules: + + NEXT_HOP: + When aggregating routes that have different NEXT_HOP + attributes, the NEXT_HOP attribute of the aggregated route + SHALL identify an interface on the BGP speaker that performs + the aggregation. + + ORIGIN attribute: + If at least one route among routes that are aggregated has + ORIGIN with the value INCOMPLETE, then the aggregated route + MUST have the ORIGIN attribute with the value INCOMPLETE. + Otherwise, if at least one route among routes that are + aggregated has ORIGIN with the value EGP, then the aggregated + route MUST have the ORIGIN attribute with the value EGP. In + all other cases,, the value of the ORIGIN attribute of the + aggregated route is IGP. + + AS_PATH attribute: + If routes to be aggregated have identical AS_PATH attributes, + then the aggregated route has the same AS_PATH attribute as + each individual route. + + For the purpose of aggregating AS_PATH attributes, we model + each AS within the AS_PATH attribute as a tuple , + where "type" identifies a type of the path segment the AS + + + +Rekhter, et al. Standards Track [Page 87] + +RFC 4271 BGP-4 January 2006 + + + belongs to (e.g., AS_SEQUENCE, AS_SET), and "value" identifies + the AS number. If the routes to be aggregated have different + AS_PATH attributes, then the aggregated AS_PATH attribute SHALL + satisfy all of the following conditions: + + - all tuples of type AS_SEQUENCE in the aggregated AS_PATH + SHALL appear in all of the AS_PATHs in the initial set of + routes to be aggregated. + + - all tuples of type AS_SET in the aggregated AS_PATH SHALL + appear in at least one of the AS_PATHs in the initial set + (they may appear as either AS_SET or AS_SEQUENCE types). + + - for any tuple X of type AS_SEQUENCE in the aggregated + AS_PATH, which precedes tuple Y in the aggregated AS_PATH, + X precedes Y in each AS_PATH in the initial set, which + contains Y, regardless of the type of Y. + + - No tuple of type AS_SET with the same value SHALL appear + more than once in the aggregated AS_PATH. + + - Multiple tuples of type AS_SEQUENCE with the same value may + appear in the aggregated AS_PATH only when adjacent to + another tuple of the same type and value. + + An implementation may choose any algorithm that conforms to + these rules. At a minimum, a conformant implementation SHALL + be able to perform the following algorithm that meets all of + the above conditions: + + - determine the longest leading sequence of tuples (as + defined above) common to all the AS_PATH attributes of the + routes to be aggregated. Make this sequence the leading + sequence of the aggregated AS_PATH attribute. + + - set the type of the rest of the tuples from the AS_PATH + attributes of the routes to be aggregated to AS_SET, and + append them to the aggregated AS_PATH attribute. + + - if the aggregated AS_PATH has more than one tuple with the + same value (regardless of tuple's type), eliminate all but + one such tuple by deleting tuples of the type AS_SET from + the aggregated AS_PATH attribute. + + - for each pair of adjacent tuples in the aggregated AS_PATH, + if both tuples have the same type, merge them together, as + long as doing so will not cause a segment with a length + greater than 255 to be generated. + + + +Rekhter, et al. Standards Track [Page 88] + +RFC 4271 BGP-4 January 2006 + + + Appendix F, Section F.6 presents another algorithm that + satisfies the conditions and allows for more complex policy + configurations. + + ATOMIC_AGGREGATE: + If at least one of the routes to be aggregated has + ATOMIC_AGGREGATE path attribute, then the aggregated route + SHALL have this attribute as well. + + AGGREGATOR: + Any AGGREGATOR attributes from the routes to be aggregated MUST + NOT be included in the aggregated route. The BGP speaker + performing the route aggregation MAY attach a new AGGREGATOR + attribute (see Section 5.1.7). + +9.3. Route Selection Criteria + + Generally, additional rules for comparing routes among several + alternatives are outside the scope of this document. There are two + exceptions: + + - If the local AS appears in the AS path of the new route being + considered, then that new route cannot be viewed as better than + any other route (provided that the speaker is configured to + accept such routes). If such a route were ever used, a routing + loop could result. + + - In order to achieve a successful distributed operation, only + routes with a likelihood of stability can be chosen. Thus, an + AS SHOULD avoid using unstable routes, and it SHOULD NOT make + rapid, spontaneous changes to its choice of route. Quantifying + the terms "unstable" and "rapid" (from the previous sentence) + will require experience, but the principle is clear. Routes + that are unstable can be "penalized" (e.g., by using the + procedures described in [RFC2439]). + +9.4. Originating BGP routes + + A BGP speaker may originate BGP routes by injecting routing + information acquired by some other means (e.g., via an IGP) into BGP. + A BGP speaker that originates BGP routes assigns the degree of + preference (e.g., according to local configuration) to these routes + by passing them through the Decision Process (see Section 9.1). + These routes MAY also be distributed to other BGP speakers within the + local AS as part of the update process (see Section 9.2). The + decision of whether to distribute non-BGP acquired routes within an + AS via BGP depends on the environment within the AS (e.g., type of + IGP) and SHOULD be controlled via configuration. + + + +Rekhter, et al. Standards Track [Page 89] + +RFC 4271 BGP-4 January 2006 + + +10. BGP Timers + + BGP employs five timers: ConnectRetryTimer (see Section 8), HoldTimer + (see Section 4.2), KeepaliveTimer (see Section 8), + MinASOriginationIntervalTimer (see Section 9.2.1.2), and + MinRouteAdvertisementIntervalTimer (see Section 9.2.1.1). + + Two optional timers MAY be supported: DelayOpenTimer, IdleHoldTimer + by BGP (see Section 8). Section 8 describes their use. The full + operation of these optional timers is outside the scope of this + document. + + ConnectRetryTime is a mandatory FSM attribute that stores the initial + value for the ConnectRetryTimer. The suggested default value for the + ConnectRetryTime is 120 seconds. + + HoldTime is a mandatory FSM attribute that stores the initial value + for the HoldTimer. The suggested default value for the HoldTime is + 90 seconds. + + During some portions of the state machine (see Section 8), the + HoldTimer is set to a large value. The suggested default for this + large value is 4 minutes. + + The KeepaliveTime is a mandatory FSM attribute that stores the + initial value for the KeepaliveTimer. The suggested default value + for the KeepaliveTime is 1/3 of the HoldTime. + + The suggested default value for the MinASOriginationIntervalTimer is + 15 seconds. + + The suggested default value for the + MinRouteAdvertisementIntervalTimer on EBGP connections is 30 seconds. + + The suggested default value for the + MinRouteAdvertisementIntervalTimer on IBGP connections is 5 seconds. + + An implementation of BGP MUST allow the HoldTimer to be configurable + on a per-peer basis, and MAY allow the other timers to be + configurable. + + To minimize the likelihood that the distribution of BGP messages by a + given BGP speaker will contain peaks, jitter SHOULD be applied to the + timers associated with MinASOriginationIntervalTimer, KeepaliveTimer, + MinRouteAdvertisementIntervalTimer, and ConnectRetryTimer. A given + BGP speaker MAY apply the same jitter to each of these quantities, + regardless of the destinations to which the updates are being sent; + that is, jitter need not be configured on a per-peer basis. + + + +Rekhter, et al. Standards Track [Page 90] + +RFC 4271 BGP-4 January 2006 + + + The suggested default amount of jitter SHALL be determined by + multiplying the base value of the appropriate timer by a random + factor, which is uniformly distributed in the range from 0.75 to 1.0. + A new random value SHOULD be picked each time the timer is set. The + range of the jitter's random value MAY be configurable. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 91] + +RFC 4271 BGP-4 January 2006 + + +Appendix A. Comparison with RFC 1771 + + There are numerous editorial changes in comparison to [RFC1771] (too + many to list here). + + The following list the technical changes: + + Changes to reflect the usage of features such as TCP MD5 + [RFC2385], BGP Route Reflectors [RFC2796], BGP Confederations + [RFC3065], and BGP Route Refresh [RFC2918]. + + Clarification of the use of the BGP Identifier in the AGGREGATOR + attribute. + + Procedures for imposing an upper bound on the number of prefixes + that a BGP speaker would accept from a peer. + + The ability of a BGP speaker to include more than one instance of + its own AS in the AS_PATH attribute for the purpose of inter-AS + traffic engineering. + + Clarification of the various types of NEXT_HOPs. + + Clarification of the use of the ATOMIC_AGGREGATE attribute. + + The relationship between the immediate next hop, and the next hop + as specified in the NEXT_HOP path attribute. + + Clarification of the tie-breaking procedures. + + Clarification of the frequency of route advertisements. + + Optional Parameter Type 1 (Authentication Information) has been + deprecated. + + UPDATE Message Error subcode 7 (AS Routing Loop) has been + deprecated. + + OPEN Message Error subcode 5 (Authentication Failure) has been + deprecated. + + Use of the Marker field for authentication has been deprecated. + + Implementations MUST support TCP MD5 [RFC2385] for authentication. + + Clarification of BGP FSM. + + + + + +Rekhter, et al. Standards Track [Page 92] + +RFC 4271 BGP-4 January 2006 + + +Appendix B. Comparison with RFC 1267 + + All the changes listed in Appendix A, plus the following. + + BGP-4 is capable of operating in an environment where a set of + reachable destinations may be expressed via a single IP prefix. The + concept of network classes, or subnetting, is foreign to BGP-4. To + accommodate these capabilities, BGP-4 changes the semantics and + encoding associated with the AS_PATH attribute. New text has been + added to define semantics associated with IP prefixes. These + abilities allow BGP-4 to support the proposed supernetting scheme + [RFC1518, RFC1519]. + + To simplify configuration, this version introduces a new attribute, + LOCAL_PREF, that facilitates route selection procedures. + + The INTER_AS_METRIC attribute has been renamed MULTI_EXIT_DISC. + + A new attribute, ATOMIC_AGGREGATE, has been introduced to insure that + certain aggregates are not de-aggregated. Another new attribute, + AGGREGATOR, can be added to aggregate routes to advertise which AS + and which BGP speaker within that AS caused the aggregation. + + To ensure that Hold Timers are symmetric, the Hold Timer is now + negotiated on a per-connection basis. Hold Timers of zero are now + supported. + +Appendix C. Comparison with RFC 1163 + + All of the changes listed in Appendices A and B, plus the following. + + To detect and recover from BGP connection collision, a new field (BGP + Identifier) has been added to the OPEN message. New text (Section + 6.8) has been added to specify the procedure for detecting and + recovering from collision. + + The new document no longer restricts the router that is passed in the + NEXT_HOP path attribute to be part of the same Autonomous System as + the BGP Speaker. + + The new document optimizes and simplifies the exchange of information + about previously reachable routes. + + + + + + + + + +Rekhter, et al. Standards Track [Page 93] + +RFC 4271 BGP-4 January 2006 + + +Appendix D. Comparison with RFC 1105 + + All of the changes listed in Appendices A, B, and C, plus the + following. + + Minor changes to the [RFC1105] Finite State Machine were necessary to + accommodate the TCP user interface provided by BSD version 4.3. + + The notion of Up/Down/Horizontal relations presented in RFC 1105 has + been removed from the protocol. + + The changes in the message format from RFC 1105 are as follows: + + 1. The Hold Time field has been removed from the BGP header and + added to the OPEN message. + + 2. The version field has been removed from the BGP header and + added to the OPEN message. + + 3. The Link Type field has been removed from the OPEN message. + + 4. The OPEN CONFIRM message has been eliminated and replaced with + implicit confirmation, provided by the KEEPALIVE message. + + 5. The format of the UPDATE message has been changed + significantly. New fields were added to the UPDATE message to + support multiple path attributes. + + 6. The Marker field has been expanded and its role broadened to + support authentication. + + Note that quite often BGP, as specified in RFC 1105, is referred to + as BGP-1; BGP, as specified in [RFC1163], is referred to as BGP-2; + BGP, as specified in RFC 1267 is referred to as BGP-3; and BGP, as + specified in this document is referred to as BGP-4. + +Appendix E. TCP Options that May Be Used with BGP + + If a local system TCP user interface supports the TCP PUSH function, + then each BGP message SHOULD be transmitted with PUSH flag set. + Setting PUSH flag forces BGP messages to be transmitted to the + receiver promptly. + + If a local system TCP user interface supports setting the DSCP field + [RFC2474] for TCP connections, then the TCP connection used by BGP + SHOULD be opened with bits 0-2 of the DSCP field set to 110 (binary). + + An implementation MUST support the TCP MD5 option [RFC2385]. + + + +Rekhter, et al. Standards Track [Page 94] + +RFC 4271 BGP-4 January 2006 + + +Appendix F. Implementation Recommendations + + This section presents some implementation recommendations. + +Appendix F.1. Multiple Networks Per Message + + The BGP protocol allows for multiple address prefixes with the same + path attributes to be specified in one message. Using this + capability is highly recommended. With one address prefix per + message there is a substantial increase in overhead in the receiver. + Not only does the system overhead increase due to the reception of + multiple messages, but the overhead of scanning the routing table for + updates to BGP peers and other routing protocols (and sending the + associated messages) is incurred multiple times as well. + + One method of building messages that contain many address prefixes + per path attribute set from a routing table that is not organized on + a per path attribute set basis is to build many messages as the + routing table is scanned. As each address prefix is processed, a + message for the associated set of path attributes is allocated, if it + does not exist, and the new address prefix is added to it. If such a + message exists, the new address prefix is appended to it. If the + message lacks the space to hold the new address prefix, it is + transmitted, a new message is allocated, and the new address prefix + is inserted into the new message. When the entire routing table has + been scanned, all allocated messages are sent and their resources are + released. Maximum compression is achieved when all destinations + covered by the address prefixes share a common set of path + attributes, making it possible to send many address prefixes in one + 4096-byte message. + + When peering with a BGP implementation that does not compress + multiple address prefixes into one message, it may be necessary to + take steps to reduce the overhead from the flood of data received + when a peer is acquired or when a significant network topology change + occurs. One method of doing this is to limit the rate of updates. + This will eliminate the redundant scanning of the routing table to + provide flash updates for BGP peers and other routing protocols. A + disadvantage of this approach is that it increases the propagation + latency of routing information. By choosing a minimum flash update + interval that is not much greater than the time it takes to process + the multiple messages, this latency should be minimized. A better + method would be to read all received messages before sending updates. + + + + + + + + +Rekhter, et al. Standards Track [Page 95] + +RFC 4271 BGP-4 January 2006 + + +Appendix F.2. Reducing Route Flapping + + To avoid excessive route flapping, a BGP speaker that needs to + withdraw a destination and send an update about a more specific or + less specific route should combine them into the same UPDATE message. + +Appendix F.3. Path Attribute Ordering + + Implementations that combine update messages (as described above in + Section 6.1) may prefer to see all path attributes presented in a + known order. This permits them to quickly identify sets of + attributes from different update messages that are semantically + identical. To facilitate this, it is a useful optimization to order + the path attributes according to type code. This optimization is + entirely optional. + +Appendix F.4. AS_SET Sorting + + Another useful optimization that can be done to simplify this + situation is to sort the AS numbers found in an AS_SET. This + optimization is entirely optional. + +Appendix F.5. Control Over Version Negotiation + + Because BGP-4 is capable of carrying aggregated routes that cannot be + properly represented in BGP-3, an implementation that supports BGP-4 + and another BGP version should provide the capability to only speak + BGP-4 on a per-peer basis. + +Appendix F.6. Complex AS_PATH Aggregation + + An implementation that chooses to provide a path aggregation + algorithm retaining significant amounts of path information may wish + to use the following procedure: + + For the purpose of aggregating AS_PATH attributes of two routes, + we model each AS as a tuple , where "type" identifies + a type of the path segment the AS belongs to (e.g., AS_SEQUENCE, + AS_SET), and "value" is the AS number. Two ASes are said to be + the same if their corresponding tuples are the same. + + The algorithm to aggregate two AS_PATH attributes works as + follows: + + a) Identify the same ASes (as defined above) within each + AS_PATH attribute that are in the same relative order within + both AS_PATH attributes. Two ASes, X and Y, are said to be + in the same order if either: + + + +Rekhter, et al. Standards Track [Page 96] + +RFC 4271 BGP-4 January 2006 + + + - X precedes Y in both AS_PATH attributes, or + - Y precedes X in both AS_PATH attributes. + + b) The aggregated AS_PATH attribute consists of ASes identified + in (a), in exactly the same order as they appear in the + AS_PATH attributes to be aggregated. If two consecutive + ASes identified in (a) do not immediately follow each other + in both of the AS_PATH attributes to be aggregated, then the + intervening ASes (ASes that are between the two consecutive + ASes that are the same) in both attributes are combined into + an AS_SET path segment that consists of the intervening ASes + from both AS_PATH attributes. This segment is then placed + between the two consecutive ASes identified in (a) of the + aggregated attribute. If two consecutive ASes identified in + (a) immediately follow each other in one attribute, but do + not follow in another, then the intervening ASes of the + latter are combined into an AS_SET path segment. This + segment is then placed between the two consecutive ASes + identified in (a) of the aggregated attribute. + + c) For each pair of adjacent tuples in the aggregated AS_PATH, + if both tuples have the same type, merge them together if + doing so will not cause a segment of a length greater than + 255 to be generated. + + If, as a result of the above procedure, a given AS number appears + more than once within the aggregated AS_PATH attribute, all but + the last instance (rightmost occurrence) of that AS number should + be removed from the aggregated AS_PATH attribute. + +Security Considerations + + A BGP implementation MUST support the authentication mechanism + specified in RFC 2385 [RFC2385]. The authentication provided by this + mechanism could be done on a per-peer basis. + + BGP makes use of TCP for reliable transport of its traffic between + peer routers. To provide connection-oriented integrity and data + origin authentication on a point-to-point basis, BGP specifies use of + the mechanism defined in RFC 2385. These services are intended to + detect and reject active wiretapping attacks against the inter-router + TCP connections. Absent the use of mechanisms that effect these + security services, attackers can disrupt these TCP connections and/or + masquerade as a legitimate peer router. Because the mechanism + defined in the RFC does not provide peer-entity authentication, these + connections may be subject to some forms of replay attacks that will + not be detected at the TCP layer. Such attacks might result in + delivery (from TCP) of "broken" or "spoofed" BGP messages. + + + +Rekhter, et al. Standards Track [Page 97] + +RFC 4271 BGP-4 January 2006 + + + The mechanism defined in RFC 2385 augments the normal TCP checksum + with a 16-byte message authentication code (MAC) that is computed + over the same data as the TCP checksum. This MAC is based on a one- + way hash function (MD5) and use of a secret key. The key is shared + between peer routers and is used to generate MAC values that are not + readily computed by an attacker who does not have access to the key. + A compliant implementation must support this mechanism, and must + allow a network administrator to activate it on a per-peer basis. + + RFC 2385 does not specify a means of managing (e.g., generating, + distributing, and replacing) the keys used to compute the MAC. RFC + 3562 [RFC3562] (an informational document) provides some guidance in + this area, and provides rationale to support this guidance. It notes + that a distinct key should be used for communication with each + protected peer. If the same key is used for multiple peers, the + offered security services may be degraded, e.g., due to an increased + risk of compromise at one router that adversely affects other + routers. + + The keys used for MAC computation should be changed periodically, to + minimize the impact of a key compromise or successful cryptanalytic + attack. RFC 3562 suggests a crypto period (the interval during which + a key is employed) of, at most, 90 days. More frequent key changes + reduce the likelihood that replay attacks (as described above) will + be feasible. However, absent a standard mechanism for effecting such + changes in a coordinated fashion between peers, one cannot assume + that BGP-4 implementations complying with this RFC will support + frequent key changes. + + Obviously, each should key also be chosen to be difficult for an + attacker to guess. The techniques specified in RFC 1750 for random + number generation provide a guide for generation of values that could + be used as keys. RFC 2385 calls for implementations to support keys + "composed of a string of printable ASCII of 80 bytes or less." RFC + 3562 suggests keys used in this context be 12 to 24 bytes of random + (pseudo-random) bits. This is fairly consistent with suggestions for + analogous MAC algorithms, which typically employ keys in the range of + 16 to 20 bytes. To provide enough random bits at the low end of this + range, RFC 3562 also observes that a typical ACSII text string would + have to be close to the upper bound for the key length specified in + RFC 2385. + + BGP vulnerabilities analysis is discussed in [RFC4272]. + + + + + + + + +Rekhter, et al. Standards Track [Page 98] + +RFC 4271 BGP-4 January 2006 + + +IANA Considerations + + All the BGP messages contain an 8-bit message type, for which IANA + has created and is maintaining a registry entitled "BGP Message + Types". This document defines the following message types: + + Name Value Definition + ---- ----- ---------- + OPEN 1 See Section 4.2 + UPDATE 2 See Section 4.3 + NOTIFICATION 3 See Section 4.5 + KEEPALIVE 4 See Section 4.4 + + Future assignments are to be made using either the Standards Action + process defined in [RFC2434], or the Early IANA Allocation process + defined in [RFC4020]. Assignments consist of a name and the value. + + The BGP UPDATE messages may carry one or more Path Attributes, where + each Attribute contains an 8-bit Attribute Type Code. IANA is + already maintaining such a registry, entitled "BGP Path Attributes". + This document defines the following Path Attributes Type Codes: + + Name Value Definition + ---- ----- ---------- + ORIGIN 1 See Section 5.1.1 + AS_PATH 2 See Section 5.1.2 + NEXT_HOP 3 See Section 5.1.3 + MULTI_EXIT_DISC 4 See Section 5.1.4 + LOCAL_PREF 5 See Section 5.1.5 + ATOMIC_AGGREGATE 6 See Section 5.1.6 + AGGREGATOR 7 See Section 5.1.7 + + Future assignments are to be made using either the Standards Action + process defined in [RFC2434], or the Early IANA Allocation process + defined in [RFC4020]. Assignments consist of a name and the value. + + The BGP NOTIFICATION message carries an 8-bit Error Code, for which + IANA has created and is maintaining a registry entitled "BGP Error + Codes". This document defines the following Error Codes: + + Name Value Definition + ------------ ----- ---------- + Message Header Error 1 Section 6.1 + OPEN Message Error 2 Section 6.2 + UPDATE Message Error 3 Section 6.3 + Hold Timer Expired 4 Section 6.5 + Finite State Machine Error 5 Section 6.6 + Cease 6 Section 6.7 + + + +Rekhter, et al. Standards Track [Page 99] + +RFC 4271 BGP-4 January 2006 + + + Future assignments are to be made using either the Standards Action + process defined in [RFC2434], or the Early IANA Allocation process + defined in [RFC4020]. Assignments consist of a name and the value. + + The BGP NOTIFICATION message carries an 8-bit Error Subcode, where + each Subcode has to be defined within the context of a particular + Error Code, and thus has to be unique only within that context. + + IANA has created and is maintaining a set of registries, "Error + Subcodes", with a separate registry for each BGP Error Code. Future + assignments are to be made using either the Standards Action process + defined in [RFC2434], or the Early IANA Allocation process defined in + [RFC4020]. Assignments consist of a name and the value. + + This document defines the following Message Header Error subcodes: + + Name Value Definition + -------------------- ----- ---------- + Connection Not Synchronized 1 See Section 6.1 + Bad Message Length 2 See Section 6.1 + Bad Message Type 3 See Section 6.1 + + This document defines the following OPEN Message Error subcodes: + + Name Value Definition + -------------------- ----- ---------- + Unsupported Version Number 1 See Section 6.2 + Bad Peer AS 2 See Section 6.2 + Bad BGP Identifier 3 See Section 6.2 + Unsupported Optional Parameter 4 See Section 6.2 + [Deprecated] 5 See Appendix A + Unacceptable Hold Time 6 See Section 6.2 + + This document defines the following UPDATE Message Error subcodes: + + Name Value Definition + -------------------- --- ---------- + Malformed Attribute List 1 See Section 6.3 + Unrecognized Well-known Attribute 2 See Section 6.3 + Missing Well-known Attribute 3 See Section 6.3 + Attribute Flags Error 4 See Section 6.3 + Attribute Length Error 5 See Section 6.3 + Invalid ORIGIN Attribute 6 See Section 6.3 + [Deprecated] 7 See Appendix A + Invalid NEXT_HOP Attribute 8 See Section 6.3 + Optional Attribute Error 9 See Section 6.3 + Invalid Network Field 10 See Section 6.3 + Malformed AS_PATH 11 See Section 6.3 + + + +Rekhter, et al. Standards Track [Page 100] + +RFC 4271 BGP-4 January 2006 + + +Normative References + + [RFC791] Postel, J., "Internet Protocol", STD 5, RFC 791, September + 1981. + + [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC + 793, September 1981. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2385] Heffernan, A., "Protection of BGP Sessions via the TCP MD5 + Signature Option", RFC 2385, August 1998. + + [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 2434, + October 1998. + +Informative References + + [RFC904] Mills, D., "Exterior Gateway Protocol formal + specification", RFC 904, April 1984. + + [RFC1092] Rekhter, J., "EGP and policy based routing in the new + NSFNET backbone", RFC 1092, February 1989. + + [RFC1093] Braun, H., "NSFNET routing architecture", RFC 1093, + February 1989. + + [RFC1105] Lougheed, K. and Y. Rekhter, "Border Gateway Protocol + (BGP)", RFC 1105, June 1989. + + [RFC1163] Lougheed, K. and Y. Rekhter, "Border Gateway Protocol + (BGP)", RFC 1163, June 1990. + + [RFC1267] Lougheed, K. and Y. Rekhter, "Border Gateway Protocol 3 + (BGP-3)", RFC 1267, October 1991. + + [RFC1771] Rekhter, Y. and T. Li, "A Border Gateway Protocol 4 (BGP- + 4)", RFC 1771, March 1995. + + [RFC1772] Rekhter, Y. and P. Gross, "Application of the Border + Gateway Protocol in the Internet", RFC 1772, March 1995. + + [RFC1518] Rekhter, Y. and T. Li, "An Architecture for IP Address + Allocation with CIDR", RFC 1518, September 1993. + + + + + +Rekhter, et al. Standards Track [Page 101] + +RFC 4271 BGP-4 January 2006 + + + [RFC1519] Fuller, V., Li, T., Yu, J., and K. Varadhan, "Classless + Inter-Domain Routing (CIDR): an Address Assignment and + Aggregation Strategy", RFC 1519, September 1993. + + [RFC1930] Hawkinson, J. and T. Bates, "Guidelines for creation, + selection, and registration of an Autonomous System (AS)", + BCP 6, RFC 1930, March 1996. + + [RFC1997] Chandra, R., Traina, P., and T. Li, "BGP Communities + Attribute", RFC 1997, August 1996. + + [RFC2439] Villamizar, C., Chandra, R., and R. Govindan, "BGP Route + Flap Damping", RFC 2439, November 1998. + + [RFC2474] Nichols, K., Blake, S., Baker, F., and D. Black, + "Definition of the Differentiated Services Field (DS Field) + in the IPv4 and IPv6 Headers", RFC 2474, December 1998. + + [RFC2796] Bates, T., Chandra, R., and E. Chen, "BGP Route Reflection + - An Alternative to Full Mesh IBGP", RFC 2796, April 2000. + + [RFC2858] Bates, T., Rekhter, Y., Chandra, R., and D. Katz, + "Multiprotocol Extensions for BGP-4", RFC 2858, June 2000. + + [RFC3392] Chandra, R. and J. Scudder, "Capabilities Advertisement + with BGP-4", RFC 3392, November 2002. + + [RFC2918] Chen, E., "Route Refresh Capability for BGP-4", RFC 2918, + September 2000. + + [RFC3065] Traina, P., McPherson, D., and J. Scudder, "Autonomous + System Confederations for BGP", RFC 3065, February 2001. + + [RFC3562] Leech, M., "Key Management Considerations for the TCP MD5 + Signature Option", RFC 3562, July 2003. + + [IS10747] "Information Processing Systems - Telecommunications and + Information Exchange between Systems - Protocol for + Exchange of Inter-domain Routeing Information among + Intermediate Systems to Support Forwarding of ISO 8473 + PDUs", ISO/IEC IS10747, 1993. + + [RFC4272] Murphy, S., "BGP Security Vulnerabilities Analysis", RFC + 4272, January 2006 + + [RFC4020] Kompella, K. and A. Zinin, "Early IANA Allocation of + Standards Track Code Points", BCP 100, RFC 4020, February + 2005. + + + +Rekhter, et al. Standards Track [Page 102] + +RFC 4271 BGP-4 January 2006 + + +Editors' Addresses + + Yakov Rekhter + Juniper Networks + + EMail: yakov@juniper.net + + + Tony Li + + EMail: tony.li@tony.li + + + Susan Hares + NextHop Technologies, Inc. + 825 Victors Way + Ann Arbor, MI 48108 + + Phone: (734)222-1610 + EMail: skh@nexthop.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 103] + +RFC 4271 BGP-4 January 2006 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2006). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is provided by the IETF + Administrative Support Activity (IASA). + + + + + + + +Rekhter, et al. Standards Track [Page 104] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc4838.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc4838.txt new file mode 100644 index 0000000..d4ac8a7 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc4838.txt @@ -0,0 +1,1963 @@ + + + + + + +Network Working Group V. Cerf +Request for Comments: 4838 Google/Jet Propulsion Laboratory +Category: Informational S. Burleigh + A. Hooke + L. Torgerson + NASA/Jet Propulsion Laboratory + R. Durst + K. Scott + The MITRE Corporation + K. Fall + Intel Corporation + H. Weiss + SPARTA, Inc. + April 2007 + + + Delay-Tolerant Networking Architecture + +Status of This Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The IETF Trust (2007). + +IESG Note + + This RFC is a product of the Internet Research Task Force and is not + a candidate for any level of Internet Standard. The IRTF publishes + the results of Internet-related research and development activities. + These results might not be suitable for deployment on the public + Internet. + +Abstract + + This document describes an architecture for delay-tolerant and + disruption-tolerant networks, and is an evolution of the architecture + originally designed for the Interplanetary Internet, a communication + system envisioned to provide Internet-like services across + interplanetary distances in support of deep space exploration. This + document describes an architecture that addresses a variety of + problems with internetworks having operational and performance + characteristics that make conventional (Internet-like) networking + approaches either unworkable or impractical. We define a message- + oriented overlay that exists above the transport (or other) layers of + + + +Cerf, et al. Informational [Page 1] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + the networks it interconnects. The document presents a motivation + for the architecture, an architectural overview, review of state + management required for its operation, and a discussion of + application design issues. This document represents the consensus of + the IRTF DTN research group and has been widely reviewed by that + group. + +Table of Contents + + 1. Introduction ....................................................3 + 2. Why an Architecture for Delay-Tolerant Networking? ..............4 + 3. DTN Architectural Description ...................................5 + 3.1. Virtual Message Switching Using Store-and-Forward + Operation ..................................................5 + 3.2. Nodes and Endpoints ........................................7 + 3.3. Endpoint Identifiers (EIDs) and Registrations ..............8 + 3.4. Anycast and Multicast .....................................10 + 3.5. Priority Classes ..........................................10 + 3.6. Postal-Style Delivery Options and Administrative Records ..11 + 3.7. Primary Bundle Fields .....................................15 + 3.8. Routing and Forwarding ....................................16 + 3.9. Fragmentation and Reassembly ..............................18 + 3.10. Reliability and Custody Transfer .........................19 + 3.11. DTN Support for Proxies and Application Layer Gateways ...21 + 3.12. Timestamps and Time Synchronization ......................22 + 3.13. Congestion and Flow Control at the Bundle Layer ..........22 + 3.14. Security .................................................23 + 4. State Management Considerations ................................25 + 4.1. Application Registration State ............................25 + 4.2. Custody Transfer State ....................................26 + 4.3. Bundle Routing and Forwarding State .......................26 + 4.4. Security-Related State ....................................27 + 4.5. Policy and Configuration State ............................27 + 5. Application Structuring Issues .................................28 + 6. Convergence Layer Considerations for Use of Underlying + Protocols ......................................................28 + 7. Summary ........................................................29 + 8. Security Considerations ........................................29 + 9. IANA Considerations ............................................30 + 10. Normative References ..........................................30 + 11. Informative References ........................................30 + 12. Acknowledgments ...............................................32 + + + + + + + + + +Cerf, et al. Informational [Page 2] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +1. Introduction + + This document describes an architecture for delay and disruption- + tolerant interoperable networking (DTN). The architecture embraces + the concepts of occasionally-connected networks that may suffer from + frequent partitions and that may be comprised of more than one + divergent set of protocols or protocol families. The basis for this + architecture lies with that of the Interplanetary Internet, which + focused primarily on the issue of deep space communication in high- + delay environments. We expect the DTN architecture described here to + be utilized in various operational environments, including those + subject to disruption and disconnection and those with high-delay; + the case of deep space is one specialized example of these, and is + being pursued as a specialization of this architecture (See [IPN01] + and [SB03] for more details). + + Other networks to which we believe this architecture applies include + sensor-based networks using scheduled intermittent connectivity, + terrestrial wireless networks that cannot ordinarily maintain end-to- + end connectivity, satellite networks with moderate delays and + periodic connectivity, and underwater acoustic networks with moderate + delays and frequent interruptions due to environmental factors. A + DTN tutorial [FW03], aimed at introducing DTN and the types of + networks for which it is designed, is available to introduce new + readers to the fundamental concepts and motivation. More technical + descriptions may be found in [KF03], [JFP04], [JDPF05], and [WJMF05]. + + We define an end-to-end message-oriented overlay called the "bundle + layer" that exists at a layer above the transport (or other) layers + of the networks on which it is hosted and below applications. + Devices implementing the bundle layer are called DTN nodes. The + bundle layer forms an overlay that employs persistent storage to help + combat network interruption. It includes a hop-by-hop transfer of + reliable delivery responsibility and optional end-to-end + acknowledgement. It also includes a number of diagnostic and + management features. For interoperability, it uses a flexible naming + scheme (based on Uniform Resource Identifiers [RFC3986]) capable of + encapsulating different naming and addressing schemes in the same + overall naming syntax. It also has a basic security model, + optionally enabled, aimed at protecting infrastructure from + unauthorized use. + + The bundle layer provides functionality similar to the internet layer + of gateways described in the original ARPANET/Internet designs + [CK74]. It differs from ARPANET gateways, however, because it is + layer-agnostic and is focused on virtual message forwarding rather + than packet switching. However, both generally provide + interoperability between underlying protocols specific to one + + + +Cerf, et al. Informational [Page 3] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + environment and those protocols specific to another, and both provide + a store-and-forward forwarding service (with the bundle layer + employing persistent storage for its store and forward function). + + In a sense, the DTN architecture provides a common method for + interconnecting heterogeneous gateways or proxies that employ store- + and-forward message routing to overcome communication disruptions. + It provides services similar to electronic mail, but with enhanced + naming, routing, and security capabilities. Nodes unable to support + the full capabilities required by this architecture may be supported + by application-layer proxies acting as DTN applications. + +2. Why an Architecture for Delay-Tolerant Networking? + + Our motivation for pursuing an architecture for delay tolerant + networking stems from several factors. These factors are summarized + below; much more detail on their rationale can be explored in [SB03], + [KF03], and [DFS02]. + + The existing Internet protocols do not work well for some + environments, due to some fundamental assumptions built into the + Internet architecture: + + - that an end-to-end path between source and destination exists for + the duration of a communication session + + - (for reliable communication) that retransmissions based on timely + and stable feedback from data receivers is an effective means for + repairing errors + + - that end-to-end loss is relatively small + + - that all routers and end stations support the TCP/IP protocols + + - that applications need not worry about communication performance + + - that endpoint-based security mechanisms are sufficient for meeting + most security concerns + + - that packet switching is the most appropriate abstraction for + interoperability and performance + + - that selecting a single route between sender and receiver is + sufficient for achieving acceptable communication performance + + The DTN architecture is conceived to relax most of these assumptions, + based on a number of design principles that are summarized here (and + further discussed in [KF03]): + + + +Cerf, et al. Informational [Page 4] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + - Use variable-length (possibly long) messages (not streams or + limited-sized packets) as the communication abstraction to help + enhance the ability of the network to make good scheduling/path + selection decisions when possible. + + - Use a naming syntax that supports a wide range of naming and + addressing conventions to enhance interoperability. + + - Use storage within the network to support store-and-forward + operation over multiple paths, and over potentially long timescales + (i.e., to support operation in environments where many and/or no + end-to-end paths may ever exist); do not require end-to-end + reliability. + + - Provide security mechanisms that protect the infrastructure from + unauthorized use by discarding traffic as quickly as possible. + + - Provide coarse-grained classes of service, delivery options, and a + way to express the useful lifetime of data to allow the network to + better deliver data in serving the needs of applications. + + The use of the bundle layer is guided not only by its own design + principles, but also by a few application design principles: + + - Applications should minimize the number of round-trip exchanges. + + - Applications should cope with restarts after failure while network + transactions remain pending. + + - Applications should inform the network of the useful life and + relative importance of data to be delivered. + + These issues are discussed in further detail in Section 5. + +3. DTN Architectural Description + + The previous section summarized the design principles that guide the + definition of the DTN architecture. This section presents a + description of the major features of the architecture resulting from + design decisions guided by the aforementioned design principles. + +3.1. Virtual Message Switching Using Store-and-Forward Operation + + A DTN-enabled application sends messages of arbitrary length, also + called Application Data Units or ADUs [CT90], which are subject to + any implementation limitations. The relative order of ADUs might not + be preserved. ADUs are typically sent by and delivered to + + + + +Cerf, et al. Informational [Page 5] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + applications in complete units, although a system interface that + behaves differently is not precluded. + + ADUs are transformed by the bundle layer into one or more protocol + data units called "bundles", which are forwarded by DTN nodes. + Bundles have a defined format containing two or more "blocks" of + data. Each block may contain either application data or other + information used to deliver the containing bundle to its + destination(s). Blocks serve the purpose of holding information + typically found in the header or payload portion of protocol data + units in other protocol architectures. The term "block" is used + instead of "header" because blocks may not appear at the beginning of + a bundle due to particular processing requirements (e.g., digital + signatures). + + Bundles may be split up ("fragmented") into multiple constituent + bundles (also called "fragments" or "bundle fragments") during + transmission. Fragments are themselves bundles, and may be further + fragmented. Two or more fragments may be reassembled anywhere in the + network, forming a new bundle. + + Bundle sources and destinations are identified by (variable-length) + Endpoint Identifiers (EIDs, described below), which identify the + original sender and final destination(s) of bundles, respectively. + Bundles also contain a "report-to" EID used when special operations + are requested to direct diagnostic output to an arbitrary entity + (e.g., other than the source). An EID may refer to one or more DTN + nodes (i.e., for multicast destinations or "report-to" destinations). + + While IP networks are based on "store-and-forward" operation, there + is an assumption that the "storing" will not persist for more than a + modest amount of time, on the order of the queuing and transmission + delay. In contrast, the DTN architecture does not expect that + network links are always available or reliable, and instead expects + that nodes may choose to store bundles for some time. We anticipate + that most DTN nodes will use some form of persistent storage for this + -- disk, flash memory, etc. -- and that stored bundles will survive + system restarts. + + Bundles contain an originating timestamp, useful life indicator, a + class of service designator, and a length. This information provides + bundle-layer routing with a priori knowledge of the size and + performance requirements of requested data transfers. When there is + a significant amount of queuing that can occur in the network (as is + the case in the DTN version of store-and-forward), the advantage + provided by knowing this information may be significant for making + scheduling and path selection decisions [JFP04]. An alternative + abstraction (i.e., of stream-based delivery based on packets) would + + + +Cerf, et al. Informational [Page 6] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + make such scheduling much more difficult. Although packets provide + some of the same benefits as bundles, larger aggregates provide a way + for the network to apply scheduling and buffer management to units of + data that are more useful to applications. + + An essential element of the bundle-based style of forwarding is that + bundles have a place to wait in a queue until a communication + opportunity ("contact") is available. This highlights the following + assumptions: + + 1. that storage is available and well-distributed throughout the + network, + + 2. that storage is sufficiently persistent and robust to store + bundles until forwarding can occur, and + + 3. (implicitly) that this "store-and-forward" model is a better + choice than attempting to effect continuous connectivity or other + alternatives. + + For a network to effectively support the DTN architecture, these + assumptions must be considered and must be found to hold. Even so, + the inclusion of long-term storage as a fundamental aspect of the DTN + architecture poses new problems, especially with respect to + congestion management and denial-of-service mitigation. Node storage + in essence represents a new resource that must be managed and + protected. Much of the research in DTN revolves around exploring + these issues. Congestion is discussed in Section 3.13, and security + mechanisms, including methods for DTN nodes to protect themselves + from handling unauthorized traffic from other nodes, are discussed in + [DTNSEC] and [DTNSOV]. + +3.2. Nodes and Endpoints + + A DTN node (or simply "node" in this document) is an engine for + sending and receiving bundles -- an implementation of the bundle + layer. Applications utilize DTN nodes to send or receive ADUs + carried in bundles (applications also use DTN nodes when acting as + report-to destinations for diagnostic information carried in + bundles). Nodes may be members of groups called "DTN endpoints". A + DTN endpoint is therefore a set of DTN nodes. A bundle is considered + to have been successfully delivered to a DTN endpoint when some + minimum subset of the nodes in the endpoint has received the bundle + without error. This subset is called the "minimum reception group" + (MRG) of the endpoint. The MRG of an endpoint may refer to one node + (unicast), one of a group of nodes (anycast), or all of a group of + nodes (multicast and broadcast). A single node may be in the MRG of + multiple endpoints. + + + +Cerf, et al. Informational [Page 7] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.3. Endpoint Identifiers (EIDs) and Registrations + + An Endpoint Identifier (EID) is a name, expressed using the general + syntax of URIs (see below), that identifies a DTN endpoint. Using an + EID, a node is able to determine the MRG of the DTN endpoint named by + the EID. Each node is also required to have at least one EID that + uniquely identifies it. + + Applications send ADUs destined for an EID, and may arrange for ADUs + sent to a particular EID to be delivered to them. Depending on the + construction of the EID being used (see below), there may be a + provision for wildcarding some portion of an EID, which is often + useful for diagnostic and routing purposes. + + An application's desire to receive ADUs destined for a particular EID + is called a "registration", and in general is maintained persistently + by a DTN node. This allows application registration information to + survive application and operating system restarts. + + An application's attempt to establish a registration is not + guaranteed to succeed. For example, an application could request to + register itself to receive ADUs by specifying an Endpoint ID that is + uninterpretable or unavailable to the DTN node servicing the request. + Such requests are likely to fail. + +3.3.1. URI Schemes + + Each Endpoint ID is expressed syntactically as a Uniform Resource + Identifier (URI) [RFC3986]. The URI syntax has been designed as a + way to express names or addresses for a wide range of purposes, and + is therefore useful for constructing names for DTN endpoints. + + In URI terminology, each URI begins with a scheme name. The scheme + name is an element of the set of globally-managed scheme names + maintained by IANA [ISCHEMES]. Lexically following the scheme name + in a URI is a series of characters constrained by the syntax defined + by the scheme. This portion of the URI is called the scheme-specific + part (SSP), and can be quite general. (See, as one example, the URI + scheme for SNMP [RFC4088]). Note that scheme-specific syntactical + and semantic restrictions may be more constraining than the basic + rules of RFC 3986. Section 3.1 of RFC 3986 provides guidance on the + syntax of scheme names. + + URI schemes are a key concept in the DTN architecture, and evolved + from an earlier concept called regions, which were tied more closely + to assumptions of the network topology. Using URIs, significant + flexibility is attained in the structuring of EIDs. They might, for + example, be constructed based on DNS names, or might look like + + + +Cerf, et al. Informational [Page 8] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + "expressions of interest" or forms of database-like queries as in a + directed diffusion-routed network [IGE00] or in intentional naming + [WSBL99]. As names, EIDs are not required to be related to routing + or topological organization. Such a relationship is not prohibited, + however, and in some environments using EIDs this way may be + advantageous. + + A single EID may refer to an endpoint containing more than one DTN + node, as suggested above. It is the responsibility of a scheme + designer to define how to interpret the SSP of an EID so as to + determine whether it refers to a unicast, multicast, or anycast set + of nodes. See Section 3.4 for more details. + + URIs are constructed based on rules specified in RFC 3986, using the + US-ASCII character set. However, note this excerpt from RFC 3986, + Section 1.2.1, on dealing with characters that cannot be represented + by US-ASCII: "Percent-encoded octets (Section 2.1) may be used + within a URI to represent characters outside the range of the US- + ASCII coded character set if this representation is allowed by the + scheme or by the protocol element in which the URI is referenced. + Such a definition should specify the character encoding used to map + those characters to octets prior to being percent-encoded for the + URI". + +3.3.2. Late Binding + + Binding means interpreting the SSP of an EID for the purpose of + carrying an associated message towards a destination. For example, + binding might require mapping an EID to a next-hop EID or to a lower- + layer address for transmission. "Late binding" means that the + binding of a bundle's destination to a particular set of destination + identifiers or addresses does not necessarily happen at the bundle + source. Because the destination EID is potentially re-interpreted at + each hop, the binding may occur at the source, during transit, or + possibly at the destination(s). This contrasts with the name-to- + address binding of Internet communications where a DNS lookup at the + source fixes the IP address of the destination node before data is + sent. Such a circumstance would be considered "early binding" + because the name-to-address translation is performed prior to data + being sent into the network. + + In a frequently-disconnected network, late binding may be + advantageous because the transit time of a message may exceed the + validity time of a binding, making binding at the source impossible + or invalid. Furthermore, use of name-based routing with late binding + may reduce the amount of administrative (mapping) information that + + + + + +Cerf, et al. Informational [Page 9] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + must propagate through the network, and may also limit the scope of + mapping synchronization requirements to a local topological + neighborhood where changes are made. + +3.4. Anycast and Multicast + + As mentioned above, an EID may refer to an endpoint containing one or + more DTN nodes. When referring to a group of size greater than one, + the delivery semantics may be of either the anycast or multicast + variety (broadcast is considered to be of the multicast variety). + For anycast group delivery, a bundle is delivered to one node among a + group of potentially many nodes, and for multicast delivery it is + intended to be delivered to all of them, subject to the normal DTN + class of service and maximum useful lifetime semantics. + + Multicast group delivery in a DTN presents an unfamiliar issue with + respect to group membership. In relatively low-delay networks, such + as the Internet, nodes may be considered to be part of the group if + they have expressed interest to join it "recently". In a DTN, + however, nodes may wish to receive data sent to a group during an + interval of time earlier than when they are actually able to receive + it [ZAZ05]. More precisely, an application expresses its desire to + receive data sent to EID e at time t. Prior to this, during the + interval [t0, t1], t > t1, data may have been generated for group e. + For the application to receive any of this data, the data must be + available a potentially long time after senders have ceased sending + to the group. Thus, the data may need to be stored within the + network in order to support temporal group semantics of this kind. + How to design and implement this remains a research issue, as it is + likely to be at least as hard as problems related to reliable + multicast. + +3.5. Priority Classes + + The DTN architecture offers *relative* measures of priority (low, + medium, high) for delivering ADUs. These priorities differentiate + traffic based upon an application's desire to affect the delivery + urgency for ADUs, and are carried in bundle blocks generated by the + bundle layer based on information specified by the application. + + The (U.S. or similar) Postal Service provides a strong metaphor for + the priority classes offered by the forwarding abstraction offered by + the DTN architecture. Traffic is generally not interactive and is + often one-way. There are generally no strong guarantees of timely + delivery, yet there are some forms of class of service, reliability, + and security. + + + + + +Cerf, et al. Informational [Page 10] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + We have defined three relative priority classes to date. These + priority classes typically imply some relative scheduling + prioritization among bundles in queue at a sender: + + - Bulk - Bulk bundles are shipped on a "least effort" basis. No + bundles of this class will be shipped until all bundles of other + classes bound for the same destination and originating from the + same source have been shipped. + + - Normal - Normal-class bundles are shipped prior to any bulk-class + bundles and are otherwise the same as bulk bundles. + + - Expedited - Expedited bundles, in general, are shipped prior to + bundles of other classes and are otherwise the same. + + Applications specify their requested priority class and data lifetime + (see below) for each ADU they send. This information, coupled with + policy applied at DTN nodes that select how messages are forwarded + and which routing algorithms are in use, affects the overall + likelihood and timeliness of ADU delivery. + + The priority class of a bundle is only required to relate to other + bundles from the same source. This means that a high priority bundle + from one source may not be delivered faster (or with some other + superior quality of service) than a medium priority bundle from a + different source. It does mean that a high priority bundle from one + source will be handled preferentially to a lower priority bundle sent + from the same source. + + Depending on a particular DTN node's forwarding/scheduling policy, + priority may or may not be enforced across different sources. That + is, in some DTN nodes, expedited bundles might always be sent prior + to any bulk bundles, irrespective of source. Many variations are + possible. + +3.6. Postal-Style Delivery Options and Administrative Records + + Continuing with the postal analogy, the DTN architecture supports + several delivery options that may be selected by an application when + it requests the transmission of an ADU. In addition, the + architecture defines two types of administrative records: "status + reports" and "signals". These records are bundles that provide + information about the delivery of other bundles, and are used in + conjunction with the delivery options. + + + + + + + +Cerf, et al. Informational [Page 11] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.6.1. Delivery Options + + We have defined eight delivery options. Applications sending an ADU + (the "subject ADU") may request any combination of the following, + which are carried in each of the bundles produced ("sent bundles") by + the bundle layer resulting from the application's request to send the + subject ADU: + + - Custody Transfer Requested - requests sent bundles be delivered + with enhanced reliability using custody transfer procedures. Sent + bundles will be transmitted by the bundle layer using reliable + transfer protocols (if available), and the responsibility for + reliable delivery of the bundle to its destination(s) may move + among one or more "custodians" in the network. This capability is + described in more detail in Section 3.10. + + - Source Node Custody Acceptance Required - requires the source DTN + node to provide custody transfer for the sent bundles. If custody + transfer is not available at the source when this delivery option + is requested, the requested transmission fails. This provides a + means for applications to insist that the source DTN node take + custody of the sent bundles (e.g., by storing them in persistent + storage). + + - Report When Bundle Delivered - requests a (single) Bundle Delivery + Status Report be generated when the subject ADU is delivered to its + intended recipient(s). This request is also known as "return- + receipt". + + - Report When Bundle Acknowledged by Application - requests an + Acknowledgement Status Report be generated when the subject ADU is + acknowledged by a receiving application. This only happens by + action of the receiving application, and differs from the Bundle + Delivery Status Report. It is intended for cases where the + application may be acting as a form of application layer gateway + and wishes to indicate the status of a protocol operation external + to DTN back to the requesting source. See Section 11 for more + details. + + - Report When Bundle Received - requests a Bundle Reception Status + Report be generated when each sent bundle arrives at a DTN node. + This is designed primarily for diagnostic purposes. + + - Report When Bundle Custody Accepted - requests a Custody + Acceptance Status Report be generated when each sent bundle has + been accepted using custody transfer. This is designed primarily + for diagnostic purposes. + + + + +Cerf, et al. Informational [Page 12] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + - Report When Bundle Forwarded - requests a Bundle Forwarding Status + Report be generated when each sent bundle departs a DTN node after + forwarding. This is designed primarily for diagnostic purposes. + + - Report When Bundle Deleted - requests a Bundle Deletion Status + Report be generated when each sent bundle is deleted at a DTN node. + This is designed primarily for diagnostic purposes. + + The first four delivery options are designed for ordinary use by + applications. The last four are designed primarily for diagnostic + purposes and their use may be restricted or limited in environments + subject to congestion or attack. + + If the security procedures defined in [DTNSEC] are also enabled, then + three additional delivery options become available: + + - Confidentiality Required - requires the subject ADU be made secret + from parties other than the source and the members of the + destination EID. + + - Authentication Required - requires all non-mutable fields in the + bundle blocks of the sent bundles (i.e., those which do not change + as the bundle is forwarded) be made strongly verifiable (i.e., + cryptographically strong). This protects several fields, including + the source and destination EIDs and the bundle's data. See Section + 3.7 and [BSPEC] for more details. + + - Error Detection Required - requires modifications to the non- + mutable fields of each sent bundle be made detectable with high + probability at each destination. + +3.6.2. Administrative Records: Bundle Status Reports and Custody + Signals + + Administrative records are used to report status information or error + conditions related to the bundle layer. There are two types of + administrative records defined: bundle status reports (BSRs) and + custody signals. Administrative records correspond (approximately) + to messages in the ICMP protocol in IP [RFC792]. In ICMP, however, + messages are returned to the source. In DTN, they are instead + directed to the report-to EID for BSRs and the EID of the current + custodian for custody signals, which might differ from the source's + EID. Administrative records are sent as bundles with a source EID + set to one of the EIDs associated with the DTN node generating the + administrative record. In some cases, arrival of a single bundle or + bundle fragment may elicit multiple administrative records (e.g., in + the case where a bundle is replicated for multicast forwarding). + + + + +Cerf, et al. Informational [Page 13] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + The following BSRs are currently defined (also see [BSPEC] for more + details): + + - Bundle Reception - sent when a bundle arrives at a DTN node. + Generation of this message may be limited by local policy. + + - Custody Acceptance - sent when a node has accepted custody of a + bundle with the Custody Transfer Requested option set. Generation + of this message may be limited by local policy. + + - Bundle Forwarded - sent when a bundle containing a Report When + Bundle Forwarded option departs from a DTN node after having been + forwarded. Generation of this message may be limited by local + policy. + + - Bundle Deletion - sent from a DTN node when a bundle containing a + Report When Bundle Deleted option is discarded. This can happen + for several reasons, such as expiration. Generation of this + message may be limited by local policy but is required in cases + where the deletion is performed by a bundle's current custodian. + + - Bundle Delivery - sent from a final recipient's (destination) node + when a complete ADU comprising sent bundles containing Report When + Bundle Delivered options is consumed by an application. + + - Acknowledged by application - sent from a final recipient's + (destination) node when a complete ADU comprising sent bundles + containing Application Acknowledgment options has been processed by + an application. This generally involves specific action on the + receiving application's part. + + In addition to the status reports, the custody signal is currently + defined to indicate the status of a custody transfer. These are sent + to the current-custodian EID contained in an arriving bundle: + + - Custody Signal - indicates that custody has been successfully + transferred. This signal appears as a Boolean indicator, and may + therefore indicate either a successful or a failed custody transfer + attempt. + + Administrative records must reference a received bundle. This is + accomplished by a method for uniquely identifying bundles based on a + transmission timestamp and sequence number discussed in Section 3.12. + + + + + + + + +Cerf, et al. Informational [Page 14] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.7. Primary Bundle Fields + + The bundles carried between and among DTN nodes obey a standard + bundle protocol specified in [BSPEC]. Here we provide an overview of + most of the fields carried with every bundle. The protocol is + designed with a mandatory primary block, an optional payload block + (which contains the ADU data itself), and a set of optional extension + blocks. Blocks may be cascaded in a way similar to extension headers + in IPv6. The following selected fields are all present in the + primary block, and therefore are present for every bundle and + fragment: + + - Creation Timestamp - a concatenation of the bundle's creation time + and a monotonically increasing sequence number such that the + creation timestamp is guaranteed to be unique for each ADU + originating from the same source. The creation timestamp is based + on the time-of-day an application requested an ADU to be sent (not + when the corresponding bundle(s) are sent into the network). DTN + nodes are assumed to have a basic time synchronization capability + (see Section 3.12). + + - Lifespan - the time-of-day at which the message is no longer + useful. If a bundle is stored in the network (including the + source's DTN node) when its lifespan is reached, it may be + discarded. The lifespan of a bundle is expressed as an offset + relative to its creation time. + + - Class of Service Flags - indicates the delivery options and + priority class for the bundle. Priority classes may be one of + bulk, normal, or expedited. See Section 3.6.1. + + - Source EID - EID of the source (the first sender). + + - Destination EID - EID of the destination (the final intended + recipient(s)). + + - Report-To Endpoint ID - an EID identifying where reports (return- + receipt, route-tracing functions) should be sent. This may or may + not identify the same endpoint as the Source EID. + + - Custodian EID - EID of the current custodian of a bundle (if any). + + The payload block indicates information about the contained payload + (e.g., its length) and the payload itself. In addition to the fields + found in the primary and payload blocks, each bundle may have fields + in additional blocks carried with each bundle. See [BSPEC] for + additional details. + + + + +Cerf, et al. Informational [Page 15] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.8. Routing and Forwarding + + The DTN architecture provides a framework for routing and forwarding + at the bundle layer for unicast, anycast, and multicast messages. + Because nodes in a DTN network might be interconnected using more + than one type of underlying network technology, a DTN network is best + described abstractly using a *multigraph* (a graph where vertices may + be interconnected with more than one edge). Edges in this graph are, + in general, time-varying with respect to their delay and capacity and + directional because of the possibility of one-way connectivity. When + an edge has zero capacity, it is considered to not be connected. + + Because edges in a DTN graph may have significant delay, it is + important to distinguish where time is measured when expressing an + edge's capacity or delay. We adopt the convention of expressing + capacity and delay as functions of time where time is measured at the + point where data is inserted into a network edge. For example, + consider an edge having capacity C(t) and delay D(t) at time t. If B + bits are placed in this edge at time t, they completely arrive by + time t + D(t) + (1/C(t))*B. We assume C(t) and D(t) do not change + significantly during the interval [t, t+D(t)+(1/C(t))*B]. + + Because edges may vary between positive and zero capacity, it is + possible to describe a period of time (interval) during which the + capacity is strictly positive, and the delay and capacity can be + considered to be constant [AF03]. This period of time is called a + "contact". In addition, the product of the capacity and the interval + is known as a contact's "volume". If contacts and their volumes are + known ahead of time, intelligent routing and forwarding decisions can + be made (optimally for small networks) [JFP04]. Optimally using a + contact's volume, however, requires the ability to divide large ADUs + and bundles into smaller routable units. This is provided by DTN + fragmentation (see Section 3.9). + + When delivery paths through a DTN graph are lossy or contact + intervals and volumes are not known precisely ahead of time, routing + computations become especially challenging. How to handle these + situations is an active area of work in the (emerging) research area + of delay tolerant networking. + +3.8.1. Types of Contacts + + Contacts typically fall into one of several categories, based largely + on the predictability of their performance characteristics and + whether some action is required to bring them into existence. To + date, the following major types of contacts have been defined: + + + + + +Cerf, et al. Informational [Page 16] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + Persistent Contacts + + Persistent contacts are always available (i.e., no connection- + initiation action is required to instantiate a persistent + contact). An 'always-on' Internet connection such as a DSL or + Cable Modem connection would be a representative of this class. + + On-Demand Contacts + + On-Demand contacts require some action in order to instantiate, + but then function as persistent contacts until terminated. A + dial-up connection is an example of an On-Demand contact (at + least, from the viewpoint of the dialer; it may be viewed as an + Opportunistic Contact, below, from the viewpoint of the dial-up + service provider). + + Intermittent - Scheduled Contacts + + A scheduled contact is an agreement to establish a contact at a + particular time, for a particular duration. An example of a + scheduled contact is a link with a low-earth orbiting satellite. + A node's list of contacts with the satellite can be constructed + from the satellite's schedule of view times, capacities, and + latencies. Note that for networks with substantial delays, the + notion of the "particular time" is delay-dependent. For example, + a single scheduled contact between Earth and Mars would not be at + the same instant in each location, but would instead be offset by + the (non-negligible) propagation delay. + + Intermittent - Opportunistic Contacts + + Opportunistic contacts are not scheduled, but rather present + themselves unexpectedly. For example, an unscheduled aircraft + flying overhead and beaconing, advertising its availability for + communication, would present an opportunistic contact. Another + type of opportunistic contact might be via an infrared or + Bluetooth communication link between a personal digital assistant + (PDA) and a kiosk in an airport concourse. The opportunistic + contact begins as the PDA is brought near the kiosk, lasting an + undetermined amount of time (i.e., until the link is lost or + terminated). + + Intermittent - Predicted Contacts + + Predicted contacts are based on no fixed schedule, but rather are + predictions of likely contact times and durations based on a + history of previously observed contacts or some other information. + Given a great enough confidence in a predicted contact, routes may + + + +Cerf, et al. Informational [Page 17] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + be chosen based on this information. This is an active research + area, and a few approaches having been proposed [LFC05]. + +3.9. Fragmentation and Reassembly + + DTN fragmentation and reassembly are designed to improve the + efficiency of bundle transfers by ensuring that contact volumes are + fully utilized and by avoiding retransmission of partially-forwarded + bundles. There are two forms of DTN fragmentation/reassembly: + + Proactive Fragmentation + + A DTN node may divide a block of application data into multiple + smaller blocks and transmit each such block as an independent + bundle. In this case, the *final destination(s)* are responsible + for extracting the smaller blocks from incoming bundles and + reassembling them into the original larger bundle and, ultimately, + ADU. This approach is called proactive fragmentation because it + is used primarily when contact volumes are known (or predicted) in + advance. + + Reactive Fragmentation + + DTN nodes sharing an edge in the DTN graph may fragment a bundle + cooperatively when a bundle is only partially transferred. In + this case, the receiving bundle layer modifies the incoming bundle + to indicate it is a fragment, and forwards it normally. The + previous- hop sender may learn (via convergence-layer protocols, + see Section 6) that only a portion of the bundle was delivered to + the next hop, and send the remaining portion(s) when subsequent + contacts become available (possibly to different next-hops if + routing changes). This is called reactive fragmentation because + the fragmentation process occurs after an attempted transmission + has taken place. + + As an example, consider a ground station G, and two store-and- + forward satellites S1 and S2, in opposite low-earth orbit. While + G is transmitting a large bundle to S1, a reliable transport layer + protocol below the bundle layer at each indicates the transmission + has terminated, but that half the transfer has completed + successfully. In this case, G can form a smaller bundle fragment + consisting of the second half of the original bundle and forward + it to S2 when available. In addition, S1 (now out of range of G) + can form a new bundle consisting of the first half of the original + bundle and forward it to whatever next hop(s) it deems + appropriate. + + + + + +Cerf, et al. Informational [Page 18] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + The reactive fragmentation capability is not required to be available + in every DTN implementation, as it requires a certain level of + support from underlying protocols that may not be present, and + presents significant challenges with respect to handling digital + signatures and authentication codes on messages. When a signed + message is only partially received, most message authentication codes + will fail. When DTN security is present and enabled, it may + therefore be necessary to proactively fragment large bundles into + smaller units that are more convenient for digital signatures. + + Even if reactive fragmentation is not present in an implementation, + the ability to reassemble fragments at a destination is required in + order to support DTN fragmentation. Furthermore, for contacts with + volumes that are small compared to typical bundle sizes, some + incremental delivery approach must be used (e.g., checkpoint/restart) + to prevent data delivery livelock. Reactive fragmentation is one + such approach, but other protocol layers could potentially handle + this issue as well. + +3.10. Reliability and Custody Transfer + + The most basic service provided by the bundle layer is + unacknowledged, prioritized (but not guaranteed) unicast message + delivery. It also provides two options for enhancing delivery + reliability: end-to-end acknowledgments and custody transfer. + Applications wishing to implement their own end-to-end message + reliability mechanisms are free to utilize the acknowledgment. The + custody transfer feature of the DTN architecture only specifies a + coarse-grained retransmission capability, described next. + + Transmission of bundles with the Custody Transfer Requested option + specified generally involves moving the responsibility for reliable + delivery of an ADU's bundles among different DTN nodes in the + network. For unicast delivery, this will typically involve moving + bundles "closer" (in terms of some routing metric) to their ultimate + destination(s), and retransmitting when necessary. The nodes + receiving these bundles along the way (and agreeing to accept the + reliable delivery responsibility) are called "custodians". The + movement of a bundle (and its delivery responsibility) from one node + to another is called a "custody transfer". It is analogous to a + database commit transaction [FHM03]. The exact meaning and design of + custody transfer for multicast and anycast delivery remains to be + fully explored. + + Custody transfer allows the source to delegate retransmission + responsibility and recover its retransmission-related resources + relatively soon after sending a bundle (on the order of the minimum + round-trip time to the first bundle hop(s)). Not all nodes in a DTN + + + +Cerf, et al. Informational [Page 19] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + are required by the DTN architecture to accept custody transfers, so + it is not a true 'hop-by-hop' mechanism. For example, some nodes may + have sufficient storage resources to sometimes act as custodians, but + may elect to not offer such services when congested or running low on + power. + + The existence of custodians can alter the way DTN routing is + performed. In some circumstances, it may be beneficial to move a + bundle to a custodian as quickly as possible even if the custodian is + further away (in terms of distance, time or some routing metric) from + the bundle's final destination(s) than some other reachable node. + Designing a system with this capability involves constructing more + than one routing graph, and is an area of continued research. + + Custody transfer in DTN not only provides a method for tracking + bundles that require special handling and identifying DTN nodes that + participate in custody transfer, it also provides a (weak) mechanism + for enhancing the reliability of message delivery. Generally + speaking, custody transfer relies on underlying reliable delivery + protocols of the networks that it operates over to provide the + primary means of reliable transfer from one bundle node to the next + (set). However, when custody transfer is requested, the bundle layer + provides an additional coarse-grained timeout and retransmission + mechanism and an accompanying (bundle-layer) custodian-to-custodian + acknowledgment signaling mechanism. When an application does *not* + request custody transfer, this bundle layer timeout and + retransmission mechanism is typically not employed, and successful + bundle layer delivery depends solely on the reliability mechanisms of + the underlying protocols. + + When a node accepts custody for a bundle that contains the Custody + Transfer Requested option, a Custody Transfer Accepted Signal is sent + by the bundle layer to the Current Custodian EID contained in the + primary bundle block. In addition, the Current Custodian EID is + updated to contain one of the forwarding node's (unicast) EIDs before + the bundle is forwarded. + + When an application requests an ADU to be delivered with custody + transfer, the request is advisory. In some circumstances, a source + of a bundle for which custody transfer has been requested may not be + able to provide this service. In such circumstances, the subject + bundle may traverse multiple DTN nodes before it obtains a custodian. + Bundles in this condition are specially marked with their Current + Custodian EID field set to a null endpoint. In cases where + applications wish to require the source to take custody of the + bundle, they may supply the Source Node Custody Acceptance Required + + + + + +Cerf, et al. Informational [Page 20] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + delivery option. This may be useful to applications that desire a + continuous "chain" of custody or that wish to exit after being + ensured their data is safely held in a custodian. + + In a DTN network where one or more custodian-to-custodian hops are + strictly one directional (and cannot be reversed), the DTN custody + transfer mechanism will be affected over such hops due to the lack of + any way to receive a custody signal (or any other information) back + across the path, resulting in the expiration of the bundle at the + ingress to the one-way hop. This situation does not necessarily mean + the bundle has been lost; nodes on the other side of the hop may + continue to transfer custody, and the bundle may be delivered + successfully to its destination(s). However, in this circumstance a + source that has requested to receive expiration BSRs for this bundle + will receive an expiration report for the bundle, and possibly + conclude (incorrectly) that the bundle has been discarded and not + delivered. Although this problem cannot be fully solved in this + situation, a mechanism is provided to help ameliorate the seemingly + incorrect information that may be reported when the bundle expires + after having been transferred over a one-way hop. This is + accomplished by the node at the ingress to the one-way hop reporting + the existence of a known one-way path using a variant of a bundle + status report. These types of reports are provided if the subject + bundle requests the report using the 'Report When Bundle Forwarded' + delivery option. + +3.11. DTN Support for Proxies and Application Layer Gateways + + One of the aims of DTN is to provide a common method for + interconnecting application layer gateways and proxies. In cases + where existing Internet applications can be made to tolerate delays, + local proxies can be constructed to benefit from the existing + communication capabilities provided by DTN [S05, T02]. Making such + proxies compatible with DTN reduces the burden on the proxy author + from being concerned with how to implement routing and reliability + management and allows existing TCP/IP-based applications to operate + unmodified over a DTN-based network. + + When DTN is used to provide a form of tunnel encapsulation for other + protocols, it can be used in constructing overlay networks comprised + of application layer gateways. The application acknowledgment + capability is designed for such circumstances. This provides a + common way for remote application layer gateways to signal the + success or failure of non-DTN protocol operations initiated as a + result of receiving DTN ADUs. Without this capability, such + indicators would have to be implemented by applications themselves in + non-standard ways. + + + + +Cerf, et al. Informational [Page 21] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.12. Timestamps and Time Synchronization + + The DTN architecture depends on time synchronization among DTN nodes + (supported by external, non-DTN protocols) for four primary purposes: + bundle and fragment identification, routing with scheduled or + predicted contacts, bundle expiration time computations, and + application registration expiration. + + Bundle identification and expiration are supported by placing a + creation timestamp and an explicit expiration field (expressed in + seconds after the source timestamp) in each bundle. The origination + timestamps on arriving bundles are made available to consuming + applications in ADUs they receive by some system interface function. + Each set of bundles corresponding to an ADU is required to contain a + timestamp unique to the sender's EID. The EID, timestamp, and data + offset/length information together uniquely identify a bundle. + Unique bundle identification is used for a number of purposes, + including custody transfer and reassembly of bundle fragments. + + Time is also used in conjunction with application registrations. + When an application expresses its desire to receive ADUs destined for + a particular EID, this registration is only maintained for a finite + period of time, and may be specified by the application. For + multicast registrations, an application may also specify a time range + or "interest interval" for its registration. In this case, traffic + sent to the specified EID any time during the specified interval will + eventually be delivered to the application (unless such traffic has + expired due to the expiration time provided by the application at the + source or some other reason prevents such delivery). + +3.13. Congestion and Flow Control at the Bundle Layer + + The subject of congestion control and flow control at the bundle + layer is one on which the authors of this document have not yet + reached complete consensus. We have unresolved concerns about the + efficiency and efficacy of congestion and flow control schemes + implemented across long and/or highly variable delay environments, + especially with the custody transfer mechanism that may require nodes + to retain bundles for long periods of time. + + For the purposes of this document, we define "flow control" as a + means of assuring that the average rate at which a sending node + transmits data to a receiving node does not exceed the average rate + at which the receiving node is prepared to receive data from that + sender. (Note that this is a generalized notion of flow control, + rather than one that applies only to end-to-end communication.) We + define "congestion control" as a means of assuring that the aggregate + rate at which all traffic sources inject data into a network does not + + + +Cerf, et al. Informational [Page 22] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + exceed the maximum aggregate rate at which the network can deliver + data to destination nodes over time. If flow control is propagated + backward from congested nodes toward traffic sources, then the flow + control mechanism can be used as at least a partial solution to the + problem of congestion as well. + + DTN flow control decisions must be made within the bundle layer + itself based on information about resources (in this case, primarily + persistent storage) available within the bundle node. When storage + resources become scarce, a DTN node has only a certain degree of + freedom in handling the situation. It can always discard bundles + which have expired -- an activity DTN nodes should perform regularly + in any case. If it ordinarily is willing to accept custody for + bundles, it can cease doing so. If storage resources are available + elsewhere in the network, it may be able to make use of them in some + way for bundle storage. It can also discard bundles which have not + expired but for which it has not accepted custody. A node must avoid + discarding bundles for which it has accepted custody, and do so only + as a last resort. Determining when a node should engage in or cease + to engage in custody transfers is a resource allocation and + scheduling problem of current research interest. + + In addition to the bundle layer mechanisms described above, a DTN + node may be able to avail itself of support from lower-layer + protocols in affecting its own resource utilization. For example, a + DTN node receiving a bundle using TCP/IP might intentionally slow + down its receiving rate by performing read operations less frequently + in order to reduce its offered load. This is possible because TCP + provides its own flow control, so reducing the application data + consumption rate could effectively implement a form of hop-by-hop + flow control. Unfortunately, it may also lead to head-of-line + blocking issues, depending on the nature of bundle multiplexing + within a TCP connection. A protocol with more relaxed ordering + constraints (e.g. SCTP [RFC2960]) might be preferable in such + circumstances. + + Congestion control is an ongoing research topic. + +3.14. Security + + The possibility of severe resource scarcity in some delay-tolerant + networks dictates that some form of authentication and access control + to the network itself is required in many circumstances. It is not + acceptable for an unauthorized user to flood the network with traffic + easily, possibly denying service to authorized users. In many cases + it is also not acceptable for unauthorized traffic to be forwarded + over certain network links at all. This is especially true for + exotic, mission-critical links. In light of these considerations, + + + +Cerf, et al. Informational [Page 23] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + several goals are established for the security component of the DTN + architecture: + + - Promptly prevent unauthorized applications from having their data + carried through or stored in the DTN. + + - Prevent unauthorized applications from asserting control over the + DTN infrastructure. + + - Prevent otherwise authorized applications from sending bundles at a + rate or class of service for which they lack permission. + + - Promptly discard bundles that are damaged or improperly modified in + transit. + + - Promptly detect and de-authorize compromised entities. + + Many existing authentication and access control protocols designed + for operation in low-delay, connected environments may not perform + well in DTNs. In particular, updating access control lists and + revoking ("blacklisting") credentials may be especially difficult. + Also, approaches that require frequent access to centralized servers + to complete an authentication or authorization transaction are not + attractive. The consequences of these difficulties include delays in + the onset of communication, delays in detecting and recovering from + system compromise, and delays in completing transactions due to + inappropriate access control or authentication settings. + + To help satisfy these security requirements in light of the + challenges, the DTN architecture adopts a standard but optionally + deployed security architecture [DTNSEC] that utilizes hop-by-hop and + end-to-end authentication and integrity mechanisms. The purpose of + using both approaches is to be able to handle access control for data + forwarding and storage separately from application-layer data + integrity. While the end-to-end mechanism provides authentication + for a principal such as a user (of which there may be many), the hop- + by-hop mechanism is intended to authenticate DTN nodes as legitimate + transceivers of bundles to each-other. Note that it is conceivable + to construct a DTN in which only a subset of the nodes participate in + the security mechanisms, resulting in a secure DTN overlay existing + atop an insecure DTN overlay. This idea is relatively new and is + still being explored. + + In accordance with the goals listed above, DTN nodes discard traffic + as early as possible if authentication or access control checks fail. + This approach meets the goals of removing unwanted traffic from being + forwarded over specific high-value links, but also has the associated + benefit of making denial-of-service attacks considerably harder to + + + +Cerf, et al. Informational [Page 24] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + mount more generally, as compared with conventional Internet routers. + However, the obvious cost for this capability is potentially larger + computation and credential storage overhead required at DTN nodes. + + For more detailed information on DTN security provisions, refer to + [DTNSEC] and [DTNSOV]. + +4. State Management Considerations + + An important aspect of any networking architecture is its management + of state. This section describes the state managed at the bundle + layer and discusses how it is established and removed. + +4.1. Application Registration State + + In long/variable delay environments, an asynchronous application + interface seems most appropriate. Such interfaces typically include + methods for applications to register callback actions when certain + triggering events occur (e.g., when ADUs arrive). These + registrations create state information called application + registration state. + + Application registration state is typically created by explicit + request of the application, and is removed by a separate explicit + request, but may also be removed by an application-specified timer + (it is thus "firm" state). In most cases, there must be a provision + for retaining this state across application and operating system + termination/restart conditions because a client/server bundle round- + trip time may exceed the requesting application's execution time (or + hosting system's uptime). In cases where applications are not + automatically restarted but application registration state remains + persistent, a method must be provided to indicate to the system what + action to perform when the triggering event occurs (e.g., restarting + some application, ignoring the event, etc.). + + To initiate a registration and thereby establish application + registration state, an application specifies an Endpoint ID for which + it wishes to receive ADUs, along with an optional time value + indicating how long the registration should remain active. This + operation is somewhat analogous to the bind() operation in the common + sockets API. + + For registrations to groups (i.e., joins), a time interval may also + be specified. The time interval refers to the range of origination + times of ADUs sent to the specified EID. See Section 3.4 above for + more details. + + + + + +Cerf, et al. Informational [Page 25] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +4.2. Custody Transfer State + + Custody transfer state includes information required to keep account + of bundles for which a node has taken custody, as well as the + protocol state related to transferring custody for one or more of + them. The accounting-related state is created when a bundle is + received. Custody transfer retransmission state is created when a + transfer of custody is initiated by forwarding a bundle with the + custody transfer requested delivery option specified. Retransmission + state and accounting state may be released upon receipt of one or + more Custody Transfer Succeeded signals, indicating custody has been + moved. In addition, the bundle's expiration time (possibly mitigated + by local policy) provides an upper bound on the time when this state + is purged from the system in the event that it is not purged + explicitly due to receipt of a signal. + +4.3. Bundle Routing and Forwarding State + + As with the Internet architecture, we distinguish between routing and + forwarding. Routing refers to the execution of a (possibly + distributed) algorithm for computing routing paths according to some + objective function (see [JFP04], for example). Forwarding refers to + the act of moving a bundle from one DTN node to another. Routing + makes use of routing state (the RIB, or routing information base), + while forwarding makes use of state derived from routing, and is + maintained as forwarding state (the FIB, or forwarding information + base). The structure of the FIB and the rules for maintaining it are + implementation choices. In some DTNs, exchange of information used + to update state in the RIB may take place on network paths distinct + from those where exchange of application data takes place. + + The maintenance of state in the RIB is dependent on the type of + routing algorithm being used. A routing algorithm may consider + requested class of service and the location of potential custodians + (for custody transfer, see section 3.10), and this information will + tend to increase the size of the RIB. The separation between FIB and + RIB is not required by this document, as these are implementation + details to be decided by system implementers. The choice of routing + algorithms is still under study. + + Bundles may occupy queues in nodes for a considerable amount of time. + For unicast or anycast delivery, the amount of time is likely to be + the interval between when a bundle arrives at a node and when it can + be forwarded to its next hop. For multicast delivery of bundles, + this could be significantly longer, up to a bundle's expiration time. + This situation occurs when multicast delivery is utilized in such a + way that nodes joining a group can obtain information previously sent + to the group. In such cases, some nodes may act as "archivers" that + + + +Cerf, et al. Informational [Page 26] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + provide copies of bundles to new participants that have already been + delivered to other participants. + +4.4. Security-Related State + + The DTN security approach described in [DTNSEC], when used, requires + maintenance of state in all DTN nodes that use it. All such nodes + are required to store their own private information (including their + own policy and authentication material) and a block of information + used to verify credentials. Furthermore, in most cases, DTN nodes + will cache some public information (and possibly the credentials) of + their next-hop (bundle) neighbors. All cached information has + expiration times, and nodes are responsible for acquiring and + distributing updates of public information and credentials prior to + the expiration of the old set (in order to avoid a disruption in + network service). + + In addition to basic end-to-end and hop-by-hop authentication, access + control may be used in a DTN by one or more mechanisms such as + capabilities or access control lists (ACLs). ACLs would represent + another block of state present in any node that wishes to enforce + security policy. ACLs are typically initialized at node + configuration time and may be updated dynamically by DTN bundles or + by some out of band technique. Capabilities or credentials may be + revoked, requiring the maintenance of a revocation list ("black + list", another form of state) to check for invalid authentication + material that has already been distributed. + + Some DTNs may implement security boundaries enforced by selected + nodes in the network, where end-to-end credentials may be checked in + addition to checking the hop-by-hop credentials. (Doing so may + require routing to be adjusted to ensure all bundles comprising each + ADU pass through these points.) Public information used to verify + end-to-end authentication will typically be cached at these points. + +4.5. Policy and Configuration State + + DTN nodes will contain some amount of configuration and policy + information. Such information may alter the behavior of bundle + forwarding. Examples of policy state include the types of + cryptographic algorithms and access control procedures to use if DTN + security is employed, whether nodes may become custodians, what types + of convergence layer (see Section 6) and routing protocols are in + use, how bundles of differing priorities should be scheduled, where + and for how long bundles and other data is stored, what status + reports may be generated or at what rate, etc. + + + + + +Cerf, et al. Informational [Page 27] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +5. Application Structuring Issues + + DTN bundle delivery is intended to operate in a delay-tolerant + fashion over a broad range of network types. This does not mean + there *must* be large delays in the network; it means there *may* be + very significant delays (including extended periods of disconnection + between sender and intended recipient(s)). The DTN protocols are + delay tolerant, so applications using them must also be delay + tolerant in order to operate effectively in environments subject to + significant delay or disruption. + + The communication primitives provided by the DTN architecture are + based on asynchronous, message-oriented communication which differs + from conversational request/response communication. In general, + applications should attempt to include enough information in an ADU + so that it may be treated as an independent unit of work by the + network and receiver(s). The goal is to minimize synchronous + interchanges between applications that are separated by a network + characterized by long and possibly highly variable delays. A single + file transfer request message, for example, might include + authentication information, file location information, and requested + file operation (thus "bundling" this information together). + Comparing this style of operation to a classic FTP transfer, one sees + that the bundled model can complete in one round trip, whereas an FTP + file "put" operation can take as many as eight round trips to get to + a point where file data can flow [DFS02]. + + Delay-tolerant applications must consider additional factors beyond + the conversational implications of long delay paths. For example, an + application may terminate (voluntarily or not) between the time it + sends a message and the time it expects a response. If this + possibility has been anticipated, the application can be "re- + instantiated" with state information saved in persistent storage. + This is an implementation issue, but also an application design + consideration. + + Some consideration of delay-tolerant application design can result in + applications that work reasonably well in low-delay environments, and + that do not suffer extraordinarily in high or highly-variable delay + environments. + +6. Convergence Layer Considerations for Use of Underlying Protocols + + Implementation experience with the DTN architecture has revealed an + important architectural construct and interface for DTN nodes + [DBFJHP04]. Not all underlying protocols in different protocol + families provide the same exact functionality, so some additional + adaptation or augmentation on a per-protocol or per-protocol-family + + + +Cerf, et al. Informational [Page 28] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + basis may be required. This adaptation is accomplished by a set of + convergence layers placed between the bundle layer and underlying + protocols. The convergence layers manage the protocol-specific + details of interfacing with particular underlying protocols and + present a consistent interface to the bundle layer. + + The complexity of one convergence layer may vary substantially from + another, depending on the type of underlying protocol it adapts. For + example, a TCP/IP convergence layer for use in the Internet might + only have to add message boundaries to TCP streams, whereas a + convergence layer for some network where no reliable transport + protocol exists might be considerably more complex (e.g., it might + have to implement reliability, fragmentation, flow-control, etc.) if + reliable delivery is to be offered to the bundle layer. + + As convergence layers implement protocols above and beyond the basic + bundle protocol specified in [BSPEC], they will be defined in their + own documents (in a fashion similar to the way encapsulations for IP + datagrams are specified on a per-underlying-protocol basis, such as + in RFC 894 [RFC894]). + +7. Summary + + The DTN architecture addresses many of the problems of heterogeneous + networks that must operate in environments subject to long delays and + discontinuous end-to-end connectivity. It is based on asynchronous + messaging and uses postal mail as a model of service classes and + delivery semantics. It accommodates many different forms of + connectivity, including scheduled, predicted, and opportunistically + connected delivery paths. It introduces a novel approach to end-to- + end reliability across frequently partitioned and unreliable + networks. It also proposes a model for securing the network + infrastructure against unauthorized access. + + It is our belief that this architecture is applicable to many + different types of challenged environments. + +8. Security Considerations + + Security is an integral concern for the design of the Delay Tolerant + Network Architecture, but its use is optional. Sections 3.6.1, 3.14, + and 4.4 of this document present some factors to consider for + securing the DTN architecture, but separate documents [DTNSOV] and + [DTNSEC] define the security architecture in much more detail. + + + + + + + +Cerf, et al. Informational [Page 29] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +9. IANA Considerations + + This document specifies the architecture for Delay Tolerant + Networking, which uses Internet-standard URIs for its Endpoint + Identifiers. URIs intended for use with DTN should be compliant with + the guidelines given in [RFC3986]. + +10. Normative References + + [RFC3986] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifier (URI): Generic Syntax", STD 66, RFC + 3986, January 2005. + +11. Informative References + + [IPN01] InterPlaNetary Internet Project, Internet Society IPN + Special Interest Group, http://www.ipnsig.org. + + [SB03] S. Burleigh, et al., "Delay-Tolerant Networking - An + Approach to Interplanetary Internet", IEEE Communications + Magazine, July 2003. + + [FW03] F. Warthman, "Delay-Tolerant Networks (DTNs): A Tutorial + v1.1", Wartham Associates, 2003. Available from + http://www.dtnrg.org. + + [KF03] K. Fall, "A Delay-Tolerant Network Architecture for + Challenged Internets", Proceedings SIGCOMM, Aug 2003. + + [JFP04] S. Jain, K. Fall, R. Patra, "Routing in a Delay Tolerant + Network", Proceedings SIGCOMM, Aug/Sep 2004. + + [DFS02] R. Durst, P. Feighery, K. Scott, "Why not use the + Standard Internet Suite for the Interplanetary + Internet?", MITRE White Paper, 2002. Available from + http://www.ipnsig.org/reports/TCP_IP.pdf. + + [CK74] V. Cerf, R. Kahn, "A Protocol for Packet Network + Intercommunication", IEEE Trans. on Comm., COM-22(5), May + 1974. + + [IGE00] C. Intanagonwiwat, R. Govindan, D. Estrin, "Directed + Diffusion: A Scalable and Robust Communication Paradigm + for Sensor Networks", Proceedings MobiCOM, Aug 2000. + + + + + + + +Cerf, et al. Informational [Page 30] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + [WSBL99] W. Adjie-Winoto, E. Schwartz, H. Balakrishnan, J. Lilley, + "The Design and Implementation of an Intentional Naming + System", Proc. 17th ACM SOSP, Kiawah Island, SC, Dec. + 1999. + + [CT90] D. Clark, D. Tennenhouse, "Architectural Considerations + for a New Generation of Protocols", Proceedings SIGCOMM, + 1990. + + [ISCHEMES] IANA, Uniform Resource Identifer (URI) Schemes, + http://www.iana.org/assignments/uri-schemes.html. + + [JDPF05] S. Jain, M. Demmer, R. Patra, K. Fall, "Using Redundancy + to Cope with Failures in a Delay Tolerant Network", + Proceedings SIGCOMM, 2005. + + [WJMF05] Y. Wang, S. Jain, M. Martonosi, K. Fall, "Erasure Coding + Based Routing in Opportunistic Networks", Proceedings + SIGCOMM Workshop on Delay Tolerant Networks, 2005. + + [ZAZ05] W. Zhao, M. Ammar, E. Zegura, "Multicast in Delay + Tolerant Networks", Proceedings SIGCOMM Workshop on Delay + Tolerant Networks, 2005. + + [LFC05] J. Leguay, T. Friedman, V. Conan, "DTN Routing in a + Mobility Pattern Space", Proceedings SIGCOMM Workshop on + Delay Tolerant Networks, 2005. + + [AF03] J. Alonso, K. Fall, "A Linear Programming Formulation of + Flows over Time with Piecewise Constant Capacity and + Transit Times", Intel Research Technical Report IRB-TR- + 03-007, June 2003. + + [FHM03] K. Fall, W. Hong, S. Madden, "Custody Transfer for + Reliable Delivery in Delay Tolerant Networks", Intel + Research Technical Report IRB-TR-03-030, July 2003. + + [BSPEC] K. Scott, S. Burleigh, "Bundle Protocol Specification", + Work in Progress, December 2006. + + [DTNSEC] S. Symington, S. Farrell, H. Weiss, "Bundle Security + Protocol Specification", Work in Progress, October 2006. + + [DTNSOV] S. Farrell, S. Symington, H. Weiss, "Delay-Tolerant + Networking Security Overview", Work in Progress, October + 2006. + + + + + +Cerf, et al. Informational [Page 31] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + [DBFJHP04] M. Demmer, E. Brewer, K. Fall, S. Jain, M. Ho, R. Patra, + "Implementing Delay Tolerant Networking", Intel Research + Technical Report IRB-TR-04-020, Dec. 2004. + + [RFC792] Postel, J., "Internet Control Message Protocol", STD 5, + RFC 792, September 1981. + + [RFC894] Hornig, C., "A Standard for the Transmission of IP + Datagrams over Ethernet Networks", STD 41, RFC 894, April + 1 1984. + + [RFC2960] Stewart, R., Xie, Q., Morneault, K., Sharp, C., + Schwarzbauer, H., Taylor, T., Rytina, I., Kalla, M., + Zhang, L., and V. Paxson, "Stream Control Transmission + Protocol", RFC 2960, October 2000. + + [RFC4088] Black, D., McCloghrie, K., and J. Schoenwaelder, "Uniform + Resource Identifier (URI) Scheme for the Simple Network + Management Protocol (SNMP)", RFC 4088, June 2005. + + [S05] K. Scott, "Disruption Tolerant Networking Proxies for + On-the-Move Tactical Networks", Proc. MILCOM 2005 + (unclassified track), Oct. 2005. + + [T02] W. Thies, et al., "Searching the World Wide Web in Low- + Connectivity Communities", Proc. WWW Conference (Global + Community track), May 2002. + +12. Acknowledgments + + John Wroclawski, David Mills, Greg Miller, James P. G. Sterbenz, Joe + Touch, Steven Low, Lloyd Wood, Robert Braden, Deborah Estrin, Stephen + Farrell, Melissa Ho, Ting Liu, Mike Demmer, Jakob Ericsson, Susan + Symington, Andrei Gurtov, Avri Doria, Tom Henderson, Mark Allman, + Michael Welzl, and Craig Partridge all contributed useful thoughts + and criticisms to versions of this document. We are grateful for + their time and participation. + + This work was performed in part under DOD Contract DAA-B07-00-CC201, + DARPA AO H912; JPL Task Plan No. 80-5045, DARPA AO H870; and NASA + Contract NAS7-1407. + + + + + + + + + + +Cerf, et al. Informational [Page 32] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +Authors' Addresses + + Dr. Vinton G. Cerf + Google Corporation + Suite 384 + 13800 Coppermine Rd. + Herndon, VA 20171 + Phone: +1 (703) 234-1823 + Fax: +1 (703) 848-0727 + EMail: vint@google.com + + Scott C. Burleigh + Jet Propulsion Laboratory + 4800 Oak Grove Drive + M/S: 179-206 + Pasadena, CA 91109-8099 + Phone: +1 (818) 393-3353 + Fax: +1 (818) 354-1075 + EMail: Scott.Burleigh@jpl.nasa.gov + + Robert C. Durst + The MITRE Corporation + 7515 Colshire Blvd., M/S H440 + McLean, VA 22102 + Phone: +1 (703) 983-7535 + Fax: +1 (703) 983-7142 + EMail: durst@mitre.org + + Dr. Kevin Fall + Intel Research, Berkeley + 2150 Shattuck Ave., #1300 + Berkeley, CA 94704 + Phone: +1 (510) 495-3014 + Fax: +1 (510) 495-3049 + EMail: kfall@intel.com + + Adrian J. Hooke + Jet Propulsion Laboratory + 4800 Oak Grove Drive + M/S: 303-400 + Pasadena, CA 91109-8099 + Phone: +1 (818) 354-3063 + Fax: +1 (818) 393-3575 + EMail: Adrian.Hooke@jpl.nasa.gov + + + + + + + +Cerf, et al. Informational [Page 33] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + Dr. Keith L. Scott + The MITRE Corporation + 7515 Colshire Blvd., M/S H440 + McLean, VA 22102 + Phone: +1 (703) 983-6547 + Fax: +1 (703) 983-7142 + EMail: kscott@mitre.org + + Leigh Torgerson + Jet Propulsion Laboratory + 4800 Oak Grove Drive + M/S: 238-412 + Pasadena, CA 91109-8099 + Phone: +1 (818) 393-0695 + Fax: +1 (818) 354-6825 + EMail: ltorgerson@jpl.nasa.gov + + Howard S. Weiss + SPARTA, Inc. + 7075 Samuel Morse Drive + Columbia, MD 21046 + Phone: +1 (410) 872-1515 x201 + Fax: +1 (410) 872-8079 + EMail: howard.weiss@sparta.com + + Please refer comments to dtn-interest@mailman.dtnrg.org. The Delay + Tolerant Networking Research Group (DTNRG) web site is located at + http://www.dtnrg.org. + + + + + + + + + + + + + + + + + + + + + + + +Cerf, et al. Informational [Page 34] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +Full Copyright Statement + + Copyright (C) The IETF Trust (2007). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND + THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF + THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + +Cerf, et al. Informational [Page 35] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc5050.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc5050.txt new file mode 100644 index 0000000..2a77197 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc5050.txt @@ -0,0 +1,2803 @@ + + + + + + +Network Working Group K. Scott +Request for Comments: 5050 The MITRE Corporation +Category: Experimental S. Burleigh + NASA Jet Propulsion Laboratory + November 2007 + + + Bundle Protocol Specification + +Status of This Memo + + This memo defines an Experimental Protocol for the Internet + community. It does not specify an Internet standard of any kind. + Discussion and suggestions for improvement are requested. + Distribution of this memo is unlimited. + +IESG Note + + This RFC is not a candidate for any level of Internet Standard. The + IETF disclaims any knowledge of the fitness of this RFC for any + purpose and in particular notes that the decision to publish is not + based on IETF review for such things as security, congestion control, + or inappropriate interaction with deployed protocols. The RFC Editor + has chosen to publish this document at its discretion. Readers of + this document should exercise caution in evaluating its value for + implementation and deployment. See RFC 3932 for more information. + +Abstract + + This document describes the end-to-end protocol, block formats, and + abstract service description for the exchange of messages (bundles) + in Delay Tolerant Networking (DTN). + + This document was produced within the IRTF's Delay Tolerant + Networking Research Group (DTNRG) and represents the consensus of all + of the active contributors to this group. See http://www.dtnrg.org + for more information. + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 1] + +RFC 5050 Bundle Protocol Specification November 2007 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3 + 2. Requirements Notation . . . . . . . . . . . . . . . . . . . . 4 + 3. Service Description . . . . . . . . . . . . . . . . . . . . . 5 + 3.1. Definitions . . . . . . . . . . . . . . . . . . . . . . . 5 + 3.2. Implementation Architectures . . . . . . . . . . . . . . . 9 + 3.3. Services Offered by Bundle Protocol Agents . . . . . . . . 11 + 4. Bundle Format . . . . . . . . . . . . . . . . . . . . . . . . 11 + 4.1. Self-Delimiting Numeric Values (SDNVs) . . . . . . . . . . 12 + 4.2. Bundle Processing Control Flags . . . . . . . . . . . . . 13 + 4.3. Block Processing Control Flags . . . . . . . . . . . . . . 15 + 4.4. Endpoint IDs . . . . . . . . . . . . . . . . . . . . . . . 16 + 4.5. Formats of Bundle Blocks . . . . . . . . . . . . . . . . . 17 + 4.5.1. Primary Bundle Block . . . . . . . . . . . . . . . . . 19 + 4.5.2. Canonical Bundle Block Format . . . . . . . . . . . . 22 + 4.5.3. Bundle Payload Block . . . . . . . . . . . . . . . . . 23 + 4.6. Extension Blocks . . . . . . . . . . . . . . . . . . . . . 24 + 4.7. Dictionary Revision . . . . . . . . . . . . . . . . . . . 24 + 5. Bundle Processing . . . . . . . . . . . . . . . . . . . . . . 24 + 5.1. Generation of Administrative Records . . . . . . . . . . . 25 + 5.2. Bundle Transmission . . . . . . . . . . . . . . . . . . . 26 + 5.3. Bundle Dispatching . . . . . . . . . . . . . . . . . . . . 26 + 5.4. Bundle Forwarding . . . . . . . . . . . . . . . . . . . . 27 + 5.4.1. Forwarding Contraindicated . . . . . . . . . . . . . . 28 + 5.4.2. Forwarding Failed . . . . . . . . . . . . . . . . . . 29 + 5.5. Bundle Expiration . . . . . . . . . . . . . . . . . . . . 29 + 5.6. Bundle Reception . . . . . . . . . . . . . . . . . . . . . 30 + 5.7. Local Bundle Delivery . . . . . . . . . . . . . . . . . . 31 + 5.8. Bundle Fragmentation . . . . . . . . . . . . . . . . . . . 32 + 5.9. Application Data Unit Reassembly . . . . . . . . . . . . . 33 + 5.10. Custody Transfer . . . . . . . . . . . . . . . . . . . . . 34 + 5.10.1. Custody Acceptance . . . . . . . . . . . . . . . . . . 34 + 5.10.2. Custody Release . . . . . . . . . . . . . . . . . . . 35 + 5.11. Custody Transfer Success . . . . . . . . . . . . . . . . . 35 + 5.12. Custody Transfer Failure . . . . . . . . . . . . . . . . . 35 + 5.13. Bundle Deletion . . . . . . . . . . . . . . . . . . . . . 36 + 5.14. Discarding a Bundle . . . . . . . . . . . . . . . . . . . 36 + 5.15. Canceling a Transmission . . . . . . . . . . . . . . . . . 36 + 5.16. Polling . . . . . . . . . . . . . . . . . . . . . . . . . 36 + 6. Administrative Record Processing . . . . . . . . . . . . . . . 37 + 6.1. Administrative Records . . . . . . . . . . . . . . . . . . 37 + 6.1.1. Bundle Status Reports . . . . . . . . . . . . . . . . 38 + 6.1.2. Custody Signals . . . . . . . . . . . . . . . . . . . 41 + 6.2. Generation of Administrative Records . . . . . . . . . . . 44 + 6.3. Reception of Custody Signals . . . . . . . . . . . . . . . 44 + + + + + +Scott & Burleigh Experimental [Page 2] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 7. Services Required of the Convergence Layer . . . . . . . . . . 44 + 7.1. The Convergence Layer . . . . . . . . . . . . . . . . . . 44 + 7.2. Summary of Convergence Layer Services . . . . . . . . . . 45 + 8. Security Considerations . . . . . . . . . . . . . . . . . . . 45 + 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 47 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 47 + 10.1. Normative References . . . . . . . . . . . . . . . . . . . 47 + 10.2. Informative References . . . . . . . . . . . . . . . . . . 47 + Appendix A. Contributors . . . . . . . . . . . . . . . . . . . . 49 + Appendix B. Comments . . . . . . . . . . . . . . . . . . . . . . 49 + +1. Introduction + + This document describes version 6 of the Delay Tolerant Networking + (DTN) "bundle" protocol (BP). Delay Tolerant Networking is an end- + to-end architecture providing communications in and/or through highly + stressed environments. Stressed networking environments include + those with intermittent connectivity, large and/or variable delays, + and high bit error rates. To provide its services, BP sits at the + application layer of some number of constituent internets, forming a + store-and-forward overlay network. Key capabilities of BP include: + + o Custody-based retransmission + + o Ability to cope with intermittent connectivity + + o Ability to take advantage of scheduled, predicted, and + opportunistic connectivity (in addition to continuous + connectivity) + + o Late binding of overlay network endpoint identifiers to + constituent internet addresses + + For descriptions of these capabilities and the rationale for the DTN + architecture, see [ARCH] and [SIGC]. [TUT] contains a tutorial-level + overview of DTN concepts. + + This is an experimental protocol, produced within the IRTF's Delay + Tolerant Networking Research Group (DTNRG) and represents the + consensus of all of the active contributors to this group. If this + protocol is used on the Internet, IETF standard protocols for + security and congestion control should be used. + + BP's location within the standard protocol stack is as shown in + Figure 1. BP uses the "native" internet protocols for communications + within a given internet. Note that "internet" in the preceding is + used in a general sense and does not necessarily refer to TCP/IP. + The interface between the common bundle protocol and a specific + + + +Scott & Burleigh Experimental [Page 3] + +RFC 5050 Bundle Protocol Specification November 2007 + + + internetwork protocol suite is termed a "convergence layer adapter". + Figure 1 shows three distinct transport and network protocols + (denoted T1/N1, T2/N2, and T3/N3). + + +-----------+ +-----------+ + | BP app | | BP app | + +---------v-| +->>>>>>>>>>v-+ +->>>>>>>>>>v-+ +-^---------+ + | BP v | | ^ BP v | | ^ BP v | | ^ BP | + +---------v-+ +-^---------v-+ +-^---------v-+ +-^---------+ + | Trans1 v | + ^ T1/T2 v | + ^ T2/T3 v | | ^ Trans3 | + +---------v-+ +-^---------v-+ +-^---------v + +-^---------+ + | Net1 v | | ^ N1/N2 v | | ^ N2/N3 v | | ^ Net3 | + +---------v-+ +-^---------v + +-^---------v-+ +-^---------+ + | >>>>>>>>^ >>>>>>>>>>^ >>>>>>>>^ | + +-----------+ +-------------+ +-------------+ +-----------+ + | | | | + |<--- An internet --->| |<--- An internet --->| + | | | | + + Figure 1: The Bundle Protocol Sits at + the Application Layer of the Internet Model + + This document describes the format of the protocol data units (called + bundles) passed between entities participating in BP communications. + The entities are referred to as "bundle nodes". This document does + not address: + + o Operations in the convergence layer adapters that bundle nodes use + to transport data through specific types of internets. (However, + the document does discuss the services that must be provided by + each adapter at the convergence layer.) + + o The bundle routing algorithm. + + o Mechanisms for populating the routing or forwarding information + bases of bundle nodes. + +2. Requirements Notation + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in [RFC2119]. + + + + + + + + + +Scott & Burleigh Experimental [Page 4] + +RFC 5050 Bundle Protocol Specification November 2007 + + +3. Service Description + +3.1. Definitions + + Bundle - A bundle is a protocol data unit of the DTN bundle + protocol. Each bundle comprises a sequence of two or more + "blocks" of protocol data, which serve various purposes. Multiple + instances of the same bundle (the same unit of DTN protocol data) + might exist concurrently in different parts of a network -- + possibly in different representations -- in the memory local to + one or more bundle nodes and/or in transit between nodes. In the + context of the operation of a bundle node, a bundle is an instance + of some bundle in the network that is in that node's local memory. + + Bundle payload - A bundle payload (or simply "payload") is the + application data whose conveyance to the bundle's destination is + the purpose for the transmission of a given bundle. The terms + "bundle content", "bundle payload", and "payload" are used + interchangeably in this document. The "nominal" payload for a + bundle forwarded in response to a bundle transmission request is + the application data unit whose location is provided as a + parameter to that request. The nominal payload for a bundle + forwarded in response to reception of that bundle is the payload + of the received bundle. + + Fragment - A fragment is a bundle whose payload block contains a + fragmentary payload. A fragmentary payload is either the first N + bytes or the last N bytes of some other payload -- either a + nominal payload or a fragmentary payload -- of length M, such that + 0 < N < M. + + Bundle node - A bundle node (or, in the context of this document, + simply a "node") is any entity that can send and/or receive + bundles. In the most familiar case, a bundle node is instantiated + as a single process running on a general-purpose computer, but in + general the definition is meant to be broader: a bundle node might + alternatively be a thread, an object in an object-oriented + operating system, a special-purpose hardware device, etc. Each + bundle node has three conceptual components, defined below: a + "bundle protocol agent", a set of zero or more "convergence layer + adapters", and an "application agent". + + Bundle protocol agent - The bundle protocol agent (BPA) of a node is + the node component that offers the BP services and executes the + procedures of the bundle protocol. The manner in which it does so + is wholly an implementation matter. For example, BPA + functionality might be coded into each node individually; it might + be implemented as a shared library that is used in common by any + + + +Scott & Burleigh Experimental [Page 5] + +RFC 5050 Bundle Protocol Specification November 2007 + + + number of bundle nodes on a single computer; it might be + implemented as a daemon whose services are invoked via inter- + process or network communication by any number of bundle nodes on + one or more computers; it might be implemented in hardware. + + Convergence layer adapters - A convergence layer adapter (CLA) sends + and receives bundles on behalf of the BPA, utilizing the services + of some 'native' internet protocol that is supported in one of the + internets within which the node is functionally located. The + manner in which a CLA sends and receives bundles is wholly an + implementation matter, exactly as described for the BPA. + + Application agent - The application agent (AA) of a node is the node + component that utilizes the BP services to effect communication + for some purpose. The application agent in turn has two elements, + an administrative element and an application-specific element. + The application-specific element of an AA constructs, requests + transmission of, accepts delivery of, and processes application- + specific application data units; the only interface between the + BPA and the application-specific element of the AA is the BP + service interface. The administrative element of an AA constructs + and requests transmission of administrative records (status + reports and custody signals), and it accepts delivery of and + processes any custody signals that the node receives. In addition + to the BP service interface, there is a (conceptual) private + control interface between the BPA and the administrative element + of the AA that enables each to direct the other to take action + under specific circumstances. In the case of a node that serves + simply as a "router" in the overlay network, the AA may have no + application-specific element at all. The application-specific + elements of other nodes' AAs may perform arbitrarily complex + application functions, perhaps even offering multiplexed DTN + communication services to a number of other applications. As with + the BPA, the manner in which the AA performs its functions is + wholly an implementation matter; in particular, the administrative + element of an AA might be built into the library or daemon or + hardware that implements the BPA, and the application-specific + element of an AA might be implemented either in software or in + hardware. + + Bundle endpoint - A bundle endpoint (or simply "endpoint") is a set + of zero or more bundle nodes that all identify themselves for BP + purposes by some single text string, called a "bundle endpoint ID" + (or, in this document, simply "endpoint ID"; endpoint IDs are + described in detail in Section 4.4 below). The special case of an + endpoint that never contains more than one node is termed a + "singleton" endpoint; every bundle node must be a member of at + least one singleton endpoint. Singletons are the most familiar + + + +Scott & Burleigh Experimental [Page 6] + +RFC 5050 Bundle Protocol Specification November 2007 + + + sort of endpoint, but in general the endpoint notion is meant to + be broader. For example, the nodes in a sensor network might + constitute a set of bundle nodes that identify themselves by a + single common endpoint ID and thus form a single bundle endpoint. + *Note* too that a given bundle node might identify itself by + multiple endpoint IDs and thus be a member of multiple bundle + endpoints. + + Forwarding - When the bundle protocol agent of a node determines + that a bundle must be "forwarded" to an endpoint, it causes the + bundle to be sent to all of the nodes that the bundle protocol + agent currently believes are in the "minimum reception group" of + that endpoint. The minimum reception group of an endpoint may be + any one of the following: (a) ALL of the nodes registered in an + endpoint that is permitted to contain multiple nodes (in which + case forwarding to the endpoint is functionally similar to + "multicast" operations in the Internet, though possibly very + different in implementation); (b) ANY N of the nodes registered in + an endpoint that is permitted to contain multiple nodes, where N + is in the range from zero to the cardinality of the endpoint (in + which case forwarding to the endpoint is functionally similar to + "anycast" operations in the Internet); or (c) THE SOLE NODE + registered in a singleton endpoint (in which case forwarding to + the endpoint is functionally similar to "unicast" operations in + the Internet). The nature of the minimum reception group for a + given endpoint can be determined from the endpoint's ID (again, + see Section 4.4 below): for some endpoint ID "schemes", the nature + of the minimum reception group is fixed - in a manner that is + defined by the scheme - for all endpoints identified under the + scheme; for other schemes, the nature of the minimum reception + group is indicated by some lexical feature of the "scheme-specific + part" of the endpoint ID, in a manner that is defined by the + scheme. + + Registration - A registration is the state machine characterizing a + given node's membership in a given endpoint. Any number of + registrations may be concurrently associated with a given + endpoint, and any number of registrations may be concurrently + associated with a given node. Any single registration must at any + time be in one of two states: Active or Passive. A registration + always has an associated "delivery failure action", the action + that is to be taken when a bundle that is "deliverable" (see + below) subject to that registration is received at a time when the + registration is in the Passive state. Delivery failure action + must be one of the following: + + * defer "delivery" (see below) of the bundle subject to this + registration until (a) this bundle is the least recently + + + +Scott & Burleigh Experimental [Page 7] + +RFC 5050 Bundle Protocol Specification November 2007 + + + received of all bundles currently deliverable subject to this + registration and (b) either the registration is polled or else + the registration is in the Active state; or + + * "abandon" (see below) delivery of the bundle subject to this + registration. + + An additional implementation-specific delivery deferral procedure + may optionally be associated with the registration. While the + state of a registration is Active, reception of a bundle that is + deliverable subject to this registration must cause the bundle to + be delivered automatically as soon as it is the least recently + received bundle that is currently deliverable subject to the + registration. While the state of a registration is Passive, + reception of a bundle that is deliverable subject to this + registration must cause delivery of the bundle to be abandoned or + deferred as mandated by the registration's current delivery + failure action; in the latter case, any additional delivery + deferral procedure associated with the registration must also be + performed. + + Delivery - Upon reception, the processing of a bundle that has been + sent to a given node depends on whether or not the receiving node + is registered in the bundle's destination endpoint. If it is, and + if the payload of the bundle is non-fragmentary (possibly as a + result of successful payload reassembly from fragmentary payloads, + including the original payload of the received bundle), then the + bundle is normally "delivered" to the node's application agent + subject to the registration characterizing the node's membership + in the destination endpoint. A bundle is considered to have been + delivered at a node subject to a registration as soon as the + application data unit that is the payload of the bundle, together + with the value of the bundle's "Acknowledgement by application is + requested" flag and any other relevant metadata (an implementation + matter), has been presented to the node's application agent in a + manner consistent with the state of that registration and, as + applicable, the registration's delivery failure action. + + Deliverability, Abandonment - A bundle is considered "deliverable" + subject to a registration if and only if (a) the bundle's + destination endpoint is the endpoint with which the registration + is associated, (b) the bundle has not yet been delivered subject + to this registration, and (c) delivery of the bundle subject to + this registration has not been abandoned. To "abandon" delivery + of a bundle subject to a registration is simply to declare it no + longer deliverable subject to that registration; normally only + registrations' registered delivery failure actions cause + deliveries to be abandoned. + + + +Scott & Burleigh Experimental [Page 8] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Deletion, Discarding - A bundle protocol agent "discards" a bundle + by simply ceasing all operations on the bundle and functionally + erasing all references to it; the specific procedures by which + this is accomplished are an implementation matter. Bundles are + discarded silently; i.e., the discarding of a bundle does not + result in generation of an administrative record. "Retention + constraints" are elements of the bundle state that prevent a + bundle from being discarded; a bundle cannot be discarded while it + has any retention constraints. A bundle protocol agent "deletes" + a bundle in response to some anomalous condition by notifying the + bundle's report-to endpoint of the deletion (provided such + notification is warranted; see Section 5.13 for details) and then + arbitrarily removing all of the bundle's retention constraints, + enabling the bundle to be discarded. + + Transmission - A transmission is a sustained effort by a node's + bundle protocol agent to cause a bundle to be sent to all nodes in + the minimum reception group of some endpoint (which may be the + bundle's destination or may be some intermediate forwarding + endpoint) in response to a transmission request issued by the + node's application agent. Any number of transmissions may be + concurrently undertaken by the bundle protocol agent of a given + node. + + Custody - To "accept custody" upon forwarding a bundle is to commit + to retaining a copy of the bundle -- possibly re-forwarding the + bundle when necessary -- until custody of that bundle is + "released". Custody of a bundle whose destination is a singleton + endpoint is released when either (a) notification is received that + some other node has accepted custody of the same bundle; (b) + notification is received that the bundle has been delivered at the + (sole) node registered in the bundle's destination endpoint; or + (c) the bundle is explicitly deleted for some reason, such as + lifetime expiration. The condition(s) under which custody of a + bundle whose destination is not a singleton endpoint may be + released are not defined in this specification. To "refuse + custody" of a bundle is to decide not to accept custody of the + bundle. A "custodial node" of a bundle is a node that has + accepted custody of the bundle and has not yet released that + custody. A "custodian" of a bundle is a singleton endpoint whose + sole member is one of the bundle's custodial nodes. + +3.2. Implementation Architectures + + The above definitions are intended to enable the bundle protocol's + operations to be specified in a manner that minimizes bias toward any + particular implementation architecture. To illustrate the range of + interoperable implementation models that might conform to this + + + +Scott & Burleigh Experimental [Page 9] + +RFC 5050 Bundle Protocol Specification November 2007 + + + specification, four example architectures are briefly described + below. + + 1. Bundle protocol application server + + A single bundle protocol application server, constituting a + single bundle node, runs as a daemon process on each computer. + The daemon's functionality includes all functions of the bundle + protocol agent, all convergence layer adapters, and both the + administrative and application-specific elements of the + application agent. The application-specific element of the + application agent functions as a server, offering bundle protocol + service over a local area network: it responds to remote + procedure calls from application processes (on the same computer + and/or remote computers) that need to communicate via the bundle + protocol. The server supports its clients by creating a new + (conceptual) node for each one and registering each such node in + a client-specified endpoint. The conceptual nodes managed by the + server function as clients' bundle protocol service access + points. + + 2. Peer application nodes + + Any number of bundle protocol application processes, each one + constituting a single bundle node, run in ad-hoc fashion on each + computer. The functionality of the bundle protocol agent, all + convergence layer adapters, and the administrative element of the + application agent is provided by a library to which each node + process is dynamically linked at run time. The application- + specific element of each node's application agent is node- + specific application code. + + 3. Sensor network nodes + + Each node of the sensor network is the self-contained + implementation of a single bundle node. All functions of the + bundle protocol agent, all convergence layer adapters, and the + administrative element of the application agent are implemented + in simplified form in Application-Specific Integrated Circuits + (ASICs), while the application-specific element of each node's + application agent is implemented in a programmable + microcontroller. Forwarding is rudimentary: all bundles are + forwarded on a hard-coded default route. + + + + + + + + +Scott & Burleigh Experimental [Page 10] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 4. Dedicated bundle router + + Each computer constitutes a single bundle node that functions + solely as a high-performance bundle forwarder. Many standard + functions of the bundle protocol agent, the convergence layer + adapters, and the administrative element of the application agent + are implemented in ASICs, but some functions are implemented in a + high-speed processor to enable reprogramming as necessary. The + node's application agent has no application-specific element. + Substantial non-volatile storage resources are provided, and + arbitrarily complex forwarding algorithms are supported. + +3.3. Services Offered by Bundle Protocol Agents + + The bundle protocol agent of each node is expected to provide the + following services to the node's application agent: + + o commencing a registration (registering a node in an endpoint); + + o terminating a registration; + + o switching a registration between Active and Passive states; + + o transmitting a bundle to an identified bundle endpoint; + + o canceling a transmission; + + o polling a registration that is in the passive state; + + o delivering a received bundle. + +4. Bundle Format + + Each bundle shall be a concatenated sequence of at least two block + structures. The first block in the sequence must be a primary bundle + block, and no bundle may have more than one primary bundle block. + Additional bundle protocol blocks of other types may follow the + primary block to support extensions to the bundle protocol, such as + the Bundle Security Protocol [BSP]. At most one of the blocks in the + sequence may be a payload block. The last block in the sequence must + have the "last block" flag (in its block processing control flags) + set to 1; for every other block in the bundle after the primary + block, this flag must be set to zero. + + + + + + + + +Scott & Burleigh Experimental [Page 11] + +RFC 5050 Bundle Protocol Specification November 2007 + + +4.1. Self-Delimiting Numeric Values (SDNVs) + + The design of the bundle protocol attempts to reconcile minimal + consumption of transmission bandwidth with: + + o extensibility to address requirements not yet identified, and + + o scalability across a wide range of network scales and payload + sizes. + + A key strategic element in the design is the use of self-delimiting + numeric values (SDNVs). The SDNV encoding scheme is closely adapted + from the Abstract Syntax Notation One Basic Encoding Rules for + subidentifiers within an object identifier value [ASN1]. An SDNV is + a numeric value encoded in N octets, the last of which has its most + significant bit (MSB) set to zero; the MSB of every other octet in + the SDNV must be set to 1. The value encoded in an SDNV is the + unsigned binary number obtained by concatenating into a single bit + string the 7 least significant bits of each octet of the SDNV. + + The following examples illustrate the encoding scheme for various + hexadecimal values. + + 0xABC : 1010 1011 1100 + is encoded as + {1 00 10101} {0 0111100} + = 10010101 00111100 + + 0x1234 : 0001 0010 0011 0100 + = 1 0010 0011 0100 + is encoded as + {1 0 100100} {0 0110100} + = 10100100 00110100 + + 0x4234 : 0100 0010 0011 0100 + = 100 0010 0011 0100 + is encoded as + {1 000000 1} {1 0000100} {0 0110100} + = 10000001 10000100 00110100 + + 0x7F : 0111 1111 + = 111 1111 + is encoded as + {0 1111111} + = 01111111 + + Figure 2: SDNV Example + + + + +Scott & Burleigh Experimental [Page 12] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Note: Care must be taken to make sure that the value to be encoded is + (in concept) padded with high-order zero bits to make its bitwise + length a multiple of 7 before encoding. Also note that, while there + is no theoretical limit on the size of an SDNV field, the overhead of + the SDNV scheme is 1:7, i.e., one bit of overhead for every 7 bits of + actual data to be encoded. Thus, a 7-octet value (a 56-bit quantity + with no leading zeroes) would be encoded in an 8-octet SDNV; an + 8-octet value (a 64-bit quantity with no leading zeroes) would be + encoded in a 10-octet SDNV (one octet containing the high-order bit + of the value padded with six leading zero bits, followed by nine + octets containing the remaining 63 bits of the value). 148 bits of + overhead would be consumed in encoding a 1024-bit RSA encryption key + directly in an SDNV. In general, an N-bit quantity with no leading + zeroes is encoded in an SDNV occupying ceil(N/7) octets, where ceil + is the integer ceiling function. + + Implementations of the bundle protocol may handle as an invalid + numeric value any SDNV that encodes an integer that is larger than + (2^64 - 1). + + An SDNV can be used to represent both very large and very small + integer values. However, SDNV is clearly not the best way to + represent every numeric value. For example, an SDNV is a poor way to + represent an integer whose value typically falls in the range 128 to + 255. In general, though, we believe that SDNV representation of + numeric values in bundle blocks yields the smallest block sizes + without sacrificing scalability. + +4.2. Bundle Processing Control Flags + + The bundle processing control flags field in the primary bundle block + of each bundle is an SDNV; the value encoded in this SDNV is a string + of bits used to invoke selected bundle processing control features. + The significance of the value in each currently defined position of + this bit string is described here. Note that in the figure and + descriptions, the bit label numbers denote position (from least + significant ('0') to most significant) within the decoded bit string, + and not within the representation of the bits on the wire. This is + why the descriptions in this section and the next do not follow + standard RFC conventions with bit 0 on the left; if fields are added + in the future, the SDNV will grow to the left, and using this + representation allows the references here to remain valid. + + + + + + + + + +Scott & Burleigh Experimental [Page 13] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 2 1 0 + 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Status Report|Class of Svc.| General | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 3: Bundle Processing Control Flags Bit Layout + + The bits in positions 0 through 6 are flags that characterize the + bundle as follows: + + 0 -- Bundle is a fragment. + + 1 -- Application data unit is an administrative record. + + 2 -- Bundle must not be fragmented. + + 3 -- Custody transfer is requested. + + 4 -- Destination endpoint is a singleton. + + 5 -- Acknowledgement by application is requested. + + 6 -- Reserved for future use. + + The bits in positions 7 through 13 are used to indicate the bundle's + class of service. The bits in positions 8 and 7 constitute a two-bit + priority field indicating the bundle's priority, with higher values + being of higher priority: 00 = bulk, 01 = normal, 10 = expedited, 11 + is reserved for future use. Within this field, bit 8 is the most + significant bit. The bits in positions 9 through 13 are reserved for + future use. + + The bits in positions 14 through 20 are status report request flags. + These flags are used to request status reports as follows: + + 14 -- Request reporting of bundle reception. + + 15 -- Request reporting of custody acceptance. + + 16 -- Request reporting of bundle forwarding. + + 17 -- Request reporting of bundle delivery. + + 18 -- Request reporting of bundle deletion. + + 19 -- Reserved for future use. + + + + +Scott & Burleigh Experimental [Page 14] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 20 -- Reserved for future use. + + If the bundle processing control flags indicate that the bundle's + application data unit is an administrative record, then the custody + transfer requested flag must be zero and all status report request + flags must be zero. If the custody transfer requested flag is 1, + then the sending node requests that the receiving node accept custody + of the bundle. If the bundle's source endpoint ID is "dtn:none" (see + below), then the bundle is not uniquely identifiable and all bundle + protocol features that rely on bundle identity must therefore be + disabled: the bundle's custody transfer requested flag must be zero, + the "Bundle must not be fragmented" flag must be 1, and all status + report request flags must be zero. + +4.3. Block Processing Control Flags + + The block processing control flags field in every block other than + the primary bundle block is an SDNV; the value encoded in this SDNV + is a string of bits used to invoke selected block processing control + features. The significance of the values in all currently defined + positions of this bit string, in order from least significant + position in the decoded bit string (labeled '0') to most significant + (labeled '6'), is described here. + + 0 + 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+ + | Flags | + +-+-+-+-+-+-+-+ + + Figure 4: Block Processing Control Flags Bit Layout + + 0 - Block must be replicated in every fragment. + + 1 - Transmit status report if block can't be processed. + + 2 - Delete bundle if block can't be processed. + + 3 - Last block. + + 4 - Discard block if it can't be processed. + + 5 - Block was forwarded without being processed. + + 6 - Block contains an EID-reference field. + + + + + + +Scott & Burleigh Experimental [Page 15] + +RFC 5050 Bundle Protocol Specification November 2007 + + + For each bundle whose primary block's bundle processing control flags + (see above) indicate that the bundle's application data unit is an + administrative record, the "Transmit status report if block can't be + processed" flag in the block processing flags field of every other + block in the bundle must be zero. + + The 'Block must be replicated in every fragment' bit in the block + processing flags must be set to zero on all blocks that follow the + payload block. + +4.4. Endpoint IDs + + The destinations of bundles are bundle endpoints, identified by text + strings termed "endpoint IDs" (see Section 3.1). Each endpoint ID + conveyed in any bundle block takes the form of a Uniform Resource + Identifier (URI; [URI]). As such, each endpoint ID can be + characterized as having this general structure: + + < scheme name > : < scheme-specific part, or "SSP" > + + As used for the purposes of the bundle protocol, neither the length + of a scheme name nor the length of an SSP may exceed 1023 bytes. + + Bundle blocks cite a number of endpoint IDs for various purposes of + the bundle protocol. Many, though not necessarily all, of the + endpoint IDs referred to in the blocks of a given bundle are conveyed + in the "dictionary" byte array in the bundle's primary block. This + array is simply the concatenation of any number of null-terminated + scheme names and SSPs. + + "Endpoint ID references" are used to cite endpoint IDs that are + contained in the dictionary; all endpoint ID citations in the primary + bundle block are endpoint ID references, and other bundle blocks may + contain endpoint ID references as well. Each endpoint ID reference + is an ordered pair of SDNVs: + + o The first SDNV contains the offset within the dictionary of the + first character of the referenced endpoint ID's scheme name. + + o The second SDNV contains the offset within the dictionary of the + first character of the referenced endpoint ID's SSP. + + This encoding enables a degree of block compression: when the source + and report-to of a bundle are the same endpoint, for example, the + text of that endpoint's ID may be cited twice yet appear only once in + the dictionary. + + + + + +Scott & Burleigh Experimental [Page 16] + +RFC 5050 Bundle Protocol Specification November 2007 + + + The scheme identified by the < scheme name > in an endpoint ID is a + set of syntactic and semantic rules that fully explain how to parse + and interpret the SSP. The set of allowable schemes is effectively + unlimited. Any scheme conforming to [URIREG] may be used in a bundle + protocol endpoint ID. In addition, a single additional scheme is + defined by the present document: + + o The "dtn" scheme, which is used at minimum in the representation + of the null endpoint ID "dtn:none". The forwarding of a bundle to + the null endpoint is never contraindicated, and the minimum + reception group for the null endpoint is the empty set. + + Note that, although the endpoint IDs conveyed in bundle blocks are + expressed as URIs, implementations of the BP service interface may + support expression of endpoint IDs in some internationalized manner + (e.g., Internationalized Resource Identifiers (IRIs); see [RFC3987]). + +4.5. Formats of Bundle Blocks + + This section describes the formats of the primary block and payload + block. Rules for processing these blocks appear in Section 5 of this + document. + + Note that supplementary DTN protocol specifications (including, but + not restricted to, the Bundle Security Protocol [BSP]) may require + that BP implementations conforming to those protocols construct and + process additional blocks. + + The format of the two basic BP blocks is shown in Figure 5 below. + + + + + + + + + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 17] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Primary Bundle Block + +----------------+----------------+----------------+----------------+ + | Version | Proc. Flags (*) | + +----------------+----------------+----------------+----------------+ + | Block length (*) | + +----------------+----------------+---------------------------------+ + | Destination scheme offset (*) | Destination SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Source scheme offset (*) | Source SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Report-to scheme offset (*) | Report-to SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Custodian scheme offset (*) | Custodian SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Creation Timestamp time (*) | + +---------------------------------+---------------------------------+ + | Creation Timestamp sequence number (*) | + +---------------------------------+---------------------------------+ + | Lifetime (*) | + +----------------+----------------+----------------+----------------+ + | Dictionary length (*) | + +----------------+----------------+----------------+----------------+ + | Dictionary byte array (variable) | + +----------------+----------------+---------------------------------+ + | [Fragment offset (*)] | + +----------------+----------------+---------------------------------+ + | [Total application data unit length (*)] | + +----------------+----------------+---------------------------------+ + + + Bundle Payload Block + +----------------+----------------+----------------+----------------+ + | Block type | Proc. Flags (*)| Block length(*) | + +----------------+----------------+----------------+----------------+ + / Bundle Payload (variable) / + +-------------------------------------------------------------------+ + + Figure 5: Bundle Block Formats + + (*) Notes: + + The bundle processing control ("Proc.") flags field in the Primary + Bundle Block is an SDNV and is therefore variable length. A three- + octet SDNV is shown here for convenience in representation. + + The block length field of the Primary Bundle Block is an SDNV and is + therefore variable length. A four-octet SDNV is shown here for + convenience in representation. + + + +Scott & Burleigh Experimental [Page 18] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Each of the eight offset fields in the Primary Bundle Block is an + SDNV and is therefore variable length. Two-octet SDNVs are shown + here for convenience in representation. + + The Creation Timestamp time field in the Primary Bundle Block is an + SDNV and is therefore variable length. A four-octet SDNV is shown + here for convenience in representation. + + The Creation Timestamp sequence number field in the Primary Bundle + Block is an SDNV and is therefore variable length. A four-octet SDNV + is shown here for convenience in representation. + + The Lifetime field in the Primary Bundle Block is an SDNV and is + therefore variable length. A four-octet SDNV is shown here for + convenience in representation. + + The dictionary length field of the Primary Bundle Block is an SDNV + and is therefore variable length. A four-octet SDNV is shown here + for convenience in representation. + + The fragment offset field of the Primary Bundle Block is present only + if the Fragment flag in the block's processing flags byte is set to + 1. It is an SDNV and is therefore variable length; a four-octet SDNV + is shown here for convenience in representation. + + The total application data unit length field of the Primary Bundle + Block is present only if the Fragment flag in the block's processing + flags byte is set to 1. It is an SDNV and is therefore variable + length; a four-octet SDNV is shown here for convenience in + representation. + + The block processing control ("Proc.") flags field of the Payload + Block is an SDNV and is therefore variable length. A one-octet SDNV + is shown here for convenience in representation. + + The block length field of the Payload Block is an SDNV and is + therefore variable length. A two-octet SDNV is shown here for + convenience in representation. + +4.5.1. Primary Bundle Block + + The primary bundle block contains the basic information needed to + route bundles to their destinations. The fields of the primary + bundle block are: + + + + + + + +Scott & Burleigh Experimental [Page 19] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Version: A 1-byte field indicating the version of the bundle + protocol that constructed this block. The present document + describes version 0x06 of the bundle protocol. + + Bundle Processing Control Flags: The Bundle Processing Control + Flags field is an SDNV that contains the bundle processing control + flags discussed in Section 4.2 above. + + Block Length: The Block Length field is an SDNV that contains the + aggregate length of all remaining fields of the block. + + Destination Scheme Offset: The Destination Scheme Offset field + contains the offset within the dictionary byte array of the scheme + name of the endpoint ID of the bundle's destination, i.e., the + endpoint containing the node(s) at which the bundle is to be + delivered. + + Destination SSP Offset: The Destination SSP Offset field contains + the offset within the dictionary byte array of the scheme-specific + part of the endpoint ID of the bundle's destination. + + Source Scheme Offset: The Source Scheme Offset field contains the + offset within the dictionary byte array of the scheme name of the + endpoint ID of the bundle's nominal source, i.e., the endpoint + nominally containing the node from which the bundle was initially + transmitted. + + Source SSP Offset: The Source SSP Offset field contains the offset + within the dictionary byte array of the scheme-specific part of + the endpoint ID of the bundle's nominal source. + + Report-to Scheme Offset: The Report-to Scheme Offset field contains + the offset within the dictionary byte array of the scheme name of + the ID of the endpoint to which status reports pertaining to the + forwarding and delivery of this bundle are to be transmitted. + + Report-to SSP Offset: The Report-to SSP Offset field contains the + offset within the dictionary byte array of the scheme-specific + part of the ID of the endpoint to which status reports pertaining + to the forwarding and delivery of this bundle are to be + transmitted. + + Custodian Scheme Offset: The "current custodian endpoint ID" of a + primary bundle block identifies an endpoint whose membership + includes the node that most recently accepted custody of the + bundle upon forwarding this bundle. The Custodian Scheme Offset + field contains the offset within the dictionary byte array of the + scheme name of the current custodian endpoint ID. + + + +Scott & Burleigh Experimental [Page 20] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Custodian SSP Offset: The Custodian SSP Offset field contains the + offset within the dictionary byte array of the scheme-specific + part of the current custodian endpoint ID. + + Creation Timestamp: The creation timestamp is a pair of SDNVs that, + together with the source endpoint ID and (if the bundle is a + fragment) the fragment offset and payload length, serve to + identify the bundle. The first SDNV of the timestamp is the + bundle's creation time, while the second is the bundle's creation + timestamp sequence number. Bundle creation time is the time -- + expressed in seconds since the start of the year 2000, on the + Coordinated Universal Time (UTC) scale [UTC] -- at which the + transmission request was received that resulted in the creation of + the bundle. Sequence count is the latest value (as of the time at + which that transmission request was received) of a monotonically + increasing positive integer counter managed by the source node's + bundle protocol agent that may be reset to zero whenever the + current time advances by one second. A source Bundle Protocol + Agent must never create two distinct bundles with the same source + endpoint ID and bundle creation timestamp. The combination of + source endpoint ID and bundle creation timestamp therefore serves + to identify a single transmission request, enabling it to be + acknowledged by the receiving application (provided the source + endpoint ID is not "dtn:none"). + + Lifetime: The lifetime field is an SDNV that indicates the time at + which the bundle's payload will no longer be useful, encoded as a + number of seconds past the creation time. When the current time + is greater than the creation time plus the lifetime, bundle nodes + need no longer retain or forward the bundle; the bundle may be + deleted from the network. + + Dictionary Length: The Dictionary Length field is an SDNV that + contains the length of the dictionary byte array. + + Dictionary: The Dictionary field is an array of bytes formed by + concatenating the null-terminated scheme names and SSPs of all + endpoint IDs referenced by any fields in this Primary Block + together with, potentially, other endpoint IDs referenced by + fields in other TBD DTN protocol blocks. Its length is given by + the value of the Dictionary Length field. + + Fragment Offset: If the Bundle Processing Control Flags of this + Primary block indicate that the bundle is a fragment, then the + Fragment Offset field is an SDNV indicating the offset from the + start of the original application data unit at which the bytes + comprising the payload of this bundle were located. If not, then + the Fragment Offset field is omitted from the block. + + + +Scott & Burleigh Experimental [Page 21] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Total Application Data Unit Length: If the Bundle Processing + Control Flags of this Primary block indicate that the bundle is a + fragment, then the Total Application Data Unit Length field is an + SDNV indicating the total length of the original application data + unit of which this bundle's payload is a part. If not, then the + Total Application Data Unit Length field is omitted from the + block. + +4.5.2. Canonical Bundle Block Format + + Every bundle block of every type other than the primary bundle block + comprises the following fields, in this order: + + o Block type code, expressed as an 8-bit unsigned binary integer. + Bundle block type code 1 indicates that the block is a bundle + payload block. Block type codes 192 through 255 are not defined + in this specification and are available for private and/or + experimental use. All other values of the block type code are + reserved for future use. + + o Block processing control flags, an unsigned integer expressed as + an SDNV. The individual bits of this integer are used to invoke + selected block processing control features. + + o Block EID reference count and EID references (optional). If and + only if the block references EID elements in the primary block's + dictionary, the 'block contains an EID-reference field' flag in + the block processing control flags is set to 1 and the block + includes an EID reference field consisting of a count of EID + references expressed as an SDNV followed by the EID references + themselves. Each EID reference is a pair of SDNVs. The first + SDNV of each EID reference contains the offset of a scheme name in + the primary block's dictionary, and the second SDNV of each + reference contains the offset of a scheme-specific part in the + dictionary. + + o Block data length, an unsigned integer expressed as an SDNV. The + Block data length field contains the aggregate length of all + remaining fields of the block, i.e., the block-type-specific data + fields. + + o Block-type-specific data fields, whose format and order are type- + specific and whose aggregate length in octets is the value of the + block data length field. All multi-byte block-type-specific data + fields are represented in network byte order. + + + + + + +Scott & Burleigh Experimental [Page 22] + +RFC 5050 Bundle Protocol Specification November 2007 + + + +-----------+-----------+-----------+-----------+ + |Block type | Block processing ctrl flags (SDNV)| + +-----------+-----------+-----------+-----------+ + | Block length (SDNV) | + +-----------+-----------+-----------+-----------+ + / Block body data (variable) / + +-----------+-----------+-----------+-----------+ + + Figure 6: Block Layout without EID Reference List + + + +-----------+-----------+-----------+-----------+ + |Block Type | Block processing ctrl flags (SDNV)| + +-----------+-----------+-----------+-----------+ + | EID Reference Count (SDNV) | + +-----------+-----------+-----------+-----------+ + | Ref_scheme_1 (SDNV) | Ref_ssp_1 (SDNV) | + +-----------+-----------+-----------+-----------+ + | Ref_scheme_2 (SDNV) | Ref_ssp_2 (SDNV) | + +-----------+-----------+-----------+-----------+ + | Block length (SDNV) | + +-----------+-----------+-----------+-----------+ + / Block body data (variable) / + +-----------+-----------+-----------+-----------+ + + Figure 7: Block Layout Showing Two EID References + +4.5.3. Bundle Payload Block + + The fields of the bundle payload block are: + + Block Type: The Block Type field is a 1-byte field that indicates + the type of the block. For the bundle payload block, this field + contains the value 1. + + Block Processing Control Flags: The Block Processing Control Flags + field is an SDNV that contains the block processing control flags + discussed in Section 4.3 above. + + Block Length: The Block Length field is an SDNV that contains the + aggregate length of all remaining fields of the block - which is + to say, the length of the bundle's payload. + + Payload: The Payload field contains the application data carried by + this bundle. + + That is, bundle payload blocks follow the canonical format of the + previous section with the restriction that the 'block contains an + + + +Scott & Burleigh Experimental [Page 23] + +RFC 5050 Bundle Protocol Specification November 2007 + + + EID-reference field' bit of the block processing control flags is + never set. The block body data for payload blocks is the application + data carried by the bundle. + +4.6. Extension Blocks + + "Extension blocks" are all blocks other than the primary and payload + blocks. Because extension blocks are not defined in the Bundle + Protocol specification (the present document), not all nodes + conforming to this specification will necessarily instantiate Bundle + Protocol implementations that include procedures for processing (that + is, recognizing, parsing, acting on, and/or producing) all extension + blocks. It is therefore possible for a node to receive a bundle that + includes extension blocks that the node cannot process. + + Whenever a bundle is forwarded that contains one or more extension + blocks that could not be processed, the "Block was forwarded without + being processed" flag must be set to 1 within the block processing + flags of each such block. For each block flagged in this way, the + flag may optionally be cleared (i.e., set to zero) by another node + that subsequently receives the bundle and is able to process that + block; the specifications defining the various extension blocks are + expected to define the circumstances under which this flag may be + cleared, if any. + +4.7. Dictionary Revision + + Any strings (scheme names and SSPs) in a bundle's dictionary that are + referenced neither from the bundle's primary block nor from the block + EID reference field of any extension block may be removed from the + dictionary at the time the bundle is forwarded. + + Whenever removal of a string from the dictionary causes the offsets + (within the dictionary byte array) of any other strings to change, + all endpoint ID references that refer to those strings must be + adjusted at the same time. Note that these references may be in the + primary block and/or in the block EID reference fields of extension + blocks. + +5. Bundle Processing + + The bundle processing procedures mandated in this section and in + Section 6 govern the operation of the Bundle Protocol Agent and the + Application Agent administrative element of each bundle node. They + are neither exhaustive nor exclusive. That is, supplementary DTN + protocol specifications (including, but not restricted to, the Bundle + Security Protocol [BSP]) may require that additional measures be + taken at specified junctures in these procedures. Such additional + + + +Scott & Burleigh Experimental [Page 24] + +RFC 5050 Bundle Protocol Specification November 2007 + + + measures shall not override or supersede the mandated bundle protocol + procedures, except that they may in some cases make these procedures + moot by requiring, for example, that implementations conforming to + the supplementary protocol terminate the processing of a given + incoming or outgoing bundle due to a fault condition recognized by + that protocol. + +5.1. Generation of Administrative Records + + All initial transmission of bundles is in response to bundle + transmission requests presented by nodes' application agents. When + required to "generate" an administrative record (a bundle status + report or a custody signal), the bundle protocol agent itself is + responsible for causing a new bundle to be transmitted, conveying + that record. In concept, the bundle protocol agent discharges this + responsibility by directing the administrative element of the node's + application agent to construct the record and request its + transmission as detailed in Section 6 below. In practice, the manner + in which administrative record generation is accomplished is an + implementation matter, provided the constraints noted in Section 6 + are observed. + + Under some circumstances, the requesting of status reports could + result in an unacceptable increase in the bundle traffic in the + network. For this reason, the generation of status reports is + mandatory only in one case, the deletion of a bundle for which + custody transfer is requested. In all other cases, the decision on + whether or not to generate a requested status report is left to the + discretion of the bundle protocol agent. Mechanisms that could + assist in making such decisions, such as pre-placed agreements + authorizing the generation of status reports under specified + circumstances, are beyond the scope of this specification. + + Notes on administrative record terminology: + + o A "bundle reception status report" is a bundle status report with + the "reporting node received bundle" flag set to 1. + + o A "custody acceptance status report" is a bundle status report + with the "reporting node accepted custody of bundle" flag set to + 1. + + o A "bundle forwarding status report" is a bundle status report with + the "reporting node forwarded the bundle" flag set to 1. + + o A "bundle delivery status report" is a bundle status report with + the "reporting node delivered the bundle" flag set to 1. + + + + +Scott & Burleigh Experimental [Page 25] + +RFC 5050 Bundle Protocol Specification November 2007 + + + o A "bundle deletion status report" is a bundle status report with + the "reporting node deleted the bundle" flag set to 1. + + o A "Succeeded" custody signal is a custody signal with the "custody + transfer succeeded" flag set to 1. + + o A "Failed" custody signal is a custody signal with the "custody + transfer succeeded" flag set to zero. + + o The "current custodian" of a bundle is the endpoint identified by + the current custodian endpoint ID in the bundle's primary block. + +5.2. Bundle Transmission + + The steps in processing a bundle transmission request are: + + Step 1: If custody transfer is requested for this bundle + transmission and, moreover, custody acceptance by the source node + is required, then either the bundle protocol agent must commit to + accepting custody of the bundle -- in which case processing + proceeds from Step 2 -- or the request cannot be honored and all + remaining steps of this procedure must be skipped. The bundle + protocol agent must not commit to accepting custody of a bundle if + the conditions under which custody of the bundle may be accepted + are not satisfied. The conditions under which a node may accept + custody of a bundle whose destination is not a singleton endpoint + are not defined in this specification. + + Step 2: Transmission of the bundle is initiated. An outbound + bundle must be created per the parameters of the bundle + transmission request, with current custodian endpoint ID set to + the null endpoint ID "dtn:none" and with the retention constraint + "Dispatch pending". The source endpoint ID of the bundle must be + either the ID of an endpoint of which the node is a member or the + null endpoint ID "dtn:none". + + Step 3: Processing proceeds from Step 1 of Section 5.4. + +5.3. Bundle Dispatching + + The steps in dispatching a bundle are: + + Step 1: If the bundle's destination endpoint is an endpoint of + which the node is a member, the bundle delivery procedure defined + in Section 5.7 must be followed. + + Step 2: Processing proceeds from Step 1 of Section 5.4. + + + + +Scott & Burleigh Experimental [Page 26] + +RFC 5050 Bundle Protocol Specification November 2007 + + +5.4. Bundle Forwarding + + The steps in forwarding a bundle are: + + Step 1: The retention constraint "Forward pending" must be added to + the bundle, and the bundle's "Dispatch pending" retention + constraint must be removed. + + Step 2: The bundle protocol agent must determine whether or not + forwarding is contraindicated for any of the reasons listed in + Figure 12. In particular: + + * The bundle protocol agent must determine which endpoint(s) to + forward the bundle to. The bundle protocol agent may choose + either to forward the bundle directly to its destination + endpoint (if possible) or to forward the bundle to some other + endpoint(s) for further forwarding. The manner in which this + decision is made may depend on the scheme name in the + destination endpoint ID but in any case is beyond the scope of + this document. If the agent finds it impossible to select any + endpoint(s) to forward the bundle to, then forwarding is + contraindicated. + + * Provided the bundle protocol agent succeeded in selecting the + endpoint(s) to forward the bundle to, the bundle protocol agent + must select the convergence layer adapter(s) whose services + will enable the node to send the bundle to the nodes of the + minimum reception group of each selected endpoint. The manner + in which the appropriate convergence layer adapters are + selected may depend on the scheme name in the destination + endpoint ID but in any case is beyond the scope of this + document. If the agent finds it impossible to select + convergence layer adapters to use in forwarding this bundle, + then forwarding is contraindicated. + + Step 3: If forwarding of the bundle is determined to be + contraindicated for any of the reasons listed in Figure 12, then + the Forwarding Contraindicated procedure defined in Section 5.4.1 + must be followed; the remaining steps of Section 5 are skipped at + this time. + + Step 4: If the bundle's custody transfer requested flag (in the + bundle processing flags field) is set to 1, then the custody + transfer procedure defined in Section 5.10.2 must be followed. + + + + + + + +Scott & Burleigh Experimental [Page 27] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Step 5: For each endpoint selected for forwarding, the bundle + protocol agent must invoke the services of the selected + convergence layer adapter(s) in order to effect the sending of the + bundle to the nodes constituting the minimum reception group of + that endpoint. Determining the time at which the bundle is to be + sent by each convergence layer adapter is an implementation + matter. + + To keep from possibly invalidating bundle security, the sequencing + of the blocks in a forwarded bundle must not be changed as it + transits a node; received blocks must be transmitted in the same + relative order as that in which they were received. While blocks + may be added to bundles as they transit intermediate nodes, + removal of blocks that do not have their 'Discard block if it + can't be processed' flag in the block processing control flags set + to 1 may cause security to fail. + + Step 6: When all selected convergence layer adapters have informed + the bundle protocol agent that they have concluded their data + sending procedures with regard to this bundle: + + * If the "request reporting of bundle forwarding" flag in the + bundle's status report request field is set to 1, then a bundle + forwarding status report should be generated, destined for the + bundle's report-to endpoint ID. If the bundle has the + retention constraint "custody accepted" and all of the nodes in + the minimum reception group of the endpoint selected for + forwarding are known to be unable to send bundles back to this + node, then the reason code on this bundle forwarding status + report must be "forwarded over unidirectional link"; otherwise, + the reason code must be "no additional information". + + * The bundle's "Forward pending" retention constraint must be + removed. + +5.4.1. Forwarding Contraindicated + + The steps in responding to contraindication of forwarding for some + reason are: + + Step 1: The bundle protocol agent must determine whether or not to + declare failure in forwarding the bundle for this reason. Note: + this decision is likely to be influenced by the reason for which + forwarding is contraindicated. + + + + + + + +Scott & Burleigh Experimental [Page 28] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Step 2: If forwarding failure is declared, then the Forwarding + Failed procedure defined in Section 5.4.2 must be followed. + Otherwise, (a) if the bundle's custody transfer requested flag (in + the bundle processing flags field) is set to 1, then the custody + transfer procedure defined in Section 5.10 must be followed; (b) + when -- at some future time - the forwarding of this bundle ceases + to be contraindicated, processing proceeds from Step 5 of + Section 5.4. + +5.4.2. Forwarding Failed + + The steps in responding to a declaration of forwarding failure for + some reason are: + + Step 1: If the bundle's custody transfer requested flag (in the + bundle processing flags field) is set to 1, custody transfer + failure must be handled. Procedures for handling failure of + custody transfer for a bundle whose destination is not a singleton + endpoint are not defined in this specification. For a bundle + whose destination is a singleton endpoint, the bundle protocol + agent must handle the custody transfer failure by generating a + "Failed" custody signal for the bundle, destined for the bundle's + current custodian; the custody signal must contain a reason code + corresponding to the reason for which forwarding was determined to + be contraindicated. (Note that discarding the bundle will not + delete it from the network, since the current custodian still has + a copy.) + + Step 2: If the bundle's destination endpoint is an endpoint of + which the node is a member, then the bundle's "Forward pending" + retention constraint must be removed. Otherwise, the bundle must + be deleted: the bundle deletion procedure defined in Section 5.13 + must be followed, citing the reason for which forwarding was + determined to be contraindicated. + +5.5. Bundle Expiration + + A bundle expires when the current time is greater than the bundle's + creation time plus its lifetime as specified in the primary bundle + block. Bundle expiration may occur at any point in the processing of + a bundle. When a bundle expires, the bundle protocol agent must + delete the bundle for the reason "lifetime expired": the bundle + deletion procedure defined in Section 5.13 must be followed. + + + + + + + + +Scott & Burleigh Experimental [Page 29] + +RFC 5050 Bundle Protocol Specification November 2007 + + +5.6. Bundle Reception + + The steps in processing a bundle received from another node are: + + Step 1: The retention constraint "Dispatch pending" must be added + to the bundle. + + Step 2: If the "request reporting of bundle reception" flag in the + bundle's status report request field is set to 1, then a bundle + reception status report with reason code "No additional + information" should be generated, destined for the bundle's + report-to endpoint ID. + + Step 3: For each block in the bundle that is an extension block + that the bundle protocol agent cannot process: + + * If the block processing flags in that block indicate that a + status report is requested in this event, then a bundle + reception status report with reason code "Block unintelligible" + should be generated, destined for the bundle's report-to + endpoint ID. + + * If the block processing flags in that block indicate that the + bundle must be deleted in this event, then the bundle protocol + agent must delete the bundle for the reason "Block + unintelligible"; the bundle deletion procedure defined in + Section 5.13 must be followed and all remaining steps of the + bundle reception procedure must be skipped. + + * If the block processing flags in that block do NOT indicate + that the bundle must be deleted in this event but do indicate + that the block must be discarded, then the bundle protocol + agent must remove this block from the bundle. + + * If the block processing flags in that block indicate NEITHER + that the bundle must be deleted NOR that the block must be + discarded, then the bundle protocol agent must set to 1 the + "Block was forwarded without being processed" flag in the block + processing flags of the block. + + Step 4: If the bundle's custody transfer requested flag (in the + bundle processing flags field) is set to 1 and the bundle has the + same source endpoint ID, creation timestamp, and (if the bundle is + a fragment) fragment offset and payload length as another bundle + that (a) has not been discarded and (b) currently has the + retention constraint "Custody accepted", custody transfer + redundancy must be handled. Otherwise, processing proceeds from + Step 5. Procedures for handling redundancy in custody transfer + + + +Scott & Burleigh Experimental [Page 30] + +RFC 5050 Bundle Protocol Specification November 2007 + + + for a bundle whose destination is not a singleton endpoint are not + defined in this specification. For a bundle whose destination is + a singleton endpoint, the bundle protocol agent must handle + custody transfer redundancy by generating a "Failed" custody + signal for this bundle with reason code "Redundant reception", + destined for this bundle's current custodian, and removing this + bundle's "Dispatch pending" retention constraint. + + Step 5: Processing proceeds from Step 1 of Section 5.3. + +5.7. Local Bundle Delivery + + The steps in processing a bundle that is destined for an endpoint of + which this node is a member are: + + Step 1: If the received bundle is a fragment, the application data + unit reassembly procedure described in Section 5.9 must be + followed. If this procedure results in reassembly of the entire + original application data unit, processing of this bundle (whose + fragmentary payload has been replaced by the reassembled + application data unit) proceeds from Step 2; otherwise, the + retention constraint "Reassembly pending" must be added to the + bundle and all remaining steps of this procedure are skipped. + + Step 2: Delivery depends on the state of the registration whose + endpoint ID matches that of the destination of the bundle: + + * If the registration is in the Active state, then the bundle + must be delivered subject to this registration (see Section 3.1 + above) as soon as all previously received bundles that are + deliverable subject to this registration have been delivered. + + * If the registration is in the Passive state, then the + registration's delivery failure action must be taken (see + Section 3.1 above). + + Step 3: As soon as the bundle has been delivered: + + * If the "request reporting of bundle delivery" flag in the + bundle's status report request field is set to 1, then a bundle + delivery status report should be generated, destined for the + bundle's report-to endpoint ID. Note that this status report + only states that the payload has been delivered to the + application agent, not that the application agent has processed + that payload. + + + + + + +Scott & Burleigh Experimental [Page 31] + +RFC 5050 Bundle Protocol Specification November 2007 + + + * If the bundle's custody transfer requested flag (in the bundle + processing flags field) is set to 1, custodial delivery must be + reported. Procedures for reporting custodial delivery for a + bundle whose destination is not a singleton endpoint are not + defined in this specification. For a bundle whose destination + is a singleton endpoint, the bundle protocol agent must report + custodial delivery by generating a "Succeeded" custody signal + for the bundle, destined for the bundle's current custodian. + +5.8. Bundle Fragmentation + + It may at times be necessary for bundle protocol agents to reduce the + sizes of bundles in order to forward them. This might be the case, + for example, if the endpoint to which a bundle is to be forwarded is + accessible only via intermittent contacts and no upcoming contact is + long enough to enable the forwarding of the entire bundle. + + The size of a bundle can be reduced by "fragmenting" the bundle. To + fragment a bundle whose payload is of size M is to replace it with + two "fragments" -- new bundles with the same source endpoint ID and + creation timestamp as the original bundle -- whose payloads are the + first N and the last (M - N) bytes of the original bundle's payload, + where 0 < N < M. Note that fragments may themselves be fragmented, + so fragmentation may in effect replace the original bundle with more + than two fragments. (However, there is only one 'level' of + fragmentation, as in IP fragmentation.) + + Any bundle whose primary block's bundle processing flags do NOT + indicate that it must not be fragmented may be fragmented at any + time, for any purpose, at the discretion of the bundle protocol + agent. + + Fragmentation shall be constrained as follows: + + o The concatenation of the payloads of all fragments produced by + fragmentation must always be identical to the payload of the + bundle that was fragmented. Note that the payloads of fragments + resulting from different fragmentation episodes, in different + parts of the network, may be overlapping subsets of the original + bundle's payload. + + o The bundle processing flags in the primary block of each fragment + must be modified to indicate that the bundle is a fragment, and + both fragment offset and total application data unit length must + be provided at the end of each fragment's primary bundle block. + + o The primary blocks of the fragments will differ from that of the + fragmented bundle as noted above. + + + +Scott & Burleigh Experimental [Page 32] + +RFC 5050 Bundle Protocol Specification November 2007 + + + o The payload blocks of fragments will differ from that of the + fragmented bundle as noted above. + + o All blocks that precede the payload block at the time of + fragmentation must be replicated in the fragment with the lowest + offset. + + o All blocks that follow the payload block at the time of + fragmentation must be replicated in the fragment with the highest + offset. + + o If the 'Block must be replicated in every fragment' bit is set to + 1, then the block must be replicated in every fragment. + + o If the 'Block must be replicated in every fragment' bit is set to + zero, the block should be replicated in only one fragment. + + o The relative order of all blocks that are present in a fragment + must be the same as in the bundle prior to fragmentation. + +5.9. Application Data Unit Reassembly + + If the concatenation -- as informed by fragment offsets and payload + lengths -- of the payloads of all previously received fragments with + the same source endpoint ID and creation timestamp as this fragment, + together with the payload of this fragment, forms a byte array whose + length is equal to the total application data unit length in the + fragment's primary block, then: + + o This byte array -- the reassembled application data unit -- must + replace the payload of this fragment. + + o The "Reassembly pending" retention constraint must be removed from + every other fragment whose payload is a subset of the reassembled + application data unit. + + Note: reassembly of application data units from fragments occurs at + destination endpoints as necessary; an application data unit may also + be reassembled at some other endpoint on the route to the + destination. + + + + + + + + + + + +Scott & Burleigh Experimental [Page 33] + +RFC 5050 Bundle Protocol Specification November 2007 + + +5.10. Custody Transfer + + The conditions under which a node may accept custody of a bundle + whose destination is not a singleton endpoint are not defined in this + specification. + + The decision as to whether or not to accept custody of a bundle whose + destination is a singleton endpoint is an implementation matter that + may involve both resource and policy considerations; however, if the + bundle protocol agent has committed to accepting custody of the + bundle (as described in Step 1 of Section 5.2), then custody must be + accepted. + + If the bundle protocol agent elects to accept custody of the bundle, + then it must follow the custody acceptance procedure defined in + Section 5.10.1. + +5.10.1. Custody Acceptance + + Procedures for acceptance of custody of a bundle whose destination is + not a singleton endpoint are not defined in this specification. + + Procedures for acceptance of custody of a bundle whose destination is + a singleton endpoint are defined as follows. + + The retention constraint "Custody accepted" must be added to the + bundle. + + If the "request reporting of custody acceptance" flag in the bundle's + status report request field is set to 1, a custody acceptance status + report should be generated, destined for the report-to endpoint ID of + the bundle. However, if a bundle reception status report was + generated for this bundle (Step 1 of Section 5.6), then this report + should be generated by simply turning on the "Reporting node accepted + custody of bundle" flag in that earlier report's status flags byte. + + The bundle protocol agent must generate a "Succeeded" custody signal + for the bundle, destined for the bundle's current custodian. + + The bundle protocol agent must assert the new current custodian for + the bundle. It does so by changing the current custodian endpoint ID + in the bundle's primary block to the endpoint ID of one of the + singleton endpoints in which the node is registered. This may entail + appending that endpoint ID's null-terminated scheme name and SSP to + the dictionary byte array in the bundle's primary block, and in some + case it may also enable the (optional) removal of the current + custodian endpoint ID's scheme name and/or SSP from the dictionary. + + + + +Scott & Burleigh Experimental [Page 34] + +RFC 5050 Bundle Protocol Specification November 2007 + + + The bundle protocol agent may set a custody transfer countdown timer + for this bundle; upon expiration of this timer prior to expiration of + the bundle itself and prior to custody transfer success for this + bundle, the custody transfer failure procedure detailed in + Section 5.12 must be followed. The manner in which the countdown + interval for such a timer is determined is an implementation matter. + + The bundle should be retained in persistent storage if possible. + +5.10.2. Custody Release + + Procedures for release of custody of a bundle whose destination is + not a singleton endpoint are not defined in this specification. + + When custody of a bundle is released, where the destination of the + bundle is a singleton endpoint, the "Custody accepted" retention + constraint must be removed from the bundle and any custody transfer + timer that has been established for this bundle must be destroyed. + +5.11. Custody Transfer Success + + Procedures for determining custody transfer success for a bundle + whose destination is not a singleton endpoint are not defined in this + specification. + + Upon receipt of a "Succeeded" custody signal at a node that is a + custodial node of the bundle identified in the custody signal, where + the destination of the bundle is a singleton endpoint, custody of the + bundle must be released as described in Section 5.10.2. + +5.12. Custody Transfer Failure + + Procedures for determining custody transfer failure for a bundle + whose destination is not a singleton endpoint are not defined in this + specification. Custody transfer for a bundle whose destination is a + singleton endpoint is determined to have failed at a custodial node + for that bundle when either (a) that node's custody transfer timer + for that bundle (if any) expires or (b) a "Failed" custody signal for + that bundle is received at that node. + + Upon determination of custody transfer failure, the action taken by + the bundle protocol agent is implementation-specific and may depend + on the nature of the failure. For example, if custody transfer + failure was inferred from expiration of a custody transfer timer or + was asserted by a "Failed" custody signal with the "Depleted storage" + reason code, the bundle protocol agent might choose to re-forward the + bundle, possibly on a different route (Section 5.4). Receipt of a + "Failed" custody signal with the "Redundant reception" reason code, + + + +Scott & Burleigh Experimental [Page 35] + +RFC 5050 Bundle Protocol Specification November 2007 + + + on the other hand, might cause the bundle protocol agent to release + custody of the bundle and to revise its algorithm for computing + countdown intervals for custody transfer timers. + +5.13. Bundle Deletion + + The steps in deleting a bundle are: + + Step 1: If the retention constraint "Custody accepted" currently + prevents this bundle from being discarded, and the destination of + the bundle is a singleton endpoint, then: + + * Custody of the node is released as described in Section 5.10.2. + + * A bundle deletion status report citing the reason for deletion + must be generated, destined for the bundle's report-to endpoint + ID. + + Otherwise, if the "request reporting of bundle deletion" flag in + the bundle's status report request field is set to 1, then a + bundle deletion status report citing the reason for deletion + should be generated, destined for the bundle's report-to endpoint + ID. + + Step 2: All of the bundle's retention constraints must be removed. + +5.14. Discarding a Bundle + + As soon as a bundle has no remaining retention constraints it may be + discarded. + +5.15. Canceling a Transmission + + When requested to cancel a specified transmission, where the bundle + created upon initiation of the indicated transmission has not yet + been discarded, the bundle protocol agent must delete that bundle for + the reason "transmission cancelled". For this purpose, the procedure + defined in Section 5.13 must be followed. + +5.16. Polling + + When requested to poll a specified registration that is in the + Passive state, the bundle protocol agent must immediately deliver the + least recently received bundle that is deliverable subject to the + indicated registration, if any. + + + + + + +Scott & Burleigh Experimental [Page 36] + +RFC 5050 Bundle Protocol Specification November 2007 + + +6. Administrative Record Processing + +6.1. Administrative Records + + Administrative records are standard application data units that are + used in providing some of the features of the Bundle Protocol. Two + types of administrative records have been defined to date: bundle + status reports and custody signals. + + Every administrative record consists of a four-bit record type code + followed by four bits of administrative record flags, followed by + record content in type-specific format. Record type codes are + defined as follows: + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0001 | Bundle status report. | + +---------+--------------------------------------------+ + | 0010 | Custody signal. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 8: Administrative Record Type Codes + + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0001 | Record is for a fragment; fragment | + | | offset and length fields are present. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 9: Administrative Record Flags + + All time values in administrative records are UTC times expressed in + "DTN time" representation. A DTN time consists of an SDNV indicating + the number of seconds since the start of the year 2000, followed by + an SDNV indicating the number of nanoseconds since the start of the + indicated second. + + The contents of the various types of administrative records are + described below. + + + + + +Scott & Burleigh Experimental [Page 37] + +RFC 5050 Bundle Protocol Specification November 2007 + + +6.1.1. Bundle Status Reports + + The transmission of 'bundle status reports' under specified + conditions is an option that can be invoked when transmission of a + bundle is requested. These reports are intended to provide + information about how bundles are progressing through the system, + including notices of receipt, custody transfer, forwarding, final + delivery, and deletion. They are transmitted to the Report-to + endpoints of bundles. + + +----------------+----------------+----------------+----------------+ + | Status Flags | Reason code | Fragment offset (*) (if + +----------------+----------------+----------------+----------------+ + present) | Fragment length (*) (if present) | + +----------------+----------------+----------------+----------------+ + | Time of receipt of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of custody acceptance of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of forwarding of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of delivery of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of deletion of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp time (*) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp sequence number (*) | + +----------------+----------------+----------------+----------------+ + | Length of X's source endpoint ID (*) | Source + +----------------+---------------------------------+ + + endpoint ID of bundle X (variable) | + +----------------+----------------+----------------+----------------+ + + Figure 10: Bundle Status Report Format + + (*) Notes: + + The Fragment Offset field, if present, is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The Fragment Length field, if present, is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + + + + + +Scott & Burleigh Experimental [Page 38] + +RFC 5050 Bundle Protocol Specification November 2007 + + + The Creation Timestamp fields replicate the Creation Timestamp fields + in the primary block of the subject bundle. As such they are SDNVs + (see Section 4.5.1 above) and are therefore variable length. Four- + octet SDNVs are shown here for convenience in representation. + + The source endpoint ID length field is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The fields in a bundle status report are: + + Status Flags: A 1-byte field containing the following flags: + + +----------+--------------------------------------------+ + | Value | Meaning | + +==========+============================================+ + | 00000001 | Reporting node received bundle. | + +----------+--------------------------------------------+ + | 00000010 | Reporting node accepted custody of bundle.| + +----------+--------------------------------------------+ + | 00000100 | Reporting node forwarded the bundle. | + +----------+--------------------------------------------+ + | 00001000 | Reporting node delivered the bundle. | + +----------+--------------------------------------------+ + | 00010000 | Reporting node deleted the bundle. | + +----------+--------------------------------------------+ + | 00100000 | Unused. | + +----------+--------------------------------------------+ + | 01000000 | Unused. | + +----------+--------------------------------------------+ + | 10000000 | Unused. | + +----------+--------------------------------------------+ + + Figure 11: Status Flags for Bundle Status Reports + + Reason Code: A 1-byte field explaining the value of the flags in + the status flags byte. The list of status report reason codes + provided here is neither exhaustive nor exclusive; supplementary + DTN protocol specifications (including, but not restricted to, the + Bundle Security Protocol [BSP]) may define additional reason + codes. Status report reason codes are defined as follows: + + + + + + + + + + +Scott & Burleigh Experimental [Page 39] + +RFC 5050 Bundle Protocol Specification November 2007 + + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0x00 | No additional information. | + +---------+--------------------------------------------+ + | 0x01 | Lifetime expired. | + +---------+--------------------------------------------+ + | 0x02 | Forwarded over unidirectional link. | + +---------+--------------------------------------------+ + | 0x03 | Transmission canceled. | + +---------+--------------------------------------------+ + | 0x04 | Depleted storage. | + +---------+--------------------------------------------+ + | 0x05 | Destination endpoint ID unintelligible. | + +---------+--------------------------------------------+ + | 0x06 | No known route to destination from here. | + +---------+--------------------------------------------+ + | 0x07 | No timely contact with next node on route.| + +---------+--------------------------------------------+ + | 0x08 | Block unintelligible. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 12: Status Report Reason Codes + + Fragment Offset: If the bundle fragment bit is set in the status + flags, then the offset (within the original application data unit) + of the payload of the bundle that caused the status report to be + generated is included here. + + Fragment length: If the bundle fragment bit is set in the status + flags, then the length of the payload of the subject bundle is + included here. + + Time of Receipt (if present): If the bundle-received bit is set in + the status flags, then a DTN time indicating the time at which the + bundle was received at the reporting node is included here. + + Time of Custody Acceptance (if present): If the custody-accepted + bit is set in the status flags, then a DTN time indicating the + time at which custody was accepted at the reporting node is + included here. + + Time of Forward (if present): If the bundle-forwarded bit is set in + the status flags, then a DTN time indicating the time at which the + bundle was first forwarded at the reporting node is included here. + + + + +Scott & Burleigh Experimental [Page 40] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Time of Delivery (if present): If the bundle-delivered bit is set + in the status flags, then a DTN time indicating the time at which + the bundle was delivered at the reporting node is included here. + + Time of Deletion (if present): If the bundle-deleted bit is set in + the status flags, then a DTN time indicating the time at which the + bundle was deleted at the reporting node is included here. + + Creation Timestamp of Subject Bundle: A copy of the creation + timestamp of the bundle that caused the status report to be + generated. + + Length of Source Endpoint ID: The length in bytes of the source + endpoint ID of the bundle that caused the status report to be + generated. + + Source Endpoint ID text: The text of the source endpoint ID of the + bundle that caused the status report to be generated. + +6.1.2. Custody Signals + + Custody signals are administrative records that effect custody + transfer operations. They are transmitted to the endpoints that are + the current custodians of bundles. + + Custody signals have the following format. + + Custody signal regarding bundle 'X': + + +----------------+----------------+----------------+----------------+ + | Status | Fragment offset (*) (if present) | + +----------------+----------------+----------------+----------------+ + | Fragment length (*) (if present) | + +----------------+----------------+----------------+----------------+ + | Time of signal (a DTN time) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp time (*) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp sequence number (*) | + +----------------+----------------+----------------+----------------+ + | Length of X's source endpoint ID (*) | Source + +----------------+---------------------------------+ + + endpoint ID of bundle X (variable) | + +----------------+----------------+----------------+----------------+ + + Figure 13: Custody Signal Format + + + + + +Scott & Burleigh Experimental [Page 41] + +RFC 5050 Bundle Protocol Specification November 2007 + + + (*) Notes: + + The Fragment Offset field, if present, is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The Fragment Length field, if present, is an SDNV and is therefore + variable length. A four-octet SDNV is shown here for convenience in + representation. + + The Creation Timestamp fields replicate the Creation Timestamp fields + in the primary block of the subject bundle. As such they are SDNVs + (see Section 4.5.1 above) and are therefore variable length. Four- + octet SDNVs are shown here for convenience in representation. + + The source endpoint ID length field is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The fields in a custody signal are: + + Status: A 1-byte field containing a 1-bit "custody transfer + succeeded" flag followed by a 7-bit reason code explaining the + value of that flag. Custody signal reason codes are defined as + follows: + + + + + + + + + + + + + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 42] + +RFC 5050 Bundle Protocol Specification November 2007 + + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0x00 | No additional information. | + +---------+--------------------------------------------+ + | 0x01 | Reserved for future use. | + +---------+--------------------------------------------+ + | 0x02 | Reserved for future use. | + +---------+--------------------------------------------+ + | 0x03 | Redundant reception (reception by a node | + | | that is a custodial node for this bundle).| + +---------+--------------------------------------------+ + | 0x04 | Depleted storage. | + +---------+--------------------------------------------+ + | 0x05 | Destination endpoint ID unintelligible. | + +---------+--------------------------------------------+ + | 0x06 | No known route to destination from here. | + +---------+--------------------------------------------+ + | 0x07 | No timely contact with next node on route.| + +---------+--------------------------------------------+ + | 0x08 | Block unintelligible. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 14: Custody Signal Reason Codes + + Fragment offset: If the bundle fragment bit is set in the status + flags, then the offset (within the original application data unit) + of the payload of the bundle that caused the status report to be + generated is included here. + + Fragment length: If the bundle fragment bit is set in the status + flags, then the length of the payload of the subject bundle is + included here. + + Time of Signal: A DTN time indicating the time at which the signal + was generated. + + Creation Timestamp of Subject Bundle: A copy of the creation + timestamp of the bundle to which the signal applies. + + Length of Source Endpoint ID: The length in bytes of the source + endpoint ID of the bundle to which the signal applied. + + + + + + + +Scott & Burleigh Experimental [Page 43] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Source Endpoint ID text: The text of the source endpoint ID of the + bundle to which the signal applies. + +6.2. Generation of Administrative Records + + Whenever the application agent's administrative element is directed + by the bundle protocol agent to generate an administrative record + with reference to some bundle, the following procedure must be + followed: + + Step 1: The administrative record must be constructed. If the + referenced bundle is a fragment, the administrative record must + have the Fragment flag set and must contain the fragment offset + and fragment length fields. The value of the fragment offset + field must be the value of the referenced bundle's fragment + offset, and the value of the fragment length field must be the + length of the referenced bundle's payload. + + Step 2: A request for transmission of a bundle whose payload is + this administrative record must be presented to the bundle + protocol agent. + +6.3. Reception of Custody Signals + + For each received custody signal that has the "custody transfer + succeeded" flag set to 1, the administrative element of the + application agent must direct the bundle protocol agent to follow the + custody transfer success procedure in Section 5.11. + + For each received custody signal that has the "custody transfer + succeeded" flag set to 0, the administrative element of the + application agent must direct the bundle protocol agent to follow the + custody transfer failure procedure in Section 5.12. + +7. Services Required of the Convergence Layer + +7.1. The Convergence Layer + + The successful operation of the end-to-end bundle protocol depends on + the operation of underlying protocols at what is termed the + "convergence layer"; these protocols accomplish communication between + nodes. A wide variety of protocols may serve this purpose, so long + as each convergence layer protocol adapter provides a defined minimal + set of services to the bundle protocol agent. This convergence layer + service specification enumerates those services. + + + + + + +Scott & Burleigh Experimental [Page 44] + +RFC 5050 Bundle Protocol Specification November 2007 + + +7.2. Summary of Convergence Layer Services + + Each convergence layer protocol adapter is expected to provide the + following services to the bundle protocol agent: + + o sending a bundle to all bundle nodes in the minimum reception + group of the endpoint identified by a specified endpoint ID that + are reachable via the convergence layer protocol; and + + o delivering to the bundle protocol agent a bundle that was sent by + a remote bundle node via the convergence layer protocol. + + The convergence layer service interface specified here is neither + exhaustive nor exclusive. That is, supplementary DTN protocol + specifications (including, but not restricted to, the Bundle Security + Protocol [BSP]) may expect convergence layer adapters that serve BP + implementations conforming to those protocols to provide additional + services. + +8. Security Considerations + + The bundle protocol has taken security into concern from the outset + of its design. It was always assumed that security services would be + needed in the use of the bundle protocol. As a result, the bundle + protocol security architecture and the available security services + are specified in an accompanying document, the Bundle Security + Protocol specification [BSP]; an informative overview of this + architecture is provided in [SECO]. + + The bundle protocol has been designed with the notion that it will be + run over networks with scarce resources. For example, the networks + might have limited bandwidth, limited connectivity, constrained + storage in relay nodes, etc. Therefore, the bundle protocol must + ensure that only those entities authorized to send bundles over such + constrained environments are actually allowed to do so. All + unauthorized entities should be prevented from consuming valuable + resources. + + Likewise, because of the potentially long latencies and delays + involved in the networks that make use of the bundle protocol, data + sources should be concerned with the integrity of the data received + at the intended destination(s) and may also be concerned with + ensuring confidentiality of the data as it traverses the network. + Without integrity, the bundle payload data might be corrupted while + in transit without the destination able to detect it. Similarly, the + data source can be concerned with ensuring that the data can only be + used by those authorized, hence the need for confidentiality. + + + + +Scott & Burleigh Experimental [Page 45] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Internal to the bundle-aware overlay network, the bundle nodes should + be concerned with the authenticity of other bundle nodes as well as + the preservation of bundle payload data integrity as it is forwarded + between bundle nodes. + + As a result, bundle security is concerned with the authenticity, + integrity, and confidentiality of bundles conveyed among bundle + nodes. This is accomplished via the use of three independent + security-specific bundle blocks, which may be used together to + provide multiple bundle security services or independently of one + another, depending on perceived security threats, mandated security + requirements, and security policies that must be enforced. + + The Bundle Authentication Block (BAB) ensures the authenticity and + integrity of bundles on a hop-by-hop basis between bundle nodes. The + BAB allows each bundle node to verify a bundle's authenticity before + processing or forwarding the bundle. In this way, entities that are + not authorized to send bundles will have unauthorized transmissions + blocked by security-aware bundle nodes. + + Additionally, to provide "security-source" to "security-destination" + bundle authenticity and integrity, the Payload Security Block (PSB) + is used. A "security-source" may not actually be the origination + point of the bundle but instead may be the first point along the path + that is security-aware and is able to apply security services. For + example, an enclave of networked systems may generate bundles but + only their gateway may be required and/or able to apply security + services. The PSB allows any security-enabled entity along the + delivery path, in addition to the "security-destination" (the + recipient counterpart to the "security-source"), to ensure the + bundle's authenticity. + + Finally, to provide payload confidentiality, the use of the + Confidentiality Block (CB) is available. The bundle payload may be + encrypted to provide "security-source" to "security-destination" + payload confidentiality/privacy. The CB indicates the cryptographic + algorithm and key IDs that were used to encrypt the payload. + + Note that removal of strings from the dictionary at a given point in + a bundle's end-to-end path, and attendant adjustment of endpoint ID + references in the blocks of that bundle, may make it necessary to re- + compute values in one or more of the bundle's security blocks. + + Bundle security must not be invalidated by forwarding nodes even + though they themselves might not use the Bundle Security Protocol. + In particular, the sequencing of the blocks in a forwarded bundle + must not be changed as it transits a node; received blocks must be + transmitted in the same relative order as that in which they were + + + +Scott & Burleigh Experimental [Page 46] + +RFC 5050 Bundle Protocol Specification November 2007 + + + received. While blocks may be added to bundles as they transit + intermediate nodes, removal of blocks that do not have their 'Discard + block if it can't be processed' flag in the block processing control + flags set to 1 may cause security to fail. + + Inclusion of the Bundle Security Protocol in any Bundle Protocol + implementation is RECOMMENDED. Use of the Bundle Security Protocol + in Bundle Protocol operations is OPTIONAL. + +9. IANA Considerations + + The "dtn:" URI scheme has been provisionally registered by IANA. See + http://www.iana.org/assignments/uri-schemes.html for the latest + details. + +10. References + +10.1. Normative References + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [URI] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifier (URI): Generic Syntax", RFC 3986, + STD 66, January 2005. + + [URIREG] Hansen, T., Hardie, T., and L. Masinter, "Guidelines and + Registration Procedures for New URI Schemes", RFC 4395, + BCP 115, February 2006. + +10.2. Informative References + + [ARCH] V. Cerf et. al., "Delay-Tolerant Network Architecture", + RFC 4838, April 2007. + + [ASN1] "Abstract Syntax Notation One (ASN.1), "ASN.1 Encoding + Rules: Specification of Basic Encoding Rules (BER), + Canonical Encoding Rules (CER) and Distinguished Encoding + Rules (DER)," ITU-T Rec. X.690 (2002) | ISO/IEC 8825- + 1:2002", 2003. + + [BSP] Symington, S., "Bundle Security Protocol Specification", + Work Progress, October 2007. + + [RFC3987] Duerst, M. and M. Suignard, "Internationalized Resource + Identifiers (IRIs)", RFC 3987, January 2005. + + + + + +Scott & Burleigh Experimental [Page 47] + +RFC 5050 Bundle Protocol Specification November 2007 + + + [SECO] Farrell, S., Symington, S., Weiss, H., and P. Lovell, + "Delay-Tolerant Networking Security Overview", + Work Progress, July 2007. + + [SIGC] Fall, K., "A Delay-Tolerant Network Architecture for + Challenged Internets", SIGCOMM 2003 . + + [TUT] Warthman, F., "Delay-Tolerant Networks (DTNs): A + Tutorial", . + + [UTC] Arias, E. and B. Guinot, ""Coordinated universal time UTC: + historical background and perspectives" in Journees + systemes de reference spatio-temporels", 2004. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 48] + +RFC 5050 Bundle Protocol Specification November 2007 + + +Appendix A. Contributors + + This was an effort of the Delay Tolerant Networking Research Group. + The following DTNRG participants contributed significant technical + material and/or inputs: Dr. Vinton Cerf of Google, Scott Burleigh, + Adrian Hooke, and Leigh Torgerson of the Jet Propulsion Laboratory, + Michael Demmer of the University of California at Berkeley, Robert + Durst, Keith Scott, and Susan Symington of The MITRE Corporation, + Kevin Fall of Intel Research, Stephen Farrell of Trinity College + Dublin, Peter Lovell of SPARTA, Inc., Manikantan Ramadas of Ohio + University (most of Section 4.1), and Howard Weiss of SPARTA, Inc. + (text of Section 8). + +Appendix B. Comments + + Please refer comments to dtn-interest@mailman.dtnrg.org. The Delay + Tolerant Networking Research Group (DTNRG) Web site is located at + http://www.dtnrg.org. + +Authors' Addresses + + Keith L. Scott + The MITRE Corporation + 7515 Colshire Drive + McLean, VA 21102 + US + + Phone: +1 703 983 6547 + Fax: +1 703 983 7142 + EMail: kscott@mitre.org + + + Scott Burleigh + NASA Jet Propulsion Laboratory + 4800 Oak Grove Dr. + Pasadena, CA 91109-8099 + US + + Phone: +1 818 393 3353 + Fax: +1 818 354 1075 + EMail: Scott.Burleigh@jpl.nasa.gov + + + + + + + + + + +Scott & Burleigh Experimental [Page 49] + +RFC 5050 Bundle Protocol Specification November 2007 + + +Full Copyright Statement + + Copyright (C) The IETF Trust (2007). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78 and at www.rfc-editor.org/copyright.html, and + except as set forth therein, the authors retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND + THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF + THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 50] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc7098.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc7098.txt new file mode 100644 index 0000000..9b48f34 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc7098.txt @@ -0,0 +1,731 @@ + + + + + + +Internet Engineering Task Force (IETF) B. Carpenter +Request for Comments: 7098 Univ. of Auckland +Category: Informational S. Jiang +ISSN: 2070-1721 Huawei Technologies Co., Ltd + W. Tarreau + HAProxy Technologies, Inc. + January 2014 + + + Using the IPv6 Flow Label for Load Balancing in Server Farms + +Abstract + + This document describes how the currently specified IPv6 flow label + can be used to enhance layer 3/4 (L3/4) load distribution and + balancing for large server farms. + +Status of This Memo + + This document is not an Internet Standards Track specification; it is + published for informational purposes. + + This document is a product of the Internet Engineering Task Force + (IETF). It represents the consensus of the IETF community. It has + received public review and has been approved for publication by the + Internet Engineering Steering Group (IESG). Not all documents + approved by the IESG are a candidate for any level of Internet + Standard; see Section 2 of RFC 5741. + + Information about the current status of this document, any errata, + and how to provide feedback on it may be obtained at + http://www.rfc-editor.org/info/rfc7098. + +Copyright Notice + + Copyright (c) 2014 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + + + + +Carpenter, et al. Informational [Page 1] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 2 + 2. Summary of Flow Label Specification . . . . . . . . . . . . . 2 + 3. Summary of Server Farm Load-Balancing Techniques . . . . . . 4 + 4. Applying the Flow Label to Layer 3/4 Load Balancing . . . . . 8 + 5. Security Considerations . . . . . . . . . . . . . . . . . . . 10 + 6. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 11 + 7. References . . . . . . . . . . . . . . . . . . . . . . . . . 12 + 7.1. Normative References . . . . . . . . . . . . . . . . . . 12 + 7.2. Informative References . . . . . . . . . . . . . . . . . 12 + +1. Introduction + + The IPv6 flow label has been redefined [RFC6437] and is now a + recommended IPv6 node requirement [RFC6434]. Its use for load + sharing in multipath routing has been specified [RFC6438]. Another + scenario in which the flow label could be used is in load + distribution for large server farms. Load distribution is a slightly + more general term than load balancing, but the latter is more + commonly used. In the context of a server farm, both terms refer to + mechanisms that distribute the workload of a server farm among + different servers in order to optimize performance. Server load + balancing commonly applies to HTTP traffic, but most of the + techniques described would apply to other upper-layer applications as + well. This document starts with brief introductions to the flow + label and to server load-balancing techniques, and then describes how + the flow label can be used to enhance load balancers operating on IP + packets and TCP sessions, commonly known as layer 3/4 load balancers. + + The motivation for this approach is to improve the performance of + most types of layer 3/4 load balancers, especially for traffic + including multiple IPv6 extension headers and in particular for + fragmented packets. Fragmented packets, often the result of + customers reaching the load balancer via a VPN with a limited MTU, + are a common performance problem. + +2. Summary of Flow Label Specification + + The IPv6 flow label [RFC6437] is a 20-bit field included in every + IPv6 header [RFC2460]. It is recommended to be supported in all IPv6 + nodes by [RFC6434]. There is additional background material in + [RFC6436] and [RFC6294]. According to its definition, the flow label + should be set to a constant value for a given traffic flow (such as + an HTTP connection), and that value will belong to a uniform + statistical distribution, making it potentially valuable for load- + balancing purposes. + + + + +Carpenter, et al. Informational [Page 2] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + Any device that has access to the IPv6 header has access to the flow + label, and it is at a fixed position in every IPv6 packet. In + contrast, transport-layer information, such as the port numbers, is + not always in a fixed position, since it follows any IPv6 extension + headers that may be present. In fact, the logic of finding the + transport header is always more complex for IPv6 than for IPv4, due + to the absence of an Internet Header Length field in IPv6. + Additionally, if packets are fragmented, the flow label will be + present in all fragments, but the transport header will only be in + one packet. Therefore, within the lifetime of a given transport- + layer connection, the flow label can be a more convenient "handle" + than the port number for identifying that particular connection. + + According to RFC 6437, source hosts should set the flow label; + however, if they do not (i.e., its value is zero), forwarding nodes + (such as the first-hop router) may set it instead. In both cases, + the flow label value must be constant for a given transport session, + normally identified by the IPv6 and Transport header 5-tuple. By + default, the flow label value should be calculated by a stateless + algorithm. The resulting value should form part of a statistically + uniform distribution, regardless of which node sets it. + + It is recognized that at the time of writing, very few traffic flows + include a non-zero flow label value. The mechanism described below + is one that can be added to existing load-balancing mechanisms, so + that it will become effective as more and more flows contain a non- + zero label. Even if the flow label is chosen from an imperfectly + uniform distribution, it will nevertheless increase the information + entropy of the IPv6 header as a whole. This allows for progressive + introduction of load balancing based on the flow label. + + If the recommendations in Section 3 of RFC 6437 are followed for + traffic from a given source accessing a well-known TCP port at a + given destination, the flow label can act as a substitute for the + port numbers as far as a load balancer is concerned, and it can be + found at a fixed position in the layer 3 header even if extension + headers are present. + + The flow label is defined as an end-to-end component of the IPv6 + header, but there are three qualifications to this: + + 1. Until the IPv6 flow label specification in RFC 6437 is widely + implemented as recommended by RFC 6434, the flow label will often + be set to the default value of zero. + + + + + + + +Carpenter, et al. Informational [Page 3] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + 2. Because of the recommendation to use a stateless algorithm to + calculate the label, there is a low (but non-zero) probability + that two simultaneous flows from the same source to the same + destination have the same flow label value despite having + different transport-protocol port numbers. + + 3. The Flow Label field is in an unprotected part of the IPv6 + header, which means that intentional or unintentional changes to + its value cannot be easily detected by a receiver. + + The first two points are addressed below in Section 4 and the third + in Section 5. + +3. Summary of Server Farm Load-Balancing Techniques + + Load balancing for server farms is achieved by a variety of methods, + often used in combination [Tarreau]. This section gives a general + overview of common methods, although the flow label is not relevant + to all of them. The actual load-balancing algorithm (the choice of + which server to use for a new client session) is irrelevant to this + discussion. We give examples for HTTP, but analogous techniques may + be used for other application protocols. + + o The simplest method is using the DNS to return different server + addresses for a single name such as www.example.com to different + users. This is typically done by rotating the order in which + different addresses within the server site are listed by the + relevant authoritative DNS server, on the assumption that the + client will pick the first one. Routing may be configured such + that the different addresses are handled by different ingress + routers. Several variants of this load-balancing mechanism exist, + such as expecting some clients to use all the advertised addresses + when multiple connections are involved, or directing the traffic + to multiple sites, also known as global load balancing. None of + these mechanisms are in the scope of this document, and the + proposal in this document does not affect their usability nor aim + to replace them, so they will not be discussed further. + + o Another method, for HTTP servers, is to operate a layer 7 reverse + proxy in front of the server farm. The reverse proxy will present + a single IP address to the world, communicated to clients by a + single AAAA record. For each new client session (an incoming TCP + connection and HTTP request), it will pick a particular server and + proxy the session to it. The act of proxying should be more + efficient and less resource-intensive than the act of serving the + required content. The proxy must retain TCP state and proxy state + for the duration of the session. This TCP state could, + potentially, include the incoming flow label value. + + + +Carpenter, et al. Informational [Page 4] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + o A component of some load-balancing systems is an SSL reverse proxy + farm. The individual SSL proxies handle all cryptographic aspects + and exchange unencrypted HTTP with the actual servers. Thus, from + the load-balancing point of view, this really looks just like a + server farm, except that it's specialized for HTTPS. Each proxy + will retain SSL and TCP and maybe HTTP state for the duration of + the session, and the TCP state could potentially include the flow + label. + + o Finally the "front end" of many load-balancing systems is a layer + 3/4 load balancer. While it can be a dedicated device, it is also + a standard function of some network switches or routers (e.g. + using Equal-Cost Multipath Routing (ECMP) [RFC2991]). In this + case, it is the layer 3/4 load balancer whose IP address is + published as the primary AAAA record for the service. All client + sessions will pass through this device. Depending on the specific + scenario, the balancer will assign new sessions among the actual + application servers, across an SSL proxy farm, or among a set of + layer 7 proxies. In all cases, the layer 3/4 load balancer has to + classify incoming packets very quickly and choose the target + server or proxy so as to ensure persistence. 'Persistence' is + defined as the guarantee that a given client session will run to + completion on a single server. The layer 3/4 load balancer + therefore needs to inspect each incoming packet to classify it. + There are two common types of layer 3/4 load balancers, the + totally stateless ones which only act on single packets, generally + involving a per-packet hashing of easy-to-find information such as + the source address and/or port into a server number, and the + stateful ones that take the routing decision on the very first + packets of a session and maintain the same direction for all + packets belonging to the same session. Clearly, both types of + layer 3/4 balancers could inspect and make use of the flow label + value. + + Our focus is on how the balancer identifies a particular flow. + For clarity, note that two aspects of layer 3/4 load balancers are + not affected by use of the flow label to identify sessions: + + 1. Balancers use various techniques to redirect traffic to a + specific target server. + + + All servers are configured with the same IP address, they + are all on the same LAN, and the load balancer sends + directly to their individual MAC addresses. In this case, + return packets from the server to the client are sent back + without passing through the balancer, a technique known as + direct server return, but we are not concerned here with + the return packets. + + + +Carpenter, et al. Informational [Page 5] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + + All servers are configured with the same IP address, + treated locally as an anycast address by layer 3 ECMP + routing. + + + Each server has its own IP address, and the balancer uses + an IP-in-IP tunnel to reach it. + + + Each server has its own IP address, and the balancer + performs NAPT (Network Address and Port Translation) to + deliver the client's packets to that address. + + + The choice between these methods is not affected by use of + the flow label. + + 2. A layer 3/4 balancer must correctly handle Path MTU Discovery + by forwarding relevant ICMPv6 packets in both directions. + This too is not directly affected by use of the flow label. + It should be noted that there may be difficulty correlating an + ICMPv6 "Packet too big" response with the session it refers + to, but that is out of the scope of the present document. + + The following diagram, inspired by [Tarreau], shows a layout with + various methods in use together. (Below, "ASIC" stands for + "Application-Specific Integrated Circuit".) + + + + + + + + + + + + + + + + + + + + + + + + + + + +Carpenter, et al. Informational [Page 6] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + ___________________________________________ + ( ) + ( Clients in the Internet ) + (___________________________________________) + | | + ------------ DNS-based ------------ + | Ingress | load splitting | Ingress | + | router | affects | router | + ------------ routing ------------ + ___|____________________________|___ + | | + | | + | | + ------------ ------------ + | L3/4 ASIC| | L3/4 ASIC| + | balancer | | balancer | + ------------ ------------ + | load | + | spreading | + __________|________________________|___________ + | | | | + ------------ ------------ -------- -------- + |HTTP proxy|...|HTTP proxy| | SSL |...| SSL | + | balancer | | balancer | | proxy| | proxy| + ------------ ------------ -------- -------- + ____|_____________|_____________|_________|_____ + | | | | | + -------- -------- -------- -------- -------- + |HTTP | |HTTP | |HTTP | |HTTP | |HTTP | + |server| |server| |server| |server| |server| + -------- -------- -------- -------- -------- + + From the previous paragraphs, we can identify several points in this + diagram where the flow label might be relevant: + + 1. Layer 3/4 load balancers. + + 2. SSL proxies. + + 3. HTTP proxies. + + However, usage by the proxies seems unlikely to affect performance, + because they must in any case process the application-layer header, + so in this document we focus only on layer 3/4 balancers. + + + + + + + +Carpenter, et al. Informational [Page 7] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +4. Applying the Flow Label to Layer 3/4 Load Balancing + + The suggested model for using the flow label to enhance an layer 3/4 + load-balancing mechanism is as follows: + + o We are only concerned with IPv6 traffic in which the flow label + value has been set according to [RFC6437]. If the flow label of + an incoming packet is zero, load balancers will continue to use + the transport header in the traditional way. As the use of the + flow label becomes more prevalent according to RFC 6434, load + balancers, and therefore users, will reap a growing performance + benefit. + + o If the flow label of an incoming packet is non-zero, layer 3/4 + load balancers can use the 2-tuple {source address, flow label} as + the session key for whatever load distribution algorithm they + support. Alternatively, they might use the 3-tuple {dest address, + source address, flow label}, especially if the server farm + supports multiple server IP addresses, but using the 3-tuple will + be significantly quicker than searching for the transport port + numbers later in the packet. Moreover, the transport-layer + information such as the source port is not repeated in fragments, + which generally prevents stateless load balancers from supporting + fragmented traffic since they generally cannot reassemble + fragments. + + A stateless layer 3/4 load balancer would simply apply a hash + algorithm to the 2-tuple or 3-tuple on all packets in order to + select the same target server consistently for a given flow. + Needless to say, the hash algorithm has to be well chosen for its + purpose, but this problem is common to several forms of stateless + load balancing. The discussion in [RFC6438] applies. + + A stateful layer 3/4 load balancer would apply its usual load + distribution algorithm to the first packet of a session, and store + the {tuple, server} association in a table so that subsequent + packets belonging to the same session are forwarded to the same + server. Thus, for all subsequent packets of the session, it can + ignore all IPv6 extension headers, which should lead to a + performance benefit. Whether this benefit is valuable will depend + on engineering details of the specific load balancer. + + Note that such a balancer will not identify new transport sessions + from the same source that use the same flow label; they will be + delivered to the same server. This is like the behavior of + existing hash-based layer 4 balancers that always send similarly + hashed packets to the same destination. However, a global state + table in a flow label balancer cannot be shared between multiple + + + +Carpenter, et al. Informational [Page 8] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + services if these services rely on transport-layer information, + since the goal of using the flow label is to avoid looking up that + information. + + A related issue is that the balancer will not detect FIN/ACK + sequences at the end of sessions. Therefore, it will rely on + inactivity timers to delete session state. However, all existing + balancers must maintain such timers to deal with hung sessions, + and the practical impact on memory utilization is unlikely to be + significant. + + o Layer 3/4 balancers that redirect the incoming packets by NAPT are + not expected to obtain any saving of time by using the flow label, + because they have no choice but to follow the extension header + chain in order to locate and modify the port number and transport + checksum. The same would apply to balancers that perform TCP + state tracking for any reason. + + o Note that correct handling of ICMPv6 for Path MTU Discovery + requires the layer 3/4 balancer to keep state for the client + source address, independently of either the port numbers or the + flow label. + + o SSL and HTTP proxies, if present, should forward the flow label + value towards the server. This usually has no performance + benefit, but it is consistent with the general model for the flow + label described in RFC 6437. + + It should be noted that the performance benefit, if any, depends + entirely on engineering trade-offs in the design of the layer 3/4 + balancer. An extra test is needed to check if the label is non-zero, + but if there is a non-zero label, all logic for handling extension + headers can be skipped except for the first packet of a new flow. + Since the identifying state to be stored is only the tuple and the + server identifier, storage requirements will be reduced. + Additionally, the method will work for fragmented traffic and for + flows where the transport information is missing (unknown transport + protocol) or obfuscated (e.g., IPsec). Traffic reaching the load + balancer via a VPN is particularly prone to the fragmentation issue, + due to MTU size issues. For some load-balancer designs, these are + very significant advantages. + + In the unlikely event of two simultaneous flows from the same source + address having the same flow label value, the two flows would end up + assigned to the same server, where they would be distinguished as + normal by their port numbers. There are approximately one million + possible flow label values, and if the rules for flow label + generation [RFC6437] are followed, this would be a statistically rare + + + +Carpenter, et al. Informational [Page 9] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + event, and would not damage the overall load-balancing effect. + Moreover, with a million possible label values, it is very likely + that there will be many more flow label values than servers at most + sites, so it is already expected that multiple flow label values will + end up on the same server for a given client IP address. + + In the case that many thousands of clients are hidden behind the same + large-scale NAPT with a single shared IP address, the assumption of + low probability of conflicts might become incorrect, unless flow + label values are random enough to avoid following similar sequences + for all clients. This is not expected to be a factor for IPv6 + anyway, since there is no need to implement large-scale NAPT with + address sharing [RFC4864]. The probability of conflicts is low for + sites that implement network prefix translation [RFC6296], since this + technique provides a different address for each client. + +5. Security Considerations + + Security aspects of the flow label are discussed in [RFC6437]. As + noted there, a malicious source or man-in-the-middle could disturb + load balancing by manipulating flow labels. This risk already exists + today where the source address and port are used as a hashing key in + layer 3/4 load balancers, as well as where a persistence cookie is + used in HTTP to designate a server. It even exists on layer 3 + components that only rely on the source address to select a + destination, making them more DDoS-prone. Nevertheless, all these + methods are currently used because the benefits for load balancing + and persistence hugely outweigh the risks. The flow label does not + significantly alter this situation. + + Specifically, the IPv6 flow label specification [RFC6437] states that + "stateless classifiers should not use the flow label alone to control + load distribution, and stateful classifiers should include explicit + methods to detect and ignore suspect flow label values." The former + point is answered by also using the source address. The latter point + is more complex. If the risk is considered serious, the site ingress + router or the layer 3/4 balancer should use a suitable heuristic to + verify incoming flows with non-zero flow label values. If a flow + from a given source address and port number does not have a constant + flow label value, it is suspect and should be dropped. This would + deal with both intentional and accidental changes to the flow label. + + A malicious source or man-in-the-middle could generate a flow in + which the flow label is constant but the transport port numbers in + some packets are invalid. Such packets, if load-balanced only on the + basis of the flow label, could reach the target server and create a + single-source DoS attack on its TCP engine. + + + + +Carpenter, et al. Informational [Page 10] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + RFC 6437 notes in its Security Considerations that if the covert + channel risk is considered significant, a firewall might rewrite non- + zero flow labels. As long as this is done as described in RFC 6437, + it will not invalidate the mechanisms described above. + + The flow label may be of use in protecting against DDoS attacks + against servers. As noted in RFC 6437, a source should generate flow + label values that are hard to predict, most likely by including a + secret nonce in the hash used to generate each label. The attacker + does not know the nonce and therefore has no way to invent flow + labels that will all target the same server, even with knowledge of + both the hash algorithm and the load-balancing algorithm. Still, it + is important to understand that it is always trivial to force a load + balancer to stick to the same server during an attack, so the + security of the whole solution must not rely on the unpredictability + of the flow label values alone, but should include defensive measures + like most load balancers already have against abnormal use of source + addresses or session cookies. + + New flows are assigned to a server according to any of the usual + algorithms available on the load balancer (e.g., least connections, + round robin, etc.). The association between the 2-tuple {source + address, flow label} and the server is stored in a table (often + called stick table) so that future traffic from the same source using + the same flow label can be sent to the same server. This method is + more robust against a loss of server and also makes it harder for an + attacker to target a specific server, because the association between + a flow label value and a server is not known externally. + + In the case that a stateless hash function is used to assign client + packets to specific servers, it may be advisable to use a + cryptographic hash function of some kind, to ensure that an attacker + cannot predict the behavior of the load balancer. + +6. Acknowledgements + + Valuable comments and contributions were made by Fred Baker, Olivier + Bonaventure, Ben Campbell, Lorenzo Colitti, Linda Dunbar, Donald + Eastlake, Joel Jaeggli, Gurudeep Kamat, Warren Kumari, Julia + Renouard, Julius Volz, and others. + + + + + + + + + + + +Carpenter, et al. Informational [Page 11] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +7. References + +7.1. Normative References + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, December 1998. + + [RFC6434] Jankiewicz, E., Loughney, J., and T. Narten, "IPv6 Node + Requirements", RFC 6434, December 2011. + + [RFC6437] Amante, S., Carpenter, B., Jiang, S., and J. Rajahalme, + "IPv6 Flow Label Specification", RFC 6437, November 2011. + +7.2. Informative References + + [RFC2991] Thaler, D. and C. Hopps, "Multipath Issues in Unicast and + Multicast Next-Hop Selection", RFC 2991, November 2000. + + [RFC4864] Van de Velde, G., Hain, T., Droms, R., Carpenter, B., and + E. Klein, "Local Network Protection for IPv6", RFC 4864, + May 2007. + + [RFC6294] Hu, Q. and B. Carpenter, "Survey of Proposed Use Cases for + the IPv6 Flow Label", RFC 6294, June 2011. + + [RFC6296] Wasserman, M. and F. Baker, "IPv6-to-IPv6 Network Prefix + Translation", RFC 6296, June 2011. + + [RFC6436] Amante, S., Carpenter, B., and S. Jiang, "Rationale for + Update to the IPv6 Flow Label Specification", RFC 6436, + November 2011. + + [RFC6438] Carpenter, B. and S. Amante, "Using the IPv6 Flow Label + for Equal Cost Multipath Routing and Link Aggregation in + Tunnels", RFC 6438, November 2011. + + [Tarreau] Tarreau, W., "Making applications scalable with load + balancing", 2006, . + + + + + + + + + + + + + +Carpenter, et al. Informational [Page 12] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +Authors' Addresses + + Brian Carpenter + Department of Computer Science + University of Auckland + PB 92019 + Auckland 1142 + New Zealand + + EMail: brian.e.carpenter@gmail.com + + + Sheng Jiang + Huawei Technologies Co., Ltd + Q14, Huawei Campus + No.156 Beiqing Road + Hai-Dian District, Beijing 100095 + P.R. China + + EMail: jiangsheng@huawei.com + + + Willy Tarreau + HAProxy Technologies, Inc. + R&D Network Products + 3 rue du petit Robinson + 78350 Jouy-en-Josas + France + + EMail: willy@haproxy.com + + + + + + + + + + + + + + + + + + + + + +Carpenter, et al. Informational [Page 13] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc761.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc761.txt new file mode 100644 index 0000000..6e02a8d --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc761.txt @@ -0,0 +1,5185 @@ + + +RFC: 761 +IEN: 129 + + + + + + + + DOD STANDARD + + TRANSMISSION CONTROL PROTOCOL + + + + January 1980 + + + + + + + + + + + + + + + + prepared for + + Defense Advanced Research Projects Agency + Information Processing Techniques Office + 1400 Wilson Boulevard + Arlington, Virginia 22209 + + + + + + + + by + + Information Sciences Institute + University of Southern California + 4676 Admiralty Way + Marina del Rey, California 90291 + +January 1980 + Transmission Control Protocol + + + + TABLE OF CONTENTS + + PREFACE ........................................................ iii + +1. INTRODUCTION ..................................................... 1 + + 1.1 Motivation .................................................... 1 + 1.2 Scope ......................................................... 2 + 1.3 About This Document ........................................... 2 + 1.4 Interfaces .................................................... 3 + 1.5 Operation ..................................................... 3 + +2. PHILOSOPHY ....................................................... 7 + + 2.1 Elements of the Internetwork System ........................... 7 + 2.2 Model of Operation ............................................ 7 + 2.3 The Host Environment .......................................... 8 + 2.4 Interfaces .................................................... 9 + 2.5 Relation to Other Protocols ................................... 9 + 2.6 Reliable Communication ....................................... 10 + 2.7 Connection Establishment and Clearing ........................ 10 + 2.8 Data Communication ........................................... 12 + 2.9 Precedence and Security ...................................... 13 + 2.10 Robustness Principle ......................................... 13 + +3. FUNCTIONAL SPECIFICATION ........................................ 15 + + 3.1 Header Format ................................................ 15 + 3.2 Terminology .................................................. 19 + 3.3 Sequence Numbers ............................................. 24 + 3.4 Establishing a connection .................................... 29 + 3.5 Closing a Connection ......................................... 35 + 3.6 Precedence and Security ...................................... 38 + 3.7 Data Communication ........................................... 38 + 3.8 Interfaces ................................................... 42 + 3.9 Event Processing ............................................. 52 + +GLOSSARY ............................................................ 75 + +REFERENCES .......................................................... 83 + + + + + + + + + + + + [Page i] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page ii] + + +January 1980 + Transmission Control Protocol + + + + PREFACE + + + +This document describes the DoD Standard Transmission Control Protocol +(TCP). There have been eight earlier editions of the ARPA TCP +specification on which this standard is based, and the present text +draws heavily from them. There have been many contributors to this work +both in terms of concepts and in terms of text. This edition +incorporates the addition of security, compartmentation, and precedence +concepts into the TCP specification. + + Jon Postel + + Editor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page iii] + + +January 1980 +RFC:761 +IEN:129 +Replaces: IENs 124, 112, +81, 55, 44, 40, 27, 21, 5 + + DOD STANDARD + + TRANSMISSION CONTROL PROTOCOL + + + + 1. INTRODUCTION + +The Transmission Control Protocol (TCP) is intended for use as a highly +reliable host-to-host protocol between hosts in packet-switched computer +communication networks, and especially in interconnected systems of such +networks. + +This document describes the functions to be performed by the +Transmission Control Protocol, the program that implements it, and its +interface to programs or users that require its services. + +1.1. Motivation + + Computer communication systems are playing an increasingly important + role in military, government, and civilian environments. This + document primarily focuses its attention on military computer + communication requirements, especially robustness in the presence of + communication unreliability and availability in the presence of + congestion, but many of these problems are found in the civilian and + government sector as well. + + As strategic and tactical computer communication networks are + developed and deployed, it is essential to provide means of + interconnecting them and to provide standard interprocess + communication protocols which can support a broad range of + applications. In anticipation of the need for such standards, the + Deputy Undersecretary of Defense for Research and Engineering has + declared the Transmission Control Protocol (TCP) described herein to + be a basis for DoD-wide inter-process communication protocol + standardization. + + TCP is a connection-oriented, end-to-end reliable protocol designed to + fit into a layered hierarchy of protocols which support multi-network + applications. The TCP provides for reliable inter-process + communication between pairs of processes in host computers attached to + distinct but interconnected computer communication networks. Very few + assumptions are made as to the reliability of the communication + protocols below the TCP layer. TCP assumes it can obtain a simple, + potentially unreliable datagram service from the lower level + protocols. In principle, the TCP should be able to operate above a + wide spectrum of communication systems ranging from hard-wired + connections to packet-switched or circuit-switched networks. + + + [Page 1] + + + January 1980 +Transmission Control Protocol +Introduction + + + + TCP is based on concepts first described by Cerf and Kahn in [1]. The + TCP fits into a layered protocol architecture just above a basic + Internet Protocol [2] which provides a way for the TCP to send and + receive variable-length segments of information enclosed in internet + datagram "envelopes". The internet datagram provides a means for + addressing source and destination TCPs in different networks. The + internet protocol also deals with any fragmentation or reassembly of + the TCP segments required to achieve transport and delivery through + multiple networks and interconnecting gateways. The internet protocol + also carries information on the precedence, security classification + and compartmentation of the TCP segments, so this information can be + communicated end-to-end across multiple networks. + + Protocol Layering + + +---------------------+ + | higher-level | + +---------------------+ + | TCP | + +---------------------+ + | internet protocol | + +---------------------+ + |communication network| + +---------------------+ + + Figure 1 + + Much of this document is written in the context of TCP implementations + which are co-resident with higher level protocols in the host + computer. As a practical matter, many computer systems will be + connected to networks via front-end computers which house the TCP and + internet protocol layers, as well as network specific software. The + TCP specification describes an interface to the higher level protocols + which appears to be implementable even for the front-end case, as long + as a suitable host-to-front end protocol is implemented. + +1.2. Scope + + The TCP is intended to provide a reliable process-to-process + communication service in a multinetwork environment. The TCP is + intended to be a host-to-host protocol in common use in multiple + networks. + +1.3. About this Document + + This document represents a specification of the behavior required of + any TCP implementation, both in its interactions with higher level + protocols and in its interactions with other TCPs. The rest of this + + +[Page 2] + + +January 1980 + Transmission Control Protocol + Introduction + + + + section offers a very brief view of the protocol interfaces and + operation. Section 2 summarizes the philosophical basis for the TCP + design. Section 3 offers both a detailed description of the actions + required of TCP when various events occur (arrival of new segments, + user calls, errors, etc.) and the details of the formats of TCP + segments. + +1.4. Interfaces + + The TCP interfaces on one side to user or application processes and on + the other side to a lower level protocol such as Internet Protocol. + + The interface between an application process and the TCP is + illustrated in reasonable detail. This interface consists of a set of + calls much like the calls an operating system provides to an + application process for manipulating files. For example, there are + calls to open and close connections and to send and receive letters on + established connections. It is also expected that the TCP can + asynchronously communicate with application programs. Although + considerable freedom is permitted to TCP implementors to design + interfaces which are appropriate to a particular operating system + environment, a minimum functionality is required at the TCP/user + interface for any valid implementation. + + The interface between TCP and lower level protocol is essentially + unspecified except that it is assumed there is a mechanism whereby the + two levels can asynchronously pass information to each other. + Typically, one expects the lower level protocol to specify this + interface. TCP is designed to work in a very general environment of + interconnected networks. The lower level protocol which is assumed + throughout this document is the Internet Protocol [2]. + +1.5. Operation + + As noted above, the primary purpose of the TCP is to provide reliable, + securable logical circuit or connection service between pairs of + processes. To provide this service on top of a less reliable internet + communication system requires facilities in the following areas: + + Basic Data Transfer + Reliability + Flow Control + Multiplexing + Connections + Precedence and Security + + The basic operation of the TCP in each of these areas is described in + the following paragraphs. + + + [Page 3] + + + January 1980 +Transmission Control Protocol +Introduction + + + + Basic Data Transfer: + + The TCP is able to transfer a continuous stream of octets in each + direction between its users by packaging some number of octets into + segments for transmission through the internet system. In this + stream mode, the TCPs decide when to block and forward data at their + own convenience. + + For users who desire a record-oriented service, the TCP also permits + the user to submit records, called letters, for transmission. When + the sending user indicates a record boundary (end-of-letter), this + causes the TCPs to promptly forward and deliver data up to that + point to the receiver. + + Reliability: + + The TCP must recover from data that is damaged, lost, duplicated, or + delivered out of order by the internet communication system. This + is achieved by assigning a sequence number to each octet + transmitted, and requiring a positive acknowledgment (ACK) from the + receiving TCP. If the ACK is not received within a timeout + interval, the data is retransmitted. At the receiver, the sequence + numbers are used to correctly order segments that may be received + out of order and to eliminate duplicates. Damage is handled by + adding a checksum to each segment transmitted, checking it at the + receiver, and discarding damaged segments. + + As long as the TCPs continue to function properly and the internet + system does not become completely partitioned, no transmission + errors will affect the users. TCP recovers from internet + communication system errors. + + Flow Control: + + TCP provides a means for the receiver to govern the amount of data + sent by the sender. This is achieved by returning a "window" with + every ACK indicating a range of acceptable sequence numbers beyond + the last segment successfully received. For stream mode, the window + indicates an allowed number of octets that the sender may transmit + before receiving further permission. For record mode, the window + indicates an allowed amount of buffer space the sender may consume, + this may be more than the number of data octets transmitted if there + is a mismatch between letter size and buffer size. + + + + + + + +[Page 4] + + +January 1980 + Transmission Control Protocol + Introduction + + + + Multiplexing: + + To allow for many processes within a single Host to use TCP + communication facilities simultaneously, the TCP provides a set of + addresses or ports within each host. Concatenated with the network + and host addresses from the internet communication layer, this forms + a socket. A pair of sockets uniquely identifies each connection. + That is, a socket may be simultaneously used in multiple + connections. + + The binding of ports to processes is handled independently by each + Host. However, it proves useful to attach frequently used processes + (e.g., a "logger" or timesharing service) to fixed sockets which are + made known to the public. These services can then be accessed + through the known addresses. Establishing and learning the port + addresses of other processes may involve more dynamic mechanisms. + + Connections: + + The reliability and flow control mechanisms described above require + that TCPs initialize and maintain certain status information for + each data stream. The combination of this information, including + sockets, sequence numbers, and window sizes, is called a connection. + Each connection is uniquely specified by a pair of sockets + identifying its two sides. + + When two processes wish to communicate, their TCP's must first + establish a connection (initialize the status information on each + side). When their communication is complete, the connection is + terminated or closed to free the resources for other uses. + + Since connections must be established between unreliable hosts and + over the unreliable internet communication system, a handshake + mechanism with clock-based sequence numbers is used to avoid + erroneous initialization of connections. + + Precedence and Security: + + The users of TCP may indicate the security and precedence of their + communication. Provision is made for default values to be used when + these features are not needed. + + + + + + + + + + [Page 5] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 6] + + +January 1980 + Transmission Control Protocol + + + + 2. PHILOSOPHY + +2.1. Elements of the Internetwork System + + The internetwork environment consists of hosts connected to networks + which are in turn interconnected via gateways. It is assumed here + that the networks may be either local networks (e.g., the ETHERNET) or + large networks (e.g., the ARPANET), but in any case are based on + packet switching technology. The active agents that produce and + consume messages are processes. Various levels of protocols in the + networks, the gateways, and the hosts support an interprocess + communication system that provides two-way data flow on logical + connections between process ports. + + We specifically assume that data is transmitted from host to host + through means of a set of networks. When we say network, we have in + mind a packet switched network (PSN). This assumption is probably + unnecessary, since a circuit switched network or a hybrid combination + of the two could also be used; but for concreteness, we explicitly + assume that the hosts are connected to one or more packet switches of + a PSN. + + The term packet is used generically here to mean the data of one + transaction between a host and a packet switch. The format of data + blocks exchanged between the packet switches in a network will + generally not be of concern to us. + + Hosts are computers attached to a network, and from the communication + network's point of view, are the sources and destinations of packets. + Processes are viewed as the active elements in host computers (in + accordance with the fairly common definition of a process as a program + in execution). Even terminals and files or other I/O devices are + viewed as communicating with each other through the use of processes. + Thus, all communication is viewed as inter-process communication. + + Since a process may need to distinguish among several communication + streams between itself and another process (or processes), we imagine + that each process may have a number of ports through which it + communicates with the ports of other processes. + +2.2. Model of Operation + + Processes transmit data by calling on the TCP and passing buffers of + data as arguments. The TCP packages the data from these buffers into + segments and calls on the internet module to transmit each segment to + the destination TCP. The receiving TCP places the data from a segment + into the receiving user's buffer and notifies the receiving user. The + TCPs include control information in the segments which they use to + ensure reliable ordered data transmission. + + + [Page 7] + + + January 1980 +Transmission Control Protocol +Philosophy + + + + The model of internet communication is that there is an internet + protocol module associated with each TCP which provides an interface + to the local network. This internet module packages TCP segments + inside internet datagrams and routes these datagrams to a destination + internet module or intermediate gateway. To transmit the datagram + through the local network, it is embedded in a local network packet. + + The packet switches may perform further packaging, fragmentation, or + other operations to achieve the delivery of the local packet to the + destination internet module. + + At a gateway between networks, the internet datagram is "unwrapped" + from its local packet and examined to determine through which network + the internet datagram should travel next. The internet datagram is + then "wrapped" in a local packet suitable to the next network and + routed to the next gateway, or to the final destination. + + A gateway is permitted to break up an internet datagram into smaller + internet datagram fragments if this is necessary for transmission + through the next network. To do this, the gateway produces a set of + internet datagrams; each carrying a fragment. Fragments may be broken + into smaller ones at intermediate gateways. The internet datagram + fragment format is designed so that the destination internet module + can reassemble fragments into internet datagrams. + + A destination internet module unwraps the segment from the datagram + (after reassembling the datagram, if necessary) and passes it to the + destination TCP. + + This simple model of the operation glosses over many details. One + important feature is the type of service. This provides information + to the gateway (or internet module) to guide it in selecting the + service parameters to be used in traversing the next network. + Included in the type of service information is the precedence of the + datagram. Datagrams may also carry security information to permit + host and gateways that operate in multilevel secure environments to + properly segregate datagrams for security considerations. + +2.3. The Host Environment + + The TCP is assumed to be a module in a time sharing operating system. + The users access the TCP much like they would access the file system. + The TCP may call on other operating system functions, for example, to + manage data structures. The actual interface to the network is + assumed to be controlled by a device driver module. The TCP does not + call on the network device driver directly, but rather calls on the + internet datagram protocol module which may in turn call on the device + driver. + + +[Page 8] + + +January 1980 + Transmission Control Protocol + Philosophy + + + + Though it is assumed here that processes are supported by the host + operating system, the mechanisms of TCP do not preclude implementation + of the TCP in a front-end processor. However, in such an + implementation, a host-to-front-end protocol must provide the + functionality to support the type of TCP-user interface described + above. + +2.4. Interfaces + + The TCP/user interface provides for calls made by the user on the TCP + to OPEN or CLOSE a connection, to SEND or RECEIVE data, or to obtain + STATUS about a connection. These calls are like other calls from user + programs on the operating system, for example, the calls to open, read + from, and close a file. + + The TCP/internet interface provides calls to send and receive + datagrams addressed to TCP modules in hosts anywhere in the internet + system. These calls have parameters for passing the address, type of + service, precedence, security, and other control information. + +2.5. Relation to Other Protocols + + The following diagram illustrates the place of the TCP in the protocol + hierarchy: + + + +------+ +-----+ +-----+ +-----+ + |Telnet| | FTP | |Voice| ... | | Application Level + +------+ +-----+ +-----+ +-----+ + | | | | + +-----+ +-----+ +-----+ + | TCP | | RTP | ... | | Host Level + +-----+ +-----+ +-----+ + | | | + +-------------------------------+ + | Internet Protocol | Gateway Level + +-------------------------------+ + | + +---------------------------+ + | Local Network Protocol | Network Level + +---------------------------+ + | + + + + Protocol Relationships + + Figure 2. + + + [Page 9] + + + January 1980 +Transmission Control Protocol +Philosophy + + + + It is expected that the TCP will be able to support higher level + protocols efficiently. It should be easy to interface higher level + protocols like the ARPANET Telnet [3] or AUTODIN II THP to the TCP. + +2.6. Reliable Communication + + A stream of data sent on a TCP connection is delivered reliably and in + order at the destination. + + Transmission is made reliable via the use of sequence numbers and + acknowledgments. Conceptually, each octet of data is assigned a + sequence number. The sequence number of the first octet of data in a + segment is the sequence number transmitted with that segment and is + called the segment sequence number. Segments also carry an + acknowledgment number which is the sequence number of the next + expected data octet of transmissions in the reverse direction. When + the TCP transmits a segment, it puts a copy on a retransmission queue + and starts a timer; when the acknowledgment for that data is received, + the segment is deleted from the queue. If the acknowledgment is not + received before the timer runs out, the segment is retransmitted. + + An acknowledgment by TCP does not guarantee that the data has been + delivered to the end user, but only that the receiving TCP has taken + the responsibility to do so. + + To govern the flow of data into a TCP, a flow control mechanism is + employed. The the data receiving TCP reports a window to the sending + TCP. This window specifies the number of octets, starting with the + acknowledgment number that the data receiving TCP is currently + prepared to receive. + +2.7. Connection Establishment and Clearing + + To identify the separate data streams that a TCP may handle, the TCP + provides a port identifier. Since port identifiers are selected + independently by each operating system, TCP, or user, they might not + be unique. To provide for unique addresses at each TCP, we + concatenate an internet address identifying the TCP with a port + identifier to create a socket which will be unique throughout all + networks connected together. + + A connection is fully specified by the pair of sockets at the ends. A + local socket may participate in many connections to different foreign + sockets. A connection can be used to carry data in both directions, + that is, it is "full duplex". + + TCPs are free to associate ports with processes however they choose. + However, several basic concepts seem necessary in any implementation. + + +[Page 10] + + +January 1980 + Transmission Control Protocol + Philosophy + + + + There must be well-known sockets which the TCP associates only with + the "appropriate" processes by some means. We envision that processes + may "own" ports, and that processes can only initiate connections on + the ports they own. (Means for implementing ownership is a local + issue, but we envision a Request Port user command, or a method of + uniquely allocating a group of ports to a given process, e.g., by + associating the high order bits of a port name with a given process.) + + A connection is specified in the OPEN call by the local port and + foreign socket arguments. In return, the TCP supplies a (short) local + connection name by which the user refers to the connection in + subsequent calls. There are several things that must be remembered + about a connection. To store this information we imagine that there + is a data structure called a Transmission Control Block (TCB). One + implementation strategy would have the local connection name be a + pointer to the TCB for this connection. The OPEN call also specifies + whether the connection establishment is to be actively pursued, or to + be passively waited for. + + A passive OPEN request means that the process wants to accept incoming + connection requests rather than attempting to initiate a connection. + Often the process requesting a passive OPEN will accept a connection + request from any caller. In this case a foreign socket of all zeros + is used to denote an unspecified socket. Unspecified foreign sockets + are allowed only on passive OPENs. + + A service process that wished to provide services for unknown other + processes could issue a passive OPEN request with an unspecified + foreign socket. Then a connection could be made with any process that + requested a connection to this local socket. It would help if this + local socket were known to be associated with this service. + + Well-known sockets are a convenient mechanism for a priori associating + a socket address with a standard service. For instance, the + "Telnet-Server" process might be permanently assigned to a particular + socket, and other sockets might be reserved for File Transfer, Remote + Job Entry, Text Generator, Echoer, and Sink processes (the last three + being for test purposes). A socket address might be reserved for + access to a "Look-Up" service which would return the specific socket + at which a newly created service would be provided. The concept of a + well-known socket is part of the TCP specification, but the assignment + of sockets to services is outside this specification. + + Processes can issue passive OPENs and wait for matching calls from + other processes and be informed by the TCP when connections have been + established. Two processes which issue calls to each other at the + same time are correctly connected. This flexibility is critical for + + + + [Page 11] + + + January 1980 +Transmission Control Protocol +Philosophy + + + + the support of distributed computing in which components act + asynchronously with respect to each other. + + There are two cases for matching the sockets in the local request and + an incoming segment. In the first case, the local request has fully + specified the foreign socket. In this case, the match must be exact. + In the second case, the local request has left the foreign socket + unspecified. In this case, any foreign socket is acceptable as long + as the local sockets match. + + If there are several pending passive OPENs (recorded in TCBs) with the + same local socket, an incoming segment should be matched to a request + with the specific foreign socket in the segment, if such a request + exists, before selecting a request with an unspecified foreign socket. + + The procedures to establish and clear connections utilize synchronize + (SYN) and finis (FIN) control flags and involve an exchange of three + messages. This exchange has been termed a three-way hand shake [4]. + + A connection is initiated by the rendezvous of an arriving segment + containing a SYN and a waiting TCB entry created by a user OPEN + command. The matching of local and foreign sockets determines when a + connection has been initiated. The connection becomes "established" + when sequence numbers have been synchronized in both directions. + + The clearing of a connection also involves the exchange of segments, + in this case carrying the FIN control flag. + +2.8. Data Communication + + The data that flows on a connection may be thought of as a stream of + octets, or as a sequence of records. In TCP the records are called + letters and are of variable length. The sending user indicates in + each SEND call whether the data in that call completes a letter by the + setting of the end-of-letter parameter. + + The length of a letter may be such that it must be broken into + segments before it can be transmitted to its destination. We assume + that the segments will normally be reassembled into a letter before + being passed to the receiving process. A segment may contain all or a + part of a letter, but a segment never contains parts of more than one + letter. The end of a letter is marked by the appearance of an EOL + control flag in a segment. A sending TCP is allowed to collect data + from the sending user and to send that data in segments at its own + convenience, until the end of letter is signaled then it must send all + unsent data. When a receiving TCP has a complete letter, it must not + wait for more data from the sending TCP before passing the letter to + the receiving process. + + +[Page 12] + + +January 1980 + Transmission Control Protocol + Philosophy + + + + There is a coupling between letters as sent and the use of buffers of + data that cross the TCP/user interface. Each time an end-of-letter + (EOL) flag is associated with data placed into the receiving user's + buffer, the buffer is returned to the user for processing even if the + buffer is not filled. If a letter is longer than the user's buffer, + the letter is passed to the user in buffer size units, the last of + which may be only partly full. The receiving TCP's buffer size may be + communicated to the sending TCP when the connection is being + established. + + The TCP is responsible for regulating the flow of segments on the + connections, as a way of preventing itself from becoming saturated or + overloaded with traffic. This is done using a window flow control + mechanism. The data receiving TCP reports to the data sending TCP a + window which is the range of sequence numbers of data octets that data + receiving TCP is currently prepared to accept. + + TCP also provides a means to communicate to the receiver of data that + at some point further along in the data stream than the receiver is + currently reading there is urgent data. TCP does not attempt to + define what the user specifically does upon being notified of pending + urgent data, but the general notion is that the receiving process + should take action to read through the end urgent data quickly. + +2.9. Precedence and Security + + The TCP makes use of the internet protocol type of service field and + security option to provide precedence and security on a per connection + basis to TCP users. Not all TCP modules will necessarily function in + a multilevel secure environment, some may be limited to unclassified + use only, and others may operate at only one security level and + compartment. Consequently, some TCP implementations and services to + users may be limited to a subset of the multilevel secure case. + + TCP modules which operate in a multilevel secure environment should + properly mark outgoing segments with the security, compartment, and + precedence. Such TCP modules should also provide to their users or + higher level protocols such as Telnet or THP an interface to allow + them to specify the desired security level, compartment, and + precedence of connections. + +2.10. Robustness Principle + + TCP implementations should follow a general principle of robustness: + be conservative in what you do, be liberal in what you accept from + others. + + + + + [Page 13] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 14] + + +January 1980 + Transmission Control Protocol + + + + 3. FUNCTIONAL SPECIFICATION + +3.1. Header Format + + TCP segments are sent as internet datagrams. The Internet Protocol + header carries several information fields, including the source and + destination host addresses [2]. A TCP header follows the internet + header, supplying information specific to the TCP protocol. This + division allows for the existence of host level protocols other than + TCP. + + TCP Header Format + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Port | Destination Port | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Acknowledgment Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Data | |U|A|E|R|S|F| | + | Offset| Reserved |R|C|O|S|Y|I| Window | + | | |G|K|L|T|N|N| | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum | Urgent Pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Options | Padding | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | data | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + TCP Header Format + + Note that one tick mark represents one bit position. + + Figure 3. + + Source Port: 16 bits + + The source port number. + + Destination Port: 16 bits + + The destination port number. + + + + + [Page 15] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + Sequence Number: 32 bits + + The sequence number of the first data octet in this segment (except + when SYN is present). + + Acknowledgment Number: 32 bits + + If the ACK control bit is set this field contains the value of the + next sequence number the sender of the segment is expecting to + receive. Once a connection is established this is always sent. + + Data Offset: 4 bits + + The number of 32 bit words in the TCP Header. This indicates where + the data begins. The TCP header including options is an integral + number of 32 bits long. + + Reserved: 6 bits + + Reserved for future use. Must be zero. + + Control Bits: 8 bits (from left to right): + + URG: Urgent Pointer field significant + ACK: Acknowledgment field significant + EOL: End of Letter + RST: Reset the connection + SYN: Synchronize sequence numbers + FIN: No more data from sender + + Window: 16 bits + + The number of data octets beginning with the one indicated in the + acknowledgment field which the sender of this segment is willing to + accept. + + Checksum: 16 bits + + The checksum field is the 16 bit one's complement of the one's + complement sum of all 16 bit words in the header and text. If a + segment contains an odd number of header and text octets to be + checksummed, the last octet is padded on the right with zeros to + form a 16 bit word for checksum purposes. The pad is not + transmitted as part of the segment. While computing the checksum, + the checksum field itself is replaced with zeros. + + The checksum also covers a 96 bit pseudo header conceptually + prefixed to the TCP header. This pseudo header contains the Source + + +[Page 16] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Address, the Destination Address, the Protocol, and TCP length. + This gives the TCP protection against misrouted segments. This + information is carried in the Internet Protocol and is transferred + across the TCP/Network interface in the arguments or results of + calls by the TCP on the IP. + + +--------------------------+ + | Source Address | + +--------------------------+ + | Destination Address | + +--------------------------+ + | zero | PTCL | TCP Length | + +--------------------------+ + + The TCP Length is the TCP header plus the data length in octets + (this is not an explicitly transmitted quantity, but is computed + from the total length, and the header length). + + Urgent Pointer: 16 bits + + This field communicates the current value of the urgent pointer as a + positive offset from the sequence number in this segment. The + urgent pointer points to the sequence number of the octet following + the urgent data. This field should only be interpreted in segments + with the URG control bit set. + + Options: variable + + Options may occupy space at the end of the TCP header and are a + multiple of 8 bits in length. All options are included in the + checksum. An option may begin on any octet boundary. There are two + cases for the format of an option: + + Case 1: A single octet of option-kind. + + Case 2: An octet of option-kind, an octet of option-length, and + the actual option-data octets. + + The option-length counts the two octets of option-kind and + option-length as well as the option-data octets. + + Note that the list of options may be shorter than the data offset + field might imply. The content of the header beyond the + End-of-Option option should be header padding (i.e., zero). + + A TCP must implement all options. + + + + + [Page 17] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + Currently defined options include (kind indicated in octal): + + Kind Length Meaning + ---- ------ ------- + 0 - End of option list. + 1 - No-Operation. + 100 - Reserved. + 105 4 Buffer Size. + + + Specific Option Definitions + + End of Option List + + +--------+ + |00000000| + +--------+ + Kind=0 + + This option code indicates the end of the option list. This + might not coincide with the end of the TCP header according to + the Data Offset field. This is used at the end of all options, + not the end of each option, and need only be used if the end of + the options would not otherwise coincide with the end of the TCP + header. + + No-Operation + + +--------+ + |00000001| + +--------+ + Kind=1 + + This option code may be used between options, for example, to + align the beginning of a subsequent option on a word boundary. + There is no guarantee that senders will use this option, so + receivers must be prepared to process options even if they do + not begin on a word boundary. + + Buffer Size + + +--------+--------+---------+--------+ + |01000101|00000100| buffer size | + +--------+--------+---------+--------+ + Kind=105 Length=4 + + + + + +[Page 18] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Buffer Size Option Data: 16 bits + + If this option is present, then it communicates the receive + buffer size at the TCP which sends this segment. This field + should only be sent in the initial connection request (i.e., + in segments with the SYN control bit set). If this option is + not used, the default buffer size of one octet is assumed. + + Padding: variable + + The TCP header padding is used to ensure that the TCP header ends + and data begins on a 32 bit boundary. The padding is composed of + zeros. + +3.2. Terminology + + Before we can discuss very much about the operation of the TCP we need + to introduce some detailed terminology. The maintenance of a TCP + connection requires the remembering of several variables. We conceive + of these variables being stored in a connection record called a + Transmission Control Block or TCB. Among the variables stored in the + TCB are the local and remote socket numbers, the security and + precedence of the connection, pointers to the user's send and receive + buffers, pointers to the retransmit queue and to the current segment. + In addition several variables relating to the send and receive + sequence numbers are stored in the TCB. + + Send Sequence Variables + + SND.UNA - send unacknowledged + SND.NXT - send sequence + SND.WND - send window + SND.BS - send buffer size + SND.UP - send urgent pointer + SND.WL - send sequence number used for last window update + SND.LBB - send last buffer beginning + ISS - initial send sequence number + + Receive Sequence Variables + + RCV.NXT - receive sequence + RCV.WND - receive window + RCV.BS - receive buffer size + RCV.UP - receive urgent pointer + RCV.LBB - receive last buffer beginning + IRS - initial receive sequence number + + + + + [Page 19] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + The following diagrams may help to relate some of these variables to + the sequence space. + + Send Sequence Space + + 1 2 3 4 + ----------|----------|----------|---------- + SND.UNA SND.NXT SND.UNA + +SND.WND + + 1 - old sequence numbers which have been acknowledged + 2 - sequence numbers of unacknowledged data + 3 - sequence numbers allowed for new data transmission + 4 - future sequence numbers which are not yet allowed + + Send Sequence Space + + Figure 4. + + + + Receive Sequence Space + + 1 2 3 + ----------|----------|---------- + RCV.NXT RCV.NXT + +RCV.WND + + 1 - old sequence numbers which have been acknowledged + 2 - sequence numbers allowed for new reception + 3 - future sequence numbers which are not yet allowed + + Receive Sequence Space + + Figure 5. + + + + There are also some variables used frequently in the discussion that + take their values from the fields of the current segment. + + + + + + + + + + +[Page 20] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Current Segment Variables + + SEG.SEQ - segment sequence number + SEG.ACK - segment acknowledgment number + SEG.LEN - segment length + SEG.WND - segment window + SEG.UP - segment urgent pointer + SEG.PRC - segment precedence value + + A connection progresses through a series of states during its + lifetime. The states are: LISTEN, SYN-SENT, SYN-RECEIVED, + ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, TIME-WAIT, CLOSE-WAIT, CLOSING, + and the fictional state CLOSED. CLOSED is fictional because it + represents the state when there is no TCB, and therefore, no + connection. Briefly the meanings of the states are: + + LISTEN - represents waiting for a connection request from any remote + TCP and port. + + SYN-SENT - represents waiting for a matching connection request + after having sent a connection request. + + SYN-RECEIVED - represents waiting for a confirming connection + request acknowledgment after having both received and sent a + connection request. + + ESTABLISHED - represents an open connection, ready to transmit and + receive data segments. + + FIN-WAIT-1 - represents waiting for a connection termination request + from the remote TCP, or an acknowledgment of the connection + termination request previously sent. + + FIN-WAIT-2 - represents waiting for a connection termination request + from the remote TCP. + + TIME-WAIT - represents waiting for enough time to pass to be sure + the remote TCP received the acknowledgment of its connection + termination request. + + CLOSE-WAIT - represents waiting for a connection termination request + from the local user. + + CLOSING - represents waiting for a connection termination request + acknowledgment from the remote TCP. + + CLOSED - represents no connection state at all. + + + + [Page 21] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + A TCP connection progresses from one state to another in response to + events. The events are the user calls, OPEN, SEND, RECEIVE, CLOSE, + ABORT, and STATUS; the incoming segments, particularly those + containing the SYN and FIN flags; and timeouts. + + The Glossary contains a more complete list of terms and their + definitions. + + The state diagram in figure 6 only illustrates state changes, together + with the causing events and resulting actions, but addresses neither + error conditions nor actions which are not connected with state + changes. In a later section, more detail is offered with respect to + the reaction of the TCP to events. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 22] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + +---------+ ---------\ active OPEN + | CLOSED | \ ----------- + +---------+<---------\ \ create TCB + | ^ \ \ snd SYN + passive OPEN | | CLOSE \ \ + ------------ | | ---------- \ \ + create TCB | | delete TCB \ \ + V | \ \ + +---------+ CLOSE | \ + | LISTEN | ---------- | | + +---------+ delete TCB | | + rcv SYN | | SEND | | + ----------- | | ------- | V + +---------+ snd SYN,ACK / \ snd SYN +---------+ + | |<----------------- ------------------>| | + | SYN | rcv SYN | SYN | + | RCVD |<-----------------------------------------------| SENT | + | | snd ACK | | + | |------------------ -------------------| | + +---------+ rcv ACK of SYN \ / rcv SYN,ACK +---------+ + | -------------- | | ----------- + | x | | snd ACK + | V V + | CLOSE +---------+ + | ------- | ESTAB | + | snd FIN +---------+ + | CLOSE | | rcv FIN + V ------- | | ------- + +---------+ snd FIN / \ snd ACK +---------+ + | FIN |<----------------- ------------------>| CLOSE | + | WAIT-1 |------------------ -------------------| WAIT | + +---------+ rcv FIN \ / CLOSE +---------+ + | rcv ACK of FIN ------- | | ------- + | -------------- snd ACK | | snd FIN + V x V V + +---------+ +---------+ + |FINWAIT-2| | CLOSING | + +---------+ +---------+ + | rcv FIN | rcv ACK of FIN + | ------- Timeout=2MSL | -------------- + V snd ACK ------------ V delete TCB + +---------+ delete TCB +---------+ + |TIME WAIT|----------------->| CLOSED | + +---------+ +---------+ + + TCP Connection State Diagram + Figure 6. + + + [Page 23] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + +3.3. Sequence Numbers + + A fundamental notion in the design is that every octet of data sent + over a TCP connection has a sequence number. Since every octet is + sequenced, each of them can be acknowledged. The acknowledgment + mechanism employed is cumulative so that an acknowledgment of sequence + number X indicates that all octets up to but not including X have been + received. This mechanism allows for straight-forward duplicate + detection in the presence of retransmission. Numbering of octets + within a segment is that the first data octet immediately following + the header is the lowest numbered, and the following octets are + numbered consecutively. + + It is essential to remember that the actual sequence number space is + finite, though very large. This space ranges from 0 to 2**32 - 1. + Since the space is finite, all arithmetic dealing with sequence + numbers must be performed modulo 2**32. This unsigned arithmetic + preserves the relationship of sequence numbers as they cycle from + 2**32 - 1 to 0 again. There are some subtleties to computer modulo + arithmetic, so great care should be taken in programming the + comparison of such values. The typical kinds of sequence number + comparisons which the TCP must perform include: + + (a) Determining that an acknowledgment refers to some sequence + number sent but not yet acknowledged. + + (b) Determining that all sequence numbers occupied by a segment + have been acknowledged (e.g., to remove the segment from a + retransmission queue). + + (c) Determining that an incoming segment contains sequence numbers + which are expected (i.e., that the segment "overlaps" the + receive window). + + + + + + + + + + + + + + + + + +[Page 24] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + On send connections the following comparisons are needed: + + older sequence numbers newer sequence numbers + + + SND.UNA SEG.ACK SND.NXT + | | | + ----|----XXXXXXX------XXXXXXXXXX---------XXXXXX----|---- + | | | | | | + | | | + Segment 1 Segment 2 Segment 3 + + <----- sequence space -----> + + Sending Sequence Space Information + + Figure 7. + + SND.UNA = oldest unacknowledged sequence number + + SND.NXT = next sequence number to be sent + + SEG.ACK = acknowledgment (next sequence number expected by the + acknowledging TCP) + + SEG.SEQ = first sequence number of a segment + + SEG.SEQ+SEG.LEN-1 = last sequence number of a segment + + A new acknowledgment (called an "acceptable ack"), is one for which + the inequality below holds: + + SND.UNA < SEG.ACK =< SND.NXT + + All arithmetic is modulo 2**32 and that comparisons are unsigned. + "=<" means "less than or equal". + + A segment on the retransmission queue is fully acknowledged if the sum + of its sequence number and length is less than the acknowledgment + value in the incoming segment. + + SEG.LEN is the number of octets occupied by the data in the segment. + It is important to note that SEG.LEN must be non-zero; segments which + do not occupy any sequence space (e.g., empty acknowledgment segments) + are never placed on the retransmission queue, so would not go through + this particular test. + + + + + [Page 25] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + On receive connections the following comparisons are needed: + + older sequence numbers newer sequence numbers + + + RCV.NXT RCV.NXT+RCV.WND + | | + ---------XXX|XXX------XXXXXXXXXX---------XXX|XX--------- + | | | | | + | | | + Segment 1 Segment 2 Segment 3 + + <----- sequence space -----> + + Receiving Sequence Space Information + + Figure 8. + + RCV.NXT = next sequence number expected on incoming segments + + RCV.NXT+RCV.WND = last sequence number expected on incoming + segments, plus one + + SEG.SEQ = first sequence number occupied by the incoming segment + + SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming + segment + + A segment is judged to occupy a portion of valid receive sequence + space if + + 0 =< (SEG.SEQ+SEG.LEN-1 - RCV.NXT) < (RCV.NXT+RCV.WND - RCV.NXT) + + SEG.SEQ+SEG.LEN-1 is the last sequence number occupied by the segment; + RCV.NXT is the next sequence number expected on an incoming segment; + and RCV.NXT+RCV.WND is the right edge of the receive window. + + Actually, it is a little more complicated than this. Due to zero + windows and zero length segments, we have four cases for the + acceptability of an incoming segment: + + + + + + + + + + +[Page 26] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Segment Receive Test + Length Window + ------- ------- ------------------------------------------- + + 0 0 SEG.SEQ = RCV.NXT + + 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + + >0 0 not acceptable + + >0 >0 RCV.NXT < SEG.SEQ+SEG.LEN =< RCV.NXT+RCV.WND + + Note that the acceptance test for a segment, since it requires the end + of a segment to lie in the window, is somewhat more restrictive than + is absolutely necessary. If at least the first sequence number of the + segment lies in the receive window, or if some part of the segment + lies in the receive window, then the segment might be judged + acceptable. Thus, in figure 8, at least segments 1 and 2 are + acceptable by the strict rule, and segment 3 may or may not be, + depending on the strictness of interpretation of the rule. + + Note that when the receive window is zero no segments should be + acceptable except ACK segments. Thus, it should be possible for a TCP + to maintain a zero receive window while transmitting data and + receiving ACKs. + + We have taken advantage of the numbering scheme to protect certain + control information as well. This is achieved by implicitly including + some control flags in the sequence space so they can be retransmitted + and acknowledged without confusion (i.e., one and only one copy of the + control will be acted upon). Control information is not physically + carried in the segment data space. Consequently, we must adopt rules + for implicitly assigning sequence numbers to control. The SYN and FIN + are the only controls requiring this protection, and these controls + are used only at connection opening and closing. For sequence number + purposes, the SYN is considered to occur before the first actual data + octet of the segment in which it occurs, while the FIN is considered + to occur after the last actual data octet in a segment in which it + occurs. The segment length includes both data and sequence space + occupying controls. When a SYN is present then SEG.SEQ is the + sequence number of the SYN. + + Initial Sequence Number Selection + + The protocol places no restriction on a particular connection being + used over and over again. A connection is defined by a pair of + sockets. New instances of a connection will be referred to as + incarnations of the connection. The problem that arises owing to this + + + [Page 27] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + is -- "how does the TCP identify duplicate segments from previous + incarnations of the connection?" This problem becomes apparent if the + connection is being opened and closed in quick succession, or if the + connection breaks with loss of memory and is then reestablished. + + To avoid confusion we must prevent segments from one incarnation of a + connection from being used while the same sequence numbers may still + be present in the network from an earlier incarnation. We want to + assure this, even if a TCP crashes and loses all knowledge of the + sequence numbers it has been using. When new connections are created, + an initial sequence number (ISN) generator is employed which selects a + new 32 bit ISN. The generator is bound to a (possibly fictitious) 32 + bit clock whose low order bit is incremented roughly every 4 + microseconds. Thus, the ISN cycles approximately every 4.55 hours. + Since we assume that segments will stay in the network no more than + tens of seconds or minutes, at worst, we can reasonably assume that + ISN's will be unique. + + For each connection there is a send sequence number and a receive + sequence number. The initial send sequence number (ISS) is chosen by + the data sending TCP, and the initial receive sequence number (IRS) is + learned during the connection establishing procedure. + + For a connection to be established or initialized, the two TCPs must + synchronize on each other's initial sequence numbers. This is done in + an exchange of connection establishing messages carrying a control bit + called "SYN" (for synchronize) and the initial sequence numbers. As a + shorthand, messages carrying the SYN bit are also called "SYNs". + Hence, the solution requires a suitable mechanism for picking an + initial sequence number and a slightly involved handshake to exchange + the ISN's. A "three way handshake" is necessary because sequence + numbers are not tied to a global clock in the network, and TCPs may + have different mechanisms for picking the ISN's. The receiver of the + first SYN has no way of knowing whether the segment was an old delayed + one or not, unless it remembers the last sequence number used on the + connection (which is not always possible), and so it must ask the + sender to verify this SYN. + + The "three way handshake" and the advantages of a "clock-driven" + scheme are discussed in [4]. + + Knowing When to Keep Quiet + + To be sure that a TCP does not create a segment that carries a + sequence number which may be duplicated by an old segment remaining in + the network, the TCP must keep quiet for a maximum segment lifetime + (MSL) before assigning any sequence numbers upon starting up or + recovering from a crash in which memory of sequence numbers in use was + + +[Page 28] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + lost. For this specification the MSL is taken to be 2 minutes. This + is an engineering choice, and may be changed if experience indicates + it is desirable to do so. Note that if a TCP is reinitialized in some + sense, yet retains its memory of sequence numbers in use, then it need + not wait at all; it must only be sure to use sequence numbers larger + than those recently used. + + It should be noted that this strategy does not protect against + spoofing or other replay type duplicate message problems. + +3.4. Establishing a connection + + The "three-way handshake" is the procedure used to establish a + connection. This procedure normally is initiated by one TCP and + responded to by another TCP. The procedure also works if two TCP + simultaneously initiate the procedure. When simultaneous attempt + occurs, the TCP receives a "SYN" segment which carries no + acknowledgment after it has sent a "SYN". Of course, the arrival of + an old duplicate "SYN" segment can potentially make it appear, to the + recipient, that a simultaneous connection initiation is in progress. + Proper use of "reset" segments can disambiguate these cases. Several + examples of connection initiation follow. Although these examples do + not show connection synchronization using data-carrying segments, this + is perfectly legitimate, so long as the receiving TCP doesn't deliver + the data to the user until it is clear the data is valid (i.e., the + data must be buffered at the receiver until the connection reaches the + ESTABLISHED state). The three-way handshake reduces the possibility + of false connections. It is the implementation of a trade-off between + memory and messages to provide information for this checking. + + The simplest three-way handshake is shown in figure 9 below. The + figures should be interpreted in the following way. Each line is + numbered for reference purposes. Right arrows (-->) indicate + departure of a TCP segment from TCP A to TCP B, or arrival of a + segment at B from A. Left arrows (<--), indicate the reverse. + Ellipsis (...) indicates a segment which is still in the network + (delayed). An "XXX" indicates a segment which is lost or rejected. + Comments appear in parentheses. TCP states represent the state AFTER + the departure or arrival of the segment (whose contents are shown in + the center of each line). Segment contents are shown in abbreviated + form, with sequence number, control flags, and ACK field. Other + fields such as window, addresses, lengths, and text have been left out + in the interest of clarity. + + + + + + + + [Page 29] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + + + TCP A TCP B + + 1. CLOSED LISTEN + + 2. SYN-SENT --> --> SYN-RECEIVED + + 3. ESTABLISHED <-- <-- SYN-RECEIVED + + 4. ESTABLISHED --> --> ESTABLISHED + + 5. ESTABLISHED --> --> ESTABLISHED + + Basic 3-Way Handshake for Connection Synchronization + + Figure 9. + + In line 2 of figure 9, TCP A begins by sending a SYN segment + indicating that it will use sequence numbers starting with sequence + number 100. In line 3, TCP B sends a SYN and acknowledges the SYN it + received from TCP A. Note that the acknowledgment field indicates TCP + B is now expecting to hear sequence 101, acknowledging the SYN which + occupied sequence 100. + + At line 4, TCP A responds with an empty segment containing an ACK for + TCP B's SYN; and in line 5, TCP A sends some data. Note that the + sequence number of the segment in line 5 is the same as in line 4 + because the ACK does not occupy sequence number space (if it did, we + would wind up ACKing ACK's!). + + Simultaneous initiation is only slightly more complex, as is shown in + figure 10. Each TCP cycles from CLOSED to SYN-SENT to SYN-RECEIVED to + ESTABLISHED. + + The principle reason for the three-way handshake is to prevent old + duplicate connection initiations from causing confusion. To deal with + this, a special control message, reset, has been devised. If the + receiving TCP is in a non-synchronized state (i.e., SYN-SENT, + SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset. + If the TCP is in one of the synchronized states (ESTABLISHED, + FIN-WAIT-1, FIN-WAIT-2, TIME-WAIT, CLOSE-WAIT, CLOSING), it aborts the + connection and informs its user. We discuss this latter case under + "half-open" connections below. + + + + + + +[Page 30] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + + TCP A TCP B + + 1. CLOSED CLOSED + + 2. SYN-SENT --> ... + + 3. SYN-RECEIVED <-- <-- SYN-SENT + + 4. ... --> SYN-RECEIVED + + 5. SYN-RECEIVED --> ... + + 6. ESTABLISHED <-- <-- SYN-RECEIVED + + 7. ... --> ESTABLISHED + + Simultaneous Connection Synchronization + + Figure 10. + + + + TCP A TCP B + + 1. CLOSED LISTEN + + 2. SYN-SENT --> ... + + 3. (duplicate) ... --> SYN-RECEIVED + + 4. SYN-SENT <-- <-- SYN-RECEIVED + + 5. SYN-SENT --> --> LISTEN + + + 6. ... --> SYN-RECEIVED + + 7. SYN-SENT <-- <-- SYN-RECEIVED + + 8. ESTABLISHED --> --> ESTABLISHED + + Recovery from Old Duplicate SYN + + Figure 11. + + As a simple example of recovery from old duplicates, consider + + + [Page 31] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + figure 11. At line 3, an old duplicate SYN arrives at TCP B. TCP B + cannot tell that this is an old duplicate, so it responds normally + (line 4). TCP A detects that the ACK field is incorrect and returns a + RST (reset) with its SEQ field selected to make the segment + believable. TCP B, on receiving the RST, returns to the LISTEN state. + When the original SYN (pun intended) finally arrives at line 6, the + synchronization proceeds normally. If the SYN at line 6 had arrived + before the RST, a more complex exchange might have occurred with RST's + sent in both directions. + + Half-Open Connections and Other Anomalies + + An established connection is said to be "half-open" if one of the + TCPs has closed or aborted the connection at its end without the + knowledge of the other, or if the two ends of the connection have + become desynchronized owing to a crash that resulted in loss of + memory. Such connections will automatically become reset if an + attempt is made to send data in either direction. However, half-open + connections are expected to be unusual, and the recovery procedure is + mildly involved. + + If at site A the connection no longer exists, then an attempt by the + user at site B to send any data on it will result in the site B TCP + receiving a reset control message. Such a message should indicate to + the site B TCP that something is wrong, and it is expected to abort + the connection. + + Assume that two user processes A and B are communicating with one + another when a crash occurs causing loss of memory to A's TCP. + Depending on the operating system supporting A's TCP, it is likely + that some error recovery mechanism exists. When the TCP is up again, + A is likely to start again from the beginning or from a recovery + point. As a result, A will probably try to OPEN the connection again + or try to SEND on the connection it believes open. In the latter + case, it receives the error message "connection not open" from the + local (A's) TCP. In an attempt to establish the connection, A's TCP + will send a segment containing SYN. This scenario leads to the + example shown in figure 12. After TCP A crashes, the user attempts to + re-open the connection. TCP B, in the meantime, thinks the connection + is open. + + + + + + + + + + +[Page 32] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + + TCP A TCP B + + 1. (CRASH) (send 300,receive 100) + + 2. CLOSED ESTABLISHED + + 3. SYN-SENT --> --> (??) + + 4. (!!) <-- <-- ESTABLISHED + + 5. SYN-SENT --> --> (Abort!!) + + 6. CLOSED + + 7. SYN-SENT --> --> + + Half-Open Connection Discovery + + Figure 12. + + When the SYN arrives at line 3, TCP B, being in a synchronized state, + responds with an acknowledgment indicating what sequence it next + expects to hear (ACK 100). TCP A sees that this segment does not + acknowledge anything it sent and, being unsynchronized, sends a reset + (RST) because it has detected a half-open connection. TCP B aborts at + line 5. TCP A will continue to try to establish the connection; the + problem is now reduced to the basic 3-way handshake of figure 9. + + An interesting alternative case occurs when TCP A crashes and TCP B + tries to send data on what it thinks is a synchronized connection. + This is illustrated in figure 13. In this case, the data arriving at + TCP A from TCP B (line 2) is unacceptable because no such connection + exists, so TCP A sends a RST. The RST is acceptable so TCP B + processes it and aborts the connection. + + + + + + + + + + + + + + + [Page 33] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + + + TCP A TCP B + + 1. (CRASH) (send 300,receive 100) + + 2. (??) <-- <-- ESTABLISHED + + 3. --> --> (ABORT!!) + + Active Side Causes Half-Open Connection Discovery + + Figure 13. + + In figure 14, we find the two TCPs A and B with passive connections + waiting for SYN. An old duplicate arriving at TCP B (line 2) stirs B + into action. A SYN-ACK is returned (line 3) and causes TCP A to + generate a RST (the ACK in line 3 is not acceptable). TCP B accepts + the reset and returns to its passive LISTEN state. + + + + TCP A TCP B + + 1. LISTEN LISTEN + + 2. ... --> SYN-RECEIVED + + 3. (??) <-- <-- SYN-RECEIVED + + 4. --> --> (return to LISTEN!) + + 5. LISTEN LISTEN + + Old Duplicate SYN Initiates a Reset on two Passive Sockets + + Figure 14. + + A variety of other cases are possible, all of which are accounted for + by the following rules for RST generation and processing. + + Reset Generation + + As a general rule, reset (RST) should be sent whenever a segment + arrives which apparently is not intended for the current or a future + incarnation of the connection. A reset should not be sent if it is + not clear that this is the case. Thus, if any segment arrives for a + nonexistent connection, a reset should be sent. If a segment ACKs + + +[Page 34] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + something which has never been sent on the current connection, then + one of the following two cases applies. + + 1. If the connection is in any non-synchronized state (LISTEN, + SYN-SENT, SYN-RECEIVED) or if the connection does not exist, a reset + (RST) should be formed and sent for any segment that acknowledges + something not yet sent. The RST should take its SEQ field from the + ACK field of the offending segment (if the ACK control bit was set), + and its ACK bit should be reset (zero), except to refuse a initial + SYN. A reset is also sent if an incoming segment has a security level + or compartment which does not exactly match the level and compartment + requested for the connection. If the precedence of the incoming + segment is less than the precedence level requested a reset is sent. + + 2. If the connection is in a synchronized state (ESTABLISHED, + FIN-WAIT-1, FIN-WAIT-2, TIME-WAIT, CLOSE-WAIT, CLOSING), any + unacceptable segment should elicit only an empty acknowledgment + segment containing the current send-sequence number and an + acknowledgment indicating the next sequence number expected to be + received. + + Reset Processing + + All reset (RST) segments are validated by checking their SEQ-fields. + A reset is valid if its sequence number is in the window. In the case + of a RST received in response to an initial SYN any sequence number is + acceptable if the ACK field acknowledges the SYN. + + The receiver of a RST first validates it, then changes state. If the + receiver was in the LISTEN state, it ignores it. If the receiver was + in SYN-RECEIVED state and had previously been in the LISTEN state, + then the receiver returns to the LISTEN state, otherwise the receiver + aborts the connection and goes to the CLOSED state. If the receiver + was in any other state, it aborts the connection and advises the user + and goes to the CLOSED state. + +3.5. Closing a Connection + + CLOSE is an operation meaning "I have no more data to send." The + notion of closing a full-duplex connection is subject to ambiguous + interpretation, of course, since it may not be obvious how to treat + the receiving side of the connection. We have chosen to treat CLOSE + in a simplex fashion. The user who CLOSEs may continue to RECEIVE + until he is told that the other side has CLOSED also. Thus, a program + could initiate several SENDs followed by a CLOSE, and then continue to + RECEIVE until signaled that a RECEIVE failed because the other side + has CLOSED. We assume that the TCP will signal a user, even if no + RECEIVEs are outstanding, that the other side has closed, so the user + + + [Page 35] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + can terminate his side gracefully. A TCP will reliably deliver all + buffers SENT before the connection was CLOSED so a user who expects no + data in return need only wait to hear the connection was CLOSED + successfully to know that all his data was received at the destination + TCP. + + There are essentially three cases: + + 1) The user initiates by telling the TCP to CLOSE the connection + + 2) The remote TCP initiates by sending a FIN control signal + + 3) Both users CLOSE simultaneously + + Case 1: Local user initiates the close + + In this case, a FIN segment can be constructed and placed on the + outgoing segment queue. No further SENDs from the user will be + accepted by the TCP, and it enters the FIN-WAIT-1 state. RECEIVEs + are allowed in this state. All segments preceding and including FIN + will be retransmitted until acknowledged. When the other TCP has + both acknowledged the FIN and sent a FIN of its own, the first TCP + can ACK this FIN. It should be noted that a TCP receiving a FIN + will ACK but not send its own FIN until its user has CLOSED the + connection also. + + Case 2: TCP receives a FIN from the network + + If an unsolicited FIN arrives from the network, the receiving TCP + can ACK it and tell the user that the connection is closing. The + user should respond with a CLOSE, upon which the TCP can send a FIN + to the other TCP. The TCP then waits until its own FIN is + acknowledged whereupon it deletes the connection. If an ACK is not + forthcoming, after a timeout the connection is aborted and the user + is told. + + Case 3: both users close simultaneously + + A simultaneous CLOSE by users at both ends of a connection causes + FIN segments to be exchanged. When all segments preceding the FINs + have been processed and acknowledged, each TCP can ACK the FIN it + has received. Both will, upon receiving these ACKs, delete the + connection. + + + + + + + +[Page 36] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + + TCP A TCP B + + 1. ESTABLISHED ESTABLISHED + + 2. (Close) + FIN-WAIT-1 --> --> CLOSE-WAIT + + 3. FIN-WAIT-2 <-- <-- CLOSE-WAIT + + 4. (Close) + TIME-WAIT <-- <-- CLOSING + + 5. TIME-WAIT --> --> CLOSED + + 6. (2 MSL) + CLOSED + + Normal Close Sequence + + Figure 15. + + + + TCP A TCP B + + 1. ESTABLISHED ESTABLISHED + + 2. (Close) (Close) + FIN-WAIT-1 --> ... FIN-WAIT-1 + <-- <-- + ... --> + + 3. CLOSING --> ... CLOSING + <-- <-- + ... --> + + 4. CLOSED CLOSED + + Simultaneous Close Sequence + + Figure 16. + + + + + + + + [Page 37] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + +3.6. Precedence and Security + + The intent is that connection be allowed only between ports operating + with exactly the same security and compartment values and at the + higher of the precedence level requested by the two parts. + + The precedence levels are: + + flash override - 111 + flash - 110 + immediate - 10X + priority - 01X + routine - 00X + + The security levels are: + + top secret - 11 + secret - 10 + confidential - 01 + unclassified - 00 + + The compartments are assigned by the Defense Communications Agency. + The defaults are precedence: routine, security: unclassified, + compartment: zero. A host which does not implement precedence or + security feature should clear these fields to zero for segments it + sends. + + A connection attempt with mismatched security/compartment values or a + lower precedence value should be rejected by sending a reset. + + Note that TCP modules which operate only at the default value of + precedence will still have to check the precedence of incoming + segments and possibly raise the precedence level they use on the + connection. + +3.7. Data Communication + + Once the connection is established data is communicated by the + exchange of segments. Because segments may be lost due to errors + (checksum test failure), or network congestion, TCP uses + retransmission (after a timeout) to ensure delivery of every segment. + Duplicate segments may arrive due to network or TCP retransmission. + As discussed in the section on sequence numbers the TCP performs + certain tests on the sequence and acknowledgment numbers in the + segments to verify their acceptability. + + The sender of data keeps track of the next sequence number to use in + the variable SND.NXT. The receiver of data keeps track of the next + + +[Page 38] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + sequence number to expect in the variable RCV.NXT. The sender of data + keeps track of the oldest unacknowledged sequence number in the + variable SND.UNA. If the data flow is momentarily idle and all data + sent has been acknowledged then the three variables will be equal. + + When the sender creates a segment and transmits it the sender advances + SND.NXT. When the receiver accepts a segment it advances RCV.NXT and + sends an acknowledgment. When the data sender receives an + acknowledgment it advances SND.UNA. The extent to which the values of + these variables differ is a measure of the delay in the communication. + + Normally the amount by which the variables are advanced is the length + of the data in the segment. However, when letters are used there are + special provisions for coordination the sequence numbers, the letter + boundaries, and the receive buffer boundaries. + + End of Letter Sequence Number Adjustments + + There is provision in TCP for the receiver of data to optionally + communicate to the sender of data on a connection at the time of the + connection synchronization the receiver's buffer size. If this is + done the receiver must use this fixed size of buffers for the lifetime + of the connection. If a buffer size is communicated then there is a + coordination between receive buffers, letters, and sequence numbers. + + Each time a buffer is completed either due to being filled or due to + an end of letter, the sequence number is incremented through the end + of that buffer. + + That is, whenever an EOL is transmitted, the sender advances its send + sequence number, SND.NXT, by an amount sufficient to consume all the + unused space in the receiver's buffer. The amount of space consumed + in this fashion is subtracted from the send window just as is the + space consumed by actual data. + + And, whenever an EOL is received, the receiver advances its receive + sequence number, RCV.NXT, by an amount sufficient to consume all the + unused space in the receiver's buffer. The amount of space consumed + in this fashion is subtracted from the receive window just as is the + space consumed by actual data. + + + + + + + + + + + [Page 39] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + older sequence numbers newer sequence numbers + + | Buffer 1 | Buffer 2 + | | + ----+-------------------------------+----------------- + XXXXXXXXXXXXXXXXXXXXX+++++++++++ + | | | + |<-----SEG.LEN------>| | + | | | + | | | + SEG.SEQ A B + + XXX - data octets from segment + +++ - phantom data + + <----- sequence space -----> + + End of Letter Adjustment + + Figure 17. + + In the case illustrated above, if the segment does not carry an EOL + flag, the next value of SND.NXT or RCV.NXT will be A. If it does + carry an EOL flag, the next value will be B. + + The exchange of buffer size and sequencing information is done in + units of octets. If no buffer size is stated, then the buffer size is + assumed to be 1 octet. The receiver tells the sender the size of the + buffer in a SYN segment that contains the 16 bit buffer size data in + an option field in the TCP header. + + Each EOL advances the sequence number (SN) to the next buffer boundary + + While LBB < SEG.SEQ+SEG.LEN + Do LBB <- LBB + BS End + SN <- LBB + + where LBB is the Last Buffer Beginning, and BS is the buffer size. + + The CLOSE user call implies an end of letter, as does the FIN control + flag in an incoming segment. + + The Communication of Urgent Information + + The objective of the TCP urgent mechanism is to allow the sending user + to stimulate the receiving user to accept some urgent data and to + permit the receiving TCP to indicate to the receiving user when all + the currently known urgent data has been received by the user. + + +[Page 40] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + This mechanism permits a point in the data stream to be designated as + the end of "urgent" information. Whenever this point is in advance of + the receive sequence number (RCV.NXT) at the receiving TCP, that TCP + should tell the user to go into "urgent mode"; when the receive + sequence number catches up to the urgent pointer, the TCP should tell + user to go into "normal mode". If the urgent pointer is updated while + the user is in "read fast" mode, the update will be invisible to the + user. + + The method employs a urgent field which is carried in all segments + transmitted. The URG control flag indicates that the urgent field is + meaningful and should be added to the segment sequence number to yield + the urgent pointer. The absence of this flag indicates that the + urgent pointer has not changed. + + To send an urgent indication the user must also send at least one data + octet. If the sending user also indicates end of letter, timely + delivery of the urgent information to the destination process is + enhanced. + + Managing the Window + + The window sent in each segment indicates the range of sequence number + the sender of the window (the data receiver) is currently prepared to + accept. There is an assumption that this is related to the currently + available data buffer space available for this connection. The window + information is a guideline to be aimed at. + + Indicating a large window encourages transmissions. If more data + arrives than can be accepted, it will be discarded. This will result + in excessive retransmissions, adding unnecessarily to the load on the + network and the TCPs. Indicating a small window may restrict the + transmission of data to the point of introducing a round trip delay + between each new segment transmitted. + + The mechanisms provided allow a TCP to advertise a large window and to + subsequently advertise a much smaller window without having accepted + that much data. This, so called "shrinking the window," is strongly + discouraged. The robustness principle dictates that TCPs will not + shrink the window themselves, but will be prepared for such behavior + on the part of other TCPs. + + The sending TCP must be prepared to accept and send at least one octet + of new data even if the send window is zero. The sending TCP should + regularly retransmit to the receiving TCP even when the window is + zero. Two minutes is recommended for the retransmission interval when + the window is zero. This retransmission is essential to guarantee + + + + [Page 41] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + that when either TCP has a zero window the re-opening of the window + will be reliably reported to the other. + + The sending TCP packages the data to be transmitted into segments + which fit the current window, and may repackage segments on the + retransmission queue. Such repackaging is not required, but may be + helpful. + + Users must keep reading connections they close for sending until the + TCP says no more data. + + In a connection with a one-way data flow, the window information will + be carried in acknowledgment segments that all have the same sequence + number so there will be no way to reorder them if they arrive out of + order. This is not a serious problem, but it will allow the window + information to be on occasion temporarily based on old reports from + the data receiver. + +3.8. Interfaces + + There are of course two interfaces of concern: the user/TCP interface + and the TCP/IP interface. We have a fairly elaborate model of the + user/TCP interface, but only a sketch of the interface to the lower + level protocol module. + + User/TCP Interface + + The functional description of user commands to the TCP is, at best, + fictional, since every operating system will have different + facilities. Consequently, we must warn readers that different TCP + implementations may have different user interfaces. However, all + TCPs must provide a certain minimum set of services to guarantee + that all TCP implementations can support the same protocol + hierarchy. This section specifies the functional interfaces + required of all TCP implementations. + + TCP User Commands + + The following sections functionally characterize a USER/TCP + interface. The notation used is similar to most procedure or + function calls in high level languages, but this usage is not + meant to rule out trap type service calls (e.g., SVCs, UUOs, + EMTs). + + The user commands described below specify the basic functions the + TCP must perform to support interprocess communication. + Individual implementations should define their own exact format, + and may provide combinations or subsets of the basic functions in + + +[Page 42] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + single calls. In particular, some implementations may wish to + automatically OPEN a connection on the first SEND or RECEIVE + issued by the user for a given connection. + + In providing interprocess communication facilities, the TCP must + not only accept commands, but must also return information to the + processes it serves. The latter consists of: + + (a) general information about a connection (e.g., interrupts, + remote close, binding of unspecified foreign socket). + + (b) replies to specific user commands indicating success or + various types of failure. + + Open + + Format: OPEN (local port, foreign socket, active/passive + [, buffer size] [, timeout] [, precedence] + [, security/compartment]) -> local connection name + + We assume that the local TCP is aware of the identity of the + processes it serves and will check the authority of the process + to use the connection specified. Depending upon the + implementation of the TCP, the local network and TCP identifiers + for the source address will either be supplied by the TCP or by + the processes that serve it (e.g., the program which interfaces + the TCP network). These considerations are the result of + concern about security, to the extent that no TCP be able to + masquerade as another one, and so on. Similarly, no process can + masquerade as another without the collusion of the TCP. + + If the active/passive flag is set to passive, then this is a + call to LISTEN for an incoming connection. A passive open may + have either a fully specified foreign socket to wait for a + particular connection or an unspecified foreign socket to wait + for any call. A fully specified passive call can be made active + by the subsequent execution of a SEND. + + A full-duplex transmission control block (TCB) is created and + partially filled in with data from the OPEN command parameters. + + On an active OPEN command, the TCP will begin the procedure to + synchronize (i.e., establish) the connection at once. + + The buffer size, if present, indicates that the caller will + always receive data from the connection in that size of buffers. + This buffer size is a measure of the buffer between the user and + + + + [Page 43] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + the local TCP. The buffer size between the two TCPs may be + different. + + The timeout, if present, permits the caller to set up a timeout + for all buffers transmitted on the connection. If a buffer is + not successfully delivered to the destination within the timeout + period, the TCP will abort the connection. The present global + default is 30 seconds. The buffer retransmission rate may vary; + most likely, it will be related to the measured time for + responses from the remote TCP. + + The TCP or some component of the operating system will verify + the users authority to open a connection with the specified + precedence or security/compartment. The absence of precedence + or security/compartment specification in the OPEN call indicates + the default values should be used. + + TCP will accept incoming requests as matching only if the + security/compartment information is exactly the same and only if + the precedence is equal to or higher than the precedence + requested in the OPEN call. + + The precedence for the connection is the higher of the values + requested in the OPEN call and received from the incoming + request, and fixed at that value for the life of the connection. + + Depending on the TCP implementation, either a local connection + name will be returned to the user by the TCP, or the user will + specify this local connection name (in which case another + parameter is needed in the call). The local connection name can + then be used as a short hand term for the connection defined by + the pair. + + Send + + Format: SEND(local connection name, buffer address, byte count, + EOL flag, URGENT flag [, timeout]) + + This call causes the data contained in the indicated user buffer + to be sent on the indicated connection. If the connection has + not been opened, the SEND is considered an error. Some + implementations may allow users to SEND first; in which case, an + automatic OPEN would be done. If the calling process is not + authorized to use this connection, an error is returned. + + If the EOL flag is set, the data is the End Of a Letter, and the + EOL bit will be set in the last TCP segment created from the + + + +[Page 44] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + buffer. If the EOL flag is not set, subsequent SENDs will + appear to be part of the same letter. + + If the URGENT flag is set, segments resulting from this call + will have the urgent pointer set to indicate that some of the + data associated with this call is urgent. This facility, for + example, can be used to simulate "break" signals from terminals + or error or completion codes from I/O devices. The semantics of + this signal to the receiving process are unspecified. The + receiving TCP will signal the urgent condition to the receiving + process as long as the urgent pointer indicates that data + preceding the urgent pointer has not been consumed by the + receiving process. The purpose of urgent is to stimulate the + receiver to accept some urgent data and to indicate to the + receiver when all the currently known urgent data has been + received. + + The number of times the sending user's TCP signals urgent will + not necessarily be equal to the number of times the receiving + user will be notified of the presence of urgent data. + + If no foreign socket was specified in the OPEN, but the + connection is established (e.g., because a LISTENing connection + has become specific due to a foreign segment arriving for the + local socket), then the designated buffer is sent to the implied + foreign socket. In general, users who make use of OPEN with an + unspecified foreign socket can make use of SEND without ever + explicitly knowing the foreign socket address. + + However, if a SEND is attempted before the foreign socket + becomes specified, an error will be returned. Users can use the + STATUS call to determine the status of the connection. In some + implementations the TCP may notify the user when an unspecified + socket is bound. + + If a timeout is specified, then the current timeout for this + connection is changed to the new one. + + In the simplest implementation, SEND would not return control to + the sending process until either the transmission was complete + or the timeout had been exceeded. However, this simple method + is both subject to deadlocks (for example, both sides of the + connection might try to do SENDs before doing any RECEIVEs) and + offers poor performance, so it is not recommended. A more + sophisticated implementation would return immediately to allow + the process to run concurrently with network I/O, and, + furthermore, to allow multiple SENDs to be in progress. + + + + [Page 45] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + Multiple SENDs are served in first come, first served order, so + the TCP will queue those it cannot service immediately. + + We have implicitly assumed an asynchronous user interface in + which a SEND later elicits some kind of SIGNAL or + pseudo-interrupt from the serving TCP. An alternative is to + return a response immediately. For instance, SENDs might return + immediate local acknowledgment, even if the segment sent had not + been acknowledged by the distant TCP. We could optimistically + assume eventual success. If we are wrong, the connection will + close anyway due to the timeout. In implementations of this + kind (synchronous), there will still be some asynchronous + signals, but these will deal with the connection itself, and not + with specific segments or letters. + + NOTA BENE: In order for the process to distinguish among error + or success indications for different SENDs, it might be + appropriate for the buffer address to be returned along with the + coded response to the SEND request. TCP-to-user signals are + discussed below, indicating the information which should be + returned to the calling process. + + Receive + + Format: RECEIVE (local connection name, buffer address, byte + count) + + This command allocates a receiving buffer associated with the + specified connection. If no OPEN precedes this command or the + calling process is not authorized to use this connection, an + error is returned. + + In the simplest implementation, control would not return to the + calling program until either the buffer was filled, or some + error occurred, but this scheme is highly subject to deadlocks. + A more sophisticated implementation would permit several + RECEIVEs to be outstanding at once. These would be filled as, + segments arrive. This strategy permits increased throughput at + the cost of a more elaborate scheme (possibly asynchronous) to + notify the calling program that a letter has been received or a + buffer filled. + + If insufficient buffer space is given to reassemble a complete + letter, the EOL flag will not be set in the response to the + RECEIVE. The buffer will be filled with as much data as it can + hold. The last buffer required to hold the letter is returned + with EOL signaled. + + + +[Page 46] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + The remaining parts of a partly delivered letter will be placed + in buffers as they are made available via successive RECEIVEs. + If a number of RECEIVEs are outstanding, they may be filled with + parts of a single long letter or with at most one letter each. + The return codes associated with each RECEIVE will indicate what + is contained in the buffer. + + If a buffer size was given in the OPEN call, then all buffers + presented in RECEIVE calls must be of exactly that size, or an + error indication will be returned. + + The URGENT flag will be set only if the receiving user has + previously been informed via a TCP-to-user signal, that urgent + data is waiting. The receiving user should thus be in + "read-fast" mode. If the URGENT flag is on, additional urgent + data remains. If the URGENT flag is off, this call to RECEIVE + has returned all the urgent data, and the user may now leave + "read-fast" mode. + + To distinguish among several outstanding RECEIVEs and to take + care of the case that a letter is smaller than the buffer + supplied, the return code is accompanied by both a buffer + pointer and a byte count indicating the actual length of the + letter received. + + Alternative implementations of RECEIVE might have the TCP + allocate buffer storage, or the TCP might share a ring buffer + with the user. Variations of this kind will produce obvious + variation in user interface to the TCP. + + Close + + Format: CLOSE(local connection name) + + This command causes the connection specified to be closed. If + the connection is not open or the calling process is not + authorized to use this connection, an error is returned. + Closing connections is intended to be a graceful operation in + the sense that outstanding SENDs will be transmitted (and + retransmitted), as flow control permits, until all have been + serviced. Thus, it should be acceptable to make several SEND + calls, followed by a CLOSE, and expect all the data to be sent + to the destination. It should also be clear that users should + continue to RECEIVE on CLOSING connections, since the other side + may be trying to transmit the last of its data. Thus, CLOSE + means "I have no more to send" but does not mean "I will not + receive any more." It may happen (if the user level protocol is + not well thought out) that the closing side is unable to get rid + + + [Page 47] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + of all its data before timing out. In this event, CLOSE turns + into ABORT, and the closing TCP gives up. + + The user may CLOSE the connection at any time on his own + initiative, or in response to various prompts from the TCP + (e.g., remote close executed, transmission timeout exceeded, + destination inaccessible). + + Because closing a connection requires communication with the + foreign TCP, connections may remain in the closing state for a + short time. Attempts to reopen the connection before the TCP + replies to the CLOSE command will result in error responses. + + Close also implies end of letter. + + Status + + Format: STATUS(local connection name) + + This is an implementation dependent user command and could be + excluded without adverse effect. Information returned would + typically come from the TCB associated with the connection. + + This command returns a data block containing the following + information: + + local socket, + foreign socket, + local connection name, + receive window, + send window, + connection state, + number of buffers awaiting acknowledgment, + number of buffers pending receipt (including partial ones), + receive buffer size, + urgent state, + precedence, + security/compartment, + and default transmission timeout. + + Depending on the state of the connection, or on the + implementation itself, some of this information may not be + available or meaningful. If the calling process is not + authorized to use this connection, an error is returned. This + prevents unauthorized processes from gaining information about a + connection. + + + + +[Page 48] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Abort + + Format: ABORT (local connection name) + + This command causes all pending SENDs and RECEIVES to be + aborted, the TCB to be removed, and a special RESET message to + be sent to the TCP on the other side of the connection. + Depending on the implementation, users may receive abort + indications for each outstanding SEND or RECEIVE, or may simply + receive an ABORT-acknowledgment. + + TCP-to-User Messages + + It is assumed that the operating system environment provides a + means for the TCP to asynchronously signal the user program. When + the TCP does signal a user program, certain information is passed + to the user. Often in the specification the information will be + an error message. In other cases there will be information + relating to the completion of processing a SEND or RECEIVE or + other user call. + + The following information is provided: + + Local Connection Name Always + Response String Always + Buffer Address Send & Receive + Byte count (counts bytes received) Receive + End-of-Letter flag Receive + End-of-Urgent flag Receive + + TCP/Network Interface + + The TCP calls on a lower level protocol module to actually send and + receive information over a network. One case is that of the ARPA + internetwork system where the lower level module is the Internet + Protocol [2]. In most cases the following simple interface would be + adequate. + + + + + + + + + + + + + + [Page 49] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + The following two calls satisfy the requirements for the TCP to + internet protocol module communication: + + SEND (dest, TOS, TTL, BufPTR, len, Id, DF, options => result) + + where: + + dest = destination address + TOS = type of service + TTL = time to live + BufPTR = buffer pointer + len = length of buffer + Id = Identifier + DF = Don't Fragment + options = internet option data + result = response + OK = datagram sent ok + Error = error in arguments or local network error + + Note that the precedence is included in the TOS and the + security/compartment is passed as an option. + + RECV (BufPTR => result, source, dest, prot, TOS, len) + + where: + + BufPTR = buffer pointer + result = response + OK = datagram received ok + Error = error in arguments + source = source address + dest = destination address + prot = protocol + TOS = type of service + options = internet option data + len = length of buffer + + Note that the precedence is in the TOS, and the + security/compartment is an option. + + When the TCP sends a segment, it executes the SEND call supplying + all the arguments. The internet protocol module, on receiving + this call, checks the arguments and prepares and sends the + message. If the arguments are good and the segment is accepted by + the local network, the call returns successfully. If either the + arguments are bad, or the segment is not accepted by the local + network, the call returns unsuccessfully. On unsuccessful + returns, a reasonable report should be made as to the cause of the + + +[Page 50] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + problem, but the details of such reports are up to individual + implementations. + + When a segment arrives at the internet protocol module from the + local network, either there is a pending RECV call from TCP or + there is not. In the first case, the pending call is satisfied by + passing the information from the segment to the TCP. In the + second case, the TCP is notified of a pending segment. + + The notification of a TCP may be via a pseudo interrupt or similar + mechanism, as appropriate in the particular operating system + environment of the implementation. + + A TCP's RECV call may then either be immediately satisfied by a + pending segment, or the call may be pending until a segment + arrives. + + We note that the Internet Protocol provides arguments for a type + of service and for a time to live. TCP uses the following + settings for these parameters: + + Type of Service = Precedence: none, Package: stream, + Reliability: higher, Preference: speed, Speed: higher; or + 00011111. + + Time to Live = one minute, or 00111100. + + Note that the assumed maximum segment lifetime is two minutes. + Here we explicitly ask that a segment be destroyed if it + cannot be delivered by the internet system within one minute. + + + + + + + + + + + + + + + + + + + + + [Page 51] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + +3.9. Event Processing + + The activity of the TCP can be characterized as responding to events. + The events that occur can be cast into three categories: user calls, + arriving segments, and timeouts. This section describes the + processing the TCP does in response to each of the events. In many + cases the processing required depends on the state of the connection. + + Events that occur: + + User Calls + + OPEN + SEND + RECEIVE + CLOSE + ABORT + STATUS + + Arriving Segments + + SEGMENT ARRIVES + + Timeouts + + USER TIMEOUT + RETRANSMISSION TIMEOUT + + The model of the TCP/user interface is that user commands receive an + immediate return and possibly a delayed response via an event or + pseudo interrupt. In the following descriptions, the term "signal" + means cause a delayed response. + + Error responses are given as character strings. For example, user + commands referencing connections that do not exist receive "error: + connection not open". + + Please note in the following that all arithmetic on sequence numbers, + acknowledgment numbers, windows, et cetera, is modulo 2**32 the size + of the sequence number space. Also note that "=<" means less than or + equal to. + + A natural way to think about processing incoming segments is to + imagine that they are first tested for proper sequence number (i.e., + that their contents lie in the range of the expected "receive window" + in the sequence number space) and then that they are generally queued + and processed in sequence number order. + + + +[Page 52] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + When a segment overlaps other already received segments we reconstruct + the segment to contain just the new data, and adjust the header fields + to be consistent. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 53] + + + January 1980 +Transmission Control Protocol +Functional Specification + OPEN Call + + + + OPEN Call + + CLOSED STATE (i.e., TCB does not exist) + + Create a new transmission control block (TCB) to hold connection + state information. Fill in local socket identifier, foreign + socket, precedence, security/compartment, and user timeout + information. Verify the security and precedence requested are + allowed for this user, if not return "error: precedence not + allowed" or "error: security/compartment not allowed." If active + and the foreign socket is unspecified, return "error: foreign + socket unspecified"; if active and the foreign socket is + specified, issue a SYN segment. An initial send sequence number + (ISS) is selected and the TCP receive buffer size is selected (if + applicable). A SYN segment of the form is sent + (this may include the buffer size option if applicable). Set + SND.UNA to ISS, SND.NXT to ISS+1, SND.LBB to ISS+1, enter SYN-SENT + state, and return. + + If the caller does not have access to the local socket specified, + return "error: connection illegal for this process". If there is + no room to create a new connection, return "error: insufficient + resources". + + LISTEN STATE + SYN-SENT STATE + SYN-RECEIVED STATE + ESTABLISHED STATE + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING STATE + + Return "error: connection already exists". + + + + + + + + + + + + + + +[Page 54] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEND Call + + + + SEND Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, then + return "error: connection illegal for this process". + + Otherwise, return "error: connection does not exist". + + LISTEN STATE + + If the foreign socket is specified, then change the connection + from passive to active, select an ISS, and select the receive + buffer size. Send a SYN segment, set SND.UNA to ISS, SND.NXT to + ISS+1 and SND.LBB to ISS+1. Enter SYN-SENT state. Data + associated with SEND may be sent with SYN segment or queued for + transmission after entering ESTABLISHED state. The urgent bit if + requested in the command should be sent with the first data + segment sent as a result of this command. If there is no room to + queue the request, respond with "error: insufficient resources". + If Foreign socket was not specified, then return "error: foreign + socket unspecified". + + SYN-SENT STATE + + Queue for processing after the connection is ESTABLISHED. + Typically, nothing can be sent yet, anyway, because the send + window has not yet been set by the other side. If no space, + return "error: insufficient resources". + + SYN-RECEIVED STATE + + Queue for later processing after entering ESTABLISHED state. If + no space to queue, respond with "error: insufficient resources". + + ESTABLISHED STATE + + Segmentize the buffer, send or queue it for output, with a + piggybacked acknowledgment (acknowledgment value = RCV.NXT) with + the data. If there is insufficient space to remember this buffer, + simply return "error: insufficient resources". + + If remote buffer size is not one octet, and, if this is the end of + a letter, do the following end-of-letter/buffer-size adjustment + processing: + + + + + [Page 55] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEND Call + + + + if EOL = 0 then + + SND.NXT <- SEG.SEQ + SEG.LEN + + if EOL = 1 then + + While SND.LBB < SEG.SEQ + SEG.LEN + Do SND.LBB <- SND.LBB + SND.BS End + SND.NXT <- SND.LBB + + If the urgent flag is set, then SND.UP <- SND.NXT-1 and set the + urgent pointer in the outgoing segment. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + TIME-WAIT STATE + + Return "error: connection closing" and do not service request. + + CLOSE-WAIT STATE + + Segmentize any text to be sent and queue for output. If there is + insufficient space to remember the SEND, return "error: + insufficient resources" + + CLOSING STATE + + Respond with "error: connection closing" + + + + + + + + + + + + + + + + + + + + + +[Page 56] + + +January 1980 + Transmission Control Protocol + Functional Specification +RECEIVE Call + + + + RECEIVE Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise return "error: connection does not exist". + + LISTEN STATE + SYN-SENT STATE + SYN-RECEIVED STATE + + Queue for processing after entering ESTABLISHED state. If there + is no room to queue this request, respond with "error: + insufficient resources". + + ESTABLISHED STATE + + If insufficient incoming segments are queued to satisfy the + request, queue the request. If there is no queue space to + remember the RECEIVE, respond with "error: insufficient + resources". + + Reassemble queued incoming segments into receive buffer and return + to user. Mark "end of letter" (EOL) if this is the case. + + If RCV.UP is in advance of the data currently being passed to the + user notify the user of the presence of urgent data. + + When the TCP takes responsibility for delivering data to the user + that fact must be communicated to the sender via an + acknowledgment. The formation of such an acknowledgment is + described below in the discussion of processing an incoming + segment. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + Reassemble and return a letter, or as much as will fit, in the + user buffer. Queue the request if it cannot be serviced + immediately. + + + + + + + + [Page 57] + + + January 1980 +Transmission Control Protocol +Functional Specification + RECEIVE Call + + + + TIME-WAIT STATE + CLOSE-WAIT STATE + + Since the remote side has already sent FIN, RECEIVEs must be + satisfied by text already reassembled, but not yet delivered to + the user. If no reassembled segment text is awaiting delivery, + the RECEIVE should get a "error: connection closing" response. + Otherwise, any remaining text can be used to satisfy the RECEIVE. + + CLOSING STATE + + Return "error: connection closing" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 58] + + +January 1980 + Transmission Control Protocol + Functional Specification +CLOSE Call + + + + CLOSE Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise, return "error: connection does not exist". + + LISTEN STATE + + Any outstanding RECEIVEs should be returned with "error: closing" + responses. Delete TCB, return "ok". + + SYN-SENT STATE + + Delete the TCB and return "error: closing" responses to any + queued SENDs, or RECEIVEs. + + SYN-RECEIVED STATE + + Queue for processing after entering ESTABLISHED state or + segmentize and send FIN segment. If the latter, enter FIN-WAIT-1 + state. + + ESTABLISHED STATE + + Queue this until all preceding SENDs have been segmentized, then + form a FIN segment and send it. In any case, enter FIN-WAIT-1 + state. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + Strictly speaking, this is an error and should receive a "error: + connection closing" response. An "ok" response would be + acceptable, too, as long as a second FIN is not emitted (the first + FIN may be retransmitted though). + + + + + + + + + + + + [Page 59] + + + January 1980 +Transmission Control Protocol +Functional Specification + CLOSE Call + + + + TIME-WAIT STATE + + Strictly speaking, this is an error and should receive a "error: + connection closing" response. An "ok" response would be + acceptable, too. However, since the FIN has been sent and + acknowledged, nothing should be sent (or retransmitted). + + CLOSE-WAIT STATE + + Queue this request until all preceding SENDs have been + segmentized; then send a FIN segment, enter CLOSING state. + + CLOSING STATE + + Respond with "error: connection closing" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 60] + + +January 1980 + Transmission Control Protocol + Functional Specification +ABORT Call + + + + ABORT Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise return "error: connection does not exist". + + LISTEN STATE + + Any outstanding RECEIVEs should be returned with "error: + connection reset" responses. Delete TCB, return "ok". + + SYN-SENT STATE + + Delete the TCB and return "reset" responses to any queued SENDs, + or RECEIVEs. + + SYN-RECEIVED STATE + + Send a RST of the form: + + + + and return any unprocessed SENDs, or RECEIVEs with "reset" code, + delete the TCB. + + ESTABLISHED STATE + + Send a reset segment: + + + + All queued SENDs and RECEIVEs should be given "reset" responses; + all segments queued for transmission (except for the RST formed + above) or retransmission should be flushed, delete the TCB. + + + + + + + + + + + + + [Page 61] + + + January 1980 +Transmission Control Protocol +Functional Specification + ABORT Call + + + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + A reset segment (RST) should be formed and sent: + + + + Outstanding SENDs, RECEIVEs, CLOSEs, and/or segments queued for + retransmission, or segmentizing, should be flushed, with + "connection reset" notification to the user, delete the TCB. + + TIME-WAIT STATE + + Respond with "ok" and delete the TCB. + + CLOSE-WAIT STATE + + Flush any pending SENDs and RECEIVEs, returning "connection reset" + responses for them. Form and send a RST segment: + + + + Flush all segment queues and delete the TCB. + + CLOSING STATE + + Respond with "ok" and delete the TCB; flush any remaining segment + queues. If a CLOSE command is still pending, respond "error: + connection reset". + + + + + + + + + + + + + + + + + + + + +[Page 62] + + +January 1980 + Transmission Control Protocol + Functional Specification +STATUS Call + + + + STATUS Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise return "error: connection does not exist". + + LISTEN STATE + + Return "state = LISTEN", and the TCB pointer. + + SYN-SENT STATE + + Return "state = SYN-SENT", and the TCB pointer. + + SYN-RECEIVED STATE + + Return "state = SYN-RECEIVED", and the TCB pointer. + + ESTABLISHED STATE + + Return "state = ESTABLISHED", and the TCB pointer. + + FIN-WAIT-1 STATE + + Return "state = FIN-WAIT-1", and the TCB pointer. + + FIN-WAIT-2 STATE + + Return "state = FIN-WAIT-2", and the TCB pointer. + + TIME-WAIT STATE + + Return "state = TIME-WAIT and the TCB pointer. + + CLOSE-WAIT STATE + + Return "state = CLOSE-WAIT", and the TCB pointer. + + CLOSING STATE + + Return "state = CLOSING", and the TCB pointer. + + + + + + [Page 63] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + SEGMENT ARRIVES + + If the state is CLOSED (i.e., TCB does not exist) then + + all data in the incoming segment is discarded. An incoming + segment containing a RST is discarded. An incoming segment not + containing a RST causes a RST to be sent in response. The + acknowledgment and sequence field values are selected to make the + reset sequence acceptable to the TCP that sent the offending + segment. + + If the ACK bit is off, sequence number zero is used, + + + + If the ACK bit is on, + + + + Return. + + If the state is LISTEN then + + first check for an ACK + + Any acknowledgment is bad if it arrives on a connection still in + the LISTEN state. An acceptable reset segment should be formed + for any arriving ACK-bearing segment, except another RST. The + RST should be formatted as follows: + + + + Return. + + An incoming RST should be ignored. Return. + + if there was no ACK then check for a SYN + + If the SYN bit is set, check the security. If the + security/compartment on the incoming segment does not exactly + match the security/compartment in the TCB then send a reset and + return. If the SEG.PRC is less than the TCB.PRC then send a + reset and return. If the SEG.PRC is greater than the TCB.PRC + then set TCB.PRC<-SEG.PRC. Now RCV.NXT and RCV.LBB are set to + SEG.SEQ+1, IRS is set to SEG.SEQ and any other control or text + should be queued for processing later. ISS should be selected + and a SYN segment sent of the form: + + +[Page 64] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + + + SND.NXT and SND.LBB are set to ISS+1 and SND.UNA to ISS. The + connection state should be changed to SYN-RECEIVED. Note that + any other incoming control or data (combined with SYN) will be + processed in the SYN-RECEIVED state, but processing of SYN and + ACK should not be repeated. If the listen was not fully + specified (i.e., the foreign socket was not fully specified), + then the unspecified fields should be filled in now. + + if there was no SYN but there was other text or control + + Any other control or text-bearing segment (not containing SYN) + must have an ACK and thus would be discarded by the ACK + processing. An incoming RST segment could not be valid, since + it could not have been sent in response to anything sent by this + incarnation of the connection. So you are unlikely to get here, + but if you do, drop the segment, and return. + + If the state is SYN-SENT then + + first check for an ACK + + If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, or the + security/compartment in the segment does not exactly match the + security/compartment in the TCB, or the precedence in the + segment is less than the precedence in the TCB, send a reset + + + + and discard the segment. Return. + + If SND.UNA =< SEG.ACK =< SND.NXT and the security/compartment + and precedence are acceptable then the ACK is acceptable. + SND.UNA should be advanced to equal SEG.ACK, and any segments on + the retransmission queue which are thereby acknowledged should + be removed. + + if the ACK is ok (or there is no ACK), check the RST bit + + If the RST bit is set then signal the user "error: connection + reset", enter CLOSED state, drop the segment, delete TCB, and + return. + + if the ACK is ok (or there is no ACK) and it was not a RST, check + the SYN bit + + + + [Page 65] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + If the SYN bit is on and the security/compartment and precedence + are acceptable then, RCV.NXT and RCV.LBB are set to SEG.SEQ+1, + IRS is set to SEG.SEQ. If SND.UNA > ISS (our SYN has been + ACKed), change the connection state to ESTABLISHED, otherwise + enter SYN-RECEIVED. In any case, form an ACK segment: + + + + and send it. Data or controls which were queued for + transmission may be included. + + If SEG.PRC is greater than TCB.PRC set TCB.PRC<-SEG.PRC. + + If there are other controls or text in the segment then continue + processing at the fifth step below where the URG bit is checked, + otherwise return. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 66] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + Otherwise, + + first check sequence number + + SYN-RECEIVED STATE + ESTABLISHED STATE + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING STATE + + Segments are processed in sequence. Initial tests on arrival + are used to discard old duplicates, but further processing is + done in SEG.SEQ order. If a segment's contents straddle the + boundary between old and new, only the new parts should be + processed. + + There are four cases for the acceptability test for an incoming + segment: + + Segment Receive Test + Length Window + ------- ------- ------------------------------------------- + + 0 0 SEG.SEQ = RCV.NXT + + 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + + >0 0 not acceptable + + >0 >0 RCV.NXT < SEG.SEQ+SEG.LEN =< RCV.NXT+RCV.WND + + Note that the test above guarantees that the last sequence + number used by the segment lies in the receive-window. If the + RCV.WND is zero, no segments will be acceptable, but special + allowance should be made to accept valid ACKs, URGs and RSTs. + + If an incoming segment is not acceptable, an acknowledgment + should be sent in reply: + + + + If the incoming segment is unacceptable, drop it and return. + + + + + + [Page 67] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + second check security and precedence + + If the security/compartment and precedence in the segment do not + exactly match the security/compartment and precedence in the TCB + then form a reset and return. + + Note this check is placed following the sequence check to prevent + a segment from an old connection between these parts with a + different security or precedence from causing an abort of the + current connection. + + third check the ACK field, + + SYN-RECEIVED STATE + + If the RST bit is off and SND.UNA < SEG.ACK =< SND.NXT then set + SND.UNA <- SEG.ACK, remove any acknowledged segments from the + retransmission queue, and enter ESTABLISHED state. + + If the segment acknowledgment is not acceptable, form a reset + segment, + + + + and send it, unless the incoming segment is an RST (or there is + no ACK), in which case, it should be discarded, then return. + + ESTABLISHED STATE + + If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK. + Any segments on the retransmission queue which are thereby + entirely acknowledged are removed. Users should receive + positive acknowledgments for buffers which have been SENT and + fully acknowledged (i.e., SEND buffer should be returned with + "ok" response). If the ACK is a duplicate, it can be ignored. + + If the segment passes the sequence number and acknowledgment + number tests, the send window should be updated. If + SND.WL =< SEG.SEQ, set SND.WND <- SEG.WND and set + SND.WL <- SEG.SEQ. + + If the remote buffer size is not one, then the + end-of-letter/buffer-size adjustment to sequence numbers may + have an effect on the next expected sequence number to be + acknowledged. It is possible that the remote TCP will + acknowledge with a SEG.ACK equal to a sequence number of an + + + +[Page 68] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + octet that was skipped over at the end of a letter. This a mild + error on the remote TCPs part, but not cause for alarm. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + In addition to the processing for the ESTABLISHED state, if the + retransmission queue is empty, the user's CLOSE can be + acknowledged ("ok") but do not delete the TCB. + + TIME-WAIT STATE + + The only thing that can arrive in this state is a retransmission + of the remote FIN. Acknowledge it, and restart the 2 MSL + timeout. + + CLOSE-WAIT STATE + + Do the same processing as for the ESTABLISHED state. + + CLOSING STATE + + If the ACK acknowledges our FIN then delete the TCB (enter the + CLOSED state), otherwise ignore the segment. + + fourth check the RST bit, + + SYN-RECEIVED STATE + + If the RST bit is set then, if the segment has passed sequence + and acknowledgment tests, it is valid. If this connection was + initiated with a passive OPEN (i.e., came from the LISTEN + state), then return this connection to LISTEN state. The user + need not be informed. If this connection was initiated with an + active OPEN (i.e., came from SYN-SENT state) then the connection + was refused, signal the user "connection refused". In either + case, all segments on the retransmission queue should be + removed. + + + + + + + + + + + + [Page 69] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + ESTABLISHED + FIN-WAIT-1 + FIN-WAIT-2 + CLOSE-WAIT + CLOSING STATE + + If the RST bit is set then, any outstanding RECEIVEs and SEND + should receive "reset" responses. All segment queues should be + flushed. Users should also receive an unsolicited general + "connection reset" signal. Enter the CLOSED state, delete the + TCB, and return. + + TIME-WAIT + + Enter the CLOSED state, delete the TCB, and return. + + fifth, check the SYN bit, + + SYN-RECEIVED + ESTABLISHED STATE + + If the SYN bit is set, check the segment sequence number against + the receive window. The segment sequence number must be in the + receive window; if not, ignore the segment. If the SYN is on + and SEG.SEQ = IRS then everything is ok and no action is needed; + but if they are not equal, there is an error and a reset must be + sent. + + If a reset must be sent it is formed as follows: + + + + The connection must be aborted as if a RST had been received. + + FIN-WAIT STATE-1 + FIN-WAIT STATE-2 + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING STATE + + This case should not occur, since a duplicate of the SYN which + started the current connection incarnation will have been + filtered in the SEG.SEQ processing. Other SYN's will have been + rejected by this test as well (see SYN processing for + ESTABLISHED state). + + + + +[Page 70] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + sixth, check the URG bit, + + ESTABLISHED STATE + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + If the URG bit is set, RCV.UP <- max(RCV.UP,SEG.UP), and signal + the user that the remote side has urgent data if the urgent + pointer (RCV.UP) is in advance of the data consumed. If the + user has already been signaled (or is still in the "urgent + mode") for this continuous sequence of urgent data, do not + signal the user again. + + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING + + This should not occur, since a FIN has been received from the + remote side. Ignore the URG. + + seventh, process the segment text, + + ESTABLISHED STATE + + Once in the ESTABLISHED state, it is possible to deliver segment + text to user RECEIVE buffers. Text from segments can be moved + into buffers until either the buffer is full or the segment is + empty. If the segment empties and carries an EOL flag, then the + user is informed, when the buffer is returned, that an EOL has + been received. + + If buffer size is not one octet, then do the following + end-of-letter/buffer-size adjustment processing: + + if EOL = 0 then + + RCV.NXT <- SEG.SEQ + SEG.LEN + + if EOL = 1 then + + While RCV.LBB < SEG.SEQ+SEG.LEN + Do RCV.LBB <- RCV.LBB + RCV.BS End + RCV.NXT <- RCV.LBB + + When the TCP takes responsibility for delivering the data to the + user it must also acknowledge the receipt of the data. Send an + acknowledgment of the form: + + + [Page 71] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + + + This acknowledgment should be piggybacked on a segment being + transmitted if possible without incurring undue delay. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + If there are outstanding RECEIVEs, they should be satisfied, if + possible, with the text of this segment; remaining text should + be queued for further processing. If a RECEIVE is satisfied, + the user should be notified, with "end-of-letter" (EOL) signal, + if appropriate. + + TIME-WAIT STATE + CLOSE-WAIT STATE + + This should not occur, since a FIN has been received from the + remote side. Ignore the segment text. + + eighth, check the FIN bit, + + Send an acknowledgment for the FIN. Signal the user "connection + closing", and return any pending RECEIVEs with same message. Note + that FIN implies EOL for any segment text not yet delivered to the + user. If the current state is ESTABLISHED, enter the CLOSE-WAIT + state. If the current state is FIN-WAIT-1, enter the CLOSING + state. If the current state is FIN-WAIT-2, enter the TIME-WAIT + state. + + and return. + + + + + + + + + + + + + + + + + + +[Page 72] + + +January 1980 + Transmission Control Protocol + Functional Specification +USER TIMEOUT + + + + USER TIMEOUT + + For any state if the user timeout expires, flush all queues, signal + the user "error: connection aborted due to user timeout" in general + and for any outstanding calls, delete the TCB, and return. + + RETRANSMISSION TIMEOUT + + For any state if the retransmission timeout expires on a segment in + the retransmission queue, send the segment at the front of the + retransmission queue again, reinitialize the retransmission timer, + and return. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 73] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 74] + + +January 1980 + Transmission Control Protocol + + + + GLOSSARY + + + +1822 + BBN Report 1822, "The Specification of the Interconnection of + a Host and an IMP". The specification of interface between a + host and the ARPANET. + +ACK + A control bit (acknowledge) occupying no sequence space, which + indicates that the acknowledgment field of this segment + specifies the next sequence number the sender of this segment + is expecting to receive, hence acknowledging receipt of all + previous sequence numbers. + +ARPANET message + The unit of transmission between a host and an IMP in the + ARPANET. The maximum size is about 1012 octets (8096 bits). + +ARPANET packet + A unit of transmission used internally in the ARPANET between + IMPs. The maximum size is about 126 octets (1008 bits). + +buffer size + An option (buffer size) used to state the receive data buffer + size of the sender of this option. May only be sent in a + segment that also carries a SYN. + +connection + A logical communication path identified by a pair of sockets. + +datagram + A message sent in a packet switched computer communications + network. + +Destination Address + The destination address, usually the network and host + identifiers. + +EOL + A control bit (End of Letter) occupying no sequence space, + indicating that this segment ends a logical letter with the + last data octet in the segment. If this end of letter causes + a less than full buffer to be released to the user and the + connection buffer size is not one octet then the + end-of-letter/buffer-size adjustment to the receive sequence + number must be made. + + + + [Page 75] + + + January 1980 +Transmission Control Protocol +Glossary + + + +FIN + A control bit (finis) occupying one sequence number, which + indicates that the sender will send no more data or control + occupying sequence space. + +fragment + A portion of a logical unit of data, in particular an internet + fragment is a portion of an internet datagram. + +FTP + A file transfer protocol. + +header + Control information at the beginning of a message, segment, + fragment, packet or block of data. + +host + A computer. In particular a source or destination of messages + from the point of view of the communication network. + +Identification + An Internet Protocol field. This identifying value assigned + by the sender aids in assembling the fragments of a datagram. + +IMP + The Interface Message Processor, the packet switch of the + ARPANET. + +internet address + A source or destination address specific to the host level. + +internet datagram + The unit of data exchanged between an internet module and the + higher level protocol together with the internet header. + +internet fragment + A portion of the data of an internet datagram with an internet + header. + +IP + Internet Protocol. + +IRS + The Initial Receive Sequence number. The first sequence + number used by the sender on a connection. + + + + + +[Page 76] + + +January 1980 + Transmission Control Protocol + Glossary + + + +ISN + The Initial Sequence Number. The first sequence number used + on a connection, (either ISS or IRS). Selected on a clock + based procedure. + +ISS + The Initial Send Sequence number. The first sequence number + used by the sender on a connection. + +leader + Control information at the beginning of a message or block of + data. In particular, in the ARPANET, the control information + on an ARPANET message at the host-IMP interface. + +left sequence + This is the next sequence number to be acknowledged by the + data receiving TCP (or the lowest currently unacknowledged + sequence number) and is sometimes referred to as the left edge + of the send window. + +letter + A logical unit of data, in particular the logical unit of data + transmitted between processes via TCP. + +local packet + The unit of transmission within a local network. + +module + An implementation, usually in software, of a protocol or other + procedure. + +MSL + Maximum Segment Lifetime, the time a TCP segment can exist in + the internetwork system. Arbitrarily defined to be 2 minutes. + +octet + An eight bit byte. + +Options + An Option field may contain several options, and each option + may be several octets in length. The options are used + primarily in testing situations; for example, to carry + timestamps. Both the Internet Protocol and TCP provide for + options fields. + +packet + A package of data with a header which may or may not be + + + + [Page 77] + + + January 1980 +Transmission Control Protocol +Glossary + + + + logically complete. More often a physical packaging than a + logical packaging of data. + +port + The portion of a socket that specifies which logical input or + output channel of a process is associated with the data. + +process + A program in execution. A source or destination of data from + the point of view of the TCP or other host-to-host protocol. + +PSN + A Packet Switched Network. For example, the ARPANET. + +RCV.BS + receive buffer size, the remote buffer size + +RCV.LBB + receive last buffer beginning + +RCV.NXT + receive next sequence number + +RCV.UP + receive urgent pointer + +RCV.WND + receive window + +receive last buffer beginning + This is the sequence number of the first octet of the most + recent buffer. This value is use in calculating the next + sequence number when a segment contains an end of letter + indication. + +receive next sequence number + This is the next sequence number the local TCP is expecting to + receive. + +receive window + This represents the sequence numbers the local (receiving) TCP + is willing to receive. Thus, the local TCP considers that + segments overlapping the range RCV.NXT to + RCV.NXT + RCV.WND - 1 carry acceptable data or control. + Segments containing sequence numbers entirely outside of this + range are considered duplicates and discarded. + + + + +[Page 78] + + +January 1980 + Transmission Control Protocol + Glossary + + + +RST + A control bit (reset), occupying no sequence space, indicating + that the receiver should delete the connection without further + interaction. The receiver can determine, based on the + sequence number and acknowledgment fields of the incoming + segment, whether it should honor the reset command or ignore + it. In no case does receipt of a segment containing RST give + rise to a RST in response. + +RTP + Real Time Protocol: A host-to-host protocol for communication + of time critical information. + +Rubber EOL + An end of letter (EOL) requiring a sequence number adjustment + to align the beginning of the next letter on a buffer + boundary. + +SEG.ACK + segment acknowledgment + +SEG.LEN + segment length + +SEG.PRC + segment precedence value + +SEG.SEQ + segment sequence + +SEG.UP + segment urgent pointer field + +SEG.WND + segment window field + +segment + A logical unit of data, in particular a TCP segment is the + unit of data transfered between a pair of TCP modules. + +segment acknowledgment + The sequence number in the acknowledgment field of the + arriving segment. + +segment length + The amount of sequence number space occupied by a segment, + including any controls which occupy sequence space. + + + + [Page 79] + + + January 1980 +Transmission Control Protocol +Glossary + + + +segment sequence + The number in the sequence field of the arriving segment. + +send last buffer beginning + This is the sequence number of the first octet of the most + recent buffer. This value is used in calculating the next + sequence number when a segment contains an end of letter + indication. + +send sequence + This is the next sequence number the local (sending) TCP will + use on the connection. It is initially selected from an + initial sequence number curve (ISN) and is incremented for + each octet of data or sequenced control transmitted. + +send window + This represents the sequence numbers which the remote + (receiving) TCP is willing to receive. It is the value of the + window field specified in segments from the remote (data + receiving) TCP. The range of sequence numbers which may be + emitted by a TCP lies between SND.NXT and + SND.UNA + SND.WND - 1. + +SND.BS + send buffer size, the local buffer size + +SND.LBB + send last buffer beginning + +SND.NXT + send sequence + +SND.UNA + left sequence + +SND.UP + send urgent pointer + +SND.WL + send sequence number at last window update + +SND.WND + send window + +socket + An address which specifically includes a port identifier, that + is, the concatenation of an Internet Address with a TCP port. + + + +[Page 80] + + +January 1980 + Transmission Control Protocol + Glossary + + + +Source Address + The source address, usually the network and host identifiers. + +SYN + A control bit in the incoming segment, occupying one sequence + number, used at the initiation of a connection, to indicate + where the sequence numbering will start. + +TCB + Transmission control block, the data structure that records + the state of a connection. + +TCB.PRC + The precedence of the connection. + +TCP + Transmission Control Protocol: A host-to-host protocol for + reliable communication in internetwork environments. + +TOS + Type of Service, an Internet Protocol field. + +Type of Service + An Internet Protocol field which indicates the type of service + for this internet fragment. + +URG + A control bit (urgent), occupying no sequence space, used to + indicate that the receiving user should be notified to do + urgent processing as long as there is data to be consumed with + sequence numbers less than the value indicated in the urgent + pointer. + +urgent pointer + A control field meaningful only when the URG bit is on. This + field communicates the value of the urgent pointer which + indicates the data octet associated with the sending user's + urgent call. + + + + + + + + + + + + + [Page 81] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 82] + + +January 1980 + Transmission Control Protocol + + + + REFERENCES + + + +[1] Cerf, V., and R. Kahn, "A Protocol for Packet Network + Intercommunication," IEEE Transactions on Communications, + Vol. COM-22, No. 5, pp 637-648, May 1974. + +[2] Postel, J. (ed.), "DOD Standard Internet Protocol," Defense + Advanced Research Projects Agency, Information Processing + Techniques Office, RFC 760, IEN 128, January 1980. + +[3] Feinler, E. and J. Postel, ARPANET Protocol Handbook, Network + Information Center, SRI International, Menlo Park, CA, + January 1978. + +[4] Dalal, Y. and C. Sunshine, "Connection Management in Transport + Protocols," Computer Networks, Vol. 2, No. 6, pp. 454-473, + December 1978. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 83] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 84] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc768.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc768.txt new file mode 100644 index 0000000..4f13551 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc768.txt @@ -0,0 +1,174 @@ + + +RFC 768 J. Postel + ISI + 28 August 1980 + + + + User Datagram Protocol + ---------------------- + +Introduction +------------ + +This User Datagram Protocol (UDP) is defined to make available a +datagram mode of packet-switched computer communication in the +environment of an interconnected set of computer networks. This +protocol assumes that the Internet Protocol (IP) [1] is used as the +underlying protocol. + +This protocol provides a procedure for application programs to send +messages to other programs with a minimum of protocol mechanism. The +protocol is transaction oriented, and delivery and duplicate protection +are not guaranteed. Applications requiring ordered reliable delivery of +streams of data should use the Transmission Control Protocol (TCP) [2]. + +Format +------ + + + 0 7 8 15 16 23 24 31 + +--------+--------+--------+--------+ + | Source | Destination | + | Port | Port | + +--------+--------+--------+--------+ + | | | + | Length | Checksum | + +--------+--------+--------+--------+ + | + | data octets ... + +---------------- ... + + User Datagram Header Format + +Fields +------ + +Source Port is an optional field, when meaningful, it indicates the port +of the sending process, and may be assumed to be the port to which a +reply should be addressed in the absence of any other information. If +not used, a value of zero is inserted. + + + + + +Postel [page 1] + + + 28 Aug 1980 +User Datagram Protocol RFC 768 +Fields + + + +Destination Port has a meaning within the context of a particular +internet destination address. + +Length is the length in octets of this user datagram including this +header and the data. (This means the minimum value of the length is +eight.) + +Checksum is the 16-bit one's complement of the one's complement sum of a +pseudo header of information from the IP header, the UDP header, and the +data, padded with zero octets at the end (if necessary) to make a +multiple of two octets. + +The pseudo header conceptually prefixed to the UDP header contains the +source address, the destination address, the protocol, and the UDP +length. This information gives protection against misrouted datagrams. +This checksum procedure is the same as is used in TCP. + + 0 7 8 15 16 23 24 31 + +--------+--------+--------+--------+ + | source address | + +--------+--------+--------+--------+ + | destination address | + +--------+--------+--------+--------+ + | zero |protocol| UDP length | + +--------+--------+--------+--------+ + +If the computed checksum is zero, it is transmitted as all ones (the +equivalent in one's complement arithmetic). An all zero transmitted +checksum value means that the transmitter generated no checksum (for +debugging or for higher level protocols that don't care). + +User Interface +-------------- + +A user interface should allow + + the creation of new receive ports, + + receive operations on the receive ports that return the data octets + and an indication of source port and source address, + + and an operation that allows a datagram to be sent, specifying the + data, source and destination ports and addresses to be sent. + + + + + + +[page 2] Postel + + +28 Aug 1980 +RFC 768 User Datagram Protocol + IP Interface + + + +IP Interface +------------- + +The UDP module must be able to determine the source and destination +internet addresses and the protocol field from the internet header. One +possible UDP/IP interface would return the whole internet datagram +including all of the internet header in response to a receive operation. +Such an interface would also allow the UDP to pass a full internet +datagram complete with header to the IP to send. The IP would verify +certain fields for consistency and compute the internet header checksum. + +Protocol Application +-------------------- + +The major uses of this protocol is the Internet Name Server [3], and the +Trivial File Transfer [4]. + +Protocol Number +--------------- + +This is protocol 17 (21 octal) when used in the Internet Protocol. +Other protocol numbers are listed in [5]. + +References +---------- + +[1] Postel, J., "Internet Protocol," RFC 760, USC/Information + Sciences Institute, January 1980. + +[2] Postel, J., "Transmission Control Protocol," RFC 761, + USC/Information Sciences Institute, January 1980. + +[3] Postel, J., "Internet Name Server," USC/Information Sciences + Institute, IEN 116, August 1979. + +[4] Sollins, K., "The TFTP Protocol," Massachusetts Institute of + Technology, IEN 133, January 1980. + +[5] Postel, J., "Assigned Numbers," USC/Information Sciences + Institute, RFC 762, January 1980. + + + + + + + + + +Postel [page 3] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc792.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc792.txt new file mode 100644 index 0000000..5c659e8 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc792.txt @@ -0,0 +1,1218 @@ + + +Network Working Group J. Postel +Request for Comments: 792 ISI + September 1981 +Updates: RFCs 777, 760 +Updates: IENs 109, 128 + + INTERNET CONTROL MESSAGE PROTOCOL + + DARPA INTERNET PROGRAM + PROTOCOL SPECIFICATION + + + +Introduction + + The Internet Protocol (IP) [1] is used for host-to-host datagram + service in a system of interconnected networks called the + Catenet [2]. The network connecting devices are called Gateways. + These gateways communicate between themselves for control purposes + via a Gateway to Gateway Protocol (GGP) [3,4]. Occasionally a + gateway or destination host will communicate with a source host, for + example, to report an error in datagram processing. For such + purposes this protocol, the Internet Control Message Protocol (ICMP), + is used. ICMP, uses the basic support of IP as if it were a higher + level protocol, however, ICMP is actually an integral part of IP, and + must be implemented by every IP module. + + ICMP messages are sent in several situations: for example, when a + datagram cannot reach its destination, when the gateway does not have + the buffering capacity to forward a datagram, and when the gateway + can direct the host to send traffic on a shorter route. + + The Internet Protocol is not designed to be absolutely reliable. The + purpose of these control messages is to provide feedback about + problems in the communication environment, not to make IP reliable. + There are still no guarantees that a datagram will be delivered or a + control message will be returned. Some datagrams may still be + undelivered without any report of their loss. The higher level + protocols that use IP must implement their own reliability procedures + if reliable communication is required. + + The ICMP messages typically report errors in the processing of + datagrams. To avoid the infinite regress of messages about messages + etc., no ICMP messages are sent about ICMP messages. Also ICMP + messages are only sent about errors in handling fragment zero of + fragemented datagrams. (Fragment zero has the fragment offeset equal + zero). + + + + + + + + [Page 1] + + + September 1981 +RFC 792 + + + +Message Formats + + ICMP messages are sent using the basic IP header. The first octet of + the data portion of the datagram is a ICMP type field; the value of + this field determines the format of the remaining data. Any field + labeled "unused" is reserved for later extensions and must be zero + when sent, but receivers should not use these fields (except to + include them in the checksum). Unless otherwise noted under the + individual format descriptions, the values of the internet header + fields are as follows: + + Version + + 4 + + IHL + + Internet header length in 32-bit words. + + Type of Service + + 0 + + Total Length + + Length of internet header and data in octets. + + Identification, Flags, Fragment Offset + + Used in fragmentation, see [1]. + + Time to Live + + Time to live in seconds; as this field is decremented at each + machine in which the datagram is processed, the value in this + field should be at least as great as the number of gateways which + this datagram will traverse. + + Protocol + + ICMP = 1 + + Header Checksum + + The 16 bit one's complement of the one's complement sum of all 16 + bit words in the header. For computing the checksum, the checksum + field should be zero. This checksum may be replaced in the + future. + + +[Page 2] + + +September 1981 +RFC 792 + + + + Source Address + + The address of the gateway or host that composes the ICMP message. + Unless otherwise noted, this can be any of a gateway's addresses. + + Destination Address + + The address of the gateway or host to which the message should be + sent. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 3] + + + September 1981 +RFC 792 + + + +Destination Unreachable Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address from the original datagram's data. + + ICMP Fields: + + Type + + 3 + + Code + + 0 = net unreachable; + + 1 = host unreachable; + + 2 = protocol unreachable; + + 3 = port unreachable; + + 4 = fragmentation needed and DF set; + + 5 = source route failed. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + + +[Page 4] + + +September 1981 +RFC 792 + + + + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + If, according to the information in the gateway's routing tables, + the network specified in the internet destination field of a + datagram is unreachable, e.g., the distance to the network is + infinity, the gateway may send a destination unreachable message + to the internet source host of the datagram. In addition, in some + networks, the gateway may be able to determine if the internet + destination host is unreachable. Gateways in these networks may + send destination unreachable messages to the source host when the + destination host is unreachable. + + If, in the destination host, the IP module cannot deliver the + datagram because the indicated protocol module or process port is + not active, the destination host may send a destination + unreachable message to the source host. + + Another case is when a datagram must be fragmented to be forwarded + by a gateway yet the Don't Fragment flag is on. In this case the + gateway must discard the datagram and may return a destination + unreachable message. + + Codes 0, 1, 4, and 5 may be received from a gateway. Codes 2 and + 3 may be received from a host. + + + + + + + + + + + + + + + + + + + + + + [Page 5] + + + September 1981 +RFC 792 + + + +Time Exceeded Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address from the original datagram's data. + + ICMP Fields: + + Type + + 11 + + Code + + 0 = time to live exceeded in transit; + + 1 = fragment reassembly time exceeded. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + If the gateway processing a datagram finds the time to live field + + +[Page 6] + + +September 1981 +RFC 792 + + + + is zero it must discard the datagram. The gateway may also notify + the source host via the time exceeded message. + + If a host reassembling a fragmented datagram cannot complete the + reassembly due to missing fragments within its time limit it + discards the datagram, and it may send a time exceeded message. + + If fragment zero is not available then no time exceeded need be + sent at all. + + Code 0 may be received from a gateway. Code 1 may be received + from a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 7] + + + September 1981 +RFC 792 + + + +Parameter Problem Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address from the original datagram's data. + + ICMP Fields: + + Type + + 12 + + Code + + 0 = pointer indicates the error. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Pointer + + If code = 0, identifies the octet where an error was detected. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + + + +[Page 8] + + +September 1981 +RFC 792 + + + + Description + + If the gateway or host processing a datagram finds a problem with + the header parameters such that it cannot complete processing the + datagram it must discard the datagram. One potential source of + such a problem is with incorrect arguments in an option. The + gateway or host may also notify the source host via the parameter + problem message. This message is only sent if the error caused + the datagram to be discarded. + + The pointer identifies the octet of the original datagram's header + where the error was detected (it may be in the middle of an + option). For example, 1 indicates something is wrong with the + Type of Service, and (if there are options present) 20 indicates + something is wrong with the type code of the first option. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 9] + + + September 1981 +RFC 792 + + + +Source Quench Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address of the original datagram's data. + + ICMP Fields: + + Type + + 4 + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + A gateway may discard internet datagrams if it does not have the + buffer space needed to queue the datagrams for output to the next + network on the route to the destination network. If a gateway + + +[Page 10] + + +September 1981 +RFC 792 + + + + discards a datagram, it may send a source quench message to the + internet source host of the datagram. A destination host may also + send a source quench message if datagrams arrive too fast to be + processed. The source quench message is a request to the host to + cut back the rate at which it is sending traffic to the internet + destination. The gateway may send a source quench message for + every message that it discards. On receipt of a source quench + message, the source host should cut back the rate at which it is + sending traffic to the specified destination until it no longer + receives source quench messages from the gateway. The source host + can then gradually increase the rate at which it sends traffic to + the destination until it again receives source quench messages. + + The gateway or host may send the source quench message when it + approaches its capacity limit rather than waiting until the + capacity is exceeded. This means that the data datagram which + triggered the source quench message may be delivered. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 11] + + + September 1981 +RFC 792 + + + +Redirect Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Gateway Internet Address | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address of the original datagram's data. + + ICMP Fields: + + Type + + 5 + + Code + + 0 = Redirect datagrams for the Network. + + 1 = Redirect datagrams for the Host. + + 2 = Redirect datagrams for the Type of Service and Network. + + 3 = Redirect datagrams for the Type of Service and Host. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Gateway Internet Address + + Address of the gateway to which traffic for the network specified + in the internet destination network field of the original + datagram's data should be sent. + + + + +[Page 12] + + +September 1981 +RFC 792 + + + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + The gateway sends a redirect message to a host in the following + situation. A gateway, G1, receives an internet datagram from a + host on a network to which the gateway is attached. The gateway, + G1, checks its routing table and obtains the address of the next + gateway, G2, on the route to the datagram's internet destination + network, X. If G2 and the host identified by the internet source + address of the datagram are on the same network, a redirect + message is sent to the host. The redirect message advises the + host to send its traffic for network X directly to gateway G2 as + this is a shorter path to the destination. The gateway forwards + the original datagram's data to its internet destination. + + For datagrams with the IP source route options and the gateway + address in the destination address field, a redirect message is + not sent even if there is a better route to the ultimate + destination than the next address in the source route. + + Codes 0, 1, 2, and 3 may be received from a gateway. + + + + + + + + + + + + + + + + + + + + + + + [Page 13] + + + September 1981 +RFC 792 + + + +Echo or Echo Reply Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Data ... + +-+-+-+-+- + + IP Fields: + + Addresses + + The address of the source in an echo message will be the + destination of the echo reply message. To form an echo reply + message, the source and destination addresses are simply reversed, + the type code changed to 0, and the checksum recomputed. + + IP Fields: + + Type + + 8 for echo message; + + 0 for echo reply message. + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + If the total length is odd, the received data is padded with one + octet of zeros for computing the checksum. This checksum may be + replaced in the future. + + Identifier + + If code = 0, an identifier to aid in matching echos and replies, + may be zero. + + Sequence Number + + +[Page 14] + + +September 1981 +RFC 792 + + + + If code = 0, a sequence number to aid in matching echos and + replies, may be zero. + + Description + + The data received in the echo message must be returned in the echo + reply message. + + The identifier and sequence number may be used by the echo sender + to aid in matching the replies with the echo requests. For + example, the identifier might be used like a port in TCP or UDP to + identify a session, and the sequence number might be incremented + on each echo request sent. The echoer returns these same values + in the echo reply. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 15] + + + September 1981 +RFC 792 + + + +Timestamp or Timestamp Reply Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Originate Timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Receive Timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Transmit Timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Addresses + + The address of the source in a timestamp message will be the + destination of the timestamp reply message. To form a timestamp + reply message, the source and destination addresses are simply + reversed, the type code changed to 14, and the checksum + recomputed. + + IP Fields: + + Type + + 13 for timestamp message; + + 14 for timestamp reply message. + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Identifier + + + + +[Page 16] + + +September 1981 +RFC 792 + + + + If code = 0, an identifier to aid in matching timestamp and + replies, may be zero. + + Sequence Number + + If code = 0, a sequence number to aid in matching timestamp and + replies, may be zero. + + Description + + The data received (a timestamp) in the message is returned in the + reply together with an additional timestamp. The timestamp is 32 + bits of milliseconds since midnight UT. One use of these + timestamps is described by Mills [5]. + + The Originate Timestamp is the time the sender last touched the + message before sending it, the Receive Timestamp is the time the + echoer first touched it on receipt, and the Transmit Timestamp is + the time the echoer last touched the message on sending it. + + If the time is not available in miliseconds or cannot be provided + with respect to midnight UT then any time can be inserted in a + timestamp provided the high order bit of the timestamp is also set + to indicate this non-standard value. + + The identifier and sequence number may be used by the echo sender + to aid in matching the replies with the requests. For example, + the identifier might be used like a port in TCP or UDP to identify + a session, and the sequence number might be incremented on each + request sent. The destination returns these same values in the + reply. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + [Page 17] + + + September 1981 +RFC 792 + + + +Information Request or Information Reply Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Addresses + + The address of the source in a information request message will be + the destination of the information reply message. To form a + information reply message, the source and destination addresses + are simply reversed, the type code changed to 16, and the checksum + recomputed. + + IP Fields: + + Type + + 15 for information request message; + + 16 for information reply message. + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Identifier + + If code = 0, an identifier to aid in matching request and replies, + may be zero. + + Sequence Number + + If code = 0, a sequence number to aid in matching request and + replies, may be zero. + + +[Page 18] + + +September 1981 +RFC 792 + + + + Description + + This message may be sent with the source network in the IP header + source and destination address fields zero (which means "this" + network). The replying IP module should send the reply with the + addresses fully specified. This message is a way for a host to + find out the number of the network it is on. + + The identifier and sequence number may be used by the echo sender + to aid in matching the replies with the requests. For example, + the identifier might be used like a port in TCP or UDP to identify + a session, and the sequence number might be incremented on each + request sent. The destination returns these same values in the + reply. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 19] + + + September 1981 +RFC 792 + + + +Summary of Message Types + + 0 Echo Reply + + 3 Destination Unreachable + + 4 Source Quench + + 5 Redirect + + 8 Echo + + 11 Time Exceeded + + 12 Parameter Problem + + 13 Timestamp + + 14 Timestamp Reply + + 15 Information Request + + 16 Information Reply + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 20] + + +September 1981 +RFC 792 + + + +References + + [1] Postel, J. (ed.), "Internet Protocol - DARPA Internet Program + Protocol Specification," RFC 791, USC/Information Sciences + Institute, September 1981. + + [2] Cerf, V., "The Catenet Model for Internetworking," IEN 48, + Information Processing Techniques Office, Defense Advanced + Research Projects Agency, July 1978. + + [3] Strazisar, V., "Gateway Routing: An Implementation + Specification", IEN 30, Bolt Beranek and Newman, April 1979. + + [4] Strazisar, V., "How to Build a Gateway", IEN 109, Bolt Beranek + and Newman, August 1979. + + [5] Mills, D., "DCNET Internet Clock Service," RFC 778, COMSAT + Laboratories, April 1981. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 21] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc894.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc894.txt new file mode 100644 index 0000000..d5cd5eb --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc894.txt @@ -0,0 +1,171 @@ + + +Network Working Group Charles Hornig +Request for Comments: 894 Symbolics Cambridge Research Center + April 1984 + + A Standard for the Transmission of IP Datagrams over Ethernet Networks + + +Status of this Memo + + This RFC specifies a standard method of encapsulating Internet + Protocol (IP) [1] datagrams on an Ethernet [2]. This RFC specifies a + standard protocol for the ARPA-Internet community. + +Introduction + + This memo applies to the Ethernet (10-megabit/second, 48-bit + addresses). The procedure for transmission of IP datagrams on the + Experimental Ethernet (3-megabit/second, 8-bit addresses) is + described in [3]. + +Frame Format + + IP datagrams are transmitted in standard Ethernet frames. The type + field of the Ethernet frame must contain the value hexadecimal 0800. + The data field contains the IP header followed immediately by the IP + data. + + The minimum length of the data field of a packet sent over an + Ethernet is 46 octets. If necessary, the data field should be padded + (with octets of zero) to meet the Ethernet minimum frame size. This + padding is not part of the IP packet and is not included in the total + length field of the IP header. + + The minimum length of the data field of a packet sent over an + Ethernet is 1500 octets, thus the maximum length of an IP datagram + sent over an Ethernet is 1500 octets. Implementations are encouraged + to support full-length packets. Gateway implementations MUST be + prepared to accept full-length packets and fragment them if + necessary. If a system cannot receive full-length packets, it should + take steps to discourage others from sending them, such as using the + TCP Maximum Segment Size option [4]. + + Note: Datagrams on the Ethernet may be longer than the general + Internet default maximum packet size of 576 octets. Hosts connected + to an Ethernet should keep this in mind when sending datagrams to + hosts not on the same Ethernet. It may be appropriate to send + smaller datagrams to avoid unnecessary fragmentation at intermediate + gateways. Please see [4] for further information on this point. + + + + + +Hornig [Page 1] + + + +RFC 894 April 1984 + + +Address Mappings + + The mapping of 32-bit Internet addresses to 48-bit Ethernet addresses + can be done several ways. A static table could be used, or a dynamic + discovery procedure could be used. + + Static Table + + Each host could be provided with a table of all other hosts on the + local network with both their Ethernet and Internet addresses. + + Dynamic Discovery + + Mappings between 32-bit Internet addresses and 48-bit Ethernet + addresses could be accomplished through the Address Resolution + Protocol (ARP) [5]. Internet addresses are assigned arbitrarily + on some Internet network. Each host's implementation must know + its own Internet address and respond to Ethernet Address + Resolution packets appropriately. It should also use ARP to + translate Internet addresses to Ethernet addresses when needed. + + Broadcast Address + + The broadcast Internet address (the address on that network with a + host part of all binary ones) should be mapped to the broadcast + Ethernet address (of all binary ones, FF-FF-FF-FF-FF-FF hex). + + The use of the ARP dynamic discovery procedure is strongly + recommended. + +Trailer Formats + + Some versions of Unix 4.2bsd use a different encapsulation method in + order to get better network performance with the VAX virtual memory + architecture. Consenting systems on the same Ethernet may use this + format between themselves. + + No host is required to implement it, and no datagrams in this format + should be sent to any host unless the sender has positive knowledge + that the recipient will be able to interpret them. Details of the + trailer encapsulation may be found in [6]. + + (Note: At the present time Unix 4.2bsd will either always use + trailers or never use them (per interface), depending on a boot-time + option. This is expected to be changed in the future. Unix 4.2bsd + also uses a non-standard Internet broadcast address with a host part + of all zeroes, this may also be changed in the future.) + + + +Hornig [Page 2] + + + +RFC 894 April 1984 + + +Byte Order + + As described in Appendix B of the Internet Protocol + specification [1], the IP datagram is transmitted over the Ethernet + as a series of 8-bit bytes. + +References + + [1] Postel, J., "Internet Protocol", RFC-791, USC/Information + Sciences Institute, September 1981. + + [2] "The Ethernet - A Local Area Network", Version 1.0, Digital + Equipment Corporation, Intel Corporation, Xerox Corporation, + September 1980. + + [3] Postel, J., "A Standard for the Transmission of IP Datagrams + over Experimental Ethernet Networks", RFC-895, USC/Information + Sciences Institute, April 1984. + + [4] Postel, J., "The TCP Maximum Segment Size Option and Related + Topics", RFC-879, USC/Information Sciences Institute, November 1983. + + [5] Plummer, D., "An Ethernet Address Resolution Protocol", RFC-826, + Symbolics Cambridge Research Center, November 1982. + + [6] Leffler, S., and M. Karels, "Trailer Encapsulations", RFC-893, + University of California at Berkeley, April 1984. + + + + + + + + + + + + + + + + + + + + + + + +Hornig [Page 3] + diff --git a/roles/dotfiles/files/.emacs.d/ac-comphist.dat b/roles/dotfiles/files/.emacs.d/ac-comphist.dat new file mode 100644 index 0000000..35eafff --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/ac-comphist.dat @@ -0,0 +1 @@ +(nil) diff --git a/roles/dotfiles/files/.emacs.d/ensure.el b/roles/dotfiles/files/.emacs.d/ensure.el new file mode 100644 index 0000000..07f0b88 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/ensure.el @@ -0,0 +1,35 @@ +(defun ensure-package (package) + (unless (package-installed-p package) + (package-install package))) + +(unless (file-directory-p "/home/kyle/.emacs.d/elpa/archives/melpa") + (package-refresh-contents)) + +(let ((initial-package-list + '(auto-complete + cargo + ;; chess + cider + geiser + ;; gnugo + go ;; play the game + go-autocomplete + go-direx + go-guru + go-mode + jedi + keychain-environment + lua-mode + luarocks + magit + markdown-mode + paredit + pelican-mode + projectile + racket-mode + rust-mode + scpaste + slime + undo-tree))) + (dolist (package initial-package-list) + (ensure-package package))) diff --git a/roles/dotfiles/files/.emacs.d/ido.last b/roles/dotfiles/files/.emacs.d/ido.last new file mode 100644 index 0000000..d19df78 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/ido.last @@ -0,0 +1,29 @@ +;;; -*- coding: utf-8 -*- + +;; ----- ido-last-directory-list ----- +( + ("/home/k.isom/kodiak/" . "ktos/") + ("/home/k.isom/" . "kodiak/") +) + +;; ----- ido-work-directory-list ----- +( + "/home/k.isom/kodiak/ktos/" +) + +;; ----- ido-work-file-list ----- +( + "README.org" +) + +;; ----- ido-dir-file-cache ----- +( + ("/home/k.isom/" (25647 820 345033 248000) ".local/" ".python_history" ".Xresources" ".aws.sh" ".aws/" "Downloads/" ".docker/" "./" ".npm/" ".sudo_as_admin_successful" ".bash_history" ".cache/" ".dmrc" ".bazelrc" ".bash_logout" ".profile.bak" "tmp/" ".config/" ".java/" ".mozilla/" "../" ".bashrc" "Pictures/" "src/" "obs.img" ".amplify/" ".gitconfig" "Music/" "Public/" ".pyenv/" "git/" ".lesshst" "token.txt" ".emacs.d/" ".profile" "snap/" ".viminfo" "kodiak/" ".yarn/" ".bazel/" "Documents/" ".GlobalProtect/" "Templates/" ".pki/" ".Xauthority" "Videos/" ".gnupg/" ".xsession-errors" "token.txt~" "java_error_in_clion_.hprof" "Desktop/" ".xsession-errors.old" ".ssh/") + + ("/home/k.isom/kodiak/ktos/" (25646 63617 378004 643000) "scripts/" "kodiak.xml" "./" "devtools.xml" "build-stack-from-source.xml" "device-table" "../" "cuda-tensorrt.xml" "nvidia-drivers.xml" "README.org" "rfs.xml" "initfs/" "update-syspart.sh" "rootfs/" "makefile" "xorg.xml" "reprepro.xml" "ifs.xml" ".git/") + + ("/home/k.isom/kodiak/" (25646 63612 290013 440000) "./" "vehicle/" "ktos/" "../") +) + +;; ----- ido-unc-hosts-cache ----- +t diff --git a/roles/dotfiles/files/.emacs.d/init.el b/roles/dotfiles/files/.emacs.d/init.el new file mode 100644 index 0000000..0464564 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/init.el @@ -0,0 +1,209 @@ +;;; startup without syntax highlighting +;;; (global-font-lock-mode 0) + +;; set up package handling +(require 'package) + +(setq gnutls-algorithm-priority "NORMAL:-VERS-TLS1.3") +(add-to-list 'package-archives + '("melpa" . "https://melpa.org/packages/")) + +(package-initialize) +(require 'cl) +(let* ((home-dir (getenv "HOME")) + (ensure-lisp (concatenate 'string home-dir "/.emacs.d/ensure.el"))) + (load ensure-lisp)) + +;; reduce brain damage +(tool-bar-mode 0) +(menu-bar-mode 0) +(setq inhibit-startup-screen t) +(setq display-time-24hr-format t) +(display-time-mode) +(column-number-mode) + +;; useful when writing +(global-set-key (kbd "C-c w") 'count-words) + +;; remove whitespace to make room for more cyberspace +(add-hook 'before-save-hook 'delete-trailing-whitespace) + +;; hippie-expand is the best +(require 'hippie-exp) +(require 'auto-complete) +(global-auto-complete-mode t) +(ac-set-trigger-key "") +(global-set-key (kbd "") 'ac-expand) + +;; eshell is pretty okay +(global-set-key (kbd "C-x m") 'eshell) + +;; ido-mode makes finding files way more awesome +;; note: C-x C-f C-f will kick back to normal find-file for when ido's tab +;; completion is getting in the way. +(require 'ido) +(ido-mode 1) + +;; magit, not yours +(require 'magit) +(global-set-key (kbd "C-x g") 'magit-status) + +;; undo-tree is undo done right +(require 'undo-tree) +(global-undo-tree-mode) + +;; i like refilling paragraphs +(global-set-key (kbd "M-q") 'fill-paragraph) + +;; i install things to /usr/local +(add-to-list 'exec-path "/home/kyle/bin") +(add-to-list 'exec-path "/usr/local/bin") + +;; tell me where i'm at +(column-number-mode) + +;;; i like cua-rectangle +(cua-mode t) +(cua-selection-mode 'emacs) +(global-set-key (kbd "M-RET") 'cua-rectangle-mark-mode) + +(require 'scpaste) +(setq scpaste-http-destination "https://p.kyleisom.net" + scpaste-scp-destination "p.kyleisom.net:/var/www/sites/p/") + +;;; useful for writing +(global-set-key (kbd "C-x w") 'count-words) + +;;; used with pollen +(global-set-key (kbd "C-c C-d") + (lambda () (interactive) (insert "\u25ca"))) +(add-to-list 'auto-mode-alist '("\\.poly.pm\\'" . text-mode)) + +(require 'markdown-mode) + +;; python stuff +(add-hook 'python-mode-hook 'jedi:setup) +(setq jedi:complete-on-dot t) ; optional + +;; golang stuff +(setq gofmt-command "goimports") +(require 'go-mode) +(add-hook 'before-save-hook 'gofmt-before-save) + +(when (file-exists-p (expand-file-name "~/quicklisp/slime-helper.el")) + (load (expand-file-name "~/quicklisp/slime-helper.el")) + (ensure-package 'slime) + ;; Replace "sbcl" with the path to your implementation + (setq inferior-lisp-program "sbcl") + (slime-setup '(slime-fancy + slime-autodoc + slime-indentation)) + + (setq slime-net-coding-system 'utf-8-unix + slime-truncate-lines nil) + + (setq lisp-lambda-list-keyword-parameter-alignment t + lisp-lambda-list-keyword-alignment t)) + +(add-to-list 'auto-mode-alist '("\\.ros\\'" . lisp-mode)) + + +(add-hook 'clojure-mode-hook #'enable-paredit-mode) +(add-hook 'lisp-mode-hook #'enable-paredit-mode) +(add-hook 'lisp-interaction-mode-hook #'enable-paredit-mode) +(add-hook 'scheme-mode-hook #'enable-paredit-mode) + +;;; rust stuff +(add-hook 'rust-mode-hook #'racer-mode) +(add-hook 'racer-mode-hook #'eldoc-mode) +(add-hook 'racer-mode-hook #'company-mode) + +(require 'rust-mode) +(define-key rust-mode-map (kbd "TAB") #'company-indent-or-complete-common) +(setq company-tooltip-align-annotations t) + +;;; Project Interaction Library for Emacs +(require 'projectile) +(define-key projectile-mode-map (kbd "s-p") 'projectile-command-map) +(define-key projectile-mode-map (kbd "C-c p") 'projectile-command-map) +(setq projectile-project-search-path '("~/src/" "~/code/")) +(projectile-mode +1) + + +;;; +;;; _:_ +;;; '-.-' +;;; () __.'.__ +;;; .-:--:-. |_______| +;;; () \____/ \=====/ +;;; /\ {====} )___( +;;; (\=, //\\ )__( /_____\ +;;; __ |'-'-'| // .\ ( ) /____\ | | +;;; / \ |_____| (( \_ \ )__( | | | | +;;; \__/ |===| )) `\_) /____\ | | | | +;;; /____\ | | (/ \ | | | | | | +;;; | | | | | _.-'| | | | | | | +;;; |__| )___( )___( /____\ /____\ /_____\ +;;; (====) (=====) (=====) (======) (======) (=======) +;;; }===={ }====={ }====={ }======{ }======{ }======={ +;;; (______)(_______)(_______)(________)(________)(_________) +(setq chess-ai-depth 2) + + +(custom-set-variables + ;; custom-set-variables was added by Custom. + ;; If you edit it by hand, you could mess it up, so be careful. + ;; Your init file should contain only one such instance. + ;; If there is more than one, they won't work right. + '(ansi-color-names-vector + ["#2d3743" "#ff4242" "#74af68" "#dbdb95" "#34cae2" "#008b8b" "#00ede1" "#e1e1e0"]) + '(chess-default-display (quote chess-plain)) + '(custom-safe-themes + (quote + ("bf390ecb203806cbe351b966a88fc3036f3ff68cd2547db6ee3676e87327b311" "e1943fd6568d49ec819ee3711c266a8a120e452ba08569045dd8f50cc5ec5dd3" "4561c67b0764aa6343d710bb0a6f3a96319252b2169d371802cc94adfea5cfc9" "5f95ce79b4a8870b3486b04de22ca2e0785b287da8779f512cdd847f42266989" default))) + '(custom-theme-directory "~/.emacs.d/themes") + '(global-font-lock-mode t) + '(package-selected-packages + (quote + (yaml-mode projectile company-racer ac-racer racer erlang go-rename blackboard-bold-mode blacken jedi minimal-theme monochrome-theme monotropic-theme nimbus-theme noctilux-theme nord-theme nordless-theme northcode-theme paganini-theme paper-theme melancholy-theme go-imports guile-scheme slime chess pelican-mode gnugo go go-autocomplete go-direx go-guru go-mode markdown-mode irfc scpaste cargo undo-tree magit auto-complete)))) +(custom-set-faces + ;; custom-set-faces was added by Custom. + ;; If you edit it by hand, you could mess it up, so be careful. + ;; Your init file should contain only one such instance. + ;; If there is more than one, they won't work right. + ) + +(setq +DEFAULT-THEME+ "weyland-yutani") +(defun toggle-fontlock () + (if (font-lock-mode) + (progn + (message "disabling font-lock-mode") + (global-font-lock-mode 0)) + (progn + (message "enabling font-lock-mode") + (load-theme +DEFAULT-THEME+) + (global-font-lock-mode t)))) + +(put 'upcase-region 'disabled nil) +(put 'downcase-region 'disabled nil) + +(keychain-refresh-environment) +(require 'ox-publish) +(setq org-publish-project-alist + '(("notes" + :base-directory "~/notes/" + :publishing-directory "/ssh:phobos.wntrmute.net:/var/www/sites/tmp/" + :publishing-function org-html-publish-to-html + :headline-levels 4 ; Just the default for this project. + :auto-preamble t) + ("notes-static" + :base-directory "~/notes/" + :base-extension "css\\|js\\|png\\|jpg\\|gif\\|pdf\\|mp3\\|ogg\\|swf" + :publishing-directory "/ssh:phobos.wntrmute.net:/var/www/sites/tmp/" + :recursive t + :publishing-function org-publish-attachment))) + +;;; Load fira-code support. +(when (window-system) + (set-frame-font "Ubuntu Mono 13")) +;; (load "~/.emacs.d/fira-code.el") diff --git a/roles/dotfiles/files/.emacs.d/themes/eink-dark-theme.el b/roles/dotfiles/files/.emacs.d/themes/eink-dark-theme.el new file mode 100644 index 0000000..9fc3326 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/themes/eink-dark-theme.el @@ -0,0 +1,256 @@ +;;; eink-dark-theme.el --- Emacs theme with a dark background. + +;; Copyright (C) 2015, K. Isom + +;; Author: K. Isom +;; https://git.kyleisom.net/style/eink-emacs +;; Version: 0.2 +;; Package-Requires: ((emacs "24")) +;; Created with emacs-theme-generator, https://github.com/mswift42/theme-creator. + + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;; This file is not part of Emacs. + +;;; Commentary: + +;;; Code: + + (deftheme eink-dark) + (let ((class '((class color) (min-colors 89))) + (fg1 "#b3b3b3") + (fg2 "#a3a3a3") + (fg3 "#949494") + (fg4 "#858585") + (bg1 "#1d1f21") + (bg2 "#2c2e30") + (bg3 "#3b3d3f") + (bg4 "#4c4d4f") + (key2 "#bbbbbb") + (key3 "#9d9d9d") + (builtin "#b3b3b3") + (keyword "#b3b3b3") + (const "#b3b3b3") + (comment "#696969") + (func "#b3b3b3") + (str "#b3b3b3") + (type "#b3b3b3") + (var "#b3b3b3") + (warning "#cd2626")) + (custom-theme-set-faces + 'eink-dark + `(default ((,class (:background ,bg1 :foreground ,fg1)))) + `(font-lock-builtin-face ((,class (:foreground ,builtin)))) + `(font-lock-comment-face ((,class (:foreground ,comment)))) + `(font-lock-negation-char-face ((,class (:foreground ,const)))) + `(font-lock-reference-face ((,class (:foreground ,const)))) + `(font-lock-constant-face ((,class (:foreground ,const)))) + `(font-lock-doc-face ((,class (:foreground ,comment)))) + `(font-lock-function-name-face ((,class (:foreground ,func :bold t)))) + `(font-lock-keyword-face ((,class (:bold ,class :foreground ,keyword)))) + `(font-lock-string-face ((,class (:foreground ,str)))) + `(font-lock-type-face ((,class (:foreground ,type )))) + `(font-lock-variable-name-face ((,class (:foreground ,var)))) + `(font-lock-warning-face ((,class (:foreground ,warning :background ,bg2)))) + `(region ((,class (:background ,fg1 :foreground ,bg1)))) + `(highlight ((,class (:foreground ,fg3 :background ,bg3)))) + `(hl-line ((,class (:background ,bg2)))) + `(fringe ((,class (:background ,bg2 :foreground ,fg4)))) + `(cursor ((,class (:background ,bg3)))) + `(show-paren-match-face ((,class (:background ,warning)))) + `(isearch ((,class (:bold t :foreground ,warning :background ,bg3)))) + `(mode-line ((,class (:box (:line-width 1 :color nil) :bold t :foreground ,fg4 :background ,bg2)))) + `(mode-line-inactive ((,class (:box (:line-width 1 :color nil :style pressed-button) :foreground ,key3 :background ,bg1 :weight normal)))) + `(mode-line-buffer-id ((,class (:bold t :foreground ,func :background nil)))) + `(mode-line-highlight ((,class (:foreground ,keyword :box nil :weight bold)))) + `(mode-line-emphasis ((,class (:foreground ,fg1)))) + `(vertical-border ((,class (:foreground ,fg3)))) + `(minibuffer-prompt ((,class (:bold t :foreground ,keyword)))) + `(default-italic ((,class (:italic t)))) + `(link ((,class (:foreground ,const :underline t)))) + `(org-code ((,class (:foreground ,fg2)))) + `(org-hide ((,class (:foreground ,fg4)))) + `(org-level-1 ((,class (:bold t :foreground ,fg2 :height 1.1)))) + `(org-level-2 ((,class (:bold nil :foreground ,fg3)))) + `(org-level-3 ((,class (:bold t :foreground ,fg4)))) + `(org-level-4 ((,class (:bold nil :foreground ,bg4)))) + `(org-date ((,class (:underline t :foreground ,var) ))) + `(org-footnote ((,class (:underline t :foreground ,fg4)))) + `(org-link ((,class (:underline t :foreground ,type )))) + `(org-special-keyword ((,class (:foreground ,func)))) + `(org-block ((,class (:foreground ,fg3)))) + `(org-quote ((,class (:inherit org-block :slant italic)))) + `(org-verse ((,class (:inherit org-block :slant italic)))) + `(org-todo ((,class (:box (:line-width 1 :color ,fg3) :foreground ,keyword :bold t)))) + `(org-done ((,class (:box (:line-width 1 :color ,bg3) :bold t :foreground ,bg4)))) + `(org-warning ((,class (:underline t :foreground ,warning)))) + `(org-agenda-structure ((,class (:weight bold :foreground ,fg3 :box (:color ,fg4) :background ,bg3)))) + `(org-agenda-date ((,class (:foreground ,var :height 1.1 )))) + `(org-agenda-date-weekend ((,class (:weight normal :foreground ,fg4)))) + `(org-agenda-date-today ((,class (:weight bold :foreground ,keyword :height 1.4)))) + `(org-agenda-done ((,class (:foreground ,bg4)))) + `(org-scheduled ((,class (:foreground ,type)))) + `(org-scheduled-today ((,class (:foreground ,func :weight bold :height 1.2)))) + `(org-ellipsis ((,class (:foreground ,builtin)))) + `(org-verbatim ((,class (:foreground ,fg4)))) + `(org-document-info-keyword ((,class (:foreground ,func)))) + `(font-latex-bold-face ((,class (:foreground ,type)))) + `(font-latex-italic-face ((,class (:foreground ,key3 :italic t)))) + `(font-latex-string-face ((,class (:foreground ,str)))) + `(font-latex-match-reference-keywords ((,class (:foreground ,const)))) + `(font-latex-match-variable-keywords ((,class (:foreground ,var)))) + `(ido-only-match ((,class (:foreground ,warning)))) + `(org-sexp-date ((,class (:foreground ,fg4)))) + `(ido-first-match ((,class (:foreground ,keyword :bold t)))) + `(gnus-header-content ((,class (:foreground ,keyword)))) + `(gnus-header-from ((,class (:foreground ,var)))) + `(gnus-header-name ((,class (:foreground ,type)))) + `(gnus-header-subject ((,class (:foreground ,func :bold t)))) + `(mu4e-view-url-number-face ((,class (:foreground ,type)))) + `(mu4e-cited-1-face ((,class (:foreground ,fg2)))) + `(mu4e-cited-7-face ((,class (:foreground ,fg3)))) + `(mu4e-header-marks-face ((,class (:foreground ,type)))) + `(ffap ((,class (:foreground ,fg4)))) + `(js2-private-function-call ((,class (:foreground ,const)))) + `(js2-jsdoc-html-tag-delimiter ((,class (:foreground ,str)))) + `(js2-jsdoc-html-tag-name ((,class (:foreground ,key2)))) + `(js2-external-variable ((,class (:foreground ,type )))) + `(js2-function-param ((,class (:foreground ,const)))) + `(js2-jsdoc-value ((,class (:foreground ,str)))) + `(js2-private-member ((,class (:foreground ,fg3)))) + `(js3-warning-face ((,class (:underline ,keyword)))) + `(js3-error-face ((,class (:underline ,warning)))) + `(js3-external-variable-face ((,class (:foreground ,var)))) + `(js3-function-param-face ((,class (:foreground ,key3)))) + `(js3-jsdoc-tag-face ((,class (:foreground ,keyword)))) + `(js3-instance-member-face ((,class (:foreground ,const)))) + `(warning ((,class (:foreground ,warning)))) + `(ac-completion-face ((,class (:underline t :foreground ,keyword)))) + `(info-quoted-name ((,class (:foreground ,builtin)))) + `(info-string ((,class (:foreground ,str)))) + `(icompletep-determined ((,class :foreground ,builtin))) + `(undo-tree-visualizer-current-face ((,class :foreground ,builtin))) + `(undo-tree-visualizer-default-face ((,class :foreground ,fg2))) + `(undo-tree-visualizer-unmodified-face ((,class :foreground ,var))) + `(undo-tree-visualizer-register-face ((,class :foreground ,type))) + `(slime-repl-inputed-output-face ((,class (:foreground ,type)))) + `(trailing-whitespace ((,class :foreground nil :background ,warning))) + `(rainbow-delimiters-depth-1-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-2-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-3-face ((,class :foreground ,var))) + `(rainbow-delimiters-depth-4-face ((,class :foreground ,const))) + `(rainbow-delimiters-depth-5-face ((,class :foreground ,keyword))) + `(rainbow-delimiters-depth-6-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-7-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-8-face ((,class :foreground ,var))) + `(magit-item-highlight ((,class :background ,bg3))) + `(magit-section-heading ((,class (:foreground ,keyword :weight bold)))) + `(magit-hunk-heading ((,class (:background ,bg3)))) + `(magit-section-highlight ((,class (:background ,bg2)))) + `(magit-hunk-heading-highlight ((,class (:background ,bg3)))) + `(magit-diff-context-highlight ((,class (:background ,bg3 :foreground ,fg3)))) + `(magit-diffstat-added ((,class (:foreground ,type)))) + `(magit-diffstat-removed ((,class (:foreground ,var)))) + `(magit-process-ok ((,class (:foreground ,func :weight bold)))) + `(magit-process-ng ((,class (:foreground ,warning :weight bold)))) + `(magit-branch ((,class (:foreground ,const :weight bold)))) + `(magit-log-author ((,class (:foreground ,fg3)))) + `(magit-hash ((,class (:foreground ,fg2)))) + `(magit-diff-file-header ((,class (:foreground ,fg2 :background ,bg3)))) + `(lazy-highlight ((,class (:foreground ,fg2 :background ,bg3)))) + `(term ((,class (:foreground ,fg1 :background ,bg1)))) + `(term-color-black ((,class (:foreground ,bg3 :background ,bg3)))) + `(term-color-blue ((,class (:foreground ,func :background ,func)))) + `(term-color-red ((,class (:foreground ,keyword :background ,bg3)))) + `(term-color-green ((,class (:foreground ,type :background ,bg3)))) + `(term-color-yellow ((,class (:foreground ,var :background ,var)))) + `(term-color-magenta ((,class (:foreground ,builtin :background ,builtin)))) + `(term-color-cyan ((,class (:foreground ,str :background ,str)))) + `(term-color-white ((,class (:foreground ,fg2 :background ,fg2)))) + `(rainbow-delimiters-unmatched-face ((,class :foreground ,warning))) + `(helm-header ((,class (:foreground ,fg2 :background ,bg1 :underline nil :box nil)))) + `(helm-source-header ((,class (:foreground ,keyword :background ,bg1 :underline nil :weight bold)))) + `(helm-selection ((,class (:background ,bg2 :underline nil)))) + `(helm-selection-line ((,class (:background ,bg2)))) + `(helm-visible-mark ((,class (:foreground ,bg1 :background ,bg3)))) + `(helm-candidate-number ((,class (:foreground ,bg1 :background ,fg1)))) + `(helm-separator ((,class (:foreground ,type :background ,bg1)))) + `(helm-time-zone-current ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-time-zone-home ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-not-saved ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-process ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-buffer-saved-out ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-buffer-size ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-ff-directory ((,class (:foreground ,func :background ,bg1 :weight bold)))) + `(helm-ff-file ((,class (:foreground ,fg1 :background ,bg1 :weight normal)))) + `(helm-ff-executable ((,class (:foreground ,key2 :background ,bg1 :weight normal)))) + `(helm-ff-invalid-symlink ((,class (:foreground ,key3 :background ,bg1 :weight bold)))) + `(helm-ff-symlink ((,class (:foreground ,keyword :background ,bg1 :weight bold)))) + `(helm-ff-prefix ((,class (:foreground ,bg1 :background ,keyword :weight normal)))) + `(helm-grep-cmd-line ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-file ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-finish ((,class (:foreground ,fg2 :background ,bg1)))) + `(helm-grep-lineno ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-match ((,class (:foreground nil :background nil :inherit helm-match)))) + `(helm-grep-running ((,class (:foreground ,func :background ,bg1)))) + `(helm-moccur-buffer ((,class (:foreground ,func :background ,bg1)))) + `(helm-source-go-package-godoc-description ((,class (:foreground ,str)))) + `(helm-bookmark-w3m ((,class (:foreground ,type)))) + `(company-echo-common ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-preview ((,class (:background ,bg1 :foreground ,key2)))) + `(company-preview-common ((,class (:foreground ,bg2 :foreground ,fg3)))) + `(company-preview-search ((,class (:foreground ,type :background ,bg1)))) + `(company-scrollbar-bg ((,class (:background ,bg3)))) + `(company-scrollbar-fg ((,class (:foreground ,keyword)))) + `(company-tooltip ((,class (:foreground ,fg2 :background ,bg1 :bold t)))) + `(company-tooltop-annotation ((,class (:foreground ,const)))) + `(company-tooltip-common ((,class ( :foreground ,fg3)))) + `(company-tooltip-common-selection ((,class (:foreground ,str)))) + `(company-tooltip-mouse ((,class (:inherit highlight)))) + `(company-tooltip-selection ((,class (:background ,bg3 :foreground ,fg3)))) + `(company-template-field ((,class (:inherit region)))) + `(web-mode-builtin-face ((,class (:inherit ,font-lock-builtin-face)))) + `(web-mode-comment-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-constant-face ((,class (:inherit ,font-lock-constant-face)))) + `(web-mode-keyword-face ((,class (:foreground ,keyword)))) + `(web-mode-doctype-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-function-name-face ((,class (:inherit ,font-lock-function-name-face)))) + `(web-mode-string-face ((,class (:foreground ,str)))) + `(web-mode-type-face ((,class (:inherit ,font-lock-type-face)))) + `(web-mode-html-attr-name-face ((,class (:foreground ,func)))) + `(web-mode-html-attr-value-face ((,class (:foreground ,keyword)))) + `(web-mode-warning-face ((,class (:inherit ,font-lock-warning-face)))) + `(web-mode-html-tag-face ((,class (:foreground ,builtin)))) + `(jde-java-font-lock-package-face ((t (:foreground ,var)))) + `(jde-java-font-lock-public-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-private-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-constant-face ((t (:foreground ,const)))) + `(jde-java-font-lock-modifier-face ((t (:foreground ,key3)))) + `(jde-jave-font-lock-protected-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-number-face ((t (:foreground ,var)))))) + +;;;###autoload +(when load-file-name + (add-to-list 'custom-theme-load-path + (file-name-as-directory (file-name-directory load-file-name)))) + +(provide-theme 'eink-dark) + +;; Local Variables: +;; no-byte-compile: t +;; End: + +;;; eink-dark-theme.el ends here + diff --git a/roles/dotfiles/files/.emacs.d/themes/eink-light-theme.el b/roles/dotfiles/files/.emacs.d/themes/eink-light-theme.el new file mode 100644 index 0000000..a70e395 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/themes/eink-light-theme.el @@ -0,0 +1,256 @@ +;;; eink-light-theme.el --- Emacs theme with a light background. + +;; Copyright (C) 2015, K. Isom + +;; Author: K. Isom +;; https://git.kyleisom.net/style/eink-emacs +;; Version: 0.2 +;; Package-Requires: ((emacs "24")) +;; Created with emacs-theme-generator, https://github.com/mswift42/theme-creator. + + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;; This file is not part of Emacs. + +;;; Commentary: + +;;; Code: + + (deftheme eink-light) + (let ((class '((class color) (min-colors 89))) + (fg1 "#1c1c1c") + (fg2 "#2b2b2b") + (fg3 "#3a3a3a") + (fg4 "#4b4b4b") + (bg1 "#fffafa") + (bg2 "#e8e3e3") + (bg3 "#d1cdcd") + (bg4 "#bbb8b8") + (key2 "#313131") + (key3 "#1a1a1a") + (builtin "#1c1c1c") + (keyword "#1c1c1c") + (const "#1c1c1c") + (comment "#7f7f7f") + (func "#1c1c1c") + (str "#1c1c1c") + (type "#1c1c1c") + (var "#1c1c1c") + (warning "#cd2626")) + (custom-theme-set-faces + 'eink-light + `(default ((,class (:background ,bg1 :foreground ,fg1)))) + `(font-lock-builtin-face ((,class (:foreground ,builtin)))) + `(font-lock-comment-face ((,class (:foreground ,comment)))) + `(font-lock-negation-char-face ((,class (:foreground ,const)))) + `(font-lock-reference-face ((,class (:foreground ,const)))) + `(font-lock-constant-face ((,class (:foreground ,const)))) + `(font-lock-doc-face ((,class (:foreground ,comment)))) + `(font-lock-function-name-face ((,class (:foreground ,func :bold t)))) + `(font-lock-keyword-face ((,class (:bold ,class :foreground ,keyword)))) + `(font-lock-string-face ((,class (:foreground ,str)))) + `(font-lock-type-face ((,class (:foreground ,type )))) + `(font-lock-variable-name-face ((,class (:foreground ,var)))) + `(font-lock-warning-face ((,class (:foreground ,warning :background ,bg2)))) + `(region ((,class (:background ,fg1 :foreground ,bg1)))) + `(highlight ((,class (:foreground ,fg3 :background ,bg3)))) + `(hl-line ((,class (:background ,bg2)))) + `(fringe ((,class (:background ,bg2 :foreground ,fg4)))) + `(cursor ((,class (:background ,bg3)))) + `(show-paren-match-face ((,class (:background ,warning)))) + `(isearch ((,class (:bold t :foreground ,warning :background ,bg3)))) + `(mode-line ((,class (:box (:line-width 1 :color nil) :bold t :foreground ,fg4 :background ,bg2)))) + `(mode-line-inactive ((,class (:box (:line-width 1 :color nil :style pressed-button) :foreground ,key3 :background ,bg1 :weight normal)))) + `(mode-line-buffer-id ((,class (:bold t :foreground ,func :background nil)))) + `(mode-line-highlight ((,class (:foreground ,keyword :box nil :weight bold)))) + `(mode-line-emphasis ((,class (:foreground ,fg1)))) + `(vertical-border ((,class (:foreground ,fg3)))) + `(minibuffer-prompt ((,class (:bold t :foreground ,keyword)))) + `(default-italic ((,class (:italic t)))) + `(link ((,class (:foreground ,const :underline t)))) + `(org-code ((,class (:foreground ,fg2)))) + `(org-hide ((,class (:foreground ,fg4)))) + `(org-level-1 ((,class (:bold t :foreground ,fg2 :height 1.1)))) + `(org-level-2 ((,class (:bold nil :foreground ,fg3)))) + `(org-level-3 ((,class (:bold t :foreground ,fg4)))) + `(org-level-4 ((,class (:bold nil :foreground ,bg4)))) + `(org-date ((,class (:underline t :foreground ,var) ))) + `(org-footnote ((,class (:underline t :foreground ,fg4)))) + `(org-link ((,class (:underline t :foreground ,type )))) + `(org-special-keyword ((,class (:foreground ,func)))) + `(org-block ((,class (:foreground ,fg3)))) + `(org-quote ((,class (:inherit org-block :slant italic)))) + `(org-verse ((,class (:inherit org-block :slant italic)))) + `(org-todo ((,class (:box (:line-width 1 :color ,fg3) :foreground ,keyword :bold t)))) + `(org-done ((,class (:box (:line-width 1 :color ,bg3) :bold t :foreground ,bg4)))) + `(org-warning ((,class (:underline t :foreground ,warning)))) + `(org-agenda-structure ((,class (:weight bold :foreground ,fg3 :box (:color ,fg4) :background ,bg3)))) + `(org-agenda-date ((,class (:foreground ,var :height 1.1 )))) + `(org-agenda-date-weekend ((,class (:weight normal :foreground ,fg4)))) + `(org-agenda-date-today ((,class (:weight bold :foreground ,keyword :height 1.4)))) + `(org-agenda-done ((,class (:foreground ,bg4)))) + `(org-scheduled ((,class (:foreground ,type)))) + `(org-scheduled-today ((,class (:foreground ,func :weight bold :height 1.2)))) + `(org-ellipsis ((,class (:foreground ,builtin)))) + `(org-verbatim ((,class (:foreground ,fg4)))) + `(org-document-info-keyword ((,class (:foreground ,func)))) + `(font-latex-bold-face ((,class (:foreground ,type)))) + `(font-latex-italic-face ((,class (:foreground ,key3 :italic t)))) + `(font-latex-string-face ((,class (:foreground ,str)))) + `(font-latex-match-reference-keywords ((,class (:foreground ,const)))) + `(font-latex-match-variable-keywords ((,class (:foreground ,var)))) + `(ido-only-match ((,class (:foreground ,warning)))) + `(org-sexp-date ((,class (:foreground ,fg4)))) + `(ido-first-match ((,class (:foreground ,keyword :bold t)))) + `(gnus-header-content ((,class (:foreground ,keyword)))) + `(gnus-header-from ((,class (:foreground ,var)))) + `(gnus-header-name ((,class (:foreground ,type)))) + `(gnus-header-subject ((,class (:foreground ,func :bold t)))) + `(mu4e-view-url-number-face ((,class (:foreground ,type)))) + `(mu4e-cited-1-face ((,class (:foreground ,fg2)))) + `(mu4e-cited-7-face ((,class (:foreground ,fg3)))) + `(mu4e-header-marks-face ((,class (:foreground ,type)))) + `(ffap ((,class (:foreground ,fg4)))) + `(js2-private-function-call ((,class (:foreground ,const)))) + `(js2-jsdoc-html-tag-delimiter ((,class (:foreground ,str)))) + `(js2-jsdoc-html-tag-name ((,class (:foreground ,key2)))) + `(js2-external-variable ((,class (:foreground ,type )))) + `(js2-function-param ((,class (:foreground ,const)))) + `(js2-jsdoc-value ((,class (:foreground ,str)))) + `(js2-private-member ((,class (:foreground ,fg3)))) + `(js3-warning-face ((,class (:underline ,keyword)))) + `(js3-error-face ((,class (:underline ,warning)))) + `(js3-external-variable-face ((,class (:foreground ,var)))) + `(js3-function-param-face ((,class (:foreground ,key3)))) + `(js3-jsdoc-tag-face ((,class (:foreground ,keyword)))) + `(js3-instance-member-face ((,class (:foreground ,const)))) + `(warning ((,class (:foreground ,warning)))) + `(ac-completion-face ((,class (:underline t :foreground ,keyword)))) + `(info-quoted-name ((,class (:foreground ,builtin)))) + `(info-string ((,class (:foreground ,str)))) + `(icompletep-determined ((,class :foreground ,builtin))) + `(undo-tree-visualizer-current-face ((,class :foreground ,builtin))) + `(undo-tree-visualizer-default-face ((,class :foreground ,fg2))) + `(undo-tree-visualizer-unmodified-face ((,class :foreground ,var))) + `(undo-tree-visualizer-register-face ((,class :foreground ,type))) + `(slime-repl-inputed-output-face ((,class (:foreground ,type)))) + `(trailing-whitespace ((,class :foreground nil :background ,warning))) + `(rainbow-delimiters-depth-1-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-2-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-3-face ((,class :foreground ,var))) + `(rainbow-delimiters-depth-4-face ((,class :foreground ,const))) + `(rainbow-delimiters-depth-5-face ((,class :foreground ,keyword))) + `(rainbow-delimiters-depth-6-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-7-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-8-face ((,class :foreground ,var))) + `(magit-item-highlight ((,class :background ,bg3))) + `(magit-section-heading ((,class (:foreground ,keyword :weight bold)))) + `(magit-hunk-heading ((,class (:background ,bg3)))) + `(magit-section-highlight ((,class (:background ,bg2)))) + `(magit-hunk-heading-highlight ((,class (:background ,bg3)))) + `(magit-diff-context-highlight ((,class (:background ,bg3 :foreground ,fg3)))) + `(magit-diffstat-added ((,class (:foreground ,type)))) + `(magit-diffstat-removed ((,class (:foreground ,var)))) + `(magit-process-ok ((,class (:foreground ,func :weight bold)))) + `(magit-process-ng ((,class (:foreground ,warning :weight bold)))) + `(magit-branch ((,class (:foreground ,const :weight bold)))) + `(magit-log-author ((,class (:foreground ,fg3)))) + `(magit-hash ((,class (:foreground ,fg2)))) + `(magit-diff-file-header ((,class (:foreground ,fg2 :background ,bg3)))) + `(lazy-highlight ((,class (:foreground ,fg2 :background ,bg3)))) + `(term ((,class (:foreground ,fg1 :background ,bg1)))) + `(term-color-black ((,class (:foreground ,bg3 :background ,bg3)))) + `(term-color-blue ((,class (:foreground ,func :background ,func)))) + `(term-color-red ((,class (:foreground ,keyword :background ,bg3)))) + `(term-color-green ((,class (:foreground ,type :background ,bg3)))) + `(term-color-yellow ((,class (:foreground ,var :background ,var)))) + `(term-color-magenta ((,class (:foreground ,builtin :background ,builtin)))) + `(term-color-cyan ((,class (:foreground ,str :background ,str)))) + `(term-color-white ((,class (:foreground ,fg2 :background ,fg2)))) + `(rainbow-delimiters-unmatched-face ((,class :foreground ,warning))) + `(helm-header ((,class (:foreground ,fg2 :background ,bg1 :underline nil :box nil)))) + `(helm-source-header ((,class (:foreground ,keyword :background ,bg1 :underline nil :weight bold)))) + `(helm-selection ((,class (:background ,bg2 :underline nil)))) + `(helm-selection-line ((,class (:background ,bg2)))) + `(helm-visible-mark ((,class (:foreground ,bg1 :background ,bg3)))) + `(helm-candidate-number ((,class (:foreground ,bg1 :background ,fg1)))) + `(helm-separator ((,class (:foreground ,type :background ,bg1)))) + `(helm-time-zone-current ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-time-zone-home ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-not-saved ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-process ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-buffer-saved-out ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-buffer-size ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-ff-directory ((,class (:foreground ,func :background ,bg1 :weight bold)))) + `(helm-ff-file ((,class (:foreground ,fg1 :background ,bg1 :weight normal)))) + `(helm-ff-executable ((,class (:foreground ,key2 :background ,bg1 :weight normal)))) + `(helm-ff-invalid-symlink ((,class (:foreground ,key3 :background ,bg1 :weight bold)))) + `(helm-ff-symlink ((,class (:foreground ,keyword :background ,bg1 :weight bold)))) + `(helm-ff-prefix ((,class (:foreground ,bg1 :background ,keyword :weight normal)))) + `(helm-grep-cmd-line ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-file ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-finish ((,class (:foreground ,fg2 :background ,bg1)))) + `(helm-grep-lineno ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-match ((,class (:foreground nil :background nil :inherit helm-match)))) + `(helm-grep-running ((,class (:foreground ,func :background ,bg1)))) + `(helm-moccur-buffer ((,class (:foreground ,func :background ,bg1)))) + `(helm-source-go-package-godoc-description ((,class (:foreground ,str)))) + `(helm-bookmark-w3m ((,class (:foreground ,type)))) + `(company-echo-common ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-preview ((,class (:background ,bg1 :foreground ,key2)))) + `(company-preview-common ((,class (:foreground ,bg2 :foreground ,fg3)))) + `(company-preview-search ((,class (:foreground ,type :background ,bg1)))) + `(company-scrollbar-bg ((,class (:background ,bg3)))) + `(company-scrollbar-fg ((,class (:foreground ,keyword)))) + `(company-tooltip ((,class (:foreground ,fg2 :background ,bg1 :bold t)))) + `(company-tooltop-annotation ((,class (:foreground ,const)))) + `(company-tooltip-common ((,class ( :foreground ,fg3)))) + `(company-tooltip-common-selection ((,class (:foreground ,str)))) + `(company-tooltip-mouse ((,class (:inherit highlight)))) + `(company-tooltip-selection ((,class (:background ,bg3 :foreground ,fg3)))) + `(company-template-field ((,class (:inherit region)))) + `(web-mode-builtin-face ((,class (:inherit ,font-lock-builtin-face)))) + `(web-mode-comment-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-constant-face ((,class (:inherit ,font-lock-constant-face)))) + `(web-mode-keyword-face ((,class (:foreground ,keyword)))) + `(web-mode-doctype-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-function-name-face ((,class (:inherit ,font-lock-function-name-face)))) + `(web-mode-string-face ((,class (:foreground ,str)))) + `(web-mode-type-face ((,class (:inherit ,font-lock-type-face)))) + `(web-mode-html-attr-name-face ((,class (:foreground ,func)))) + `(web-mode-html-attr-value-face ((,class (:foreground ,keyword)))) + `(web-mode-warning-face ((,class (:inherit ,font-lock-warning-face)))) + `(web-mode-html-tag-face ((,class (:foreground ,builtin)))) + `(jde-java-font-lock-package-face ((t (:foreground ,var)))) + `(jde-java-font-lock-public-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-private-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-constant-face ((t (:foreground ,const)))) + `(jde-java-font-lock-modifier-face ((t (:foreground ,key3)))) + `(jde-jave-font-lock-protected-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-number-face ((t (:foreground ,var)))))) + +;;;###autoload +(when load-file-name + (add-to-list 'custom-theme-load-path + (file-name-as-directory (file-name-directory load-file-name)))) + +(provide-theme 'eink-light) + +;; Local Variables: +;; no-byte-compile: t +;; End: + +;;; eink-light-theme.el ends here + diff --git a/roles/dotfiles/files/.emacs.d/themes/weyland-yutani-theme.el b/roles/dotfiles/files/.emacs.d/themes/weyland-yutani-theme.el new file mode 100644 index 0000000..2b4b3af --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/themes/weyland-yutani-theme.el @@ -0,0 +1,271 @@ + + +;;; weyland-yutani-theme.el --- Emacs theme with a dark background. + +;; Copyright (C) 2014 , Joe Staursky + +;; Author: Joe Staursky +;; +;; Version: 0.1 +;; Package-Requires: ((emacs "24")) +;; Created with emacs-theme-generator, https://github.com/mswift42/theme-creator. + + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;; This file is not part of Emacs. + +;;; Commentary: + +;;; Code: + + (deftheme weyland-yutani) + (let ((class '((class color) (min-colors 89))) + (fg1 "#a0a8b8") + (fg2 "#9299a8") + (fg3 "#848b98") + (fg4 "#777d88") + (bg1 "#141e20") + (bg2 "#232d2f") + (bg3 "#333c3e") + (bg4 "#444d4e") + (key2 "#a0e88b") + (key3 "#82c96e") + (builtin "#a3646f") + (keyword "#93e57c") + (const "#d1d68b") + (comment "#565766") + (func "#beb7f7") + (str "#627e95") + (type "#5992c2") + (var "#9e79b3") + (warning "#fcbec9")) + (custom-theme-set-faces + 'weyland-yutani + `(default ((,class (:background ,bg1 :foreground ,fg1)))) + `(font-lock-builtin-face ((,class (:foreground ,builtin)))) + `(company-tooltip-annotation-selection ((,class (:foreground ,func)))) + + `(company-tooltip-annotation ((,class (:foreground ,const)))) + + `(font-lock-comment-face ((,class (:foreground ,comment)))) + `(font-lock-negation-char-face ((,class (:foreground ,const)))) + `(font-lock-reference-face ((,class (:foreground ,const)))) + `(font-lock-constant-face ((,class (:foreground ,const)))) + `(font-lock-doc-face ((,class (:foreground ,comment)))) + `(font-lock-function-name-face ((,class (:foreground ,func :bold t)))) + `(font-lock-keyword-face ((,class (:bold ,class :foreground ,keyword)))) + `(font-lock-string-face ((,class (:foreground ,str)))) + `(font-lock-type-face ((,class (:foreground ,type )))) + `(font-lock-variable-name-face ((,class (:foreground ,var)))) + `(font-lock-warning-face ((,class (:foreground ,warning :background ,bg2)))) + `(region ((,class (:background ,fg1 :foreground ,bg1)))) + `(highlight ((,class (:foreground ,fg3 :background ,bg3)))) + `(hl-line ((,class (:background ,bg2)))) + `(fringe ((,class (:background ,bg2 :foreground ,fg4)))) + `(cursor ((,class (:background ,bg3)))) + `(show-paren-match-face ((,class (:background ,warning)))) + `(isearch ((,class (:bold t :foreground ,warning :background ,bg3)))) + `(mode-line ((,class (:box (:line-width 1 :color nil) :bold t :foreground ,fg4 :background ,bg2)))) + `(mode-line-inactive ((,class (:box (:line-width 1 :color nil :style pressed-button) :foreground ,key3 :background ,bg1 :weight normal)))) + `(mode-line-buffer-id ((,class (:bold t :foreground ,func :background nil)))) + `(mode-line-highlight ((,class (:foreground ,keyword :box nil :weight bold)))) + `(mode-line-emphasis ((,class (:foreground ,fg1)))) + `(vertical-border ((,class (:foreground ,fg3)))) + `(minibuffer-prompt ((,class (:bold t :foreground ,keyword)))) + `(default-italic ((,class (:italic t)))) + `(link ((,class (:foreground ,const :underline t)))) + `(org-code ((,class (:foreground ,fg2)))) + `(org-hide ((,class (:foreground ,fg4)))) + `(org-level-1 ((,class (:bold t :foreground ,fg2 :height 1.1)))) + `(org-level-2 ((,class (:bold nil :foreground ,fg3)))) + `(org-level-3 ((,class (:bold t :foreground ,fg4)))) + `(org-level-4 ((,class (:bold nil :foreground ,bg4)))) + `(org-date ((,class (:underline t :foreground ,var) ))) + `(org-footnote ((,class (:underline t :foreground ,fg4)))) + `(org-link ((,class (:underline t :foreground ,type )))) + `(org-special-keyword ((,class (:foreground ,func)))) + `(org-block ((,class (:foreground ,fg3)))) + `(org-quote ((,class (:inherit org-block :slant italic)))) + `(org-verse ((,class (:inherit org-block :slant italic)))) + `(org-todo ((,class (:box (:line-width 1 :color ,fg3) :foreground ,keyword :bold t)))) + `(org-done ((,class (:box (:line-width 1 :color ,bg3) :bold t :foreground ,bg4)))) + `(org-warning ((,class (:underline t :foreground ,warning)))) + `(org-agenda-structure ((,class (:weight bold :foreground ,fg3 :box (:color ,fg4) :background ,bg3)))) + `(org-agenda-date ((,class (:foreground ,var :height 1.1 )))) + `(org-agenda-date-weekend ((,class (:weight normal :foreground ,fg4)))) + `(org-agenda-date-today ((,class (:weight bold :foreground ,keyword :height 1.4)))) + `(org-agenda-done ((,class (:foreground ,bg4)))) + `(org-scheduled ((,class (:foreground ,type)))) + `(org-scheduled-today ((,class (:foreground ,func :weight bold :height 1.2)))) + `(org-ellipsis ((,class (:foreground ,builtin)))) + `(org-verbatim ((,class (:foreground ,fg4)))) + `(org-document-info-keyword ((,class (:foreground ,func)))) + `(font-latex-bold-face ((,class (:foreground ,type)))) + `(font-latex-italic-face ((,class (:foreground ,key3 :italic t)))) + `(font-latex-string-face ((,class (:foreground ,str)))) + `(font-latex-match-reference-keywords ((,class (:foreground ,const)))) + `(font-latex-match-variable-keywords ((,class (:foreground ,var)))) + `(ido-only-match ((,class (:foreground ,warning)))) + `(org-sexp-date ((,class (:foreground ,fg4)))) + `(ido-first-match ((,class (:foreground ,keyword :bold t)))) + `(gnus-header-content ((,class (:foreground ,keyword)))) + `(gnus-header-from ((,class (:foreground ,var)))) + `(gnus-header-name ((,class (:foreground ,type)))) + `(gnus-header-subject ((,class (:foreground ,func :bold t)))) + `(mu4e-view-url-number-face ((,class (:foreground ,type)))) + `(mu4e-cited-1-face ((,class (:foreground ,fg2)))) + `(mu4e-cited-7-face ((,class (:foreground ,fg3)))) + `(mu4e-header-marks-face ((,class (:foreground ,type)))) + `(ffap ((,class (:foreground ,fg4)))) + `(js2-private-function-call ((,class (:foreground ,const)))) + `(js2-jsdoc-html-tag-delimiter ((,class (:foreground ,str)))) + `(js2-jsdoc-html-tag-name ((,class (:foreground ,key2)))) + `(js2-external-variable ((,class (:foreground ,type )))) + `(js2-function-param ((,class (:foreground ,const)))) + `(js2-jsdoc-value ((,class (:foreground ,str)))) + `(js2-private-member ((,class (:foreground ,fg3)))) + `(js3-warning-face ((,class (:underline ,keyword)))) + `(js3-error-face ((,class (:underline ,warning)))) + `(js3-external-variable-face ((,class (:foreground ,var)))) + `(js3-function-param-face ((,class (:foreground ,key3)))) + `(js3-jsdoc-tag-face ((,class (:foreground ,keyword)))) + `(js3-instance-member-face ((,class (:foreground ,const)))) + `(warning ((,class (:foreground ,warning)))) + `(ac-completion-face ((,class (:underline t :foreground ,keyword)))) + `(info-quoted-name ((,class (:foreground ,builtin)))) + `(info-string ((,class (:foreground ,str)))) + `(icompletep-determined ((,class :foreground ,builtin))) + `(undo-tree-visualizer-current-face ((,class :foreground ,builtin))) + `(undo-tree-visualizer-default-face ((,class :foreground ,fg2))) + `(undo-tree-visualizer-unmodified-face ((,class :foreground ,var))) + `(undo-tree-visualizer-register-face ((,class :foreground ,type))) + `(slime-repl-inputed-output-face ((,class (:foreground ,type)))) + `(trailing-whitespace ((,class :foreground nil :background ,warning))) + `(rainbow-delimiters-depth-1-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-2-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-3-face ((,class :foreground ,var))) + `(rainbow-delimiters-depth-4-face ((,class :foreground ,const))) + `(rainbow-delimiters-depth-5-face ((,class :foreground ,keyword))) + `(rainbow-delimiters-depth-6-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-7-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-8-face ((,class :foreground ,var))) + `(magit-item-highlight ((,class :background ,bg3))) + `(magit-section-heading ((,class (:foreground ,keyword :weight bold)))) + `(magit-hunk-heading ((,class (:background ,bg3)))) + `(magit-section-highlight ((,class (:background ,bg2)))) + `(magit-hunk-heading-highlight ((,class (:background ,bg3)))) + `(magit-diff-context-highlight ((,class (:background ,bg3 :foreground ,fg3)))) + `(magit-diffstat-added ((,class (:foreground ,type)))) + `(magit-diffstat-removed ((,class (:foreground ,var)))) + `(magit-process-ok ((,class (:foreground ,func :weight bold)))) + `(magit-process-ng ((,class (:foreground ,warning :weight bold)))) + `(magit-branch ((,class (:foreground ,const :weight bold)))) + `(magit-log-author ((,class (:foreground ,fg3)))) + `(magit-hash ((,class (:foreground ,fg2)))) + `(magit-diff-file-header ((,class (:foreground ,fg2 :background ,bg3)))) + `(lazy-highlight ((,class (:foreground ,fg2 :background ,bg3)))) + `(term ((,class (:foreground ,fg1 :background ,bg1)))) + `(term-color-black ((,class (:foreground ,bg3 :background ,bg3)))) + `(term-color-blue ((,class (:foreground ,func :background ,func)))) + `(term-color-red ((,class (:foreground ,keyword :background ,bg3)))) + `(term-color-green ((,class (:foreground ,type :background ,bg3)))) + `(term-color-yellow ((,class (:foreground ,var :background ,var)))) + `(term-color-magenta ((,class (:foreground ,builtin :background ,builtin)))) + `(term-color-cyan ((,class (:foreground ,str :background ,str)))) + `(term-color-white ((,class (:foreground ,fg2 :background ,fg2)))) + `(rainbow-delimiters-unmatched-face ((,class :foreground ,warning))) + `(helm-header ((,class (:foreground ,fg2 :background ,bg1 :underline nil :box nil)))) + `(helm-source-header ((,class (:foreground ,keyword :background ,bg1 :underline nil :weight bold)))) + `(helm-selection ((,class (:background ,bg2 :underline nil)))) + `(helm-selection-line ((,class (:background ,bg2)))) + `(helm-visible-mark ((,class (:foreground ,bg1 :background ,bg3)))) + `(helm-candidate-number ((,class (:foreground ,bg1 :background ,fg1)))) + `(helm-separator ((,class (:foreground ,type :background ,bg1)))) + `(helm-time-zone-current ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-time-zone-home ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-not-saved ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-process ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-buffer-saved-out ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-buffer-size ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-ff-directory ((,class (:foreground ,func :background ,bg1 :weight bold)))) + `(helm-ff-file ((,class (:foreground ,fg1 :background ,bg1 :weight normal)))) + `(helm-ff-executable ((,class (:foreground ,key2 :background ,bg1 :weight normal)))) + `(helm-ff-invalid-symlink ((,class (:foreground ,key3 :background ,bg1 :weight bold)))) + `(helm-ff-symlink ((,class (:foreground ,keyword :background ,bg1 :weight bold)))) + `(helm-ff-prefix ((,class (:foreground ,bg1 :background ,keyword :weight normal)))) + `(helm-grep-cmd-line ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-file ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-finish ((,class (:foreground ,fg2 :background ,bg1)))) + `(helm-grep-lineno ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-match ((,class (:foreground nil :background nil :inherit helm-match)))) + `(helm-grep-running ((,class (:foreground ,func :background ,bg1)))) + `(helm-moccur-buffer ((,class (:foreground ,func :background ,bg1)))) + `(helm-source-go-package-godoc-description ((,class (:foreground ,str)))) + `(helm-bookmark-w3m ((,class (:foreground ,type)))) + + + + `(company-echo ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-preview ((,class (:background ,bg1 :foreground ,key2)))) + `(company-tooltip ((,class (:foreground ,fg2 :background ,bg1 :bold t)))) + `(company-echo-common ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-scrollbar-bg ((,class (:background ,bg3)))) + `(company-scrollbar-fg ((,class (:foreground ,keyword)))) + `(company-tooltip-mouse ((,class (:inherit highlight)))) + `(company-preview-common ((,class (:foreground ,bg2 :foreground ,fg3)))) + `(company-template-field ((,class (:inherit region)))) + `(company-tooltop-search ((,class (:inherit region)))) + `(company-tooltip-common ((,class ( :foreground ,fg3)))) + `(company-preview-search ((,class (:foreground ,type :background ,bg1)))) + `(company-tooltip-selection ((,class (:background ,bg3 :foreground ,fg3)))) + `(company-tooltop-annotation ((,class (:foreground ,const)))) + `(company-tooltip-common-selection ((,class (:foreground ,str)))) + `(company-tooltop-search-selection ((,class (:foreground ,const)))) + `(company-tooltop-annotation-selection ((,class (:foreground ,const)))) + `(web-mode-builtin-face ((,class (:inherit ,font-lock-builtin-face)))) + `(web-mode-comment-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-constant-face ((,class (:inherit ,font-lock-constant-face)))) + `(web-mode-keyword-face ((,class (:foreground ,keyword)))) + `(web-mode-doctype-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-function-name-face ((,class (:inherit ,font-lock-function-name-face)))) + `(web-mode-string-face ((,class (:foreground ,str)))) + `(web-mode-type-face ((,class (:inherit ,font-lock-type-face)))) + `(web-mode-html-attr-name-face ((,class (:foreground ,func)))) + `(web-mode-html-attr-value-face ((,class (:foreground ,keyword)))) + `(web-mode-warning-face ((,class (:inherit ,font-lock-warning-face)))) + `(web-mode-html-tag-face ((,class (:foreground ,builtin)))) + `(jde-java-font-lock-package-face ((t (:foreground ,var)))) + `(jde-java-font-lock-public-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-private-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-constant-face ((t (:foreground ,const)))) + `(jde-java-font-lock-modifier-face ((t (:foreground ,key3)))) + `(jde-jave-font-lock-protected-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-number-face ((t (:foreground ,var)))) + + + )) + +;;;###autoload +(when load-file-name + (add-to-list 'custom-theme-load-path + (file-name-as-directory (file-name-directory load-file-name)))) + +(provide-theme 'weyland-yutani) + +;; Local Variables: +;; no-byte-compile: t +;; End: + +;;; weyland-yutani-theme.el ends here diff --git a/roles/dotfiles/files/.emacs.d/transient/history.el b/roles/dotfiles/files/.emacs.d/transient/history.el new file mode 100644 index 0000000..90b5a84 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/transient/history.el @@ -0,0 +1 @@ +nil \ No newline at end of file diff --git a/roles/dotfiles/files/.gitconfig b/roles/dotfiles/files/.gitconfig new file mode 100644 index 0000000..2b41646 --- /dev/null +++ b/roles/dotfiles/files/.gitconfig @@ -0,0 +1,19 @@ +[user] + name = Kyle Isom + email = kyle@imap.cc + +[color] + ui = false + +[core] + excludesfile = /home/kyle/.gitignore_global + editor = mg + +[http] + cookiefile = /home/kyle/.gitcookies + +[init] + defaultBranch = master + +[push] + default = simple diff --git a/roles/dotfiles/files/.gitignore_global b/roles/dotfiles/files/.gitignore_global new file mode 100644 index 0000000..93d91d9 --- /dev/null +++ b/roles/dotfiles/files/.gitignore_global @@ -0,0 +1,5 @@ +*~ +*# +.#* +.*.sw? +tags diff --git a/roles/dotfiles/files/.hgrc b/roles/dotfiles/files/.hgrc new file mode 100644 index 0000000..876b14d --- /dev/null +++ b/roles/dotfiles/files/.hgrc @@ -0,0 +1,34 @@ +# example user config (see 'hg help config' for more info) +[ui] +# name and email, e.g. +# username = Jane Doe +username = Kyle Isom +editor = /usr/bin/mg + +# We recommend enabling tweakdefaults to get slight improvements to +# the UI over time. Make sure to set HGPLAIN in the environment when +# writing scripts! +tweakdefaults = True + +# uncomment to disable color in command output +# (see 'hg help color' for details) +color = never + +# uncomment to disable command output pagination +# (see 'hg help pager' for details) +paginate = never + +[extensions] +# uncomment the lines below to enable some popular extensions +# (see 'hg help extensions' for more info) +# +histedit = +rebase = +shelve = +uncommit = +hgext.mq= +hgext.patchbomb= +purge= + +[diff] +git = True diff --git a/roles/dotfiles/files/.mg b/roles/dotfiles/files/.mg new file mode 100644 index 0000000..7943a79 --- /dev/null +++ b/roles/dotfiles/files/.mg @@ -0,0 +1,3 @@ +column-number-mode +backup-to-home-directory +bksp-mode diff --git a/roles/dotfiles/files/.profile_custom b/roles/dotfiles/files/.profile_custom new file mode 100644 index 0000000..c69fa53 --- /dev/null +++ b/roles/dotfiles/files/.profile_custom @@ -0,0 +1,33 @@ +# ~/.profile: executed by the command interpreter for login shells. +# This file is not read by bash(1), if ~/.bash_profile or ~/.bash_login +# exists. +# see /usr/share/doc/bash/examples/startup-files for examples. +# the files are located in the bash-doc package. + +# the default umask is set in /etc/profile; for setting the umask +# for ssh logins, install and configure the libpam-umask package. +#umask 022 + +# if running bash +if [ -n "$BASH_VERSION" ]; then + # include .bashrc if it exists + if [ -f "$HOME/.bashrc" ]; then + . "$HOME/.bashrc" + fi +fi + +# set PATH so it includes user's private bin if it exists +if [ -d "$HOME/bin" ] ; then + PATH="$HOME/bin:$PATH" +fi + +# set PATH so it includes user's private bin if it exists +if [ -d "$HOME/.local/bin" ] ; then + PATH="$HOME/.local/bin:$PATH" +fi + +[ -f ~/.cargo/env ] && source $HOME/.cargo/env + +alias co='git checkout' +alias st='git status' +alias prb='git pull --rebase' diff --git a/roles/dotfiles/files/.vim/autoload/plug.vim b/roles/dotfiles/files/.vim/autoload/plug.vim new file mode 100644 index 0000000..4e05630 --- /dev/null +++ b/roles/dotfiles/files/.vim/autoload/plug.vim @@ -0,0 +1,2526 @@ +" vim-plug: Vim plugin manager +" ============================ +" +" Download plug.vim and put it in ~/.vim/autoload +" +" curl -fLo ~/.vim/autoload/plug.vim --create-dirs \ +" https://raw.githubusercontent.com/junegunn/vim-plug/master/plug.vim +" +" Edit your .vimrc +" +" call plug#begin('~/.vim/plugged') +" +" " Make sure you use single quotes +" +" " Shorthand notation; fetches https://github.com/junegunn/vim-easy-align +" Plug 'junegunn/vim-easy-align' +" +" " Any valid git URL is allowed +" Plug 'https://github.com/junegunn/vim-github-dashboard.git' +" +" " Multiple Plug commands can be written in a single line using | separators +" Plug 'SirVer/ultisnips' | Plug 'honza/vim-snippets' +" +" " On-demand loading +" Plug 'scrooloose/nerdtree', { 'on': 'NERDTreeToggle' } +" Plug 'tpope/vim-fireplace', { 'for': 'clojure' } +" +" " Using a non-master branch +" Plug 'rdnetto/YCM-Generator', { 'branch': 'stable' } +" +" " Using a tagged release; wildcard allowed (requires git 1.9.2 or above) +" Plug 'fatih/vim-go', { 'tag': '*' } +" +" " Plugin options +" Plug 'nsf/gocode', { 'tag': 'v.20150303', 'rtp': 'vim' } +" +" " Plugin outside ~/.vim/plugged with post-update hook +" Plug 'junegunn/fzf', { 'dir': '~/.fzf', 'do': './install --all' } +" +" " Unmanaged plugin (manually installed and updated) +" Plug '~/my-prototype-plugin' +" +" " Initialize plugin system +" call plug#end() +" +" Then reload .vimrc and :PlugInstall to install plugins. +" +" Plug options: +" +"| Option | Description | +"| ----------------------- | ------------------------------------------------ | +"| `branch`/`tag`/`commit` | Branch/tag/commit of the repository to use | +"| `rtp` | Subdirectory that contains Vim plugin | +"| `dir` | Custom directory for the plugin | +"| `as` | Use different name for the plugin | +"| `do` | Post-update hook (string or funcref) | +"| `on` | On-demand loading: Commands or ``-mappings | +"| `for` | On-demand loading: File types | +"| `frozen` | Do not update unless explicitly specified | +" +" More information: https://github.com/junegunn/vim-plug +" +" +" Copyright (c) 2017 Junegunn Choi +" +" MIT License +" +" Permission is hereby granted, free of charge, to any person obtaining +" a copy of this software and associated documentation files (the +" "Software"), to deal in the Software without restriction, including +" without limitation the rights to use, copy, modify, merge, publish, +" distribute, sublicense, and/or sell copies of the Software, and to +" permit persons to whom the Software is furnished to do so, subject to +" the following conditions: +" +" The above copyright notice and this permission notice shall be +" included in all copies or substantial portions of the Software. +" +" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +" MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +" NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +" LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +if exists('g:loaded_plug') + finish +endif +let g:loaded_plug = 1 + +let s:cpo_save = &cpo +set cpo&vim + +let s:plug_src = 'https://github.com/junegunn/vim-plug.git' +let s:plug_tab = get(s:, 'plug_tab', -1) +let s:plug_buf = get(s:, 'plug_buf', -1) +let s:mac_gui = has('gui_macvim') && has('gui_running') +let s:is_win = has('win32') +let s:nvim = has('nvim-0.2') || (has('nvim') && exists('*jobwait') && !s:is_win) +let s:vim8 = has('patch-8.0.0039') && exists('*job_start') +let s:me = resolve(expand(':p')) +let s:base_spec = { 'branch': 'master', 'frozen': 0 } +let s:TYPE = { +\ 'string': type(''), +\ 'list': type([]), +\ 'dict': type({}), +\ 'funcref': type(function('call')) +\ } +let s:loaded = get(s:, 'loaded', {}) +let s:triggers = get(s:, 'triggers', {}) + +function! plug#begin(...) + if a:0 > 0 + let s:plug_home_org = a:1 + let home = s:path(fnamemodify(expand(a:1), ':p')) + elseif exists('g:plug_home') + let home = s:path(g:plug_home) + elseif !empty(&rtp) + let home = s:path(split(&rtp, ',')[0]) . '/plugged' + else + return s:err('Unable to determine plug home. Try calling plug#begin() with a path argument.') + endif + if fnamemodify(home, ':t') ==# 'plugin' && fnamemodify(home, ':h') ==# s:first_rtp + return s:err('Invalid plug home. '.home.' is a standard Vim runtime path and is not allowed.') + endif + + let g:plug_home = home + let g:plugs = {} + let g:plugs_order = [] + let s:triggers = {} + + call s:define_commands() + return 1 +endfunction + +function! s:define_commands() + command! -nargs=+ -bar Plug call plug#() + if !executable('git') + return s:err('`git` executable not found. Most commands will not be available. To suppress this message, prepend `silent!` to `call plug#begin(...)`.') + endif + command! -nargs=* -bar -bang -complete=customlist,s:names PlugInstall call s:install(0, []) + command! -nargs=* -bar -bang -complete=customlist,s:names PlugUpdate call s:update(0, []) + command! -nargs=0 -bar -bang PlugClean call s:clean(0) + command! -nargs=0 -bar PlugUpgrade if s:upgrade() | execute 'source' s:esc(s:me) | endif + command! -nargs=0 -bar PlugStatus call s:status() + command! -nargs=0 -bar PlugDiff call s:diff() + command! -nargs=? -bar -bang -complete=file PlugSnapshot call s:snapshot(0, ) +endfunction + +function! s:to_a(v) + return type(a:v) == s:TYPE.list ? a:v : [a:v] +endfunction + +function! s:to_s(v) + return type(a:v) == s:TYPE.string ? a:v : join(a:v, "\n") . "\n" +endfunction + +function! s:glob(from, pattern) + return s:lines(globpath(a:from, a:pattern)) +endfunction + +function! s:source(from, ...) + let found = 0 + for pattern in a:000 + for vim in s:glob(a:from, pattern) + execute 'source' s:esc(vim) + let found = 1 + endfor + endfor + return found +endfunction + +function! s:assoc(dict, key, val) + let a:dict[a:key] = add(get(a:dict, a:key, []), a:val) +endfunction + +function! s:ask(message, ...) + call inputsave() + echohl WarningMsg + let answer = input(a:message.(a:0 ? ' (y/N/a) ' : ' (y/N) ')) + echohl None + call inputrestore() + echo "\r" + return (a:0 && answer =~? '^a') ? 2 : (answer =~? '^y') ? 1 : 0 +endfunction + +function! s:ask_no_interrupt(...) + try + return call('s:ask', a:000) + catch + return 0 + endtry +endfunction + +function! s:lazy(plug, opt) + return has_key(a:plug, a:opt) && + \ (empty(s:to_a(a:plug[a:opt])) || + \ !isdirectory(a:plug.dir) || + \ len(s:glob(s:rtp(a:plug), 'plugin')) || + \ len(s:glob(s:rtp(a:plug), 'after/plugin'))) +endfunction + +function! plug#end() + if !exists('g:plugs') + return s:err('Call plug#begin() first') + endif + + if exists('#PlugLOD') + augroup PlugLOD + autocmd! + augroup END + augroup! PlugLOD + endif + let lod = { 'ft': {}, 'map': {}, 'cmd': {} } + + if exists('g:did_load_filetypes') + filetype off + endif + for name in g:plugs_order + if !has_key(g:plugs, name) + continue + endif + let plug = g:plugs[name] + if get(s:loaded, name, 0) || !s:lazy(plug, 'on') && !s:lazy(plug, 'for') + let s:loaded[name] = 1 + continue + endif + + if has_key(plug, 'on') + let s:triggers[name] = { 'map': [], 'cmd': [] } + for cmd in s:to_a(plug.on) + if cmd =~? '^.\+' + if empty(mapcheck(cmd)) && empty(mapcheck(cmd, 'i')) + call s:assoc(lod.map, cmd, name) + endif + call add(s:triggers[name].map, cmd) + elseif cmd =~# '^[A-Z]' + let cmd = substitute(cmd, '!*$', '', '') + if exists(':'.cmd) != 2 + call s:assoc(lod.cmd, cmd, name) + endif + call add(s:triggers[name].cmd, cmd) + else + call s:err('Invalid `on` option: '.cmd. + \ '. Should start with an uppercase letter or ``.') + endif + endfor + endif + + if has_key(plug, 'for') + let types = s:to_a(plug.for) + if !empty(types) + augroup filetypedetect + call s:source(s:rtp(plug), 'ftdetect/**/*.vim', 'after/ftdetect/**/*.vim') + augroup END + endif + for type in types + call s:assoc(lod.ft, type, name) + endfor + endif + endfor + + for [cmd, names] in items(lod.cmd) + execute printf( + \ 'command! -nargs=* -range -bang -complete=file %s call s:lod_cmd(%s, "", , , , %s)', + \ cmd, string(cmd), string(names)) + endfor + + for [map, names] in items(lod.map) + for [mode, map_prefix, key_prefix] in + \ [['i', '', ''], ['n', '', ''], ['v', '', 'gv'], ['o', '', '']] + execute printf( + \ '%snoremap %s %s:call lod_map(%s, %s, %s, "%s")', + \ mode, map, map_prefix, string(map), string(names), mode != 'i', key_prefix) + endfor + endfor + + for [ft, names] in items(lod.ft) + augroup PlugLOD + execute printf('autocmd FileType %s call lod_ft(%s, %s)', + \ ft, string(ft), string(names)) + augroup END + endfor + + call s:reorg_rtp() + filetype plugin indent on + if has('vim_starting') + if has('syntax') && !exists('g:syntax_on') + syntax enable + end + else + call s:reload_plugins() + endif +endfunction + +function! s:loaded_names() + return filter(copy(g:plugs_order), 'get(s:loaded, v:val, 0)') +endfunction + +function! s:load_plugin(spec) + call s:source(s:rtp(a:spec), 'plugin/**/*.vim', 'after/plugin/**/*.vim') +endfunction + +function! s:reload_plugins() + for name in s:loaded_names() + call s:load_plugin(g:plugs[name]) + endfor +endfunction + +function! s:trim(str) + return substitute(a:str, '[\/]\+$', '', '') +endfunction + +function! s:version_requirement(val, min) + for idx in range(0, len(a:min) - 1) + let v = get(a:val, idx, 0) + if v < a:min[idx] | return 0 + elseif v > a:min[idx] | return 1 + endif + endfor + return 1 +endfunction + +function! s:git_version_requirement(...) + if !exists('s:git_version') + let s:git_version = map(split(split(s:system('git --version'))[2], '\.'), 'str2nr(v:val)') + endif + return s:version_requirement(s:git_version, a:000) +endfunction + +function! s:progress_opt(base) + return a:base && !s:is_win && + \ s:git_version_requirement(1, 7, 1) ? '--progress' : '' +endfunction + +if s:is_win + function! s:rtp(spec) + return s:path(a:spec.dir . get(a:spec, 'rtp', '')) + endfunction + + function! s:path(path) + return s:trim(substitute(a:path, '/', '\', 'g')) + endfunction + + function! s:dirpath(path) + return s:path(a:path) . '\' + endfunction + + function! s:is_local_plug(repo) + return a:repo =~? '^[a-z]:\|^[%~]' + endfunction +else + function! s:rtp(spec) + return s:dirpath(a:spec.dir . get(a:spec, 'rtp', '')) + endfunction + + function! s:path(path) + return s:trim(a:path) + endfunction + + function! s:dirpath(path) + return substitute(a:path, '[/\\]*$', '/', '') + endfunction + + function! s:is_local_plug(repo) + return a:repo[0] =~ '[/$~]' + endfunction +endif + +function! s:err(msg) + echohl ErrorMsg + echom '[vim-plug] '.a:msg + echohl None +endfunction + +function! s:warn(cmd, msg) + echohl WarningMsg + execute a:cmd 'a:msg' + echohl None +endfunction + +function! s:esc(path) + return escape(a:path, ' ') +endfunction + +function! s:escrtp(path) + return escape(a:path, ' ,') +endfunction + +function! s:remove_rtp() + for name in s:loaded_names() + let rtp = s:rtp(g:plugs[name]) + execute 'set rtp-='.s:escrtp(rtp) + let after = globpath(rtp, 'after') + if isdirectory(after) + execute 'set rtp-='.s:escrtp(after) + endif + endfor +endfunction + +function! s:reorg_rtp() + if !empty(s:first_rtp) + execute 'set rtp-='.s:first_rtp + execute 'set rtp-='.s:last_rtp + endif + + " &rtp is modified from outside + if exists('s:prtp') && s:prtp !=# &rtp + call s:remove_rtp() + unlet! s:middle + endif + + let s:middle = get(s:, 'middle', &rtp) + let rtps = map(s:loaded_names(), 's:rtp(g:plugs[v:val])') + let afters = filter(map(copy(rtps), 'globpath(v:val, "after")'), '!empty(v:val)') + let rtp = join(map(rtps, 'escape(v:val, ",")'), ',') + \ . ','.s:middle.',' + \ . join(map(afters, 'escape(v:val, ",")'), ',') + let &rtp = substitute(substitute(rtp, ',,*', ',', 'g'), '^,\|,$', '', 'g') + let s:prtp = &rtp + + if !empty(s:first_rtp) + execute 'set rtp^='.s:first_rtp + execute 'set rtp+='.s:last_rtp + endif +endfunction + +function! s:doautocmd(...) + if exists('#'.join(a:000, '#')) + execute 'doautocmd' ((v:version > 703 || has('patch442')) ? '' : '') join(a:000) + endif +endfunction + +function! s:dobufread(names) + for name in a:names + let path = s:rtp(g:plugs[name]).'/**' + for dir in ['ftdetect', 'ftplugin'] + if len(finddir(dir, path)) + if exists('#BufRead') + doautocmd BufRead + endif + return + endif + endfor + endfor +endfunction + +function! plug#load(...) + if a:0 == 0 + return s:err('Argument missing: plugin name(s) required') + endif + if !exists('g:plugs') + return s:err('plug#begin was not called') + endif + let names = a:0 == 1 && type(a:1) == s:TYPE.list ? a:1 : a:000 + let unknowns = filter(copy(names), '!has_key(g:plugs, v:val)') + if !empty(unknowns) + let s = len(unknowns) > 1 ? 's' : '' + return s:err(printf('Unknown plugin%s: %s', s, join(unknowns, ', '))) + end + let unloaded = filter(copy(names), '!get(s:loaded, v:val, 0)') + if !empty(unloaded) + for name in unloaded + call s:lod([name], ['ftdetect', 'after/ftdetect', 'plugin', 'after/plugin']) + endfor + call s:dobufread(unloaded) + return 1 + end + return 0 +endfunction + +function! s:remove_triggers(name) + if !has_key(s:triggers, a:name) + return + endif + for cmd in s:triggers[a:name].cmd + execute 'silent! delc' cmd + endfor + for map in s:triggers[a:name].map + execute 'silent! unmap' map + execute 'silent! iunmap' map + endfor + call remove(s:triggers, a:name) +endfunction + +function! s:lod(names, types, ...) + for name in a:names + call s:remove_triggers(name) + let s:loaded[name] = 1 + endfor + call s:reorg_rtp() + + for name in a:names + let rtp = s:rtp(g:plugs[name]) + for dir in a:types + call s:source(rtp, dir.'/**/*.vim') + endfor + if a:0 + if !s:source(rtp, a:1) && !empty(s:glob(rtp, a:2)) + execute 'runtime' a:1 + endif + call s:source(rtp, a:2) + endif + call s:doautocmd('User', name) + endfor +endfunction + +function! s:lod_ft(pat, names) + let syn = 'syntax/'.a:pat.'.vim' + call s:lod(a:names, ['plugin', 'after/plugin'], syn, 'after/'.syn) + execute 'autocmd! PlugLOD FileType' a:pat + call s:doautocmd('filetypeplugin', 'FileType') + call s:doautocmd('filetypeindent', 'FileType') +endfunction + +function! s:lod_cmd(cmd, bang, l1, l2, args, names) + call s:lod(a:names, ['ftdetect', 'after/ftdetect', 'plugin', 'after/plugin']) + call s:dobufread(a:names) + execute printf('%s%s%s %s', (a:l1 == a:l2 ? '' : (a:l1.','.a:l2)), a:cmd, a:bang, a:args) +endfunction + +function! s:lod_map(map, names, with_prefix, prefix) + call s:lod(a:names, ['ftdetect', 'after/ftdetect', 'plugin', 'after/plugin']) + call s:dobufread(a:names) + let extra = '' + while 1 + let c = getchar(0) + if c == 0 + break + endif + let extra .= nr2char(c) + endwhile + + if a:with_prefix + let prefix = v:count ? v:count : '' + let prefix .= '"'.v:register.a:prefix + if mode(1) == 'no' + if v:operator == 'c' + let prefix = "\" . prefix + endif + let prefix .= v:operator + endif + call feedkeys(prefix, 'n') + endif + call feedkeys(substitute(a:map, '^', "\", '') . extra) +endfunction + +function! plug#(repo, ...) + if a:0 > 1 + return s:err('Invalid number of arguments (1..2)') + endif + + try + let repo = s:trim(a:repo) + let opts = a:0 == 1 ? s:parse_options(a:1) : s:base_spec + let name = get(opts, 'as', fnamemodify(repo, ':t:s?\.git$??')) + let spec = extend(s:infer_properties(name, repo), opts) + if !has_key(g:plugs, name) + call add(g:plugs_order, name) + endif + let g:plugs[name] = spec + let s:loaded[name] = get(s:loaded, name, 0) + catch + return s:err(v:exception) + endtry +endfunction + +function! s:parse_options(arg) + let opts = copy(s:base_spec) + let type = type(a:arg) + if type == s:TYPE.string + let opts.tag = a:arg + elseif type == s:TYPE.dict + call extend(opts, a:arg) + if has_key(opts, 'dir') + let opts.dir = s:dirpath(expand(opts.dir)) + endif + else + throw 'Invalid argument type (expected: string or dictionary)' + endif + return opts +endfunction + +function! s:infer_properties(name, repo) + let repo = a:repo + if s:is_local_plug(repo) + return { 'dir': s:dirpath(expand(repo)) } + else + if repo =~ ':' + let uri = repo + else + if repo !~ '/' + throw printf('Invalid argument: %s (implicit `vim-scripts'' expansion is deprecated)', repo) + endif + let fmt = get(g:, 'plug_url_format', 'https://git::@github.com/%s.git') + let uri = printf(fmt, repo) + endif + return { 'dir': s:dirpath(g:plug_home.'/'.a:name), 'uri': uri } + endif +endfunction + +function! s:install(force, names) + call s:update_impl(0, a:force, a:names) +endfunction + +function! s:update(force, names) + call s:update_impl(1, a:force, a:names) +endfunction + +function! plug#helptags() + if !exists('g:plugs') + return s:err('plug#begin was not called') + endif + for spec in values(g:plugs) + let docd = join([s:rtp(spec), 'doc'], '/') + if isdirectory(docd) + silent! execute 'helptags' s:esc(docd) + endif + endfor + return 1 +endfunction + +function! s:syntax() + syntax clear + syntax region plug1 start=/\%1l/ end=/\%2l/ contains=plugNumber + syntax region plug2 start=/\%2l/ end=/\%3l/ contains=plugBracket,plugX + syn match plugNumber /[0-9]\+[0-9.]*/ contained + syn match plugBracket /[[\]]/ contained + syn match plugX /x/ contained + syn match plugDash /^-/ + syn match plugPlus /^+/ + syn match plugStar /^*/ + syn match plugMessage /\(^- \)\@<=.*/ + syn match plugName /\(^- \)\@<=[^ ]*:/ + syn match plugSha /\%(: \)\@<=[0-9a-f]\{4,}$/ + syn match plugTag /(tag: [^)]\+)/ + syn match plugInstall /\(^+ \)\@<=[^:]*/ + syn match plugUpdate /\(^* \)\@<=[^:]*/ + syn match plugCommit /^ \X*[0-9a-f]\{7,9} .*/ contains=plugRelDate,plugEdge,plugTag + syn match plugEdge /^ \X\+$/ + syn match plugEdge /^ \X*/ contained nextgroup=plugSha + syn match plugSha /[0-9a-f]\{7,9}/ contained + syn match plugRelDate /([^)]*)$/ contained + syn match plugNotLoaded /(not loaded)$/ + syn match plugError /^x.*/ + syn region plugDeleted start=/^\~ .*/ end=/^\ze\S/ + syn match plugH2 /^.*:\n-\+$/ + syn keyword Function PlugInstall PlugStatus PlugUpdate PlugClean + hi def link plug1 Title + hi def link plug2 Repeat + hi def link plugH2 Type + hi def link plugX Exception + hi def link plugBracket Structure + hi def link plugNumber Number + + hi def link plugDash Special + hi def link plugPlus Constant + hi def link plugStar Boolean + + hi def link plugMessage Function + hi def link plugName Label + hi def link plugInstall Function + hi def link plugUpdate Type + + hi def link plugError Error + hi def link plugDeleted Ignore + hi def link plugRelDate Comment + hi def link plugEdge PreProc + hi def link plugSha Identifier + hi def link plugTag Constant + + hi def link plugNotLoaded Comment +endfunction + +function! s:lpad(str, len) + return a:str . repeat(' ', a:len - len(a:str)) +endfunction + +function! s:lines(msg) + return split(a:msg, "[\r\n]") +endfunction + +function! s:lastline(msg) + return get(s:lines(a:msg), -1, '') +endfunction + +function! s:new_window() + execute get(g:, 'plug_window', 'vertical topleft new') +endfunction + +function! s:plug_window_exists() + let buflist = tabpagebuflist(s:plug_tab) + return !empty(buflist) && index(buflist, s:plug_buf) >= 0 +endfunction + +function! s:switch_in() + if !s:plug_window_exists() + return 0 + endif + + if winbufnr(0) != s:plug_buf + let s:pos = [tabpagenr(), winnr(), winsaveview()] + execute 'normal!' s:plug_tab.'gt' + let winnr = bufwinnr(s:plug_buf) + execute winnr.'wincmd w' + call add(s:pos, winsaveview()) + else + let s:pos = [winsaveview()] + endif + + setlocal modifiable + return 1 +endfunction + +function! s:switch_out(...) + call winrestview(s:pos[-1]) + setlocal nomodifiable + if a:0 > 0 + execute a:1 + endif + + if len(s:pos) > 1 + execute 'normal!' s:pos[0].'gt' + execute s:pos[1] 'wincmd w' + call winrestview(s:pos[2]) + endif +endfunction + +function! s:finish_bindings() + nnoremap R :call retry() + nnoremap D :PlugDiff + nnoremap S :PlugStatus + nnoremap U :call status_update() + xnoremap U :call status_update() + nnoremap ]] :silent! call section('') + nnoremap [[ :silent! call section('b') +endfunction + +function! s:prepare(...) + if empty(getcwd()) + throw 'Invalid current working directory. Cannot proceed.' + endif + + for evar in ['$GIT_DIR', '$GIT_WORK_TREE'] + if exists(evar) + throw evar.' detected. Cannot proceed.' + endif + endfor + + call s:job_abort() + if s:switch_in() + if b:plug_preview == 1 + pc + endif + enew + else + call s:new_window() + endif + + nnoremap q :if b:plug_preview==1pcendifbd + if a:0 == 0 + call s:finish_bindings() + endif + let b:plug_preview = -1 + let s:plug_tab = tabpagenr() + let s:plug_buf = winbufnr(0) + call s:assign_name() + + for k in ['', 'L', 'o', 'X', 'd', 'dd'] + execute 'silent! unmap ' k + endfor + setlocal buftype=nofile bufhidden=wipe nobuflisted nolist noswapfile nowrap cursorline modifiable nospell + if exists('+colorcolumn') + setlocal colorcolumn= + endif + setf vim-plug + if exists('g:syntax_on') + call s:syntax() + endif +endfunction + +function! s:assign_name() + " Assign buffer name + let prefix = '[Plugins]' + let name = prefix + let idx = 2 + while bufexists(name) + let name = printf('%s (%s)', prefix, idx) + let idx = idx + 1 + endwhile + silent! execute 'f' fnameescape(name) +endfunction + +function! s:chsh(swap) + let prev = [&shell, &shellcmdflag, &shellredir] + if s:is_win + set shell=cmd.exe shellcmdflag=/c shellredir=>%s\ 2>&1 + elseif a:swap + set shell=sh shellredir=>%s\ 2>&1 + endif + return prev +endfunction + +function! s:bang(cmd, ...) + try + let [sh, shellcmdflag, shrd] = s:chsh(a:0) + " FIXME: Escaping is incomplete. We could use shellescape with eval, + " but it won't work on Windows. + let cmd = a:0 ? s:with_cd(a:cmd, a:1) : a:cmd + if s:is_win + let batchfile = tempname().'.bat' + call writefile(["@echo off\r", cmd . "\r"], batchfile) + let cmd = batchfile + endif + let g:_plug_bang = (s:is_win && has('gui_running') ? 'silent ' : '').'!'.escape(cmd, '#!%') + execute "normal! :execute g:_plug_bang\\" + finally + unlet g:_plug_bang + let [&shell, &shellcmdflag, &shellredir] = [sh, shellcmdflag, shrd] + if s:is_win + call delete(batchfile) + endif + endtry + return v:shell_error ? 'Exit status: ' . v:shell_error : '' +endfunction + +function! s:regress_bar() + let bar = substitute(getline(2)[1:-2], '.*\zs=', 'x', '') + call s:progress_bar(2, bar, len(bar)) +endfunction + +function! s:is_updated(dir) + return !empty(s:system_chomp('git log --pretty=format:"%h" "HEAD...HEAD@{1}"', a:dir)) +endfunction + +function! s:do(pull, force, todo) + for [name, spec] in items(a:todo) + if !isdirectory(spec.dir) + continue + endif + let installed = has_key(s:update.new, name) + let updated = installed ? 0 : + \ (a:pull && index(s:update.errors, name) < 0 && s:is_updated(spec.dir)) + if a:force || installed || updated + execute 'cd' s:esc(spec.dir) + call append(3, '- Post-update hook for '. name .' ... ') + let error = '' + let type = type(spec.do) + if type == s:TYPE.string + if spec.do[0] == ':' + if !get(s:loaded, name, 0) + let s:loaded[name] = 1 + call s:reorg_rtp() + endif + call s:load_plugin(spec) + try + execute spec.do[1:] + catch + let error = v:exception + endtry + if !s:plug_window_exists() + cd - + throw 'Warning: vim-plug was terminated by the post-update hook of '.name + endif + else + let error = s:bang(spec.do) + endif + elseif type == s:TYPE.funcref + try + let status = installed ? 'installed' : (updated ? 'updated' : 'unchanged') + call spec.do({ 'name': name, 'status': status, 'force': a:force }) + catch + let error = v:exception + endtry + else + let error = 'Invalid hook type' + endif + call s:switch_in() + call setline(4, empty(error) ? (getline(4) . 'OK') + \ : ('x' . getline(4)[1:] . error)) + if !empty(error) + call add(s:update.errors, name) + call s:regress_bar() + endif + cd - + endif + endfor +endfunction + +function! s:hash_match(a, b) + return stridx(a:a, a:b) == 0 || stridx(a:b, a:a) == 0 +endfunction + +function! s:checkout(spec) + let sha = a:spec.commit + let output = s:system('git rev-parse HEAD', a:spec.dir) + if !v:shell_error && !s:hash_match(sha, s:lines(output)[0]) + let output = s:system( + \ 'git fetch --depth 999999 && git checkout '.s:esc(sha).' --', a:spec.dir) + endif + return output +endfunction + +function! s:finish(pull) + let new_frozen = len(filter(keys(s:update.new), 'g:plugs[v:val].frozen')) + if new_frozen + let s = new_frozen > 1 ? 's' : '' + call append(3, printf('- Installed %d frozen plugin%s', new_frozen, s)) + endif + call append(3, '- Finishing ... ') | 4 + redraw + call plug#helptags() + call plug#end() + call setline(4, getline(4) . 'Done!') + redraw + let msgs = [] + if !empty(s:update.errors) + call add(msgs, "Press 'R' to retry.") + endif + if a:pull && len(s:update.new) < len(filter(getline(5, '$'), + \ "v:val =~ '^- ' && v:val !~# 'Already up.to.date'")) + call add(msgs, "Press 'D' to see the updated changes.") + endif + echo join(msgs, ' ') + call s:finish_bindings() +endfunction + +function! s:retry() + if empty(s:update.errors) + return + endif + echo + call s:update_impl(s:update.pull, s:update.force, + \ extend(copy(s:update.errors), [s:update.threads])) +endfunction + +function! s:is_managed(name) + return has_key(g:plugs[a:name], 'uri') +endfunction + +function! s:names(...) + return sort(filter(keys(g:plugs), 'stridx(v:val, a:1) == 0 && s:is_managed(v:val)')) +endfunction + +function! s:check_ruby() + silent! ruby require 'thread'; VIM::command("let g:plug_ruby = '#{RUBY_VERSION}'") + if !exists('g:plug_ruby') + redraw! + return s:warn('echom', 'Warning: Ruby interface is broken') + endif + let ruby_version = split(g:plug_ruby, '\.') + unlet g:plug_ruby + return s:version_requirement(ruby_version, [1, 8, 7]) +endfunction + +function! s:update_impl(pull, force, args) abort + let sync = index(a:args, '--sync') >= 0 || has('vim_starting') + let args = filter(copy(a:args), 'v:val != "--sync"') + let threads = (len(args) > 0 && args[-1] =~ '^[1-9][0-9]*$') ? + \ remove(args, -1) : get(g:, 'plug_threads', 16) + + let managed = filter(copy(g:plugs), 's:is_managed(v:key)') + let todo = empty(args) ? filter(managed, '!v:val.frozen || !isdirectory(v:val.dir)') : + \ filter(managed, 'index(args, v:key) >= 0') + + if empty(todo) + return s:warn('echo', 'No plugin to '. (a:pull ? 'update' : 'install')) + endif + + if !s:is_win && s:git_version_requirement(2, 3) + let s:git_terminal_prompt = exists('$GIT_TERMINAL_PROMPT') ? $GIT_TERMINAL_PROMPT : '' + let $GIT_TERMINAL_PROMPT = 0 + for plug in values(todo) + let plug.uri = substitute(plug.uri, + \ '^https://git::@github\.com', 'https://github.com', '') + endfor + endif + + if !isdirectory(g:plug_home) + try + call mkdir(g:plug_home, 'p') + catch + return s:err(printf('Invalid plug directory: %s. '. + \ 'Try to call plug#begin with a valid directory', g:plug_home)) + endtry + endif + + if has('nvim') && !exists('*jobwait') && threads > 1 + call s:warn('echom', '[vim-plug] Update Neovim for parallel installer') + endif + + let use_job = s:nvim || s:vim8 + let python = (has('python') || has('python3')) && !use_job + let ruby = has('ruby') && !use_job && (v:version >= 703 || v:version == 702 && has('patch374')) && !(s:is_win && has('gui_running')) && threads > 1 && s:check_ruby() + + let s:update = { + \ 'start': reltime(), + \ 'all': todo, + \ 'todo': copy(todo), + \ 'errors': [], + \ 'pull': a:pull, + \ 'force': a:force, + \ 'new': {}, + \ 'threads': (python || ruby || use_job) ? min([len(todo), threads]) : 1, + \ 'bar': '', + \ 'fin': 0 + \ } + + call s:prepare(1) + call append(0, ['', '']) + normal! 2G + silent! redraw + + let s:clone_opt = get(g:, 'plug_shallow', 1) ? + \ '--depth 1' . (s:git_version_requirement(1, 7, 10) ? ' --no-single-branch' : '') : '' + + if has('win32unix') + let s:clone_opt .= ' -c core.eol=lf -c core.autocrlf=input' + endif + + let s:submodule_opt = s:git_version_requirement(2, 8) ? ' --jobs='.threads : '' + + " Python version requirement (>= 2.7) + if python && !has('python3') && !ruby && !use_job && s:update.threads > 1 + redir => pyv + silent python import platform; print platform.python_version() + redir END + let python = s:version_requirement( + \ map(split(split(pyv)[0], '\.'), 'str2nr(v:val)'), [2, 6]) + endif + + if (python || ruby) && s:update.threads > 1 + try + let imd = &imd + if s:mac_gui + set noimd + endif + if ruby + call s:update_ruby() + else + call s:update_python() + endif + catch + let lines = getline(4, '$') + let printed = {} + silent! 4,$d _ + for line in lines + let name = s:extract_name(line, '.', '') + if empty(name) || !has_key(printed, name) + call append('$', line) + if !empty(name) + let printed[name] = 1 + if line[0] == 'x' && index(s:update.errors, name) < 0 + call add(s:update.errors, name) + end + endif + endif + endfor + finally + let &imd = imd + call s:update_finish() + endtry + else + call s:update_vim() + while use_job && sync + sleep 100m + if s:update.fin + break + endif + endwhile + endif +endfunction + +function! s:log4(name, msg) + call setline(4, printf('- %s (%s)', a:msg, a:name)) + redraw +endfunction + +function! s:update_finish() + if exists('s:git_terminal_prompt') + let $GIT_TERMINAL_PROMPT = s:git_terminal_prompt + endif + if s:switch_in() + call append(3, '- Updating ...') | 4 + for [name, spec] in items(filter(copy(s:update.all), 'index(s:update.errors, v:key) < 0 && (s:update.force || s:update.pull || has_key(s:update.new, v:key))')) + let [pos, _] = s:logpos(name) + if !pos + continue + endif + if has_key(spec, 'commit') + call s:log4(name, 'Checking out '.spec.commit) + let out = s:checkout(spec) + elseif has_key(spec, 'tag') + let tag = spec.tag + if tag =~ '\*' + let tags = s:lines(s:system('git tag --list '.s:shellesc(tag).' --sort -version:refname 2>&1', spec.dir)) + if !v:shell_error && !empty(tags) + let tag = tags[0] + call s:log4(name, printf('Latest tag for %s -> %s', spec.tag, tag)) + call append(3, '') + endif + endif + call s:log4(name, 'Checking out '.tag) + let out = s:system('git checkout -q '.s:esc(tag).' -- 2>&1', spec.dir) + else + let branch = s:esc(get(spec, 'branch', 'master')) + call s:log4(name, 'Merging origin/'.branch) + let out = s:system('git checkout -q '.branch.' -- 2>&1' + \. (has_key(s:update.new, name) ? '' : ('&& git merge --ff-only origin/'.branch.' 2>&1')), spec.dir) + endif + if !v:shell_error && filereadable(spec.dir.'/.gitmodules') && + \ (s:update.force || has_key(s:update.new, name) || s:is_updated(spec.dir)) + call s:log4(name, 'Updating submodules. This may take a while.') + let out .= s:bang('git submodule update --init --recursive'.s:submodule_opt.' 2>&1', spec.dir) + endif + let msg = s:format_message(v:shell_error ? 'x': '-', name, out) + if v:shell_error + call add(s:update.errors, name) + call s:regress_bar() + silent execute pos 'd _' + call append(4, msg) | 4 + elseif !empty(out) + call setline(pos, msg[0]) + endif + redraw + endfor + silent 4 d _ + try + call s:do(s:update.pull, s:update.force, filter(copy(s:update.all), 'index(s:update.errors, v:key) < 0 && has_key(v:val, "do")')) + catch + call s:warn('echom', v:exception) + call s:warn('echo', '') + return + endtry + call s:finish(s:update.pull) + call setline(1, 'Updated. Elapsed time: ' . split(reltimestr(reltime(s:update.start)))[0] . ' sec.') + call s:switch_out('normal! gg') + endif +endfunction + +function! s:job_abort() + if (!s:nvim && !s:vim8) || !exists('s:jobs') + return + endif + + for [name, j] in items(s:jobs) + if s:nvim + silent! call jobstop(j.jobid) + elseif s:vim8 + silent! call job_stop(j.jobid) + endif + if j.new + call s:system('rm -rf ' . s:shellesc(g:plugs[name].dir)) + endif + endfor + let s:jobs = {} +endfunction + +function! s:last_non_empty_line(lines) + let len = len(a:lines) + for idx in range(len) + let line = a:lines[len-idx-1] + if !empty(line) + return line + endif + endfor + return '' +endfunction + +function! s:job_out_cb(self, data) abort + let self = a:self + let data = remove(self.lines, -1) . a:data + let lines = map(split(data, "\n", 1), 'split(v:val, "\r", 1)[-1]') + call extend(self.lines, lines) + " To reduce the number of buffer updates + let self.tick = get(self, 'tick', -1) + 1 + if !self.running || self.tick % len(s:jobs) == 0 + let bullet = self.running ? (self.new ? '+' : '*') : (self.error ? 'x' : '-') + let result = self.error ? join(self.lines, "\n") : s:last_non_empty_line(self.lines) + call s:log(bullet, self.name, result) + endif +endfunction + +function! s:job_exit_cb(self, data) abort + let a:self.running = 0 + let a:self.error = a:data != 0 + call s:reap(a:self.name) + call s:tick() +endfunction + +function! s:job_cb(fn, job, ch, data) + if !s:plug_window_exists() " plug window closed + return s:job_abort() + endif + call call(a:fn, [a:job, a:data]) +endfunction + +function! s:nvim_cb(job_id, data, event) dict abort + return a:event == 'stdout' ? + \ s:job_cb('s:job_out_cb', self, 0, join(a:data, "\n")) : + \ s:job_cb('s:job_exit_cb', self, 0, a:data) +endfunction + +function! s:spawn(name, cmd, opts) + let job = { 'name': a:name, 'running': 1, 'error': 0, 'lines': [''], + \ 'batchfile': (s:is_win && (s:nvim || s:vim8)) ? tempname().'.bat' : '', + \ 'new': get(a:opts, 'new', 0) } + let s:jobs[a:name] = job + let cmd = has_key(a:opts, 'dir') ? s:with_cd(a:cmd, a:opts.dir) : a:cmd + if !empty(job.batchfile) + call writefile(["@echo off\r", cmd . "\r"], job.batchfile) + let cmd = job.batchfile + endif + let argv = add(s:is_win ? ['cmd', '/c'] : ['sh', '-c'], cmd) + + if s:nvim + call extend(job, { + \ 'on_stdout': function('s:nvim_cb'), + \ 'on_exit': function('s:nvim_cb'), + \ }) + let jid = jobstart(argv, job) + if jid > 0 + let job.jobid = jid + else + let job.running = 0 + let job.error = 1 + let job.lines = [jid < 0 ? argv[0].' is not executable' : + \ 'Invalid arguments (or job table is full)'] + endif + elseif s:vim8 + let jid = job_start(s:is_win ? join(argv, ' ') : argv, { + \ 'out_cb': function('s:job_cb', ['s:job_out_cb', job]), + \ 'exit_cb': function('s:job_cb', ['s:job_exit_cb', job]), + \ 'out_mode': 'raw' + \}) + if job_status(jid) == 'run' + let job.jobid = jid + else + let job.running = 0 + let job.error = 1 + let job.lines = ['Failed to start job'] + endif + else + let job.lines = s:lines(call('s:system', [cmd])) + let job.error = v:shell_error != 0 + let job.running = 0 + endif +endfunction + +function! s:reap(name) + let job = s:jobs[a:name] + if job.error + call add(s:update.errors, a:name) + elseif get(job, 'new', 0) + let s:update.new[a:name] = 1 + endif + let s:update.bar .= job.error ? 'x' : '=' + + let bullet = job.error ? 'x' : '-' + let result = job.error ? join(job.lines, "\n") : s:last_non_empty_line(job.lines) + call s:log(bullet, a:name, empty(result) ? 'OK' : result) + call s:bar() + + if has_key(job, 'batchfile') && !empty(job.batchfile) + call delete(job.batchfile) + endif + call remove(s:jobs, a:name) +endfunction + +function! s:bar() + if s:switch_in() + let total = len(s:update.all) + call setline(1, (s:update.pull ? 'Updating' : 'Installing'). + \ ' plugins ('.len(s:update.bar).'/'.total.')') + call s:progress_bar(2, s:update.bar, total) + call s:switch_out() + endif +endfunction + +function! s:logpos(name) + for i in range(4, line('$')) + if getline(i) =~# '^[-+x*] '.a:name.':' + for j in range(i + 1, line('$')) + if getline(j) !~ '^ ' + return [i, j - 1] + endif + endfor + return [i, i] + endif + endfor + return [0, 0] +endfunction + +function! s:log(bullet, name, lines) + if s:switch_in() + let [b, e] = s:logpos(a:name) + if b > 0 + silent execute printf('%d,%d d _', b, e) + if b > winheight('.') + let b = 4 + endif + else + let b = 4 + endif + " FIXME For some reason, nomodifiable is set after :d in vim8 + setlocal modifiable + call append(b - 1, s:format_message(a:bullet, a:name, a:lines)) + call s:switch_out() + endif +endfunction + +function! s:update_vim() + let s:jobs = {} + + call s:bar() + call s:tick() +endfunction + +function! s:tick() + let pull = s:update.pull + let prog = s:progress_opt(s:nvim || s:vim8) +while 1 " Without TCO, Vim stack is bound to explode + if empty(s:update.todo) + if empty(s:jobs) && !s:update.fin + call s:update_finish() + let s:update.fin = 1 + endif + return + endif + + let name = keys(s:update.todo)[0] + let spec = remove(s:update.todo, name) + let new = empty(globpath(spec.dir, '.git', 1)) + + call s:log(new ? '+' : '*', name, pull ? 'Updating ...' : 'Installing ...') + redraw + + let has_tag = has_key(spec, 'tag') + if !new + let [error, _] = s:git_validate(spec, 0) + if empty(error) + if pull + let fetch_opt = (has_tag && !empty(globpath(spec.dir, '.git/shallow'))) ? '--depth 99999999' : '' + call s:spawn(name, printf('git fetch %s %s 2>&1', fetch_opt, prog), { 'dir': spec.dir }) + else + let s:jobs[name] = { 'running': 0, 'lines': ['Already installed'], 'error': 0 } + endif + else + let s:jobs[name] = { 'running': 0, 'lines': s:lines(error), 'error': 1 } + endif + else + call s:spawn(name, + \ printf('git clone %s %s %s %s 2>&1', + \ has_tag ? '' : s:clone_opt, + \ prog, + \ s:shellesc(spec.uri), + \ s:shellesc(s:trim(spec.dir))), { 'new': 1 }) + endif + + if !s:jobs[name].running + call s:reap(name) + endif + if len(s:jobs) >= s:update.threads + break + endif +endwhile +endfunction + +function! s:update_python() +let py_exe = has('python') ? 'python' : 'python3' +execute py_exe "<< EOF" +import datetime +import functools +import os +try: + import queue +except ImportError: + import Queue as queue +import random +import re +import shutil +import signal +import subprocess +import tempfile +import threading as thr +import time +import traceback +import vim + +G_NVIM = vim.eval("has('nvim')") == '1' +G_PULL = vim.eval('s:update.pull') == '1' +G_RETRIES = int(vim.eval('get(g:, "plug_retries", 2)')) + 1 +G_TIMEOUT = int(vim.eval('get(g:, "plug_timeout", 60)')) +G_CLONE_OPT = vim.eval('s:clone_opt') +G_PROGRESS = vim.eval('s:progress_opt(1)') +G_LOG_PROB = 1.0 / int(vim.eval('s:update.threads')) +G_STOP = thr.Event() +G_IS_WIN = vim.eval('s:is_win') == '1' + +class PlugError(Exception): + def __init__(self, msg): + self.msg = msg +class CmdTimedOut(PlugError): + pass +class CmdFailed(PlugError): + pass +class InvalidURI(PlugError): + pass +class Action(object): + INSTALL, UPDATE, ERROR, DONE = ['+', '*', 'x', '-'] + +class Buffer(object): + def __init__(self, lock, num_plugs, is_pull): + self.bar = '' + self.event = 'Updating' if is_pull else 'Installing' + self.lock = lock + self.maxy = int(vim.eval('winheight(".")')) + self.num_plugs = num_plugs + + def __where(self, name): + """ Find first line with name in current buffer. Return line num. """ + found, lnum = False, 0 + matcher = re.compile('^[-+x*] {0}:'.format(name)) + for line in vim.current.buffer: + if matcher.search(line) is not None: + found = True + break + lnum += 1 + + if not found: + lnum = -1 + return lnum + + def header(self): + curbuf = vim.current.buffer + curbuf[0] = self.event + ' plugins ({0}/{1})'.format(len(self.bar), self.num_plugs) + + num_spaces = self.num_plugs - len(self.bar) + curbuf[1] = '[{0}{1}]'.format(self.bar, num_spaces * ' ') + + with self.lock: + vim.command('normal! 2G') + vim.command('redraw') + + def write(self, action, name, lines): + first, rest = lines[0], lines[1:] + msg = ['{0} {1}{2}{3}'.format(action, name, ': ' if first else '', first)] + msg.extend([' ' + line for line in rest]) + + try: + if action == Action.ERROR: + self.bar += 'x' + vim.command("call add(s:update.errors, '{0}')".format(name)) + elif action == Action.DONE: + self.bar += '=' + + curbuf = vim.current.buffer + lnum = self.__where(name) + if lnum != -1: # Found matching line num + del curbuf[lnum] + if lnum > self.maxy and action in set([Action.INSTALL, Action.UPDATE]): + lnum = 3 + else: + lnum = 3 + curbuf.append(msg, lnum) + + self.header() + except vim.error: + pass + +class Command(object): + CD = 'cd /d' if G_IS_WIN else 'cd' + + def __init__(self, cmd, cmd_dir=None, timeout=60, cb=None, clean=None): + self.cmd = cmd + if cmd_dir: + self.cmd = '{0} {1} && {2}'.format(Command.CD, cmd_dir, self.cmd) + self.timeout = timeout + self.callback = cb if cb else (lambda msg: None) + self.clean = clean if clean else (lambda: None) + self.proc = None + + @property + def alive(self): + """ Returns true only if command still running. """ + return self.proc and self.proc.poll() is None + + def execute(self, ntries=3): + """ Execute the command with ntries if CmdTimedOut. + Returns the output of the command if no Exception. + """ + attempt, finished, limit = 0, False, self.timeout + + while not finished: + try: + attempt += 1 + result = self.try_command() + finished = True + return result + except CmdTimedOut: + if attempt != ntries: + self.notify_retry() + self.timeout += limit + else: + raise + + def notify_retry(self): + """ Retry required for command, notify user. """ + for count in range(3, 0, -1): + if G_STOP.is_set(): + raise KeyboardInterrupt + msg = 'Timeout. Will retry in {0} second{1} ...'.format( + count, 's' if count != 1 else '') + self.callback([msg]) + time.sleep(1) + self.callback(['Retrying ...']) + + def try_command(self): + """ Execute a cmd & poll for callback. Returns list of output. + Raises CmdFailed -> return code for Popen isn't 0 + Raises CmdTimedOut -> command exceeded timeout without new output + """ + first_line = True + + try: + tfile = tempfile.NamedTemporaryFile(mode='w+b') + preexec_fn = not G_IS_WIN and os.setsid or None + self.proc = subprocess.Popen(self.cmd, stdout=tfile, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, shell=True, + preexec_fn=preexec_fn) + thrd = thr.Thread(target=(lambda proc: proc.wait()), args=(self.proc,)) + thrd.start() + + thread_not_started = True + while thread_not_started: + try: + thrd.join(0.1) + thread_not_started = False + except RuntimeError: + pass + + while self.alive: + if G_STOP.is_set(): + raise KeyboardInterrupt + + if first_line or random.random() < G_LOG_PROB: + first_line = False + line = '' if G_IS_WIN else nonblock_read(tfile.name) + if line: + self.callback([line]) + + time_diff = time.time() - os.path.getmtime(tfile.name) + if time_diff > self.timeout: + raise CmdTimedOut(['Timeout!']) + + thrd.join(0.5) + + tfile.seek(0) + result = [line.decode('utf-8', 'replace').rstrip() for line in tfile] + + if self.proc.returncode != 0: + raise CmdFailed([''] + result) + + return result + except: + self.terminate() + raise + + def terminate(self): + """ Terminate process and cleanup. """ + if self.alive: + if G_IS_WIN: + os.kill(self.proc.pid, signal.SIGINT) + else: + os.killpg(self.proc.pid, signal.SIGTERM) + self.clean() + +class Plugin(object): + def __init__(self, name, args, buf_q, lock): + self.name = name + self.args = args + self.buf_q = buf_q + self.lock = lock + self.tag = args.get('tag', 0) + + def manage(self): + try: + if os.path.exists(self.args['dir']): + self.update() + else: + self.install() + with self.lock: + thread_vim_command("let s:update.new['{0}'] = 1".format(self.name)) + except PlugError as exc: + self.write(Action.ERROR, self.name, exc.msg) + except KeyboardInterrupt: + G_STOP.set() + self.write(Action.ERROR, self.name, ['Interrupted!']) + except: + # Any exception except those above print stack trace + msg = 'Trace:\n{0}'.format(traceback.format_exc().rstrip()) + self.write(Action.ERROR, self.name, msg.split('\n')) + raise + + def install(self): + target = self.args['dir'] + if target[-1] == '\\': + target = target[0:-1] + + def clean(target): + def _clean(): + try: + shutil.rmtree(target) + except OSError: + pass + return _clean + + self.write(Action.INSTALL, self.name, ['Installing ...']) + callback = functools.partial(self.write, Action.INSTALL, self.name) + cmd = 'git clone {0} {1} {2} {3} 2>&1'.format( + '' if self.tag else G_CLONE_OPT, G_PROGRESS, self.args['uri'], + esc(target)) + com = Command(cmd, None, G_TIMEOUT, callback, clean(target)) + result = com.execute(G_RETRIES) + self.write(Action.DONE, self.name, result[-1:]) + + def repo_uri(self): + cmd = 'git rev-parse --abbrev-ref HEAD 2>&1 && git config -f .git/config remote.origin.url' + command = Command(cmd, self.args['dir'], G_TIMEOUT,) + result = command.execute(G_RETRIES) + return result[-1] + + def update(self): + actual_uri = self.repo_uri() + expect_uri = self.args['uri'] + regex = re.compile(r'^(?:\w+://)?(?:[^@/]*@)?([^:/]*(?::[0-9]*)?)[:/](.*?)(?:\.git)?/?$') + ma = regex.match(actual_uri) + mb = regex.match(expect_uri) + if ma is None or mb is None or ma.groups() != mb.groups(): + msg = ['', + 'Invalid URI: {0}'.format(actual_uri), + 'Expected {0}'.format(expect_uri), + 'PlugClean required.'] + raise InvalidURI(msg) + + if G_PULL: + self.write(Action.UPDATE, self.name, ['Updating ...']) + callback = functools.partial(self.write, Action.UPDATE, self.name) + fetch_opt = '--depth 99999999' if self.tag and os.path.isfile(os.path.join(self.args['dir'], '.git/shallow')) else '' + cmd = 'git fetch {0} {1} 2>&1'.format(fetch_opt, G_PROGRESS) + com = Command(cmd, self.args['dir'], G_TIMEOUT, callback) + result = com.execute(G_RETRIES) + self.write(Action.DONE, self.name, result[-1:]) + else: + self.write(Action.DONE, self.name, ['Already installed']) + + def write(self, action, name, msg): + self.buf_q.put((action, name, msg)) + +class PlugThread(thr.Thread): + def __init__(self, tname, args): + super(PlugThread, self).__init__() + self.tname = tname + self.args = args + + def run(self): + thr.current_thread().name = self.tname + buf_q, work_q, lock = self.args + + try: + while not G_STOP.is_set(): + name, args = work_q.get_nowait() + plug = Plugin(name, args, buf_q, lock) + plug.manage() + work_q.task_done() + except queue.Empty: + pass + +class RefreshThread(thr.Thread): + def __init__(self, lock): + super(RefreshThread, self).__init__() + self.lock = lock + self.running = True + + def run(self): + while self.running: + with self.lock: + thread_vim_command('noautocmd normal! a') + time.sleep(0.33) + + def stop(self): + self.running = False + +if G_NVIM: + def thread_vim_command(cmd): + vim.session.threadsafe_call(lambda: vim.command(cmd)) +else: + def thread_vim_command(cmd): + vim.command(cmd) + +def esc(name): + return '"' + name.replace('"', '\"') + '"' + +def nonblock_read(fname): + """ Read a file with nonblock flag. Return the last line. """ + fread = os.open(fname, os.O_RDONLY | os.O_NONBLOCK) + buf = os.read(fread, 100000).decode('utf-8', 'replace') + os.close(fread) + + line = buf.rstrip('\r\n') + left = max(line.rfind('\r'), line.rfind('\n')) + if left != -1: + left += 1 + line = line[left:] + + return line + +def main(): + thr.current_thread().name = 'main' + nthreads = int(vim.eval('s:update.threads')) + plugs = vim.eval('s:update.todo') + mac_gui = vim.eval('s:mac_gui') == '1' + + lock = thr.Lock() + buf = Buffer(lock, len(plugs), G_PULL) + buf_q, work_q = queue.Queue(), queue.Queue() + for work in plugs.items(): + work_q.put(work) + + start_cnt = thr.active_count() + for num in range(nthreads): + tname = 'PlugT-{0:02}'.format(num) + thread = PlugThread(tname, (buf_q, work_q, lock)) + thread.start() + if mac_gui: + rthread = RefreshThread(lock) + rthread.start() + + while not buf_q.empty() or thr.active_count() != start_cnt: + try: + action, name, msg = buf_q.get(True, 0.25) + buf.write(action, name, ['OK'] if not msg else msg) + buf_q.task_done() + except queue.Empty: + pass + except KeyboardInterrupt: + G_STOP.set() + + if mac_gui: + rthread.stop() + rthread.join() + +main() +EOF +endfunction + +function! s:update_ruby() + ruby << EOF + module PlugStream + SEP = ["\r", "\n", nil] + def get_line + buffer = '' + loop do + char = readchar rescue return + if SEP.include? char.chr + buffer << $/ + break + else + buffer << char + end + end + buffer + end + end unless defined?(PlugStream) + + def esc arg + %["#{arg.gsub('"', '\"')}"] + end + + def killall pid + pids = [pid] + if /mswin|mingw|bccwin/ =~ RUBY_PLATFORM + pids.each { |pid| Process.kill 'INT', pid.to_i rescue nil } + else + unless `which pgrep 2> /dev/null`.empty? + children = pids + until children.empty? + children = children.map { |pid| + `pgrep -P #{pid}`.lines.map { |l| l.chomp } + }.flatten + pids += children + end + end + pids.each { |pid| Process.kill 'TERM', pid.to_i rescue nil } + end + end + + def compare_git_uri a, b + regex = %r{^(?:\w+://)?(?:[^@/]*@)?([^:/]*(?::[0-9]*)?)[:/](.*?)(?:\.git)?/?$} + regex.match(a).to_a.drop(1) == regex.match(b).to_a.drop(1) + end + + require 'thread' + require 'fileutils' + require 'timeout' + running = true + iswin = VIM::evaluate('s:is_win').to_i == 1 + pull = VIM::evaluate('s:update.pull').to_i == 1 + base = VIM::evaluate('g:plug_home') + all = VIM::evaluate('s:update.todo') + limit = VIM::evaluate('get(g:, "plug_timeout", 60)') + tries = VIM::evaluate('get(g:, "plug_retries", 2)') + 1 + nthr = VIM::evaluate('s:update.threads').to_i + maxy = VIM::evaluate('winheight(".")').to_i + vim7 = VIM::evaluate('v:version').to_i <= 703 && RUBY_PLATFORM =~ /darwin/ + cd = iswin ? 'cd /d' : 'cd' + tot = VIM::evaluate('len(s:update.todo)') || 0 + bar = '' + skip = 'Already installed' + mtx = Mutex.new + take1 = proc { mtx.synchronize { running && all.shift } } + logh = proc { + cnt = bar.length + $curbuf[1] = "#{pull ? 'Updating' : 'Installing'} plugins (#{cnt}/#{tot})" + $curbuf[2] = '[' + bar.ljust(tot) + ']' + VIM::command('normal! 2G') + VIM::command('redraw') + } + where = proc { |name| (1..($curbuf.length)).find { |l| $curbuf[l] =~ /^[-+x*] #{name}:/ } } + log = proc { |name, result, type| + mtx.synchronize do + ing = ![true, false].include?(type) + bar += type ? '=' : 'x' unless ing + b = case type + when :install then '+' when :update then '*' + when true, nil then '-' else + VIM::command("call add(s:update.errors, '#{name}')") + 'x' + end + result = + if type || type.nil? + ["#{b} #{name}: #{result.lines.to_a.last || 'OK'}"] + elsif result =~ /^Interrupted|^Timeout/ + ["#{b} #{name}: #{result}"] + else + ["#{b} #{name}"] + result.lines.map { |l| " " << l } + end + if lnum = where.call(name) + $curbuf.delete lnum + lnum = 4 if ing && lnum > maxy + end + result.each_with_index do |line, offset| + $curbuf.append((lnum || 4) - 1 + offset, line.gsub(/\e\[./, '').chomp) + end + logh.call + end + } + bt = proc { |cmd, name, type, cleanup| + tried = timeout = 0 + begin + tried += 1 + timeout += limit + fd = nil + data = '' + if iswin + Timeout::timeout(timeout) do + tmp = VIM::evaluate('tempname()') + system("(#{cmd}) > #{tmp}") + data = File.read(tmp).chomp + File.unlink tmp rescue nil + end + else + fd = IO.popen(cmd).extend(PlugStream) + first_line = true + log_prob = 1.0 / nthr + while line = Timeout::timeout(timeout) { fd.get_line } + data << line + log.call name, line.chomp, type if name && (first_line || rand < log_prob) + first_line = false + end + fd.close + end + [$? == 0, data.chomp] + rescue Timeout::Error, Interrupt => e + if fd && !fd.closed? + killall fd.pid + fd.close + end + cleanup.call if cleanup + if e.is_a?(Timeout::Error) && tried < tries + 3.downto(1) do |countdown| + s = countdown > 1 ? 's' : '' + log.call name, "Timeout. Will retry in #{countdown} second#{s} ...", type + sleep 1 + end + log.call name, 'Retrying ...', type + retry + end + [false, e.is_a?(Interrupt) ? "Interrupted!" : "Timeout!"] + end + } + main = Thread.current + threads = [] + watcher = Thread.new { + if vim7 + while VIM::evaluate('getchar(1)') + sleep 0.1 + end + else + require 'io/console' # >= Ruby 1.9 + nil until IO.console.getch == 3.chr + end + mtx.synchronize do + running = false + threads.each { |t| t.raise Interrupt } unless vim7 + end + threads.each { |t| t.join rescue nil } + main.kill + } + refresh = Thread.new { + while true + mtx.synchronize do + break unless running + VIM::command('noautocmd normal! a') + end + sleep 0.2 + end + } if VIM::evaluate('s:mac_gui') == 1 + + clone_opt = VIM::evaluate('s:clone_opt') + progress = VIM::evaluate('s:progress_opt(1)') + nthr.times do + mtx.synchronize do + threads << Thread.new { + while pair = take1.call + name = pair.first + dir, uri, tag = pair.last.values_at *%w[dir uri tag] + exists = File.directory? dir + ok, result = + if exists + chdir = "#{cd} #{iswin ? dir : esc(dir)}" + ret, data = bt.call "#{chdir} && git rev-parse --abbrev-ref HEAD 2>&1 && git config -f .git/config remote.origin.url", nil, nil, nil + current_uri = data.lines.to_a.last + if !ret + if data =~ /^Interrupted|^Timeout/ + [false, data] + else + [false, [data.chomp, "PlugClean required."].join($/)] + end + elsif !compare_git_uri(current_uri, uri) + [false, ["Invalid URI: #{current_uri}", + "Expected: #{uri}", + "PlugClean required."].join($/)] + else + if pull + log.call name, 'Updating ...', :update + fetch_opt = (tag && File.exist?(File.join(dir, '.git/shallow'))) ? '--depth 99999999' : '' + bt.call "#{chdir} && git fetch #{fetch_opt} #{progress} 2>&1", name, :update, nil + else + [true, skip] + end + end + else + d = esc dir.sub(%r{[\\/]+$}, '') + log.call name, 'Installing ...', :install + bt.call "git clone #{clone_opt unless tag} #{progress} #{uri} #{d} 2>&1", name, :install, proc { + FileUtils.rm_rf dir + } + end + mtx.synchronize { VIM::command("let s:update.new['#{name}'] = 1") } if !exists && ok + log.call name, result, ok + end + } if running + end + end + threads.each { |t| t.join rescue nil } + logh.call + refresh.kill if refresh + watcher.kill +EOF +endfunction + +function! s:shellesc_cmd(arg) + let escaped = substitute(a:arg, '[&|<>()@^]', '^&', 'g') + let escaped = substitute(escaped, '%', '%%', 'g') + let escaped = substitute(escaped, '"', '\\^&', 'g') + let escaped = substitute(escaped, '\(\\\+\)\(\\^\)', '\1\1\2', 'g') + return '^"'.substitute(escaped, '\(\\\+\)$', '\1\1', '').'^"' +endfunction + +function! s:shellesc(arg) + if &shell =~# 'cmd.exe$' + return s:shellesc_cmd(a:arg) + endif + return shellescape(a:arg) +endfunction + +function! s:glob_dir(path) + return map(filter(s:glob(a:path, '**'), 'isdirectory(v:val)'), 's:dirpath(v:val)') +endfunction + +function! s:progress_bar(line, bar, total) + call setline(a:line, '[' . s:lpad(a:bar, a:total) . ']') +endfunction + +function! s:compare_git_uri(a, b) + " See `git help clone' + " https:// [user@] github.com[:port] / junegunn/vim-plug [.git] + " [git@] github.com[:port] : junegunn/vim-plug [.git] + " file:// / junegunn/vim-plug [/] + " / junegunn/vim-plug [/] + let pat = '^\%(\w\+://\)\='.'\%([^@/]*@\)\='.'\([^:/]*\%(:[0-9]*\)\=\)'.'[:/]'.'\(.\{-}\)'.'\%(\.git\)\=/\?$' + let ma = matchlist(a:a, pat) + let mb = matchlist(a:b, pat) + return ma[1:2] ==# mb[1:2] +endfunction + +function! s:format_message(bullet, name, message) + if a:bullet != 'x' + return [printf('%s %s: %s', a:bullet, a:name, s:lastline(a:message))] + else + let lines = map(s:lines(a:message), '" ".v:val') + return extend([printf('x %s:', a:name)], lines) + endif +endfunction + +function! s:with_cd(cmd, dir) + return printf('cd%s %s && %s', s:is_win ? ' /d' : '', s:shellesc(a:dir), a:cmd) +endfunction + +function! s:system(cmd, ...) + try + let [sh, shellcmdflag, shrd] = s:chsh(1) + let cmd = a:0 > 0 ? s:with_cd(a:cmd, a:1) : a:cmd + if s:is_win + let batchfile = tempname().'.bat' + call writefile(["@echo off\r", cmd . "\r"], batchfile) + let cmd = batchfile + endif + return system(s:is_win ? '('.cmd.')' : cmd) + finally + let [&shell, &shellcmdflag, &shellredir] = [sh, shellcmdflag, shrd] + if s:is_win + call delete(batchfile) + endif + endtry +endfunction + +function! s:system_chomp(...) + let ret = call('s:system', a:000) + return v:shell_error ? '' : substitute(ret, '\n$', '', '') +endfunction + +function! s:git_validate(spec, check_branch) + let err = '' + if isdirectory(a:spec.dir) + let result = s:lines(s:system('git rev-parse --abbrev-ref HEAD 2>&1 && git config -f .git/config remote.origin.url', a:spec.dir)) + let remote = result[-1] + if v:shell_error + let err = join([remote, 'PlugClean required.'], "\n") + elseif !s:compare_git_uri(remote, a:spec.uri) + let err = join(['Invalid URI: '.remote, + \ 'Expected: '.a:spec.uri, + \ 'PlugClean required.'], "\n") + elseif a:check_branch && has_key(a:spec, 'commit') + let result = s:lines(s:system('git rev-parse HEAD 2>&1', a:spec.dir)) + let sha = result[-1] + if v:shell_error + let err = join(add(result, 'PlugClean required.'), "\n") + elseif !s:hash_match(sha, a:spec.commit) + let err = join([printf('Invalid HEAD (expected: %s, actual: %s)', + \ a:spec.commit[:6], sha[:6]), + \ 'PlugUpdate required.'], "\n") + endif + elseif a:check_branch + let branch = result[0] + " Check tag + if has_key(a:spec, 'tag') + let tag = s:system_chomp('git describe --exact-match --tags HEAD 2>&1', a:spec.dir) + if a:spec.tag !=# tag && a:spec.tag !~ '\*' + let err = printf('Invalid tag: %s (expected: %s). Try PlugUpdate.', + \ (empty(tag) ? 'N/A' : tag), a:spec.tag) + endif + " Check branch + elseif a:spec.branch !=# branch + let err = printf('Invalid branch: %s (expected: %s). Try PlugUpdate.', + \ branch, a:spec.branch) + endif + if empty(err) + let [ahead, behind] = split(s:lastline(s:system(printf( + \ 'git rev-list --count --left-right HEAD...origin/%s', + \ a:spec.branch), a:spec.dir)), '\t') + if !v:shell_error && ahead + if behind + " Only mention PlugClean if diverged, otherwise it's likely to be + " pushable (and probably not that messed up). + let err = printf( + \ "Diverged from origin/%s (%d commit(s) ahead and %d commit(s) behind!\n" + \ .'Backup local changes and run PlugClean and PlugUpdate to reinstall it.', a:spec.branch, ahead, behind) + else + let err = printf("Ahead of origin/%s by %d commit(s).\n" + \ .'Cannot update until local changes are pushed.', + \ a:spec.branch, ahead) + endif + endif + endif + endif + else + let err = 'Not found' + endif + return [err, err =~# 'PlugClean'] +endfunction + +function! s:rm_rf(dir) + if isdirectory(a:dir) + call s:system((s:is_win ? 'rmdir /S /Q ' : 'rm -rf ') . s:shellesc(a:dir)) + endif +endfunction + +function! s:clean(force) + call s:prepare() + call append(0, 'Searching for invalid plugins in '.g:plug_home) + call append(1, '') + + " List of valid directories + let dirs = [] + let errs = {} + let [cnt, total] = [0, len(g:plugs)] + for [name, spec] in items(g:plugs) + if !s:is_managed(name) + call add(dirs, spec.dir) + else + let [err, clean] = s:git_validate(spec, 1) + if clean + let errs[spec.dir] = s:lines(err)[0] + else + call add(dirs, spec.dir) + endif + endif + let cnt += 1 + call s:progress_bar(2, repeat('=', cnt), total) + normal! 2G + redraw + endfor + + let allowed = {} + for dir in dirs + let allowed[s:dirpath(fnamemodify(dir, ':h:h'))] = 1 + let allowed[dir] = 1 + for child in s:glob_dir(dir) + let allowed[child] = 1 + endfor + endfor + + let todo = [] + let found = sort(s:glob_dir(g:plug_home)) + while !empty(found) + let f = remove(found, 0) + if !has_key(allowed, f) && isdirectory(f) + call add(todo, f) + call append(line('$'), '- ' . f) + if has_key(errs, f) + call append(line('$'), ' ' . errs[f]) + endif + let found = filter(found, 'stridx(v:val, f) != 0') + end + endwhile + + 4 + redraw + if empty(todo) + call append(line('$'), 'Already clean.') + else + let s:clean_count = 0 + call append(3, ['Directories to delete:', '']) + redraw! + if a:force || s:ask_no_interrupt('Delete all directories?') + call s:delete([6, line('$')], 1) + else + call setline(4, 'Cancelled.') + nnoremap d :set opfunc=delete_opg@ + nmap dd d_ + xnoremap d :call delete_op(visualmode(), 1) + echo 'Delete the lines (d{motion}) to delete the corresponding directories' + endif + endif + 4 + setlocal nomodifiable +endfunction + +function! s:delete_op(type, ...) + call s:delete(a:0 ? [line("'<"), line("'>")] : [line("'["), line("']")], 0) +endfunction + +function! s:delete(range, force) + let [l1, l2] = a:range + let force = a:force + while l1 <= l2 + let line = getline(l1) + if line =~ '^- ' && isdirectory(line[2:]) + execute l1 + redraw! + let answer = force ? 1 : s:ask('Delete '.line[2:].'?', 1) + let force = force || answer > 1 + if answer + call s:rm_rf(line[2:]) + setlocal modifiable + call setline(l1, '~'.line[1:]) + let s:clean_count += 1 + call setline(4, printf('Removed %d directories.', s:clean_count)) + setlocal nomodifiable + endif + endif + let l1 += 1 + endwhile +endfunction + +function! s:upgrade() + echo 'Downloading the latest version of vim-plug' + redraw + let tmp = tempname() + let new = tmp . '/plug.vim' + + try + let out = s:system(printf('git clone --depth 1 %s %s', s:plug_src, tmp)) + if v:shell_error + return s:err('Error upgrading vim-plug: '. out) + endif + + if readfile(s:me) ==# readfile(new) + echo 'vim-plug is already up-to-date' + return 0 + else + call rename(s:me, s:me . '.old') + call rename(new, s:me) + unlet g:loaded_plug + echo 'vim-plug has been upgraded' + return 1 + endif + finally + silent! call s:rm_rf(tmp) + endtry +endfunction + +function! s:upgrade_specs() + for spec in values(g:plugs) + let spec.frozen = get(spec, 'frozen', 0) + endfor +endfunction + +function! s:status() + call s:prepare() + call append(0, 'Checking plugins') + call append(1, '') + + let ecnt = 0 + let unloaded = 0 + let [cnt, total] = [0, len(g:plugs)] + for [name, spec] in items(g:plugs) + let is_dir = isdirectory(spec.dir) + if has_key(spec, 'uri') + if is_dir + let [err, _] = s:git_validate(spec, 1) + let [valid, msg] = [empty(err), empty(err) ? 'OK' : err] + else + let [valid, msg] = [0, 'Not found. Try PlugInstall.'] + endif + else + if is_dir + let [valid, msg] = [1, 'OK'] + else + let [valid, msg] = [0, 'Not found.'] + endif + endif + let cnt += 1 + let ecnt += !valid + " `s:loaded` entry can be missing if PlugUpgraded + if is_dir && get(s:loaded, name, -1) == 0 + let unloaded = 1 + let msg .= ' (not loaded)' + endif + call s:progress_bar(2, repeat('=', cnt), total) + call append(3, s:format_message(valid ? '-' : 'x', name, msg)) + normal! 2G + redraw + endfor + call setline(1, 'Finished. '.ecnt.' error(s).') + normal! gg + setlocal nomodifiable + if unloaded + echo "Press 'L' on each line to load plugin, or 'U' to update" + nnoremap L :call status_load(line('.')) + xnoremap L :call status_load(line('.')) + end +endfunction + +function! s:extract_name(str, prefix, suffix) + return matchstr(a:str, '^'.a:prefix.' \zs[^:]\+\ze:.*'.a:suffix.'$') +endfunction + +function! s:status_load(lnum) + let line = getline(a:lnum) + let name = s:extract_name(line, '-', '(not loaded)') + if !empty(name) + call plug#load(name) + setlocal modifiable + call setline(a:lnum, substitute(line, ' (not loaded)$', '', '')) + setlocal nomodifiable + endif +endfunction + +function! s:status_update() range + let lines = getline(a:firstline, a:lastline) + let names = filter(map(lines, 's:extract_name(v:val, "[x-]", "")'), '!empty(v:val)') + if !empty(names) + echo + execute 'PlugUpdate' join(names) + endif +endfunction + +function! s:is_preview_window_open() + silent! wincmd P + if &previewwindow + wincmd p + return 1 + endif +endfunction + +function! s:find_name(lnum) + for lnum in reverse(range(1, a:lnum)) + let line = getline(lnum) + if empty(line) + return '' + endif + let name = s:extract_name(line, '-', '') + if !empty(name) + return name + endif + endfor + return '' +endfunction + +function! s:preview_commit() + if b:plug_preview < 0 + let b:plug_preview = !s:is_preview_window_open() + endif + + let sha = matchstr(getline('.'), '^ \X*\zs[0-9a-f]\{7,9}') + if empty(sha) + return + endif + + let name = s:find_name(line('.')) + if empty(name) || !has_key(g:plugs, name) || !isdirectory(g:plugs[name].dir) + return + endif + + if exists('g:plug_pwindow') && !s:is_preview_window_open() + execute g:plug_pwindow + execute 'e' sha + else + execute 'pedit' sha + wincmd P + endif + setlocal previewwindow filetype=git buftype=nofile nobuflisted modifiable + try + let [sh, shellcmdflag, shrd] = s:chsh(1) + let cmd = 'cd '.s:shellesc(g:plugs[name].dir).' && git show --no-color --pretty=medium '.sha + if s:is_win + let batchfile = tempname().'.bat' + call writefile(["@echo off\r", cmd . "\r"], batchfile) + let cmd = batchfile + endif + execute 'silent %!' cmd + finally + let [&shell, &shellcmdflag, &shellredir] = [sh, shellcmdflag, shrd] + if s:is_win + call delete(batchfile) + endif + endtry + setlocal nomodifiable + nnoremap q :q + wincmd p +endfunction + +function! s:section(flags) + call search('\(^[x-] \)\@<=[^:]\+:', a:flags) +endfunction + +function! s:format_git_log(line) + let indent = ' ' + let tokens = split(a:line, nr2char(1)) + if len(tokens) != 5 + return indent.substitute(a:line, '\s*$', '', '') + endif + let [graph, sha, refs, subject, date] = tokens + let tag = matchstr(refs, 'tag: [^,)]\+') + let tag = empty(tag) ? ' ' : ' ('.tag.') ' + return printf('%s%s%s%s%s (%s)', indent, graph, sha, tag, subject, date) +endfunction + +function! s:append_ul(lnum, text) + call append(a:lnum, ['', a:text, repeat('-', len(a:text))]) +endfunction + +function! s:diff() + call s:prepare() + call append(0, ['Collecting changes ...', '']) + let cnts = [0, 0] + let bar = '' + let total = filter(copy(g:plugs), 's:is_managed(v:key) && isdirectory(v:val.dir)') + call s:progress_bar(2, bar, len(total)) + for origin in [1, 0] + let plugs = reverse(sort(items(filter(copy(total), (origin ? '' : '!').'(has_key(v:val, "commit") || has_key(v:val, "tag"))')))) + if empty(plugs) + continue + endif + call s:append_ul(2, origin ? 'Pending updates:' : 'Last update:') + for [k, v] in plugs + let range = origin ? '..origin/'.v.branch : 'HEAD@{1}..' + let cmd = 'git log --graph --color=never '.join(map(['--pretty=format:%x01%h%x01%d%x01%s%x01%cr', range], 's:shellesc(v:val)')) + if has_key(v, 'rtp') + let cmd .= ' -- '.s:shellesc(v.rtp) + endif + let diff = s:system_chomp(cmd, v.dir) + if !empty(diff) + let ref = has_key(v, 'tag') ? (' (tag: '.v.tag.')') : has_key(v, 'commit') ? (' '.v.commit) : '' + call append(5, extend(['', '- '.k.':'.ref], map(s:lines(diff), 's:format_git_log(v:val)'))) + let cnts[origin] += 1 + endif + let bar .= '=' + call s:progress_bar(2, bar, len(total)) + normal! 2G + redraw + endfor + if !cnts[origin] + call append(5, ['', 'N/A']) + endif + endfor + call setline(1, printf('%d plugin(s) updated.', cnts[0]) + \ . (cnts[1] ? printf(' %d plugin(s) have pending updates.', cnts[1]) : '')) + + if cnts[0] || cnts[1] + nnoremap (plug-preview) :silent! call preview_commit() + if empty(maparg("\", 'n')) + nmap (plug-preview) + endif + if empty(maparg('o', 'n')) + nmap o (plug-preview) + endif + endif + if cnts[0] + nnoremap X :call revert() + echo "Press 'X' on each block to revert the update" + endif + normal! gg + setlocal nomodifiable +endfunction + +function! s:revert() + if search('^Pending updates', 'bnW') + return + endif + + let name = s:find_name(line('.')) + if empty(name) || !has_key(g:plugs, name) || + \ input(printf('Revert the update of %s? (y/N) ', name)) !~? '^y' + return + endif + + call s:system('git reset --hard HEAD@{1} && git checkout '.s:esc(g:plugs[name].branch).' --', g:plugs[name].dir) + setlocal modifiable + normal! "_dap + setlocal nomodifiable + echo 'Reverted' +endfunction + +function! s:snapshot(force, ...) abort + call s:prepare() + setf vim + call append(0, ['" Generated by vim-plug', + \ '" '.strftime("%c"), + \ '" :source this file in vim to restore the snapshot', + \ '" or execute: vim -S snapshot.vim', + \ '', '', 'PlugUpdate!']) + 1 + let anchor = line('$') - 3 + let names = sort(keys(filter(copy(g:plugs), + \'has_key(v:val, "uri") && !has_key(v:val, "commit") && isdirectory(v:val.dir)'))) + for name in reverse(names) + let sha = s:system_chomp('git rev-parse --short HEAD', g:plugs[name].dir) + if !empty(sha) + call append(anchor, printf("silent! let g:plugs['%s'].commit = '%s'", name, sha)) + redraw + endif + endfor + + if a:0 > 0 + let fn = expand(a:1) + if filereadable(fn) && !(a:force || s:ask(a:1.' already exists. Overwrite?')) + return + endif + call writefile(getline(1, '$'), fn) + echo 'Saved as '.a:1 + silent execute 'e' s:esc(fn) + setf vim + endif +endfunction + +function! s:split_rtp() + return split(&rtp, '\\\@ f :Import fmt +" +" Drop fmt +" au Filetype go nnoremap F :Drop fmt +" +" Import the word under your cursor +" au Filetype go nnoremap k +" \ :exe 'Import ' . expand('') +" +" The backslash '\' is the default maplocalleader, so it is possible that +" your vim is set to use a different character (:help maplocalleader). +" +" Options: +" +" g:go_import_commands [default=1] +" +" Flag to indicate whether to enable the commands listed above. +" +if exists("b:did_ftplugin_go_import") + finish +endif + +if !exists("g:go_import_commands") + let g:go_import_commands = 1 +endif + +if g:go_import_commands + command! -buffer -nargs=? -complete=customlist,go#complete#Package Drop call s:SwitchImport(0, '', ) + command! -buffer -nargs=1 -complete=customlist,go#complete#Package Import call s:SwitchImport(1, '', ) + command! -buffer -nargs=* -complete=customlist,go#complete#Package ImportAs call s:SwitchImport(1, ) +endif + +function! s:SwitchImport(enabled, localname, path) + let view = winsaveview() + let path = a:path + + " Quotes are not necessary, so remove them if provided. + if path[0] == '"' + let path = strpart(path, 1) + endif + if path[len(path)-1] == '"' + let path = strpart(path, 0, len(path) - 1) + endif + if path == '' + call s:Error('Import path not provided') + return + endif + + " Extract any site prefix (e.g. github.com/). + " If other imports with the same prefix are grouped separately, + " we will add this new import with them. + " Only up to and including the first slash is used. + let siteprefix = matchstr(path, "^[^/]*/") + + let qpath = '"' . path . '"' + if a:localname != '' + let qlocalpath = a:localname . ' ' . qpath + else + let qlocalpath = qpath + endif + let indentstr = 0 + let packageline = -1 " Position of package name statement + let appendline = -1 " Position to introduce new import + let deleteline = -1 " Position of line with existing import + let linesdelta = 0 " Lines added/removed + + " Find proper place to add/remove import. + let line = 0 + while line <= line('$') + let linestr = getline(line) + + if linestr =~# '^package\s' + let packageline = line + let appendline = line + + elseif linestr =~# '^import\s\+(' + let appendstr = qlocalpath + let indentstr = 1 + let appendline = line + let firstblank = -1 + let lastprefix = "" + while line <= line("$") + let line = line + 1 + let linestr = getline(line) + let m = matchlist(getline(line), '^\()\|\(\s\+\)\(\S*\s*\)"\(.\+\)"\)') + if empty(m) + if siteprefix == "" && a:enabled + " must be in the first group + break + endif + " record this position, but keep looking + if firstblank < 0 + let firstblank = line + endif + continue + endif + if m[1] == ')' + " if there's no match, add it to the first group + if appendline < 0 && firstblank >= 0 + let appendline = firstblank + endif + break + endif + let lastprefix = matchstr(m[4], "^[^/]*/") + if a:localname != '' && m[3] != '' + let qlocalpath = printf('%-' . (len(m[3])-1) . 's %s', a:localname, qpath) + endif + let appendstr = m[2] . qlocalpath + let indentstr = 0 + if m[4] == path + let appendline = -1 + let deleteline = line + break + elseif m[4] < path + " don't set candidate position if we have a site prefix, + " we've passed a blank line, and this doesn't share the same + " site prefix. + if siteprefix == "" || firstblank < 0 || match(m[4], "^" . siteprefix) >= 0 + let appendline = line + endif + elseif siteprefix != "" && match(m[4], "^" . siteprefix) >= 0 + " first entry of site group + let appendline = line - 1 + break + endif + endwhile + break + + elseif linestr =~# '^import ' + if appendline == packageline + let appendstr = 'import ' . qlocalpath + let appendline = line - 1 + endif + let m = matchlist(linestr, '^import\(\s\+\)\(\S*\s*\)"\(.\+\)"') + if !empty(m) + if m[3] == path + let appendline = -1 + let deleteline = line + break + endif + if m[3] < path + let appendline = line + endif + if a:localname != '' && m[2] != '' + let qlocalpath = printf("%s %" . len(m[2])-1 . "s", a:localname, qpath) + endif + let appendstr = 'import' . m[1] . qlocalpath + endif + + elseif linestr =~# '^\(var\|const\|type\|func\)\>' + break + + endif + let line = line + 1 + endwhile + + " Append or remove the package import, as requested. + if a:enabled + if deleteline != -1 + call s:Error(qpath . ' already being imported') + elseif appendline == -1 + call s:Error('No package line found') + else + if appendline == packageline + call append(appendline + 0, '') + call append(appendline + 1, 'import (') + call append(appendline + 2, ')') + let appendline += 2 + let linesdelta += 3 + let appendstr = qlocalpath + let indentstr = 1 + endif + call append(appendline, appendstr) + execute appendline + 1 + if indentstr + execute 'normal >>' + endif + let linesdelta += 1 + endif + else + if deleteline == -1 + call s:Error(qpath . ' not being imported') + else + execute deleteline . 'd' + let linesdelta -= 1 + + if getline(deleteline-1) =~# '^import\s\+(' && getline(deleteline) =~# '^)' + " Delete empty import block + let deleteline -= 1 + execute deleteline . "d" + execute deleteline . "d" + let linesdelta -= 2 + endif + + if getline(deleteline) == '' && getline(deleteline - 1) == '' + " Delete spacing for removed line too. + execute deleteline . "d" + let linesdelta -= 1 + endif + endif + endif + + " Adjust view for any changes. + let view.lnum += linesdelta + let view.topline += linesdelta + if view.topline < 0 + let view.topline = 0 + endif + + " Put buffer back where it was. + call winrestview(view) + +endfunction + +function! s:Error(s) + echohl Error | echo a:s | echohl None +endfunction + +let b:did_ftplugin_go_import = 1 + +" vim:ts=4:sw=4:et diff --git a/roles/dotfiles/files/.vim/ftplugin/go/test.sh b/roles/dotfiles/files/.vim/ftplugin/go/test.sh new file mode 100755 index 0000000..d8a5b89 --- /dev/null +++ b/roles/dotfiles/files/.vim/ftplugin/go/test.sh @@ -0,0 +1,78 @@ +#!/bin/bash -e +# +# Copyright 2012 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. +# +# Tests for import.vim. + +cd $(dirname $0) + +cat > base.go <&1 -n "$1: " + vim -e -s -u /dev/null -U /dev/null --noplugin -c "source import.vim" \ + -c "$1" -c 'wq! test.go' base.go + # ensure blank lines are treated correctly + if ! gofmt test.go | cmp test.go -; then + echo 2>&1 "gofmt conflict" + gofmt test.go | diff -u test.go - | sed "s/^/ /" 2>&1 + fail=1 + return + fi + if ! [[ $(cat test.go) =~ $2 ]]; then + echo 2>&1 "$2 did not match" + cat test.go | sed "s/^/ /" 2>&1 + fail=1 + return + fi + echo 2>&1 "ok" +} + +# Tests for Import + +test_one "Import baz" '"baz".*"bytes"' +test_one "Import io/ioutil" '"io".*"io/ioutil".*"net"' +test_one "Import myc" '"io".*"myc".*"net"' # prefix of a site prefix +test_one "Import nat" '"io".*"nat".*"net"' +test_one "Import net/http" '"net".*"net/http".*"mycorp/foo"' +test_one "Import zoo" '"net".*"zoo".*"mycorp/foo"' +test_one "Import mycorp/bar" '"net".*"mycorp/bar".*"mycorp/foo"' +test_one "Import mycorp/goo" '"net".*"mycorp/foo".*"mycorp/goo"' + +# Tests for Drop + +cat > base.go <&1 "FAIL" + exit 1 +fi +echo 2>&1 "PASS" diff --git a/roles/dotfiles/files/.vimrc b/roles/dotfiles/files/.vimrc new file mode 100644 index 0000000..12cd9c6 --- /dev/null +++ b/roles/dotfiles/files/.vimrc @@ -0,0 +1,133 @@ +" General options +set backspace=indent,eol,start +set cindent autoindent +set confirm +set encoding=utf-8 +set incsearch +set hidden +set mouse=a +set nocompatible +set noexpandtab +set nohlsearch +set number +set ruler +set showcmd +set showmatch +set showmode +set tags=./tags,tags,/usr/src/sys/arch/amd64/tags,/var/db/libc.tags +set t_Co=256 +set ttyfast +source /usr/share/vim/vim82/ftplugin/man.vim + +filetype plugin on + +nnoremap :tag +nnoremap :pop + +nnoremap :bprev + +" fix glitches in certain terminals +" backspace +imap ^? ^H + +" f7 toggles spelling on/off +nn :setlocal spell! spell? + +" view binary files as hex +" Convert to hex and back; does not save changes +nn :%!xxd -g 1 +nn :%!xxd -g 1 -r + +" makefile magic +" compiler stuff +let g:compiler_gcc_ignore_unmatched_lines=1 +let mapleader=',' +" quickfix :make +nmap m :wa:silent! make \| redraw! \| cw +vmap m :wa:silent! make \| redraw! \| cw +nn ,c :silent! make clean \| redraw! \| cw +" handy shortcuts +map h :ccl +map s :cw +map l :cl +" jump between messages +map n :cn +map p :cp + +" format selection +map f :!fmt + + +" @c comment, @u uncomment, @p print function name +let @u='0xx$xx^[' +let @c='I/*^[A*/^[' +let @p='ofprintf(stderr, "%s\n", __func__);^[' + +:ab #d #define +:ab #i #include + +autocmd FileType make setlocal noexpandtab +autocmd FileType c setlocal noexpandtab +autocmd FileType cc setlocal noexpandtab +autocmd FileType python setlocal expandtab shiftwidth=4 softtabstop=4 +autocmd FileType ada setlocal expandtab shiftwidth=3 softtabstop=3 tabstop=3 + +" Plugins + +" Initialization +call plug#begin('~/.vim/bundle') + +Plug 'scrooloose/nerdtree' +Plug 'junegunn/fzf' +Plug 'fatih/vim-go', { 'for': 'go' } +Plug 'ambv/black', { 'for': 'python' } +Plug 'mileszs/ack.vim' +Plug 'racer-rust/vim-racer', { 'for': 'rust' } + +" Themes +Plug 'KKPMW/oldbook-vim' +Plug 'agreco/vim-citylights' +Plug 'xdefrag/vim-beelzebub' +Plug 'logico-dev/typewriter' +Plug 'vim-scripts/wombat256.vim' + +call plug#end() + +" NERDTree +map o :NERDTree + +" FZF +nmap (fzf-maps-n) +xmap (fzf-maps-x) +omap (fzf-maps-o) +imap (fzf-complete-word) +imap (fzf-complete-path) +imap (fzf-complete-file-ag) +imap (fzf-complete-line) + +command! FZFBuffers call fzf#run({'source': map(range(1, bufnr('$')), 'bufname(v:val)'), 'sink': 'e', 'down': '30%'}) +map b :FZFBuffers + +" Ack +if executable('ag') + let g:ackprg = 'ag --vimgrep' +endif + +" The space is signficant. +map / :Ack + +" Go stuff +map i :GoImports +map i :GoImports + +let g:go_fmt_autosave = 1 +let g:go_fmt_command = "goimports" + +au FileType rust nmap gd (rust-def) +autocmd Filetype c,cpp inoremap t :wa:silent! make test \| redraw! \| cw +autocmd Filetype go map t :wa:GoTest +autocmd Filetype go map C-] :w:GoDef +autocmd Filetype go map C-\ :w:GoDefPop + + +colorscheme oldbook diff --git a/roles/dotfiles/files/bin/em b/roles/dotfiles/files/bin/em new file mode 100755 index 0000000..02e8fd3 --- /dev/null +++ b/roles/dotfiles/files/bin/em @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +if [ -z "$DISPLAY" ] +then + NW="" +else + NW="-n" +fi + +if [ -z "$@" ] +then + cd $HOME +fi + +emacsclient $NW -c -a '' "$@"

: "π" U03C0 # GREEK SMALL LETTER PI + : "ψ" U03C8 # GREEK SMALL LETTER PSI + : "ψ" U03C8 # GREEK SMALL LETTER PSI + : "ρ" U03C1 # GREEK SMALL LETTER RHO + : "ρ" U03C1 # GREEK SMALL LETTER RHO + : "σ" U03C3 # GREEK SMALL LETTER SIGMA + : "σ" U03C3 # GREEK SMALL LETTER SIGMA + : "τ" U03C4 # GREEK SMALL LETTER TAU + : "τ" U03C4 # GREEK SMALL LETTER TAU + : "υ" U03C5 # GREEK SMALL LETTER UPSILON + : "υ" U03C5 # GREEK SMALL LETTER UPSILON + : "ς" U03C2 # GREEK SMALL LETTER FINAL SIGMA + : "ς" U03C2 # GREEK SMALL LETTER FINAL SIGMA + : "ω" U03C9 # GREEK SMALL LETTER OMEGA + : "ω" U03C9 # GREEK SMALL LETTER OMEGA + : "χ" U03C7 # GREEK SMALL LETTER CHI + : "χ" U03C7 # GREEK SMALL LETTER CHI + : "η" U03B7 # GREEK SMALL LETTER ΕΤΑ + : "η" U03B7 # GREEK SMALL LETTER ΕΤΑ + : "ζ" U03B6 # GREEK SMALL LETTER ZETA + : "ζ" U03B6 # GREEK SMALL LETTER ZETA + +# Capital greek letters. + : "Α" U0391 # GREEK CAPITAL LETTER ALPHA + : "Α" U0391 # GREEK CAPITAL LETTER ALPHA + : "Β" U0392 # GREEK CAPITAL LETTER BETA + : "Β" U0392 # GREEK CAPITAL LETTER BETA + : "Ξ" U039E # GREEK CAPITAL LETTER XI + : "Ξ" U039E # GREEK CAPITAL LETTER XI + : "Δ" U0394 # GREEK CAPITAL LETTER DELTA + : "Δ" U0394 # GREEK CAPITAL LETTER DELTA + : "Ε" U0395 # GREEK CAPITAL LETTER EPSILON + : "Ε" U0395 # GREEK CAPITAL LETTER EPSILON + : "Φ" U03A6 # GREEK CAPITAL LETTER PHI + : "Φ" U03A6 # GREEK CAPITAL LETTER PHI + : "Γ" U0393 # GREEK CAPITAL LETTER GAMMA + : "Γ" U0393 # GREEK CAPITAL LETTER GAMMA + : "Θ" U0398 # GREEK CAPITAL LETTER THETA + : "Θ" U0398 # GREEK CAPITAL LETTER THETA + : "Ι" U0399 # GREEK CAPITAL LETTER ΙΟΤΑ + : "Ι" U0399 # GREEK CAPITAL LETTER ΙΟΤΑ + : "Κ" U039A # GREEK CAPITAL LETTER KAPPA + : "Κ" U039A # GREEK CAPITAL LETTER KAPPA + : "Λ" U039B # GREEK CAPITAL LETTER LAMBDA + : "Λ" U039B # GREEK CAPITAL LETTER LAMBDA + : "Μ" U039C # GREEK CAPITAL LETTER MU + : "Μ" U039C # GREEK CAPITAL LETTER MU + : "Ν" U039D # GREEK CAPITAL LETTER NU + : "Ν" U039D # GREEK CAPITAL LETTER NU + : "Ο" U039F # GREEK CAPITAL LETTER OMICRON + : "Ο" U039F # GREEK CAPITAL LETTER OMICRON +