diff --git a/roles/dotfiles/files/.XCompose b/roles/dotfiles/files/.XCompose new file mode 100644 index 0000000..4939613 --- /dev/null +++ b/roles/dotfiles/files/.XCompose @@ -0,0 +1,118 @@ +include "/usr/X11R6/share/X11/locale/en_US.UTF-8/Compose" + +# Used for pollen + : "◊" U25ca # ◊ LOZENGE + +# LAMBDA LAMBDA LAMBDA + : "λ" U03BB # GREEK SMALL LETTER LAMBDA + +# Greek letters +# Source: https://gist.githubusercontent.com/carlobaldassi/8951743/raw/2b587c8147603d395bf2ec221eee348f27dabaa8/XCompose_greek + : "α" U03B1 # GREEK SMALL LETTER ALPHA + : "α" U03B1 # GREEK SMALL LETTER ALPHA + : "β" U03B2 # GREEK SMALL LETTER BETA + : "β" U03B2 # GREEK SMALL LETTER BETA + : "ξ" U03BE # GREEK SMALL LETTER XI + : "ξ" U03BE # GREEK SMALL LETTER XI + : "δ" U03B4 # GREEK SMALL LETTER DELTA + : "δ" U03B4 # GREEK SMALL LETTER DELTA + : "ε" U03B5 # GREEK SMALL LETTER EPSILON + : "ε" U03B5 # GREEK SMALL LETTER EPSILON + : "φ" U03C6 # GREEK SMALL LETTER PHI + : "φ" U03C6 # GREEK SMALL LETTER PHI + : "γ" U03B3 # GREEK SMALL LETTER GAMMA + : "γ" U03B3 # GREEK SMALL LETTER GAMMA + : "θ" U03B8 # GREEK SMALL LETTER THETA + : "θ" U03B8 # GREEK SMALL LETTER THETA + : "ι" U03B9 # GREEK SMALL LETTER ΙΟΤΑ + : "ι" U03B9 # GREEK SMALL LETTER ΙΟΤΑ + : "κ" U03BA # GREEK SMALL LETTER KAPPA + : "κ" U03BA # GREEK SMALL LETTER KAPPA + : "λ" U03BB # GREEK SMALL LETTER LAMBDA + : "λ" U03BB # GREEK SMALL LETTER LAMBDA + : "μ" U03BC # GREEK SMALL LETTER MU + : "μ" U03BC # GREEK SMALL LETTER MU + : "ν" U03BD # GREEK SMALL LETTER NU + : "ν" U03BD # GREEK SMALL LETTER NU + : "ο" U03BF # GREEK SMALL LETTER OMICRON + : "ο" U03BF # GREEK SMALL LETTER OMICRON +



: "Π" U03A0 # GREEK CAPITAL LETTER PI + : "Ψ" U03A8 # GREEK CAPITAL LETTER PSI + : "Ψ" U03A8 # GREEK CAPITAL LETTER PSI + : "Ρ" U03A1 # GREEK CAPITAL LETTER RHO + : "Ρ" U03A1 # GREEK CAPITAL LETTER RHO + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Τ" U03A4 # GREEK CAPITAL LETTER TAU + : "Τ" U03A4 # GREEK CAPITAL LETTER TAU + : "Υ" U03A5 # GREEK CAPITAL LETTER UPSILON + : "Υ" U03A5 # GREEK CAPITAL LETTER UPSILON + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Σ" U03A3 # GREEK CAPITAL LETTER SIGMA + : "Ω" U03A9 # GREEK CAPITAL LETTER OMEGA + : "Ω" U03A9 # GREEK CAPITAL LETTER OMEGA + : "Χ" U03A7 # GREEK CAPITAL LETTER CHI + : "Χ" U03A7 # GREEK CAPITAL LETTER CHI + : "Η" U0397 # GREEK CAPITAL LETTER ΕΤΑ + : "Η" U0397 # GREEK CAPITAL LETTER ΕΤΑ + : "Ζ" U0396 # GREEK CAPITAL LETTER ZETA + : "Ζ" U0396 # GREEK CAPITAL LETTER ZETA + + : "Ɐ" U2200 # FOR ALL + : "∃" U2203 # THERE EXISTS + + + diff --git a/roles/dotfiles/files/.config/htop/htoprc b/roles/dotfiles/files/.config/htop/htoprc new file mode 100644 index 0000000..b25c04d --- /dev/null +++ b/roles/dotfiles/files/.config/htop/htoprc @@ -0,0 +1,26 @@ +# Beware! This file is rewritten by htop when settings are changed in the interface. +# The parser is also very primitive, and not human-friendly. +fields=0 48 17 18 38 39 40 2 46 47 49 1 +sort_key=46 +sort_direction=1 +hide_threads=0 +hide_kernel_threads=1 +hide_userland_threads=0 +shadow_other_users=0 +show_thread_names=0 +show_program_path=1 +highlight_base_name=0 +highlight_megabytes=1 +highlight_threads=1 +tree_view=0 +header_margin=1 +detailed_cpu_time=0 +cpu_count_from_zero=0 +update_process_names=0 +account_guest_in_cpu_meter=0 +color_scheme=0 +delay=15 +left_meters=AllCPUs Memory Swap +left_meter_modes=1 1 1 +right_meters=Tasks LoadAverage Uptime Memory Swap +right_meter_modes=2 2 2 2 2 diff --git a/roles/dotfiles/files/.config/i3/config b/roles/dotfiles/files/.config/i3/config new file mode 100644 index 0000000..5088c39 --- /dev/null +++ b/roles/dotfiles/files/.config/i3/config @@ -0,0 +1,184 @@ +# This file has been auto-generated by i3-config-wizard(1). +# It will not be overwritten, so edit it as you like. +# +# Should you change your keyboard layout some time, delete +# this file and re-run i3-config-wizard(1). +# + +# i3 config file (v4) +# +# Please see https://i3wm.org/docs/userguide.html for a complete reference! + +set $mod Mod4 + +# Font for window titles. Will also be used by the bar unless a different font +# is used in the bar {} block below. +font pango:monospace 8 + +# This font is widely installed, provides lots of unicode glyphs, right-to-left +# text rendering and scalability on retina/hidpi displays (thanks to pango). +#font pango:DejaVu Sans Mono 8 + +# The combination of xss-lock, nm-applet and pactl is a popular choice, so +# they are included here as an example. Modify as you see fit. + +# xss-lock grabs a logind suspend inhibit lock and will use i3lock to lock the +# screen before suspend. Use loginctl lock-session to lock your screen. +exec --no-startup-id xss-lock --transfer-sleep-lock -- i3lock --nofork + +# NetworkManager is the most popular way to manage wireless networks on Linux, +# and nm-applet is a desktop environment-independent system tray GUI for it. +exec --no-startup-id nm-applet + +# Use pactl to adjust volume in PulseAudio. +set $refresh_i3status killall -SIGUSR1 i3status +bindsym XF86AudioRaiseVolume exec --no-startup-id pactl set-sink-volume @DEFAULT_SINK@ +10% && $refresh_i3status +bindsym XF86AudioLowerVolume exec --no-startup-id pactl set-sink-volume @DEFAULT_SINK@ -10% && $refresh_i3status +bindsym XF86AudioMute exec --no-startup-id pactl set-sink-mute @DEFAULT_SINK@ toggle && $refresh_i3status +bindsym XF86AudioMicMute exec --no-startup-id pactl set-source-mute @DEFAULT_SOURCE@ toggle && $refresh_i3status + +# Use Mouse+$mod to drag floating windows to their wanted position +floating_modifier $mod + +# start a terminal +bindsym $mod+Return exec i3-sensible-terminal + +# kill focused window +bindsym $mod+Shift+q kill + +# start dmenu (a program launcher) +bindsym $mod+d exec dmenu_run +# There also is the (new) i3-dmenu-desktop which only displays applications +# shipping a .desktop file. It is a wrapper around dmenu, so you need that +# installed. +# bindsym $mod+d exec --no-startup-id i3-dmenu-desktop + +# change focus +bindsym $mod+j focus left +bindsym $mod+k focus down +bindsym $mod+l focus up +bindsym $mod+semicolon focus right + +# alternatively, you can use the cursor keys: +bindsym $mod+Left focus left +bindsym $mod+Down focus down +bindsym $mod+Up focus up +bindsym $mod+Right focus right + +# move focused window +bindsym $mod+Shift+j move left +bindsym $mod+Shift+k move down +bindsym $mod+Shift+l move up +bindsym $mod+Shift+semicolon move right + +# alternatively, you can use the cursor keys: +bindsym $mod+Shift+Left move left +bindsym $mod+Shift+Down move down +bindsym $mod+Shift+Up move up +bindsym $mod+Shift+Right move right + +# split in horizontal orientation +bindsym $mod+h split h + +# split in vertical orientation +bindsym $mod+v split v + +# enter fullscreen mode for the focused container +bindsym $mod+f fullscreen toggle + +# change container layout (stacked, tabbed, toggle split) +bindsym $mod+s layout stacking +bindsym $mod+w layout tabbed +bindsym $mod+e layout toggle split + +# toggle tiling / floating +bindsym $mod+Shift+space floating toggle + +# change focus between tiling / floating windows +bindsym $mod+space focus mode_toggle + +# focus the parent container +bindsym $mod+a focus parent + +# focus the child container +#bindsym $mod+d focus child + +# Define names for default workspaces for which we configure key bindings later on. +# We use variables to avoid repeating the names in multiple places. +set $ws1 "1" +set $ws2 "2" +set $ws3 "3" +set $ws4 "4" +set $ws5 "5" +set $ws6 "6" +set $ws7 "7" +set $ws8 "8" +set $ws9 "9" +set $ws10 "10" + +# switch to workspace +bindsym $mod+1 workspace number $ws1 +bindsym $mod+2 workspace number $ws2 +bindsym $mod+3 workspace number $ws3 +bindsym $mod+4 workspace number $ws4 +bindsym $mod+5 workspace number $ws5 +bindsym $mod+6 workspace number $ws6 +bindsym $mod+7 workspace number $ws7 +bindsym $mod+8 workspace number $ws8 +bindsym $mod+9 workspace number $ws9 +bindsym $mod+0 workspace number $ws10 + +# move focused container to workspace +bindsym $mod+Shift+1 move container to workspace number $ws1 +bindsym $mod+Shift+2 move container to workspace number $ws2 +bindsym $mod+Shift+3 move container to workspace number $ws3 +bindsym $mod+Shift+4 move container to workspace number $ws4 +bindsym $mod+Shift+5 move container to workspace number $ws5 +bindsym $mod+Shift+6 move container to workspace number $ws6 +bindsym $mod+Shift+7 move container to workspace number $ws7 +bindsym $mod+Shift+8 move container to workspace number $ws8 +bindsym $mod+Shift+9 move container to workspace number $ws9 +bindsym $mod+Shift+0 move container to workspace number $ws10 + +# reload the configuration file +bindsym $mod+Shift+c reload +# restart i3 inplace (preserves your layout/session, can be used to upgrade i3) +bindsym $mod+Shift+r restart +# exit i3 (logs you out of your X session) +bindsym $mod+Shift+e exec "i3-nagbar -t warning -m 'You pressed the exit shortcut. Do you really want to exit i3? This will end your X session.' -B 'Yes, exit i3' 'i3-msg exit'" + +# resize window (you can also use the mouse for that) +mode "resize" { + # These bindings trigger as soon as you enter the resize mode + + # Pressing left will shrink the window’s width. + # Pressing right will grow the window’s width. + # Pressing up will shrink the window’s height. + # Pressing down will grow the window’s height. + bindsym j resize shrink width 10 px or 10 ppt + bindsym k resize grow height 10 px or 10 ppt + bindsym l resize shrink height 10 px or 10 ppt + bindsym semicolon resize grow width 10 px or 10 ppt + + # same bindings, but for the arrow keys + bindsym Left resize shrink width 10 px or 10 ppt + bindsym Down resize grow height 10 px or 10 ppt + bindsym Up resize shrink height 10 px or 10 ppt + bindsym Right resize grow width 10 px or 10 ppt + + # back to normal: Enter or Escape or $mod+r + bindsym Return mode "default" + bindsym Escape mode "default" + bindsym $mod+r mode "default" +} + +bindsym $mod+r mode "resize" + +# Start i3bar to display a workspace bar (plus the system information i3status +# finds out, if available) +bar { + status_command i3status +} + +# make sure qemu is floating +for_window [title="^QEMU*"] floating enable diff --git a/roles/dotfiles/files/.config/i3status/config b/roles/dotfiles/files/.config/i3status/config new file mode 100644 index 0000000..c4c24ca --- /dev/null +++ b/roles/dotfiles/files/.config/i3status/config @@ -0,0 +1,65 @@ +general { + output_format = "i3bar" + colors = true + interval = 5 +} + +order += "disk /home" +order += "disk /usr/local" +order += "wireless iwm0" +order += "battery 0" +order += "cpu_temperature 0" +order += "load" +order += "tztime local" + +wireless iwm0 { + format_up = "W: [%essid] %ip" + format_down = "W: down" +} + +ethernet eth0 { + format_up = "E: %ip (%speed)" + format_down = "E: down" +} + +battery 0 { + format = "%status %percentage %remaining %emptytime" + format_down = "No battery" + status_chr = "CHR" + status_bat = "BAT" + status_unk = "UNK" + status_full = "FULL" + path = "/sys/class/power_supply/BAT%d/uevent" + low_threshold = 10 +} + +tztime local { + format = "%Y-%m-%d %H:%M:%S" +} + +load { + format = "%5min" +} + +cpu_temperature 0 { + format = "T: %degrees °C" + path = "/sys/devices/platform/coretemp.0/temp1_input" +} + +memory { + format = "%used" + threshold_degraded = "10%" + format_degraded = "MEMORY: %free" +} + +disk "/home" { + format = "%free" +} + +disk "/usr/local" { + format = "%free" +} + +read_file uptime { + path = "/proc/uptime" +} diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc-index-latest.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc-index-latest.txt new file mode 100644 index 0000000..68e0493 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc-index-latest.txt @@ -0,0 +1,88 @@ + +=========================================================================== + This is a list of the latest RFCs only. + To get the full list of RFCs, please look up rfc-index.txt +=========================================================================== + +8197 A SIP Response Code for Unwanted Calls. H. Schulzrinne. July 2017. + (Format: TXT=19114 bytes) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8197) + +8198 Aggressive Use of DNSSEC-Validated Cache. K. Fujiwara, A. Kato, W. + Kumari. July 2017. (Format: TXT=27918 bytes) (Updates RFC4035) + (Status: PROPOSED STANDARD) (DOI: 10.17487/RFC8198) + +8199 YANG Module Classification. D. Bogdanovic, B. Claise, C. Moberg. + July 2017. (Format: TXT=23080 bytes) (Status: INFORMATIONAL) (DOI: + 10.17487/RFC8199) + +8200 Internet Protocol, Version 6 (IPv6) Specification. S. Deering, R. + Hinden. July 2017. (Format: TXT=93658 bytes) (Obsoletes RFC2460) + (Also STD0086) (Status: INTERNET STANDARD) (DOI: 10.17487/RFC8200) + +8201 Path MTU Discovery for IP version 6. J. McCann, S. Deering, J. + Mogul, R. Hinden, Ed.. July 2017. (Format: TXT=42751 bytes) + (Obsoletes RFC1981) (Also STD0087) (Status: INTERNET STANDARD) (DOI: + 10.17487/RFC8201) + +8202 IS-IS Multi-Instance. L. Ginsberg, S. Previdi, W. Henderickx. June + 2017. (Format: TXT=35114 bytes) (Obsoletes RFC6822) (Status: + PROPOSED STANDARD) (DOI: 10.17487/RFC8202) + +8203 BGP Administrative Shutdown Communication. J. Snijders, J. Heitz, J. + Scudder. July 2017. (Format: TXT=12532 bytes) (Updates RFC4486) + (Status: PROPOSED STANDARD) (DOI: 10.17487/RFC8203) + +8212 Default External BGP (EBGP) Route Propagation Behavior without + Policies. J. Mauch, J. Snijders, G. Hankins. July 2017. (Format: + TXT=12552 bytes) (Updates RFC4271) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8212) + +8213 Security of Messages Exchanged between Servers and Relay Agents. B. + Volz, Y. Pal. August 2017. (Format: TXT=17657 bytes) (Status: + PROPOSED STANDARD) (DOI: 10.17487/RFC8213) + +8214 Virtual Private Wire Service Support in Ethernet VPN. S. Boutros, A. + Sajassi, S. Salam, J. Drake, J. Rabadan. August 2017. (Format: + TXT=34563 bytes) (Status: PROPOSED STANDARD) (DOI: 10.17487/RFC8214) + +8215 Local-Use IPv4/IPv6 Translation Prefix. T. Anderson. August 2017. + (Format: TXT=14846 bytes) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8215) + +8217 Clarifications for When to Use the name-addr Production in SIP + Messages. R. Sparks. August 2017. (Format: TXT=12829 bytes) (Updates + RFC3261, RFC3325, RFC3515, RFC3892, RFC4508, RFC5002, RFC5318, + RFC5360, RFC5502) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8217) + +8218 Multipath Extension for the Optimized Link State Routing Protocol + Version 2 (OLSRv2). J. Yi, B. Parrein. August 2017. (Format: + TXT=56286 bytes) (Status: EXPERIMENTAL) (DOI: 10.17487/RFC8218) + +8219 Benchmarking Methodology for IPv6 Transition Technologies. M. + Georgescu, L. Pislaru, G. Lencse. August 2017. (Format: TXT=66085 + bytes) (Status: INFORMATIONAL) (DOI: 10.17487/RFC8219) + +8227 MPLS-TP Shared-Ring Protection (MSRP) Mechanism for Ring Topology. + W. Cheng, L. Wang, H. Li, H. van Helvoort, J. Dong. August 2017. + (Format: TXT=128880 bytes) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8227) + +8228 Guidance on Designing Label Generation Rulesets (LGRs) Supporting + Variant Labels. A. Freytag. August 2017. (Format: TXT=50900 bytes) + (Status: INFORMATIONAL) (DOI: 10.17487/RFC8228) + +8229 TCP Encapsulation of IKE and IPsec Packets. T. Pauly, S. Touati, R. + Mantha. August 2017. (Format: TXT=56294 bytes) (Status: PROPOSED + STANDARD) (DOI: 10.17487/RFC8229) + +8234 Updates to MPLS Transport Profile (MPLS-TP) Linear Protection in + Automatic Protection Switching (APS) Mode. J. Ryoo, T. Cheung, H. + van Helvoort, I. Busi, G. Wen. August 2017. (Format: TXT=16898 + bytes) (Updates RFC7271) (Status: PROPOSED STANDARD) (DOI: + 10.17487/RFC8234) + +=========================================================================== + - This file last updated Wed Aug 30 02:30:01 PDT 2017 + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc1701.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc1701.txt new file mode 100644 index 0000000..60a0e9b --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc1701.txt @@ -0,0 +1,451 @@ + + + + + + +Network Working Group S. Hanks +Request for Comments: 1701 NetSmiths, Ltd. +Category: Informational T. Li + D. Farinacci + P. Traina + cisco Systems + October 1994 + + + Generic Routing Encapsulation (GRE) + +Status of this Memo + + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +Abstract + + This document specifies a protocol for performing encapsulation of an + arbitrary network layer protocol over another arbitrary network layer + protocol. + +Introduction + + A number of different proposals [RFC 1234, RFC 1226] currently exist + for the encapsulation of one protocol over another protocol. Other + types of encapsulations [RFC 1241, SDRP, RFC 1479] have been proposed + for transporting IP over IP for policy purposes. This memo describes + a protocol which is very similar to, but is more general than, the + above proposals. In attempting to be more general, many protocol + specific nuances have been ignored. The result is that this proposal + is may be less suitable for a situation where a specific "X over Y" + encapsulation has been described. It is the attempt of this protocol + to provide a simple, general purpose mechanism which is reduces the + problem of encapsulation from its current O(n^2) problem to a more + manageable state. This proposal also attempts to provide a + lightweight encapsulation for use in policy based routing. This memo + explicitly does not address the issue of when a packet should be + encapsulated. This memo acknowledges, but does not address problems + with mutual encapsulation [RFC 1326]. + + In the most general case, a system has a packet that needs to be + encapsulated and routed. We will call this the payload packet. The + payload is first encapsulated in a GRE packet, which possibly also + includes a route. The resulting GRE packet can then be encapsulated + in some other protocol and then forwarded. We will call this outer + + + +Hanks, Li, Farinacci & Traina [Page 1] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + protocol the delivery protocol. The algorithms for processing this + packet are discussed later. + +Overall packet + + The entire encapsulated packet would then have the form: + + --------------------------------- + | | + | Delivery Header | + | | + --------------------------------- + | | + | GRE Header | + | | + --------------------------------- + | | + | Payload packet | + | | + --------------------------------- + +Packet header + + The GRE packet header has the form: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C|R|K|S|s|Recur| Flags | Ver | Protocol Type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum (optional) | Offset (optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Key (optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Sequence Number (optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Routing (optional) + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Flags and version (2 octets) + + The GRE flags are encoded in the first two octets. Bit 0 is the + most significant bit, bit 15 is the least significant bit. Bits + 13 through 15 are reserved for the Version field. Bits 5 through + 12 are reserved for future use and MUST be transmitted as zero. + + + + + + +Hanks, Li, Farinacci & Traina [Page 2] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + Checksum Present (bit 0) + + If the Checksum Present bit is set to 1, then the Checksum field + is present and contains valid information. + + If either the Checksum Present bit or the Routing Present bit are + set, BOTH the Checksum and Offset fields are present in the GRE + packet. + + Routing Present (bit 1) + + If the Routing Present bit is set to 1, then it indicates that the + Offset and Routing fields are present and contain valid + information. + + If either the Checksum Present bit or the Routing Present bit are + set, BOTH the Checksum and Offset fields are present in the GRE + packet. + + Key Present (bit 2) + + If the Key Present bit is set to 1, then it indicates that the Key + field is present in the GRE header. Otherwise, the Key field is + not present in the GRE header. + + Sequence Number Present (bit 3) + + If the Sequence Number Present bit is set to 1, then it indicates + that the Sequence Number field is present. Otherwise, the + Sequence Number field is not present in the GRE header. + + Strict Source Route (bit 4) + + The meaning of the Strict Source route bit is defined in other + documents. It is recommended that this bit only be set to 1 if + all of the the Routing Information consists of Strict Source + Routes. + + Recursion Control (bits 5-7) + + Recursion control contains a three bit unsigned integer which + contains the number of additional encapsulations which are + permissible. This SHOULD default to zero. + + Version Number (bits 13-15) + + The Version Number field MUST contain the value 0. Other values + are outside of the scope of this document. + + + +Hanks, Li, Farinacci & Traina [Page 3] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + Protocol Type (2 octets) + + The Protocol Type field contains the protocol type of the payload + packet. In general, the value will be the Ethernet protocol type + field for the packet. Currently defined protocol types are listed + below. Additional values may be defined in other documents. + + Offset (2 octets) + + The offset field indicates the octet offset from the start of the + Routing field to the first octet of the active Source Route Entry + to be examined. This field is present if the Routing Present or + the Checksum Present bit is set to 1, and contains valid + information only if the Routing Present bit is set to 1. + + Checksum (2 octets) + + The Checksum field contains the IP (one's complement) checksum of + the GRE header and the payload packet. This field is present if + the Routing Present or the Checksum Present bit is set to 1, and + contains valid information only if the Checksum Present bit is set + to 1. + + Key (4 octets) + + The Key field contains a four octet number which was inserted by + the encapsulator. It may be used by the receiver to authenticate + the source of the packet. The techniques for determining + authenticity are outside of the scope of this document. The Key + field is only present if the Key Present field is set to 1. + + Sequence Number (4 octets) + + The Sequence Number field contains an unsigned 32 bit integer + which is inserted by the encapsulator. It may be used by the + receiver to establish the order in which packets have been + transmitted from the encapsulator to the receiver. The exact + algorithms for the generation of the Sequence Number and the + semantics of their reception is outside of the scope of this + document. + + Routing (variable) + + The Routing field is optional and is present only if the Routing + Present bit is set to 1. + + + + + + +Hanks, Li, Farinacci & Traina [Page 4] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + The Routing field is a list of Source Route Entries (SREs). Each + SRE has the form: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Family | SRE Offset | SRE Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Routing Information ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The routing field is terminated with a "NULL" SRE containing an + address family of type 0x0000 and a length of 0. + + Address Family (2 octets) + + The Address Family field contains a two octet value which indicates + the syntax and semantics of the Routing Information field. The + values for this field and the corresponding syntax and semantics for + Routing Information are defined in other documents. + + SRE Offset (1 octet) + + The SRE Offset field indicates the octet offset from the start of the + Routing Information field to the first octet of the active entry in + Source Route Entry to be examined. + + SRE Length (1 octet) + + The SRE Length field contains the number of octets in the SRE. If + the SRE Length is 0, this indicates this is the last SRE in the + Routing field. + + Routing Information (variable) + + The Routing Information field contains data which may be used in + routing this packet. The exact semantics of this field is defined in + other documents. + +Forwarding of GRE packets + + Normally, a system which is forwarding delivery layer packets will + not differentiate GRE packets from other packets in any way. + However, a GRE packet may be received by a system. In this case, the + system should use some delivery-specific means to determine that this + is a GRE packet. Once this is determined, the Key, Sequence Number + and Checksum fields if they contain valid information as indicated by + the corresponding flags may be checked. If the Routing Present bit + + + +Hanks, Li, Farinacci & Traina [Page 5] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + + is set to 1, then the Address Family field should be checked to + determine the semantics and use of the SRE Length, SRE Offset and + Routing Information fields. The exact semantics for processing a SRE + for each Address Family is defined in other documents. + + Once all SREs have been processed, then the source route is complete, + the GRE header should be removed, the payload's TTL MUST be + decremented (if one exists) and the payload packet should be + forwarded as a normal packet. The exact forwarding method depends on + the Protocol Type field. + +Current List of Protocol Types + + The following are currently assigned protocol types for GRE. Future + protocol types must be taken from DIX ethernet encoding. For + historical reasons, a number of other values have been used for some + protocols. The following table of values MUST be used to identify + the following protocols: + + Protocol Family PTYPE + --------------- ----- + Reserved 0000 + SNA 0004 + OSI network layer 00FE + PUP 0200 + XNS 0600 + IP 0800 + Chaos 0804 + RFC 826 ARP 0806 + Frame Relay ARP 0808 + VINES 0BAD + VINES Echo 0BAE + VINES Loopback 0BAF + DECnet (Phase IV) 6003 + Transparent Ethernet Bridging 6558 + Raw Frame Relay 6559 + Apollo Domain 8019 + Ethertalk (Appletalk) 809B + Novell IPX 8137 + RFC 1144 TCP/IP compression 876B + IP Autonomous Systems 876C + Secure Data 876D + Reserved FFFF + + See the IANA list of Ether Types for the complete list of these + values. + + URL = ftp://ftp.isi.edu/in-notes/iana/assignments/ethernet-numbers. + + + +Hanks, Li, Farinacci & Traina [Page 6] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + +References + + RFC 1479 + Steenstrup, M. "Inter-Domain Policy Routing Protocol + Specification: Version 1", RFC1479, BBN Systems and Technologies, + July 1993. + + RFC 1226 + Kantor, B. "Internet Protocol Encapsulation of AX.25 Frames", RFC + 1226, University of California, San Diego, May 1991. + + RFC 1234 + Provan, D. "Tunneling IPX Traffic through IP Networks", RFC 1234, + Novell, Inc., June 1991. + + RFC 1241 + Woodburn, R., and D. Mills, "Scheme for an Internet Encapsulation + Protocol: Version 1", RFC 1241, SAIC, University of Delaware, July + 1991. + + RFC 1326 + Tsuchiya, P., "Mutual Encapsulation Considered Dangerous", RFC + 1326, Bellcore, May 1992. + + SDRP + Estrin, D., Li, T., and Y. Rekhter, "Source Demand Routing + Protocol Specification (Version 1)", Work in Progress. + + RFC 1702 + Hanks, S., Li, T., Farinacci, D., and P. Traina, "Generic Routing + Encapsulation over IPv4 networks", RFC 1702, NetSmiths, Ltd., + cisco Systems, October 1994. + +Security Considerations + + Security issues are not discussed in this memo. + + + + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 7] + +RFC 1701 Generic Routing Encapsulation (GRE) October 1994 + + +Acknowledgements + + The authors would like to acknowledge Yakov Rekhter (IBM) and Deborah + Estrin (USC) for their advice, encouragement and insightful comments. + +Authors' Addresses + + Stan Hanks + NetSmiths, Ltd. + 2025 Lincoln Highway + Edison NJ, 08817 + + EMail: stan@netsmiths.com + + + Tony Li + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: tli@cisco.com + + + Dino Farinacci + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: dino@cisco.com + + + Paul Traina + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: pst@cisco.com + + + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 8] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc1702.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc1702.txt new file mode 100644 index 0000000..50b57ae --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc1702.txt @@ -0,0 +1,227 @@ + + + + + + +Network Working Group S. Hanks +Request for Comments: 1702 NetSmiths, Ltd. +Category: Informational T. Li + D. Farinacci + P. Traina + cisco Systems + October 1994 + + + Generic Routing Encapsulation over IPv4 networks + +Status of this Memo + + This memo provides information for the Internet community. This memo + does not specify an Internet standard of any kind. Distribution of + this memo is unlimited. + +Introduction + + In an earlier memo [RFC 1701], we described GRE, a mechanism for + encapsulating arbitrary packets within an arbitrary transport + protocol. This is a companion memo which describes the use of GRE + with IP. This memo addresses the case of using IP as the delivery + protocol or the payload protocol and the special case of IP as both + the delivery and payload. This memo also describes using IP + addresses and autonomous system numbers as part of a GRE source + route. + +IP as a delivery protocol + + GRE packets which are encapsulated within IP will use IP protocol + type 47. + +IP as a payload protocol + + IP packets will be encapsulated with a Protocol Type field of 0x800. + + For the Address Family value of 0x800, the Routing Information field + will consist of a list of IP addresses and indicates an IP source + route. The first octet of the Routing Information field constitute a + 8 bit integer offset from the start of the Source Route Entry (SRE), + called the SRE Offset. The SRE Offset indicates the first octet of + the next IP address. The SRE Length field consists of the total + length of the IP Address List in octets. + + + + + + + +Hanks, Li, Farinacci & Traina [Page 1] + +RFC 1702 GRE over IPv4 networks October 1994 + + + This has the form: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Family | SRE Offset | SRE Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | IP Address List ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + For the Address Family value of 0xfffe, the Routing Information field + will consist of a list of Autonomous System numbers and indicates an + AS source route. The third octet of the Routing Information field + contains an 8 bit unsigned integer offset from the start of the + Source Route Entry (SRE), called the SRE Offset. The SRE Offset + indicates the first octet of the next AS number. THe SRE Length + field consists of the total length of the AS Number list in octets. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Family | SRE Offset | SRE Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | AS Number List ... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +IP as both delivery and payload protocol + + When IP is encapsulated in IP, the TTL, TOS, and IP security options + MAY be copied from the payload packet into the same fields in the + delivery packet. The payload packet's TTL MUST be decremented when + the packet is decapsulated to insure that no packet lives forever. + +IP source routes + + When a system is processing a SRE with an Address Family indicating + an IP source route, it MUST use the SRE Offset to determine the next + destination IP address. If the next IP destination is this system, + the SRE Offset field should be increased by four (the size of an IP + address). If the SRE Offset is equal to the SRE Length in this SRE, + then the Offset field in the GRE header should be adjusted to point + to the next SRE (if any). This should be repeated until the next IP + destination is not this system or until the entire SRE has been + processed. + + If the source route is incomplete, then the Strict Source Route bit + is checked. If the source route is a strict source route and the + next IP destination is NOT an adjacent system, the packet MUST be + + + +Hanks, Li, Farinacci & Traina [Page 2] + +RFC 1702 GRE over IPv4 networks October 1994 + + + dropped. Otherwise, the system should use the IP address indicated + by the Offset field to replace the destination address in the + delivery header and forward the packet. + +Autonomous system source routes + + When a system is processing a SRE with an Address Family indicating + an AS source route, it MUST use the SRE Offset field to determine the + next autonomous system. If the next autonomous system is the local + autonomous system, the SRE Offset field should be increased by two + (the size of an autonomous system number). If the SRE Offset is + equal to the SRE Length in this SRE, then the Offset field in the GRE + header should be adjusted to point to the next SRE (if any). This + should be repeated until the next autonomous system number is not + equal to the local autonomous system number or until the entire SRE + has been processed. + + If the source route is incomplete, then the Strict Source Route bit + is checked. If the source route is a strict source route and the + next autonomous system is NOT an adjacent autonomous system, the + packet should be dropped. Otherwise, the system should use the + autonomous system number indicated by the SRE Offset field to replace + the destination address in the delivery header and forward the + packet. The exact mechanism for determining the next delivery + destination address given the AS number is outside of the scope of + this document. + +Security Considerations + + Security issues are not discussed in this memo. + + + + + + + + + + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 3] + +RFC 1702 GRE over IPv4 networks October 1994 + + +Authors' Addresses + + Stan Hanks + NetSmiths, Ltd. + 2025 Lincoln Highway + Edison, NJ 08817 + + EMail: stan@netsmiths.com + + + Tony Li + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: tli@cisco.com + + + Dino Farinacci + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: dino@cisco.com + + + Paul Traina + cisco Systems, Inc. + 1525 O'Brien Drive + Menlo Park, CA 94025 + + EMail: pst@cisco.com + +References + + RFC 1701 + Hanks, S., Li, T, Farinacci, D., and P. Traina, "Generic Routing + Encapsulation", RFC 1701, NetSmiths, Ltd., and cisco Systems, + October 1994. + + + + + + + + + + + + +Hanks, Li, Farinacci & Traina [Page 4] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc2119.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc2119.txt new file mode 100644 index 0000000..e31fae4 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc2119.txt @@ -0,0 +1,171 @@ + + + + + + +Network Working Group S. Bradner +Request for Comments: 2119 Harvard University +BCP: 14 March 1997 +Category: Best Current Practice + + + Key words for use in RFCs to Indicate Requirement Levels + +Status of this Memo + + This document specifies an Internet Best Current Practices for the + Internet Community, and requests discussion and suggestions for + improvements. Distribution of this memo is unlimited. + +Abstract + + In many standards track documents several words are used to signify + the requirements in the specification. These words are often + capitalized. This document defines these words as they should be + interpreted in IETF documents. Authors who follow these guidelines + should incorporate this phrase near the beginning of their document: + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL + NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and + "OPTIONAL" in this document are to be interpreted as described in + RFC 2119. + + Note that the force of these words is modified by the requirement + level of the document in which they are used. + +1. MUST This word, or the terms "REQUIRED" or "SHALL", mean that the + definition is an absolute requirement of the specification. + +2. MUST NOT This phrase, or the phrase "SHALL NOT", mean that the + definition is an absolute prohibition of the specification. + +3. SHOULD This word, or the adjective "RECOMMENDED", mean that there + may exist valid reasons in particular circumstances to ignore a + particular item, but the full implications must be understood and + carefully weighed before choosing a different course. + +4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that + there may exist valid reasons in particular circumstances when the + particular behavior is acceptable or even useful, but the full + implications should be understood and the case carefully weighed + before implementing any behavior described with this label. + + + + + +Bradner Best Current Practice [Page 1] + +RFC 2119 RFC Key Words March 1997 + + +5. MAY This word, or the adjective "OPTIONAL", mean that an item is + truly optional. One vendor may choose to include the item because a + particular marketplace requires it or because the vendor feels that + it enhances the product while another vendor may omit the same item. + An implementation which does not include a particular option MUST be + prepared to interoperate with another implementation which does + include the option, though perhaps with reduced functionality. In the + same vein an implementation which does include a particular option + MUST be prepared to interoperate with another implementation which + does not include the option (except, of course, for the feature the + option provides.) + +6. Guidance in the use of these Imperatives + + Imperatives of the type defined in this memo must be used with care + and sparingly. In particular, they MUST only be used where it is + actually required for interoperation or to limit behavior which has + potential for causing harm (e.g., limiting retransmisssions) For + example, they must not be used to try to impose a particular method + on implementors where the method is not required for + interoperability. + +7. Security Considerations + + These terms are frequently used to specify behavior with security + implications. The effects on security of not implementing a MUST or + SHOULD, or doing something the specification says MUST NOT or SHOULD + NOT be done may be very subtle. Document authors should take the time + to elaborate the security implications of not following + recommendations or requirements as most implementors will not have + had the benefit of the experience and discussion that produced the + specification. + +8. Acknowledgments + + The definitions of these terms are an amalgam of definitions taken + from a number of RFCs. In addition, suggestions have been + incorporated from a number of people including Robert Ullmann, Thomas + Narten, Neal McBurnett, and Robert Elz. + + + + + + + + + + + + +Bradner Best Current Practice [Page 2] + +RFC 2119 RFC Key Words March 1997 + + +9. Author's Address + + Scott Bradner + Harvard University + 1350 Mass. Ave. + Cambridge, MA 02138 + + phone - +1 617 495 3864 + + email - sob@harvard.edu + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Bradner Best Current Practice [Page 3] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc2784.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc2784.txt new file mode 100644 index 0000000..614926a --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc2784.txt @@ -0,0 +1,507 @@ + + + + + + +Network Working Group D. Farinacci +Request for Comments: 2784 T. Li +Category: Standards Track Procket Networks + S. Hanks + Enron Communications + D. Meyer + Cisco Systems + P. Traina + Juniper Networks + March 2000 + + + Generic Routing Encapsulation (GRE) + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + This document specifies a protocol for encapsulation of an arbitrary + network layer protocol over another arbitrary network layer protocol. + +1. Introduction + + A number of different proposals [RFC1234, RFC1226] currently exist + for the encapsulation of one protocol over another protocol. Other + types of encapsulations [RFC1241, RFC1479] have been proposed for + transporting IP over IP for policy purposes. This memo describes a + protocol which is very similar to, but is more general than, the + above proposals. In attempting to be more general, many protocol + specific nuances have been ignored. The result is that this proposal + may be less suitable for a situation where a specific "X over Y" + encapsulation has been described. It is the attempt of this protocol + to provide a simple, general purpose mechanism which reduces the + problem of encapsulation from its current O(n^2) size to a more + manageable size. This memo purposely does not address the issue of + when a packet should be encapsulated. This memo acknowledges, but + does not address problems such as mutual encapsulation [RFC1326]. + + + + +Farinacci, et al. Standards Track [Page 1] + +RFC 2784 Generic Routing Encapsulation March 2000 + + + In the most general case, a system has a packet that needs to be + encapsulated and delivered to some destination. We will call this + the payload packet. The payload is first encapsulated in a GRE + packet. The resulting GRE packet can then be encapsulated in some + other protocol and then forwarded. We will call this outer protocol + the delivery protocol. The algorithms for processing this packet are + discussed later. + + Finally this specification describes the intersection of GRE + currently deployed by multiple vendors. + + The keywords MUST, MUST NOT, MAY, OPTIONAL, REQUIRED, RECOMMENDED, + SHALL, SHALL NOT, SHOULD, SHOULD NOT are to be interpreted as defined + in RFC 2119 [RFC2119]. + +2. Structure of a GRE Encapsulated Packet + + A GRE encapsulated packet has the form: + + --------------------------------- + | | + | Delivery Header | + | | + --------------------------------- + | | + | GRE Header | + | | + --------------------------------- + | | + | Payload packet | + | | + --------------------------------- + + This specification is generally concerned with the structure of the + GRE header, although special consideration is given to some of the + issues surrounding IPv4 payloads. + +2.1. GRE Header + + The GRE packet header has the form: + + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |C| Reserved0 | Ver | Protocol Type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum (optional) | Reserved1 (Optional) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + +Farinacci, et al. Standards Track [Page 2] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +2.2. Checksum Present (bit 0) + + If the Checksum Present bit is set to one, then the Checksum and the + Reserved1 fields are present and the Checksum field contains valid + information. Note that a compliant implementation MUST accept and + process this field. + +2.3. Reserved0 (bits 1-12) + + A receiver MUST discard a packet where any of bits 1-5 are non-zero, + unless that receiver implements RFC 1701. Bits 6-12 are reserved for + future use. These bits MUST be sent as zero and MUST be ignored on + receipt. + +2.3.1. Version Number (bits 13-15) + + The Version Number field MUST contain the value zero. + +2.4. Protocol Type (2 octets) + + The Protocol Type field contains the protocol type of the payload + packet. These Protocol Types are defined in [RFC1700] as "ETHER + TYPES" and in [ETYPES]. An implementation receiving a packet + containing a Protocol Type which is not listed in [RFC1700] or + [ETYPES] SHOULD discard the packet. + +2.5. Checksum (2 octets) + + The Checksum field contains the IP (one's complement) checksum sum of + the all the 16 bit words in the GRE header and the payload packet. + For purposes of computing the checksum, the value of the checksum + field is zero. This field is present only if the Checksum Present bit + is set to one. + +2.6. Reserved1 (2 octets) + + The Reserved1 field is reserved for future use, and if present, MUST + be transmitted as zero. The Reserved1 field is present only when the + Checksum field is present (that is, Checksum Present bit is set to + one). + +3. IPv4 as a Payload + + When IPv4 is being carried as the GRE payload, the Protocol Type + field MUST be set to 0x800. + + + + + + +Farinacci, et al. Standards Track [Page 3] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +3.1. Forwarding Decapsulated IPv4 Payload Packets + + When a tunnel endpoint decapsulates a GRE packet which has an IPv4 + packet as the payload, the destination address in the IPv4 payload + packet header MUST be used to forward the packet and the TTL of the + payload packet MUST be decremented. Care should be taken when + forwarding such a packet, since if the destination address of the + payload packet is the encapsulator of the packet (i.e., the other end + of the tunnel), looping can occur. In this case, the packet MUST be + discarded. + +4. IPv4 as a Delivery Protocol + + The IPv4 protocol 47 [RFC1700] is used when GRE packets are + enapsulated in IPv4. See [RFC1122] for requirements relating to the + delivery of packets over IPv4 networks. + +5. Interoperation with RFC 1701 Compliant Implementations + + In RFC 1701, the field described here as Reserved0 contained a number + of flag bits which this specification deprecates. In particular, the + Routing Present, Key Present, Sequence Number Present, and Strict + Source Route bits have been deprecated, along with the Recursion + Control field. As a result, the GRE header will never contain the + Key, Sequence Number or Routing fields specified in RFC 1701. + + There are, however, existing implementations of RFC 1701. The + following sections describe correct interoperation with such + implementations. + +5.1. RFC 1701 Compliant Receiver + + An implementation complying to this specification will transmit the + Reserved0 field set to zero. An RFC 1701 compliant receiver will + interpret this as having the Routing Present, Key Present, Sequence + Number Present, and Strict Source Route bits set to zero, and will + not expect the RFC 1701 Key, Sequence Number or Routing fields to be + present. + +5.2. RFC 1701 Compliant Transmitter + + An RFC 1701 transmitter may set any of the Routing Present, Key + Present, Sequence Number Present, and Strict Source Route bits set to + one, and thus may transmit the RFC 1701 Key, Sequence Number or + Routing fields in the GRE header. As stated in Section 5.3, a packet + with non-zero bits in any of bits 1-5 MUST be discarded unless the + receiver implements RFC 1701. + + + + +Farinacci, et al. Standards Track [Page 4] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +6. Security Considerations + + Security in a network using GRE should be relatively similar to + security in a normal IPv4 network, as routing using GRE follows the + same routing that IPv4 uses natively. Route filtering will remain + unchanged. However packet filtering requires either that a firewall + look inside the GRE packet or that the filtering is done on the GRE + tunnel endpoints. In those environments in which this is considered + to be a security issue it may be desirable to terminate the tunnel at + the firewall. + +7. IANA Considerations + + This section considers the assignment of additional GRE Version + Numbers and Protocol Types. + +7.1. GRE Version Numbers + + This document specifies GRE version number 0. GRE version number 1 is + used by PPTP [RFC2637]. Additional GRE version numbers are assigned + by IETF Consensus as defined in RFC 2434 [RFC2434]. + +7.2. Protocol Types + + GRE uses an ETHER Type for the Protocol Type. New ETHER TYPES are + assigned by Xerox Systems Institute [RFC1700]. + +8. Acknowledgments + + This document is derived from the original ideas of the authors of + RFC 1701 and RFC 1702. Hitoshi Asaeda, Scott Bradner, Randy Bush, + Brian Carpenter, Bill Fenner, Andy Malis, Thomas Narten, Dave Thaler, + Tim Gleeson and others provided many constructive and insightful + comments. + + + + + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 5] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +9. Appendix -- Known Issues + + This document specifies the behavior of currently deployed GRE + implementations. As such, it does not attempt to address the + following known issues: + + o Interaction Path MTU Discovery (PMTU) [RFC1191] + + Existing implementations of GRE, when using IPv4 as the Delivery + Header, do not implement Path MTU discovery and do not set the + Don't Fragment bit in the Delivery Header. This can cause large + packets to become fragmented within the tunnel and reassembled at + the tunnel exit (independent of whether the payload packet is using + PMTU). If a tunnel entry point were to use Path MTU discovery, + however, that tunnel entry point would also need to relay ICMP + unreachable error messages (in particular the "fragmentation needed + and DF set" code) back to the originator of the packet, which is + not a requirement in this specification. Failure to properly relay + Path MTU information to an originator can result in the following + behavior: the originator sets the don't fragment bit, the packet + gets dropped within the tunnel, but since the originator doesn't + receive proper feedback, it retransmits with the same PMTU, causing + subsequently transmitted packets to be dropped. + + o IPv6 as Delivery and/or Payload Protocol + + This specification describes the intersection of GRE currently + deployed by multiple vendors. IPv6 as delivery and/or payload + protocol is not included in the currently deployed versions of GRE. + + o Interaction with ICMP + + o Interaction with the Differentiated Services Architecture + + o Multiple and Looping Encapsulations + +10. REFERENCES + + [ETYPES] ftp://ftp.isi.edu/in-notes/iana/assignments/ethernet- + numbers + + [RFC1122] Braden, R., "Requirements for Internet hosts - + communication layers", STD 3, RFC 1122, October 1989. + + [RFC1191] Mogul, J. and S. Deering, "Path MTU Discovery", RFC 1191, + November 1990. + + + + + +Farinacci, et al. Standards Track [Page 6] + +RFC 2784 Generic Routing Encapsulation March 2000 + + + [RFC1226] Kantor, B., "Internet Protocol Encapsulation of AX.25 + Frames", RFC 1226, May 1991. + + [RFC1234] Provan, D., "Tunneling IPX Traffic through IP Networks", + RFC 1234, June 1991. + + [RFC1241] Woodburn, R. and D. Mills, "Scheme for an Internet + Encapsulation Protocol: Version 1", RFC 1241, July 1991. + + [RFC1326] Tsuchiya, P., "Mutual Encapsulation Considered Dangerous", + RFC 1326, May 1992. + + [RFC1479] Steenstrup, M., "Inter-Domain Policy Routing Protocol + Specification: Version 1", RFC 1479, July 1993. + + [RFC1700] Reynolds, J. and J. Postel, "Assigned Numbers", STD 2, RFC + 1700, October 1994. + + [RFC1701] Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic + Routing Encapsulation", RFC 1701, October 1994. + + [RFC1702] Hanks, S., Li, T., Farinacci, D. and P. Traina, "Generic + Routing Encapsulation over IPv4 networks", RFC 1702, + October 1994. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March, 1997. + + [RFC2408] Maughan, D., Schertler, M., Schneider, M. and J. Turner, + "Internet Security Association and Key Management Protocol + (ISAKMP)", RFC 2408, November 1998. + + [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 2434, + October, 1998. + + [RFC2637] Hamzeh, K., et al., "Point-to-Point Tunneling Protocol + (PPTP)", RFC 2637, July, 1999. + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 7] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +11. Authors' Addresses + + Dino Farinacci + Procket Networks + 3850 No. First St., Ste. C + San Jose, CA 95134 + + EMail: dino@procket.com + + + Tony Li + Procket Networks + 3850 No. First St., Ste. C + San Jose, CA 95134 + + Phone: +1 408 954 7903 + Fax: +1 408 987 6166 + EMail: tony1@home.net + + + Stan Hanks + Enron Communications + + EMail: stan_hanks@enron.net + + + David Meyer + Cisco Systems, Inc. + 170 Tasman Drive + San Jose, CA, 95134 + + EMail: dmm@cisco.com + + + Paul Traina + Juniper Networks + EMail: pst@juniper.net + + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 8] + +RFC 2784 Generic Routing Encapsulation March 2000 + + +12. Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Farinacci, et al. Standards Track [Page 9] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc2960.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc2960.txt new file mode 100644 index 0000000..b9ad20c --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc2960.txt @@ -0,0 +1,7507 @@ + + + + + + +Network Working Group R. Stewart +Request for Comments: 2960 Q. Xie +Category: Standards Track Motorola + K. Morneault + C. Sharp + Cisco + H. Schwarzbauer + Siemens + T. Taylor + Nortel Networks + I. Rytina + Ericsson + M. Kalla + Telcordia + L. Zhang + UCLA + V. Paxson + ACIRI + October 2000 + + + Stream Control Transmission Protocol + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2000). All Rights Reserved. + +Abstract + + This document describes the Stream Control Transmission Protocol + (SCTP). SCTP is designed to transport PSTN signaling messages over + IP networks, but is capable of broader applications. + + SCTP is a reliable transport protocol operating on top of a + connectionless packet network such as IP. It offers the following + services to its users: + + -- acknowledged error-free non-duplicated transfer of user data, + -- data fragmentation to conform to discovered path MTU size, + + + + +Stewart, et al. Standards Track [Page 1] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + -- sequenced delivery of user messages within multiple streams, + with an option for order-of-arrival delivery of individual user + messages, + -- optional bundling of multiple user messages into a single SCTP + packet, and + -- network-level fault tolerance through supporting of multi- + homing at either or both ends of an association. + + The design of SCTP includes appropriate congestion avoidance behavior + and resistance to flooding and masquerade attacks. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 2] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +Table of Contents + + 1. Introduction.................................................. 5 + 1.1 Motivation.................................................. 6 + 1.2 Architectural View of SCTP.................................. 6 + 1.3 Functional View of SCTP..................................... 7 + 1.3.1 Association Startup and Takedown........................ 8 + 1.3.2 Sequenced Delivery within Streams....................... 9 + 1.3.3 User Data Fragmentation................................. 9 + 1.3.4 Acknowledgement and Congestion Avoidance................ 9 + 1.3.5 Chunk Bundling ......................................... 10 + 1.3.6 Packet Validation....................................... 10 + 1.3.7 Path Management......................................... 11 + 1.4 Key Terms................................................... 11 + 1.5 Abbreviations............................................... 15 + 1.6 Serial Number Arithmetic.................................... 15 + 2. Conventions.................................................... 16 + 3. SCTP packet Format............................................ 16 + 3.1 SCTP Common Header Field Descriptions....................... 17 + 3.2 Chunk Field Descriptions.................................... 18 + 3.2.1 Optional/Variable-length Parameter Format............... 20 + 3.3 SCTP Chunk Definitions...................................... 21 + 3.3.1 Payload Data (DATA)..................................... 22 + 3.3.2 Initiation (INIT)....................................... 24 + Optional or Variable Length Parameters.............. 26 + 3.3.3 Initiation Acknowledgement (INIT ACK)................... 30 + Optional or Variable Length Parameters.............. 33 + 3.3.4 Selective Acknowledgement (SACK)........................ 33 + 3.3.5 Heartbeat Request (HEARTBEAT)........................... 37 + 3.3.6 Heartbeat Acknowledgement (HEARTBEAT ACK)............... 38 + 3.3.7 Abort Association (ABORT)............................... 39 + 3.3.8 Shutdown Association (SHUTDOWN)......................... 40 + 3.3.9 Shutdown Acknowledgement (SHUTDOWN ACK)................. 40 + 3.3.10 Operation Error (ERROR)................................ 41 + Invalid Stream Identifier.......................... 42 + Missing Mandatory Parameter........................ 43 + Stale Cookie Error................................. 43 + Out of Resource.................................... 44 + Unresolvable Address............................... 44 + Unrecognized Chunk Type............................ 44 + Invalid Mandatory Parameter........................ 45 + Unrecognized Parameters............................ 45 + No User Data....................................... 46 + Cookie Received While Shutting Down............... 46 + 3.3.11 Cookie Echo (COOKIE ECHO).............................. 46 + 3.3.12 Cookie Acknowledgement (COOKIE ACK).................... 47 + 3.3.13 Shutdown Complete (SHUTDOWN COMPLETE).................. 48 + 4. SCTP Association State Diagram................................. 48 + + + +Stewart, et al. Standards Track [Page 3] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 5. Association Initialization..................................... 52 + 5.1 Normal Establishment of an Association...................... 52 + 5.1.1 Handle Stream Parameters................................ 54 + 5.1.2 Handle Address Parameters............................... 54 + 5.1.3 Generating State Cookie................................. 56 + 5.1.4 State Cookie Processing................................. 57 + 5.1.5 State Cookie Authentication............................. 57 + 5.1.6 An Example of Normal Association Establishment.......... 58 + 5.2 Handle Duplicate or unexpected INIT, INIT ACK, COOKIE ECHO, + and COOKIE ACK.............................................. 60 + 5.2.1 Handle Duplicate INIT in COOKIE-WAIT + or COOKIE-ECHOED States................................. 60 + 5.2.2 Unexpected INIT in States Other than CLOSED, + COOKIE-ECHOED, COOKIE-WAIT and SHUTDOWN-ACK-SENT........ 61 + 5.2.3 Unexpected INIT ACK..................................... 61 + 5.2.4 Handle a COOKIE ECHO when a TCB exists.................. 62 + An Example of a Association Restart................. 64 + 5.2.5 Handle Duplicate COOKIE ACK............................. 66 + 5.2.6 Handle Stale COOKIE Error............................... 66 + 5.3 Other Initialization Issues................................. 67 + 5.3.1 Selection of Tag Value.................................. 67 + 6. User Data Transfer............................................. 67 + 6.1 Transmission of DATA Chunks................................. 69 + 6.2 Acknowledgement on Reception of DATA Chunks................. 70 + 6.2.1 Tracking Peer's Receive Buffer Space.................... 73 + 6.3 Management Retransmission Timer............................. 75 + 6.3.1 RTO Calculation......................................... 75 + 6.3.2 Retransmission Timer Rules.............................. 76 + 6.3.3 Handle T3-rtx Expiration................................ 77 + 6.4 Multi-homed SCTP Endpoints.................................. 78 + 6.4.1 Failover from Inactive Destination Address.............. 79 + 6.5 Stream Identifier and Stream Sequence Number................ 80 + 6.6 Ordered and Unordered Delivery.............................. 80 + 6.7 Report Gaps in Received DATA TSNs........................... 81 + 6.8 Adler-32 Checksum Calculation............................... 82 + 6.9 Fragmentation............................................... 83 + 6.10 Bundling .................................................. 84 + 7. Congestion Control .......................................... 85 + 7.1 SCTP Differences from TCP Congestion Control................ 85 + 7.2 SCTP Slow-Start and Congestion Avoidance.................... 87 + 7.2.1 Slow-Start.............................................. 87 + 7.2.2 Congestion Avoidance.................................... 89 + 7.2.3 Congestion Control...................................... 89 + 7.2.4 Fast Retransmit on Gap Reports.......................... 90 + 7.3 Path MTU Discovery.......................................... 91 + 8. Fault Management.............................................. 92 + 8.1 Endpoint Failure Detection.................................. 92 + 8.2 Path Failure Detection...................................... 92 + + + +Stewart, et al. Standards Track [Page 4] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 8.3 Path Heartbeat.............................................. 93 + 8.4 Handle "Out of the blue" Packets............................ 95 + 8.5 Verification Tag............................................ 96 + 8.5.1 Exceptions in Verification Tag Rules.................... 97 + 9. Termination of Association..................................... 98 + 9.1 Abort of an Association..................................... 98 + 9.2 Shutdown of an Association.................................. 98 + 10. Interface with Upper Layer....................................101 + 10.1 ULP-to-SCTP................................................101 + 10.2 SCTP-to-ULP................................................111 + 11. Security Considerations.......................................114 + 11.1 Security Objectives........................................114 + 11.2 SCTP Responses To Potential Threats........................115 + 11.2.1 Countering Insider Attacks.............................115 + 11.2.2 Protecting against Data Corruption in the Network......115 + 11.2.3 Protecting Confidentiality.............................115 + 11.2.4 Protecting against Blind Denial of Service Attacks.....116 + Flooding...........................................116 + Blind Masquerade...................................118 + Improper Monopolization of Services................118 + 11.3 Protection against Fraud and Repudiation...................119 + 12. Recommended Transmission Control Block (TCB) Parameters.......120 + 12.1 Parameters necessary for the SCTP instance.................120 + 12.2 Parameters necessary per association (i.e. the TCB)........120 + 12.3 Per Transport Address Data.................................122 + 12.4 General Parameters Needed..................................123 + 13. IANA Considerations...........................................123 + 13.1 IETF-defined Chunk Extension...............................123 + 13.2 IETF-defined Chunk Parameter Extension.....................124 + 13.3 IETF-defined Additional Error Causes.......................124 + 13.4 Payload Protocol Identifiers...............................125 + 14. Suggested SCTP Protocol Parameter Values......................125 + 15. Acknowledgements..............................................126 + 16. Authors' Addresses............................................126 + 17. References....................................................128 + 18. Bibliography..................................................129 + Appendix A .......................................................131 + Appendix B .......................................................132 + Full Copyright Statement .........................................134 + +1. Introduction + + This section explains the reasoning behind the development of the + Stream Control Transmission Protocol (SCTP), the services it offers, + and the basic concepts needed to understand the detailed description + of the protocol. + + + + + +Stewart, et al. Standards Track [Page 5] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +1.1 Motivation + + TCP [RFC793] has performed immense service as the primary means of + reliable data transfer in IP networks. However, an increasing number + of recent applications have found TCP too limiting, and have + incorporated their own reliable data transfer protocol on top of UDP + [RFC768]. The limitations which users have wished to bypass include + the following: + + -- TCP provides both reliable data transfer and strict order-of- + transmission delivery of data. Some applications need reliable + transfer without sequence maintenance, while others would be + satisfied with partial ordering of the data. In both of these + cases the head-of-line blocking offered by TCP causes unnecessary + delay. + + -- The stream-oriented nature of TCP is often an inconvenience. + Applications must add their own record marking to delineate their + messages, and must make explicit use of the push facility to + ensure that a complete message is transferred in a reasonable + time. + + -- The limited scope of TCP sockets complicates the task of + providing highly-available data transfer capability using multi- + homed hosts. + + -- TCP is relatively vulnerable to denial of service attacks, such + as SYN attacks. + + Transport of PSTN signaling across the IP network is an application + for which all of these limitations of TCP are relevant. While this + application directly motivated the development of SCTP, other + applications may find SCTP a good match to their requirements. + +1.2 Architectural View of SCTP + + SCTP is viewed as a layer between the SCTP user application ("SCTP + user" for short) and a connectionless packet network service such as + IP. The remainder of this document assumes SCTP runs on top of IP. + The basic service offered by SCTP is the reliable transfer of user + messages between peer SCTP users. It performs this service within + the context of an association between two SCTP endpoints. Section 10 + of this document sketches the API which should exist at the boundary + between the SCTP and the SCTP user layers. + + SCTP is connection-oriented in nature, but the SCTP association is a + broader concept than the TCP connection. SCTP provides the means for + each SCTP endpoint (Section 1.4) to provide the other endpoint + + + +Stewart, et al. Standards Track [Page 6] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + (during association startup) with a list of transport addresses + (i.e., multiple IP addresses in combination with an SCTP port) + through which that endpoint can be reached and from which it will + originate SCTP packets. The association spans transfers over all of + the possible source/destination combinations which may be generated + from each endpoint's lists. + + _____________ _____________ + | SCTP User | | SCTP User | + | Application | | Application | + |-------------| |-------------| + | SCTP | | SCTP | + | Transport | | Transport | + | Service | | Service | + |-------------| |-------------| + | |One or more ---- One or more| | + | IP Network |IP address \/ IP address| IP Network | + | Service |appearances /\ appearances| Service | + |_____________| ---- |_____________| + + SCTP Node A |<-------- Network transport ------->| SCTP Node B + + Figure 1: An SCTP Association + +1.3 Functional View of SCTP + + The SCTP transport service can be decomposed into a number of + functions. These are depicted in Figure 2 and explained in the + remainder of this section. + + + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 7] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + SCTP User Application + + ----------------------------------------------------- + _____________ ____________________ + | | | Sequenced delivery | + | Association | | within streams | + | | |____________________| + | startup | + | | ____________________________ + | and | | User Data Fragmentation | + | | |____________________________| + | takedown | + | | ____________________________ + | | | Acknowledgement | + | | | and | + | | | Congestion Avoidance | + | | |____________________________| + | | + | | ____________________________ + | | | Chunk Bundling | + | | |____________________________| + | | + | | ________________________________ + | | | Packet Validation | + | | |________________________________| + | | + | | ________________________________ + | | | Path Management | + |_____________| |________________________________| + + Figure 2: Functional View of the SCTP Transport Service + +1.3.1 Association Startup and Takedown + + An association is initiated by a request from the SCTP user (see the + description of the ASSOCIATE (or SEND) primitive in Section 10). + + A cookie mechanism, similar to one described by Karn and Simpson in + [RFC2522], is employed during the initialization to provide + protection against security attacks. The cookie mechanism uses a + four-way handshake, the last two legs of which are allowed to carry + user data for fast setup. The startup sequence is described in + Section 5 of this document. + + SCTP provides for graceful close (i.e., shutdown) of an active + association on request from the SCTP user. See the description of + the SHUTDOWN primitive in Section 10. SCTP also allows ungraceful + close (i.e., abort), either on request from the user (ABORT + + + +Stewart, et al. Standards Track [Page 8] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + primitive) or as a result of an error condition detected within the + SCTP layer. Section 9 describes both the graceful and the ungraceful + close procedures. + + SCTP does not support a half-open state (like TCP) wherein one side + may continue sending data while the other end is closed. When either + endpoint performs a shutdown, the association on each peer will stop + accepting new data from its user and only deliver data in queue at + the time of the graceful close (see Section 9). + +1.3.2 Sequenced Delivery within Streams + + The term "stream" is used in SCTP to refer to a sequence of user + messages that are to be delivered to the upper-layer protocol in + order with respect to other messages within the same stream. This is + in contrast to its usage in TCP, where it refers to a sequence of + bytes (in this document a byte is assumed to be eight bits). + + The SCTP user can specify at association startup time the number of + streams to be supported by the association. This number is + negotiated with the remote end (see Section 5.1.1). User messages + are associated with stream numbers (SEND, RECEIVE primitives, Section + 10). Internally, SCTP assigns a stream sequence number to each + message passed to it by the SCTP user. On the receiving side, SCTP + ensures that messages are delivered to the SCTP user in sequence + within a given stream. However, while one stream may be blocked + waiting for the next in-sequence user message, delivery from other + streams may proceed. + + SCTP provides a mechanism for bypassing the sequenced delivery + service. User messages sent using this mechanism are delivered to + the SCTP user as soon as they are received. + +1.3.3 User Data Fragmentation + + When needed, SCTP fragments user messages to ensure that the SCTP + packet passed to the lower layer conforms to the path MTU. On + receipt, fragments are reassembled into complete messages before + being passed to the SCTP user. + +1.3.4 Acknowledgement and Congestion Avoidance + + SCTP assigns a Transmission Sequence Number (TSN) to each user data + fragment or unfragmented message. The TSN is independent of any + stream sequence number assigned at the stream level. The receiving + + + + + + +Stewart, et al. Standards Track [Page 9] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + end acknowledges all TSNs received, even if there are gaps in the + sequence. In this way, reliable delivery is kept functionally + separate from sequenced stream delivery. + + The acknowledgement and congestion avoidance function is responsible + for packet retransmission when timely acknowledgement has not been + received. Packet retransmission is conditioned by congestion + avoidance procedures similar to those used for TCP. See Sections 6 + and 7 for a detailed description of the protocol procedures + associated with this function. + +1.3.5 Chunk Bundling + + As described in Section 3, the SCTP packet as delivered to the lower + layer consists of a common header followed by one or more chunks. + Each chunk may contain either user data or SCTP control information. + The SCTP user has the option to request bundling of more than one + user messages into a single SCTP packet. The chunk bundling function + of SCTP is responsible for assembly of the complete SCTP packet and + its disassembly at the receiving end. + + During times of congestion an SCTP implementation MAY still perform + bundling even if the user has requested that SCTP not bundle. The + user's disabling of bundling only affects SCTP implementations that + may delay a small period of time before transmission (to attempt to + encourage bundling). When the user layer disables bundling, this + small delay is prohibited but not bundling that is performed during + congestion or retransmission. + +1.3.6 Packet Validation + + A mandatory Verification Tag field and a 32 bit checksum field (see + Appendix B for a description of the Adler-32 checksum) are included + in the SCTP common header. The Verification Tag value is chosen by + each end of the association during association startup. Packets + received without the expected Verification Tag value are discarded, + as a protection against blind masquerade attacks and against stale + SCTP packets from a previous association. The Adler-32 checksum + should be set by the sender of each SCTP packet to provide additional + protection against data corruption in the network. The receiver of + an SCTP packet with an invalid Adler-32 checksum silently discards + the packet. + + + + + + + + + +Stewart, et al. Standards Track [Page 10] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +1.3.7 Path Management + + The sending SCTP user is able to manipulate the set of transport + addresses used as destinations for SCTP packets through the + primitives described in Section 10. The SCTP path management + function chooses the destination transport address for each outgoing + SCTP packet based on the SCTP user's instructions and the currently + perceived reachability status of the eligible destination set. The + path management function monitors reachability through heartbeats + when other packet traffic is inadequate to provide this information + and advises the SCTP user when reachability of any far-end transport + address changes. The path management function is also responsible + for reporting the eligible set of local transport addresses to the + far end during association startup, and for reporting the transport + addresses returned from the far end to the SCTP user. + + At association start-up, a primary path is defined for each SCTP + endpoint, and is used for normal sending of SCTP packets. + + On the receiving end, the path management is responsible for + verifying the existence of a valid SCTP association to which the + inbound SCTP packet belongs before passing it for further processing. + + Note: Path Management and Packet Validation are done at the same + time, so although described separately above, in reality they cannot + be performed as separate items. + +1.4 Key Terms + + Some of the language used to describe SCTP has been introduced in the + previous sections. This section provides a consolidated list of the + key terms and their definitions. + + o Active destination transport address: A transport address on a + peer endpoint which a transmitting endpoint considers available + for receiving user messages. + + o Bundling: An optional multiplexing operation, whereby more than + one user message may be carried in the same SCTP packet. Each + user message occupies its own DATA chunk. + + o Chunk: A unit of information within an SCTP packet, consisting of + a chunk header and chunk-specific content. + + o Congestion Window (cwnd): An SCTP variable that limits the data, + in number of bytes, a sender can send to a particular destination + transport address before receiving an acknowledgement. + + + + +Stewart, et al. Standards Track [Page 11] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o Cumulative TSN Ack Point: The TSN of the last DATA chunk + acknowledged via the Cumulative TSN Ack field of a SACK. + + o Idle destination address: An address that has not had user + messages sent to it within some length of time, normally the + HEARTBEAT interval or greater. + + o Inactive destination transport address: An address which is + considered inactive due to errors and unavailable to transport + user messages. + + o Message = user message: Data submitted to SCTP by the Upper Layer + Protocol (ULP). + + o Message Authentication Code (MAC): An integrity check mechanism + based on cryptographic hash functions using a secret key. + Typically, message authentication codes are used between two + parties that share a secret key in order to validate information + transmitted between these parties. In SCTP it is used by an + endpoint to validate the State Cookie information that is returned + from the peer in the COOKIE ECHO chunk. The term "MAC" has + different meanings in different contexts. SCTP uses this term + with the same meaning as in [RFC2104]. + + o Network Byte Order: Most significant byte first, a.k.a., Big + Endian. + + o Ordered Message: A user message that is delivered in order with + respect to all previous user messages sent within the stream the + message was sent on. + + o Outstanding TSN (at an SCTP endpoint): A TSN (and the associated + DATA chunk) that has been sent by the endpoint but for which it + has not yet received an acknowledgement. + + o Path: The route taken by the SCTP packets sent by one SCTP + endpoint to a specific destination transport address of its peer + SCTP endpoint. Sending to different destination transport + addresses does not necessarily guarantee getting separate paths. + + o Primary Path: The primary path is the destination and source + address that will be put into a packet outbound to the peer + endpoint by default. The definition includes the source address + since an implementation MAY wish to specify both destination and + source address to better control the return path taken by reply + chunks and on which interface the packet is transmitted when the + data sender is multi-homed. + + + + +Stewart, et al. Standards Track [Page 12] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o Receiver Window (rwnd): An SCTP variable a data sender uses to + store the most recently calculated receiver window of its peer, in + number of bytes. This gives the sender an indication of the space + available in the receiver's inbound buffer. + + o SCTP association: A protocol relationship between SCTP endpoints, + composed of the two SCTP endpoints and protocol state information + including Verification Tags and the currently active set of + Transmission Sequence Numbers (TSNs), etc. An association can be + uniquely identified by the transport addresses used by the + endpoints in the association. Two SCTP endpoints MUST NOT have + more than one SCTP association between them at any given time. + + o SCTP endpoint: The logical sender/receiver of SCTP packets. On a + multi-homed host, an SCTP endpoint is represented to its peers as + a combination of a set of eligible destination transport addresses + to which SCTP packets can be sent and a set of eligible source + transport addresses from which SCTP packets can be received. All + transport addresses used by an SCTP endpoint must use the same + port number, but can use multiple IP addresses. A transport + address used by an SCTP endpoint must not be used by another SCTP + endpoint. In other words, a transport address is unique to an + SCTP endpoint. + + o SCTP packet (or packet): The unit of data delivery across the + interface between SCTP and the connectionless packet network + (e.g., IP). An SCTP packet includes the common SCTP header, + possible SCTP control chunks, and user data encapsulated within + SCTP DATA chunks. + + o SCTP user application (SCTP user): The logical higher-layer + application entity which uses the services of SCTP, also called + the Upper-layer Protocol (ULP). + + o Slow Start Threshold (ssthresh): An SCTP variable. This is the + threshold which the endpoint will use to determine whether to + perform slow start or congestion avoidance on a particular + destination transport address. Ssthresh is in number of bytes. + + o Stream: A uni-directional logical channel established from one to + another associated SCTP endpoint, within which all user messages + are delivered in sequence except for those submitted to the + unordered delivery service. + + Note: The relationship between stream numbers in opposite directions + is strictly a matter of how the applications use them. It is the + responsibility of the SCTP user to create and manage these + correlations if they are so desired. + + + +Stewart, et al. Standards Track [Page 13] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o Stream Sequence Number: A 16-bit sequence number used internally + by SCTP to assure sequenced delivery of the user messages within a + given stream. One stream sequence number is attached to each user + message. + + o Tie-Tags: Verification Tags from a previous association. These + Tags are used within a State Cookie so that the newly restarting + association can be linked to the original association within the + endpoint that did not restart. + + o Transmission Control Block (TCB): An internal data structure + created by an SCTP endpoint for each of its existing SCTP + associations to other SCTP endpoints. TCB contains all the status + and operational information for the endpoint to maintain and + manage the corresponding association. + + o Transmission Sequence Number (TSN): A 32-bit sequence number used + internally by SCTP. One TSN is attached to each chunk containing + user data to permit the receiving SCTP endpoint to acknowledge its + receipt and detect duplicate deliveries. + + o Transport address: A Transport Address is traditionally defined + by Network Layer address, Transport Layer protocol and Transport + Layer port number. In the case of SCTP running over IP, a + transport address is defined by the combination of an IP address + and an SCTP port number (where SCTP is the Transport protocol). + + o Unacknowledged TSN (at an SCTP endpoint): A TSN (and the associated + DATA chunk) which has been received by the endpoint but for which + an acknowledgement has not yet been sent. Or in the opposite case, + for a packet that has been sent but no acknowledgement has been + received. + + o Unordered Message: Unordered messages are "unordered" with respect + to any other message, this includes both other unordered messages + as well as other ordered messages. Unordered message might be + delivered prior to or later than ordered messages sent on the same + stream. + + o User message: The unit of data delivery across the interface + between SCTP and its user. + + o Verification Tag: A 32 bit unsigned integer that is randomly + generated. The Verification Tag provides a key that allows a + receiver to verify that the SCTP packet belongs to the current + association and is not an old or stale packet from a previous + association. + + + + +Stewart, et al. Standards Track [Page 14] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +1.5. Abbreviations + + MAC - Message Authentication Code [RFC2104] + + RTO - Retransmission Time-out + + RTT - Round-trip Time + + RTTVAR - Round-trip Time Variation + + SCTP - Stream Control Transmission Protocol + + SRTT - Smoothed RTT + + TCB - Transmission Control Block + + TLV - Type-Length-Value Coding Format + + TSN - Transmission Sequence Number + + ULP - Upper-layer Protocol + +1.6 Serial Number Arithmetic + + It is essential to remember that the actual Transmission Sequence + Number space is finite, though very large. This space ranges from 0 + to 2**32 - 1. Since the space is finite, all arithmetic dealing with + Transmission Sequence Numbers must be performed modulo 2**32. This + unsigned arithmetic preserves the relationship of sequence numbers as + they cycle from 2**32 - 1 to 0 again. There are some subtleties to + computer modulo arithmetic, so great care should be taken in + programming the comparison of such values. When referring to TSNs, + the symbol "=<" means "less than or equal"(modulo 2**32). + + Comparisons and arithmetic on TSNs in this document SHOULD use Serial + Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32. + + An endpoint SHOULD NOT transmit a DATA chunk with a TSN that is more + than 2**31 - 1 above the beginning TSN of its current send window. + Doing so will cause problems in comparing TSNs. + + Transmission Sequence Numbers wrap around when they reach 2**32 - 1. + That is, the next TSN a DATA chunk MUST use after transmitting TSN = + 2*32 - 1 is TSN = 0. + + Any arithmetic done on Stream Sequence Numbers SHOULD use Serial + Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 16. + + + + +Stewart, et al. Standards Track [Page 15] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + All other arithmetic and comparisons in this document uses normal + arithmetic. + +2. Conventions + + The keywords MUST, MUST NOT, REQUIRED, SHALL, SHALL NOT, SHOULD, + SHOULD NOT, RECOMMENDED, NOT RECOMMENDED, MAY, and OPTIONAL, when + they appear in this document, are to be interpreted as described in + [RFC2119]. + +3. SCTP packet Format + + An SCTP packet is composed of a common header and chunks. A chunk + contains either control information or user data. + + The SCTP packet format is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Common Header | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk #1 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ... | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk #n | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Multiple chunks can be bundled into one SCTP packet up to the MTU + size, except for the INIT, INIT ACK, and SHUTDOWN COMPLETE chunks. + These chunks MUST NOT be bundled with any other chunk in a packet. + See Section 6.10 for more details on chunk bundling. + + If a user data message doesn't fit into one SCTP packet it can be + fragmented into multiple chunks using the procedure defined in + Section 6.9. + + All integer fields in an SCTP packet MUST be transmitted in network + byte order, unless otherwise stated. + + + + + + + + + + + +Stewart, et al. Standards Track [Page 16] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.1 SCTP Common Header Field Descriptions + + SCTP Common Header Format + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Port Number | Destination Port Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Verification Tag | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Source Port Number: 16 bits (unsigned integer) + + This is the SCTP sender's port number. It can be used by the + receiver in combination with the source IP address, the SCTP + destination port and possibly the destination IP address to + identify the association to which this packet belongs. + + Destination Port Number: 16 bits (unsigned integer) + + This is the SCTP port number to which this packet is destined. + The receiving host will use this port number to de-multiplex the + SCTP packet to the correct receiving endpoint/application. + + Verification Tag: 32 bits (unsigned integer) + + The receiver of this packet uses the Verification Tag to validate + the sender of this SCTP packet. On transmit, the value of this + Verification Tag MUST be set to the value of the Initiate Tag + received from the peer endpoint during the association + initialization, with the following exceptions: + + - A packet containing an INIT chunk MUST have a zero Verification + Tag. + - A packet containing a SHUTDOWN-COMPLETE chunk with the T-bit + set MUST have the Verification Tag copied from the packet with + the SHUTDOWN-ACK chunk. + - A packet containing an ABORT chunk may have the verification + tag copied from the packet which caused the ABORT to be sent. + For details see Section 8.4 and 8.5. + + An INIT chunk MUST be the only chunk in the SCTP packet carrying it. + + + + + + +Stewart, et al. Standards Track [Page 17] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Checksum: 32 bits (unsigned integer) + + This field contains the checksum of this SCTP packet. Its + calculation is discussed in Section 6.8. SCTP uses the Adler- + 32 algorithm as described in Appendix B for calculating the + checksum + +3.2 Chunk Field Descriptions + + The figure below illustrates the field format for the chunks to be + transmitted in the SCTP packet. Each chunk is formatted with a Chunk + Type field, a chunk-specific Flag field, a Chunk Length field, and a + Value field. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk Type | Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Chunk Value / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Type: 8 bits (unsigned integer) + + This field identifies the type of information contained in the + Chunk Value field. It takes a value from 0 to 254. The value of + 255 is reserved for future use as an extension field. + + The values of Chunk Types are defined as follows: + + ID Value Chunk Type + ----- ---------- + 0 - Payload Data (DATA) + 1 - Initiation (INIT) + 2 - Initiation Acknowledgement (INIT ACK) + 3 - Selective Acknowledgement (SACK) + 4 - Heartbeat Request (HEARTBEAT) + 5 - Heartbeat Acknowledgement (HEARTBEAT ACK) + 6 - Abort (ABORT) + 7 - Shutdown (SHUTDOWN) + 8 - Shutdown Acknowledgement (SHUTDOWN ACK) + 9 - Operation Error (ERROR) + 10 - State Cookie (COOKIE ECHO) + 11 - Cookie Acknowledgement (COOKIE ACK) + 12 - Reserved for Explicit Congestion Notification Echo (ECNE) + 13 - Reserved for Congestion Window Reduced (CWR) + + + +Stewart, et al. Standards Track [Page 18] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 14 - Shutdown Complete (SHUTDOWN COMPLETE) + 15 to 62 - reserved by IETF + 63 - IETF-defined Chunk Extensions + 64 to 126 - reserved by IETF + 127 - IETF-defined Chunk Extensions + 128 to 190 - reserved by IETF + 191 - IETF-defined Chunk Extensions + 192 to 254 - reserved by IETF + 255 - IETF-defined Chunk Extensions + + Chunk Types are encoded such that the highest-order two bits specify + the action that must be taken if the processing endpoint does not + recognize the Chunk Type. + + 00 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it. + + 01 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it, and report the unrecognized + parameter in an 'Unrecognized Parameter Type' (in either an + ERROR or in the INIT ACK). + + 10 - Skip this chunk and continue processing. + + 11 - Skip this chunk and continue processing, but report in an ERROR + Chunk using the 'Unrecognized Chunk Type' cause of error. + + Note: The ECNE and CWR chunk types are reserved for future use of + Explicit Congestion Notification (ECN). + + Chunk Flags: 8 bits + + The usage of these bits depends on the chunk type as given by the + Chunk Type. Unless otherwise specified, they are set to zero on + transmit and are ignored on receipt. + + Chunk Length: 16 bits (unsigned integer) + + This value represents the size of the chunk in bytes including the + Chunk Type, Chunk Flags, Chunk Length, and Chunk Value fields. + Therefore, if the Chunk Value field is zero-length, the Length + field will be set to 4. The Chunk Length field does not count any + padding. + + + + + + + + +Stewart, et al. Standards Track [Page 19] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Chunk Value: variable length + + The Chunk Value field contains the actual information to be + transferred in the chunk. The usage and format of this field is + dependent on the Chunk Type. + + The total length of a chunk (including Type, Length and Value fields) + MUST be a multiple of 4 bytes. If the length of the chunk is not a + multiple of 4 bytes, the sender MUST pad the chunk with all zero + bytes and this padding is not included in the chunk length field. + The sender should never pad with more than 3 bytes. The receiver + MUST ignore the padding bytes. + + SCTP defined chunks are described in detail in Section 3.3. The + guidelines for IETF-defined chunk extensions can be found in Section + 13.1 of this document. + +3.2.1 Optional/Variable-length Parameter Format + + Chunk values of SCTP control chunks consist of a chunk-type-specific + header of required fields, followed by zero or more parameters. The + optional and variable-length parameters contained in a chunk are + defined in a Type-Length-Value format as shown below. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Parameter Type | Parameter Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Parameter Value / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Parameter Type: 16 bits (unsigned integer) + + The Type field is a 16 bit identifier of the type of parameter. + It takes a value of 0 to 65534. + + The value of 65535 is reserved for IETF-defined extensions. Values + other than those defined in specific SCTP chunk description are + reserved for use by IETF. + + + + + + + + + +Stewart, et al. Standards Track [Page 20] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Chunk Parameter Length: 16 bits (unsigned integer) + + The Parameter Length field contains the size of the parameter in + bytes, including the Parameter Type, Parameter Length, and + Parameter Value fields. Thus, a parameter with a zero-length + Parameter Value field would have a Length field of 4. The + Parameter Length does not include any padding bytes. + + Chunk Parameter Value: variable-length. + + The Parameter Value field contains the actual information to be + transferred in the parameter. + + The total length of a parameter (including Type, Parameter Length and + Value fields) MUST be a multiple of 4 bytes. If the length of the + parameter is not a multiple of 4 bytes, the sender pads the Parameter + at the end (i.e., after the Parameter Value field) with all zero + bytes. The length of the padding is not included in the parameter + length field. A sender SHOULD NOT pad with more than 3 bytes. The + receiver MUST ignore the padding bytes. + + The Parameter Types are encoded such that the highest-order two bits + specify the action that must be taken if the processing endpoint does + not recognize the Parameter Type. + + 00 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it. + + 01 - Stop processing this SCTP packet and discard it, do not process + any further chunks within it, and report the unrecognized + parameter in an 'Unrecognized Parameter Type' (in either an + ERROR or in the INIT ACK). + + 10 - Skip this parameter and continue processing. + + 11 - Skip this parameter and continue processing but report the + unrecognized parameter in an 'Unrecognized Parameter Type' (in + either an ERROR or in the INIT ACK). + + The actual SCTP parameters are defined in the specific SCTP chunk + sections. The rules for IETF-defined parameter extensions are + defined in Section 13.2. + +3.3 SCTP Chunk Definitions + + This section defines the format of the different SCTP chunk types. + + + + + +Stewart, et al. Standards Track [Page 21] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.1 Payload Data (DATA) (0) + + The following format MUST be used for the DATA chunk: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 0 | Reserved|U|B|E| Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | TSN | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Stream Identifier S | Stream Sequence Number n | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Payload Protocol Identifier | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / User Data (seq n of Stream S) / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Reserved: 5 bits + + Should be set to all '0's and ignored by the receiver. + + U bit: 1 bit + + The (U)nordered bit, if set to '1', indicates that this is an + unordered DATA chunk, and there is no Stream Sequence Number + assigned to this DATA chunk. Therefore, the receiver MUST ignore + the Stream Sequence Number field. + + After re-assembly (if necessary), unordered DATA chunks MUST be + dispatched to the upper layer by the receiver without any attempt + to re-order. + + If an unordered user message is fragmented, each fragment of the + message MUST have its U bit set to '1'. + + B bit: 1 bit + + The (B)eginning fragment bit, if set, indicates the first fragment + of a user message. + + E bit: 1 bit + + The (E)nding fragment bit, if set, indicates the last fragment of + a user message. + + + + +Stewart, et al. Standards Track [Page 22] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + An unfragmented user message shall have both the B and E bits set to + '1'. Setting both B and E bits to '0' indicates a middle fragment of + a multi-fragment user message, as summarized in the following table: + + B E Description + ============================================================ + | 1 0 | First piece of a fragmented user message | + +----------------------------------------------------------+ + | 0 0 | Middle piece of a fragmented user message | + +----------------------------------------------------------+ + | 0 1 | Last piece of a fragmented user message | + +----------------------------------------------------------+ + | 1 1 | Unfragmented Message | + ============================================================ + | Table 1: Fragment Description Flags | + ============================================================ + + When a user message is fragmented into multiple chunks, the TSNs are + used by the receiver to reassemble the message. This means that the + TSNs for each fragment of a fragmented user message MUST be strictly + sequential. + + Length: 16 bits (unsigned integer) + + This field indicates the length of the DATA chunk in bytes from + the beginning of the type field to the end of the user data field + excluding any padding. A DATA chunk with no user data field will + have Length set to 16 (indicating 16 bytes). + + TSN : 32 bits (unsigned integer) + + This value represents the TSN for this DATA chunk. The valid + range of TSN is from 0 to 4294967295 (2**32 - 1). TSN wraps back + to 0 after reaching 4294967295. + + Stream Identifier S: 16 bits (unsigned integer) + + Identifies the stream to which the following user data belongs. + + Stream Sequence Number n: 16 bits (unsigned integer) + + This value represents the stream sequence number of the following + user data within the stream S. Valid range is 0 to 65535. + + When a user message is fragmented by SCTP for transport, the same + stream sequence number MUST be carried in each of the fragments of + the message. + + + + +Stewart, et al. Standards Track [Page 23] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Payload Protocol Identifier: 32 bits (unsigned integer) + + This value represents an application (or upper layer) specified + protocol identifier. This value is passed to SCTP by its upper + layer and sent to its peer. This identifier is not used by SCTP + but can be used by certain network entities as well as the peer + application to identify the type of information being carried in + this DATA chunk. This field must be sent even in fragmented DATA + chunks (to make sure it is available for agents in the middle of + the network). + + The value 0 indicates no application identifier is specified by + the upper layer for this payload data. + + User Data: variable length + + This is the payload user data. The implementation MUST pad the + end of the data to a 4 byte boundary with all-zero bytes. Any + padding MUST NOT be included in the length field. A sender MUST + never add more than 3 bytes of padding. + +3.3.2 Initiation (INIT) (1) + + This chunk is used to initiate a SCTP association between two + endpoints. The format of the INIT chunk is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 1 | Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initiate Tag | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Advertised Receiver Window Credit (a_rwnd) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of Outbound Streams | Number of Inbound Streams | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initial TSN | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Optional/Variable-Length Parameters / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The INIT chunk contains the following parameters. Unless otherwise + noted, each parameter MUST only be included once in the INIT chunk. + + + + + +Stewart, et al. Standards Track [Page 24] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Fixed Parameters Status + ---------------------------------------------- + Initiate Tag Mandatory + Advertised Receiver Window Credit Mandatory + Number of Outbound Streams Mandatory + Number of Inbound Streams Mandatory + Initial TSN Mandatory + + Variable Parameters Status Type Value + ------------------------------------------------------------- + IPv4 Address (Note 1) Optional 5 + IPv6 Address (Note 1) Optional 6 + Cookie Preservative Optional 9 + Reserved for ECN Capable (Note 2) Optional 32768 (0x8000) + Host Name Address (Note 3) Optional 11 + Supported Address Types (Note 4) Optional 12 + + Note 1: The INIT chunks can contain multiple addresses that can be + IPv4 and/or IPv6 in any combination. + + Note 2: The ECN capable field is reserved for future use of Explicit + Congestion Notification. + + Note 3: An INIT chunk MUST NOT contain more than one Host Name + address parameter. Moreover, the sender of the INIT MUST NOT combine + any other address types with the Host Name address in the INIT. The + receiver of INIT MUST ignore any other address types if the Host Name + address parameter is present in the received INIT chunk. + + Note 4: This parameter, when present, specifies all the address types + the sending endpoint can support. The absence of this parameter + indicates that the sending endpoint can support any address type. + + The Chunk Flags field in INIT is reserved and all bits in it should + be set to 0 by the sender and ignored by the receiver. The sequence + of parameters within an INIT can be processed in any order. + + Initiate Tag: 32 bits (unsigned integer) + + The receiver of the INIT (the responding end) records the value of + the Initiate Tag parameter. This value MUST be placed into the + Verification Tag field of every SCTP packet that the receiver of + the INIT transmits within this association. + + The Initiate Tag is allowed to have any value except 0. See + Section 5.3.1 for more on the selection of the tag value. + + + + + +Stewart, et al. Standards Track [Page 25] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + If the value of the Initiate Tag in a received INIT chunk is found + to be 0, the receiver MUST treat it as an error and close the + association by transmitting an ABORT. + + Advertised Receiver Window Credit (a_rwnd): 32 bits (unsigned + integer) + + This value represents the dedicated buffer space, in number of + bytes, the sender of the INIT has reserved in association with + this window. During the life of the association this buffer space + SHOULD not be lessened (i.e. dedicated buffers taken away from + this association); however, an endpoint MAY change the value of + a_rwnd it sends in SACK chunks. + + Number of Outbound Streams (OS): 16 bits (unsigned integer) + + Defines the number of outbound streams the sender of this INIT + chunk wishes to create in this association. The value of 0 MUST + NOT be used. + + Note: A receiver of an INIT with the OS value set to 0 SHOULD + abort the association. + + Number of Inbound Streams (MIS) : 16 bits (unsigned integer) + + Defines the maximum number of streams the sender of this INIT + chunk allows the peer end to create in this association. The + value 0 MUST NOT be used. + + Note: There is no negotiation of the actual number of streams but + instead the two endpoints will use the min(requested, offered). + See Section 5.1.1 for details. + + Note: A receiver of an INIT with the MIS value of 0 SHOULD abort + the association. + + Initial TSN (I-TSN) : 32 bits (unsigned integer) + + Defines the initial TSN that the sender will use. The valid range + is from 0 to 4294967295. This field MAY be set to the value of + the Initiate Tag field. + + Optional/Variable Length Parameters in INIT + + The following parameters follow the Type-Length-Value format as + defined in Section 3.2.1. Any Type-Length-Value fields MUST come + after the fixed-length fields defined in the previous section. + + + + +Stewart, et al. Standards Track [Page 26] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + IPv4 Address Parameter (5) + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 5 | Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | IPv4 Address | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + IPv4 Address: 32 bits (unsigned integer) + + Contains an IPv4 address of the sending endpoint. It is binary + encoded. + + IPv6 Address Parameter (6) + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 6 | Length = 20 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | IPv6 Address | + | | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IPv6 Address: 128 bit (unsigned integer) + + Contains an IPv6 address of the sending endpoint. It is binary + encoded. + + Note: A sender MUST NOT use an IPv4-mapped IPv6 address [RFC2373] + but should instead use an IPv4 Address Parameter for an IPv4 + address. + + Combined with the Source Port Number in the SCTP common header, + the value passed in an IPv4 or IPv6 Address parameter indicates a + transport address the sender of the INIT will support for the + association being initiated. That is, during the lifetime of this + association, this IP address can appear in the source address + field of an IP datagram sent from the sender of the INIT, and can + be used as a destination address of an IP datagram sent from the + receiver of the INIT. + + + + + +Stewart, et al. Standards Track [Page 27] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + More than one IP Address parameter can be included in an INIT + chunk when the INIT sender is multi-homed. Moreover, a multi- + homed endpoint may have access to different types of network, thus + more than one address type can be present in one INIT chunk, i.e., + IPv4 and IPv6 addresses are allowed in the same INIT chunk. + + If the INIT contains at least one IP Address parameter, then the + source address of the IP datagram containing the INIT chunk and + any additional address(es) provided within the INIT can be used as + destinations by the endpoint receiving the INIT. If the INIT does + not contain any IP Address parameters, the endpoint receiving the + INIT MUST use the source address associated with the received IP + datagram as its sole destination address for the association. + + Note that not using any IP address parameters in the INIT and + INIT-ACK is an alternative to make an association more likely to + work across a NAT box. + + Cookie Preservative (9) + + The sender of the INIT shall use this parameter to suggest to the + receiver of the INIT for a longer life-span of the State Cookie. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 9 | Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Suggested Cookie Life-span Increment (msec.) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Suggested Cookie Life-span Increment: 32 bits (unsigned integer) + + This parameter indicates to the receiver how much increment in + milliseconds the sender wishes the receiver to add to its default + cookie life-span. + + This optional parameter should be added to the INIT chunk by the + sender when it re-attempts establishing an association with a peer + to which its previous attempt of establishing the association failed + due to a stale cookie operation error. The receiver MAY choose to + ignore the suggested cookie life-span increase for its own security + reasons. + + + + + + + + +Stewart, et al. Standards Track [Page 28] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Host Name Address (11) + + The sender of INIT uses this parameter to pass its Host Name (in + place of its IP addresses) to its peer. The peer is responsible + for resolving the name. Using this parameter might make it more + likely for the association to work across a NAT box. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 11 | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Host Name / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Host Name: variable length + + This field contains a host name in "host name syntax" per RFC1123 + Section 2.1 [RFC1123]. The method for resolving the host name is + out of scope of SCTP. + + Note: At least one null terminator is included in the Host Name + string and must be included in the length. + + Supported Address Types (12) + + The sender of INIT uses this parameter to list all the address + types it can support. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 12 | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Address Type #1 | Address Type #2 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | ...... + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Address Type: 16 bits (unsigned integer) + + This is filled with the type value of the corresponding address + TLV (e.g., IPv4 = 5, IPv6 = 6, Hostname = 11). + + + + + + + +Stewart, et al. Standards Track [Page 29] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.3 Initiation Acknowledgement (INIT ACK) (2): + + The INIT ACK chunk is used to acknowledge the initiation of an SCTP + association. + + The parameter part of INIT ACK is formatted similarly to the INIT + chunk. It uses two extra variable parameters: The State Cookie and + the Unrecognized Parameter: + + The format of the INIT ACK chunk is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 2 | Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initiate Tag | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Advertised Receiver Window Credit | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of Outbound Streams | Number of Inbound Streams | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Initial TSN | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Optional/Variable-Length Parameters / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Initiate Tag: 32 bits (unsigned integer) + + The receiver of the INIT ACK records the value of the Initiate Tag + parameter. This value MUST be placed into the Verification Tag + field of every SCTP packet that the INIT ACK receiver transmits + within this association. + + The Initiate Tag MUST NOT take the value 0. See Section 5.3.1 for + more on the selection of the Initiate Tag value. + + If the value of the Initiate Tag in a received INIT ACK chunk is + found to be 0, the receiver MUST treat it as an error and close + the association by transmitting an ABORT. + + + + + + + + + +Stewart, et al. Standards Track [Page 30] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Advertised Receiver Window Credit (a_rwnd): 32 bits (unsigned + integer) + + This value represents the dedicated buffer space, in number of + bytes, the sender of the INIT ACK has reserved in association with + this window. During the life of the association this buffer space + SHOULD not be lessened (i.e. dedicated buffers taken away from + this association). + + Number of Outbound Streams (OS): 16 bits (unsigned integer) + + Defines the number of outbound streams the sender of this INIT ACK + chunk wishes to create in this association. The value of 0 MUST + NOT be used. + + Note: A receiver of an INIT ACK with the OS value set to 0 SHOULD + destroy the association discarding its TCB. + + Number of Inbound Streams (MIS) : 16 bits (unsigned integer) + + Defines the maximum number of streams the sender of this INIT ACK + chunk allows the peer end to create in this association. The + value 0 MUST NOT be used. + + Note: There is no negotiation of the actual number of streams but + instead the two endpoints will use the min(requested, offered). + See Section 5.1.1 for details. + + Note: A receiver of an INIT ACK with the MIS value set to 0 + SHOULD destroy the association discarding its TCB. + + Initial TSN (I-TSN) : 32 bits (unsigned integer) + + Defines the initial TSN that the INIT-ACK sender will use. The + valid range is from 0 to 4294967295. This field MAY be set to the + value of the Initiate Tag field. + + Fixed Parameters Status + ---------------------------------------------- + Initiate Tag Mandatory + Advertised Receiver Window Credit Mandatory + Number of Outbound Streams Mandatory + Number of Inbound Streams Mandatory + Initial TSN Mandatory + + + + + + + +Stewart, et al. Standards Track [Page 31] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Variable Parameters Status Type Value + ------------------------------------------------------------- + State Cookie Mandatory 7 + IPv4 Address (Note 1) Optional 5 + IPv6 Address (Note 1) Optional 6 + Unrecognized Parameters Optional 8 + Reserved for ECN Capable (Note 2) Optional 32768 (0x8000) + Host Name Address (Note 3) Optional 11 + + Note 1: The INIT ACK chunks can contain any number of IP address + parameters that can be IPv4 and/or IPv6 in any combination. + + Note 2: The ECN capable field is reserved for future use of Explicit + Congestion Notification. + + Note 3: The INIT ACK chunks MUST NOT contain more than one Host Name + address parameter. Moreover, the sender of the INIT ACK MUST NOT + combine any other address types with the Host Name address in the + INIT ACK. The receiver of the INIT ACK MUST ignore any other address + types if the Host Name address parameter is present. + + IMPLEMENTATION NOTE: An implementation MUST be prepared to receive a + INIT ACK that is quite large (more than 1500 bytes) due to the + variable size of the state cookie AND the variable address list. For + example if a responder to the INIT has 1000 IPv4 addresses it wishes + to send, it would need at least 8,000 bytes to encode this in the + INIT ACK. + + In combination with the Source Port carried in the SCTP common + header, each IP Address parameter in the INIT ACK indicates to the + receiver of the INIT ACK a valid transport address supported by the + sender of the INIT ACK for the lifetime of the association being + initiated. + + If the INIT ACK contains at least one IP Address parameter, then the + source address of the IP datagram containing the INIT ACK and any + additional address(es) provided within the INIT ACK may be used as + destinations by the receiver of the INIT-ACK. If the INIT ACK does + not contain any IP Address parameters, the receiver of the INIT-ACK + MUST use the source address associated with the received IP datagram + as its sole destination address for the association. + + The State Cookie and Unrecognized Parameters use the Type-Length- + Value format as defined in Section 3.2.1 and are described below. + The other fields are defined the same as their counterparts in the + INIT chunk. + + + + + +Stewart, et al. Standards Track [Page 32] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Optional or Variable Length Parameters + + State Cookie + + Parameter Type Value: 7 + + Parameter Length: variable size, depending on Size of Cookie + + Parameter Value: + + This parameter value MUST contain all the necessary state and + parameter information required for the sender of this INIT ACK + to create the association, along with a Message Authentication + Code (MAC). See Section 5.1.3 for details on State Cookie + definition. + + Unrecognized Parameters: + + Parameter Type Value: 8 + + Parameter Length: Variable Size. + + Parameter Value: + + This parameter is returned to the originator of the INIT chunk + when the INIT contains an unrecognized parameter which has a + value that indicates that it should be reported to the sender. + This parameter value field will contain unrecognized parameters + copied from the INIT chunk complete with Parameter Type, Length + and Value fields. + +3.3.4 Selective Acknowledgement (SACK) (3): + + This chunk is sent to the peer endpoint to acknowledge received DATA + chunks and to inform the peer endpoint of gaps in the received + subsequences of DATA chunks as represented by their TSNs. + + The SACK MUST contain the Cumulative TSN Ack and Advertised Receiver + Window Credit (a_rwnd) parameters. + + By definition, the value of the Cumulative TSN Ack parameter is the + last TSN received before a break in the sequence of received TSNs + occurs; the next TSN value following this one has not yet been + received at the endpoint sending the SACK. This parameter therefore + acknowledges receipt of all TSNs less than or equal to its value. + + The handling of a_rwnd by the receiver of the SACK is discussed in + detail in Section 6.2.1. + + + +Stewart, et al. Standards Track [Page 33] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The SACK also contains zero or more Gap Ack Blocks. Each Gap Ack + Block acknowledges a subsequence of TSNs received following a break + in the sequence of received TSNs. By definition, all TSNs + acknowledged by Gap Ack Blocks are greater than the value of the + Cumulative TSN Ack. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 3 |Chunk Flags | Chunk Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cumulative TSN Ack | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Advertised Receiver Window Credit (a_rwnd) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of Gap Ack Blocks = N | Number of Duplicate TSNs = X | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Gap Ack Block #1 Start | Gap Ack Block #1 End | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / / + \ ... \ + / / + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Gap Ack Block #N Start | Gap Ack Block #N End | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Duplicate TSN 1 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / / + \ ... \ + / / + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Duplicate TSN X | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to all zeros on transmit and ignored on receipt. + + Cumulative TSN Ack: 32 bits (unsigned integer) + + This parameter contains the TSN of the last DATA chunk received in + sequence before a gap. + + Advertised Receiver Window Credit (a_rwnd): 32 bits (unsigned + integer) + + This field indicates the updated receive buffer space in bytes of + the sender of this SACK, see Section 6.2.1 for details. + + + +Stewart, et al. Standards Track [Page 34] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Number of Gap Ack Blocks: 16 bits (unsigned integer) + + Indicates the number of Gap Ack Blocks included in this SACK. + + Number of Duplicate TSNs: 16 bit + + This field contains the number of duplicate TSNs the endpoint has + received. Each duplicate TSN is listed following the Gap Ack + Block list. + + Gap Ack Blocks: + + These fields contain the Gap Ack Blocks. They are repeated for + each Gap Ack Block up to the number of Gap Ack Blocks defined in + the Number of Gap Ack Blocks field. All DATA chunks with TSNs + greater than or equal to (Cumulative TSN Ack + Gap Ack Block + Start) and less than or equal to (Cumulative TSN Ack + Gap Ack + Block End) of each Gap Ack Block are assumed to have been received + correctly. + + Gap Ack Block Start: 16 bits (unsigned integer) + + Indicates the Start offset TSN for this Gap Ack Block. To + calculate the actual TSN number the Cumulative TSN Ack is added to + this offset number. This calculated TSN identifies the first TSN + in this Gap Ack Block that has been received. + + Gap Ack Block End: 16 bits (unsigned integer) + + Indicates the End offset TSN for this Gap Ack Block. To calculate + the actual TSN number the Cumulative TSN Ack is added to this + offset number. This calculated TSN identifies the TSN of the last + DATA chunk received in this Gap Ack Block. + + For example, assume the receiver has the following DATA chunks newly + arrived at the time when it decides to send a Selective ACK, + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 35] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + ---------- + | TSN=17 | + ---------- + | | <- still missing + ---------- + | TSN=15 | + ---------- + | TSN=14 | + ---------- + | | <- still missing + ---------- + | TSN=12 | + ---------- + | TSN=11 | + ---------- + | TSN=10 | + ---------- + + then, the parameter part of the SACK MUST be constructed as follows + (assuming the new a_rwnd is set to 4660 by the sender): + + +--------------------------------+ + | Cumulative TSN Ack = 12 | + +--------------------------------+ + | a_rwnd = 4660 | + +----------------+---------------+ + | num of block=2 | num of dup=0 | + +----------------+---------------+ + |block #1 strt=2 |block #1 end=3 | + +----------------+---------------+ + |block #2 strt=5 |block #2 end=5 | + +----------------+---------------+ + + + Duplicate TSN: 32 bits (unsigned integer) + + Indicates the number of times a TSN was received in duplicate + since the last SACK was sent. Every time a receiver gets a + duplicate TSN (before sending the SACK) it adds it to the list of + duplicates. The duplicate count is re-initialized to zero after + sending each SACK. + + For example, if a receiver were to get the TSN 19 three times it + would list 19 twice in the outbound SACK. After sending the SACK + if it received yet one more TSN 19 it would list 19 as a duplicate + once in the next outgoing SACK. + + + + + +Stewart, et al. Standards Track [Page 36] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.5 Heartbeat Request (HEARTBEAT) (4): + + An endpoint should send this chunk to its peer endpoint to probe the + reachability of a particular destination transport address defined in + the present association. + + The parameter field contains the Heartbeat Information which is a + variable length opaque data structure understood only by the sender. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 4 | Chunk Flags | Heartbeat Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Heartbeat Information TLV (Variable-Length) / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Heartbeat Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and the Heartbeat Information field. + + Heartbeat Information: variable length + + Defined as a variable-length parameter using the format described + in Section 3.2.1, i.e.: + + Variable Parameters Status Type Value + ------------------------------------------------------------- + Heartbeat Info Mandatory 1 + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Heartbeat Info Type=1 | HB Info Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Sender-specific Heartbeat Info / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + + + + +Stewart, et al. Standards Track [Page 37] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The Sender-specific Heartbeat Info field should normally include + information about the sender's current time when this HEARTBEAT + chunk is sent and the destination transport address to which this + HEARTBEAT is sent (see Section 8.3). + +3.3.6 Heartbeat Acknowledgement (HEARTBEAT ACK) (5): + + An endpoint should send this chunk to its peer endpoint as a response + to a HEARTBEAT chunk (see Section 8.3). A HEARTBEAT ACK is always + sent to the source IP address of the IP datagram containing the + HEARTBEAT chunk to which this ack is responding. + + The parameter field contains a variable length opaque data structure. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 5 | Chunk Flags | Heartbeat Ack Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / Heartbeat Information TLV (Variable-Length) / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Heartbeat Ack Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and the Heartbeat Information field. + + Heartbeat Information: variable length + + This field MUST contain the Heartbeat Information parameter of + the Heartbeat Request to which this Heartbeat Acknowledgement is + responding. + + Variable Parameters Status Type Value + ------------------------------------------------------------- + Heartbeat Info Mandatory 1 + + + + + + + + + +Stewart, et al. Standards Track [Page 38] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.7 Abort Association (ABORT) (6): + + The ABORT chunk is sent to the peer of an association to close the + association. The ABORT chunk may contain Cause Parameters to inform + the receiver the reason of the abort. DATA chunks MUST NOT be + bundled with ABORT. Control chunks (except for INIT, INIT ACK and + SHUTDOWN COMPLETE) MAY be bundled with an ABORT but they MUST be + placed before the ABORT in the SCTP packet, or they will be ignored + by the receiver. + + If an endpoint receives an ABORT with a format error or for an + association that doesn't exist, it MUST silently discard it. + Moreover, under any circumstances, an endpoint that receives an ABORT + MUST NOT respond to that ABORT by sending an ABORT of its own. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 6 |Reserved |T| Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / zero or more Error Causes / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Reserved: 7 bits + + Set to 0 on transmit and ignored on receipt. + + T bit: 1 bit + + The T bit is set to 0 if the sender had a TCB that it destroyed. + If the sender did not have a TCB it should set this bit to 1. + + Note: Special rules apply to this chunk for verification, please see + Section 8.5.1 for details. + + Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and all the Error Cause fields present. + + See Section 3.3.10 for Error Cause definitions. + + + + + + +Stewart, et al. Standards Track [Page 39] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.8 Shutdown Association (SHUTDOWN) (7): + + An endpoint in an association MUST use this chunk to initiate a + graceful close of the association with its peer. This chunk has the + following format. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 7 | Chunk Flags | Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cumulative TSN Ack | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Length: 16 bits (unsigned integer) + + Indicates the length of the parameter. Set to 8. + + Cumulative TSN Ack: 32 bits (unsigned integer) + + This parameter contains the TSN of the last chunk received in + sequence before any gaps. + + Note: Since the SHUTDOWN message does not contain Gap Ack Blocks, + it cannot be used to acknowledge TSNs received out of order. In a + SACK, lack of Gap Ack Blocks that were previously included + indicates that the data receiver reneged on the associated DATA + chunks. Since SHUTDOWN does not contain Gap Ack Blocks, the + receiver of the SHUTDOWN shouldn't interpret the lack of a Gap Ack + Block as a renege. (see Section 6.2 for information on reneging) + +3.3.9 Shutdown Acknowledgement (SHUTDOWN ACK) (8): + + This chunk MUST be used to acknowledge the receipt of the SHUTDOWN + chunk at the completion of the shutdown process, see Section 9.2 for + details. + + The SHUTDOWN ACK chunk has no parameters. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 8 |Chunk Flags | Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + +Stewart, et al. Standards Track [Page 40] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + +3.3.10 Operation Error (ERROR) (9): + + An endpoint sends this chunk to its peer endpoint to notify it of + certain error conditions. It contains one or more error causes. An + Operation Error is not considered fatal in and of itself, but may be + used with an ABORT chunk to report a fatal condition. It has the + following parameters: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 9 | Chunk Flags | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + \ \ + / one or more Error Causes / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the chunk header + and all the Error Cause fields present. + + Error causes are defined as variable-length parameters using the + format described in 3.2.1, i.e.: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Cause-specific Information / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Cause Code: 16 bits (unsigned integer) + + Defines the type of error conditions being reported. + + + + + +Stewart, et al. Standards Track [Page 41] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Cause Code + Value Cause Code + --------- ---------------- + 1 Invalid Stream Identifier + 2 Missing Mandatory Parameter + 3 Stale Cookie Error + 4 Out of Resource + 5 Unresolvable Address + 6 Unrecognized Chunk Type + 7 Invalid Mandatory Parameter + 8 Unrecognized Parameters + 9 No User Data + 10 Cookie Received While Shutting Down + + Cause Length: 16 bits (unsigned integer) + + Set to the size of the parameter in bytes, including the Cause + Code, Cause Length, and Cause-Specific Information fields + + Cause-specific Information: variable length + + This field carries the details of the error condition. + + Sections - define error causes for SCTP. + Guidelines for the IETF to define new error cause values are + discussed in Section 13.3. + + Invalid Stream Identifier (1) + + Cause of error + --------------- + Invalid Stream Identifier: Indicates endpoint received a DATA chunk + sent to a nonexistent stream. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=1 | Cause Length=8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Stream Identifier | (Reserved) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Stream Identifier: 16 bits (unsigned integer) + + Contains the Stream Identifier of the DATA chunk received in + error. + + + + + + + +Stewart, et al. Standards Track [Page 42] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Reserved: 16 bits + + This field is reserved. It is set to all 0's on transmit and + Ignored on receipt. + + Missing Mandatory Parameter (2) + + Cause of error + --------------- + Missing Mandatory Parameter: Indicates that one or more mandatory + TLV parameters are missing in a received INIT or INIT ACK. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=2 | Cause Length=8+N*2 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Number of missing params=N | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Missing Param Type #1 | Missing Param Type #2 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Missing Param Type #N-1 | Missing Param Type #N | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Number of Missing params: 32 bits (unsigned integer) + + This field contains the number of parameters contained in the + Cause-specific Information field. + + Missing Param Type: 16 bits (unsigned integer) + + Each field will contain the missing mandatory parameter number. + + Stale Cookie Error (3) + + Cause of error + -------------- + Stale Cookie Error: Indicates the receipt of a valid State Cookie + that has expired. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=3 | Cause Length=8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Measure of Staleness (usec.) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Measure of Staleness: 32 bits (unsigned integer) + + This field contains the difference, in microseconds, between the + current time and the time the State Cookie expired. + + + +Stewart, et al. Standards Track [Page 43] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The sender of this error cause MAY choose to report how long past + expiration the State Cookie is by including a non-zero value in + the Measure of Staleness field. If the sender does not wish to + provide this information it should set the Measure of Staleness + field to the value of zero. + + Out of Resource (4) + + Cause of error + --------------- + Out of Resource: Indicates that the sender is out of resource. This + is usually sent in combination with or within an ABORT. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=4 | Cause Length=4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unresolvable Address (5) + + Cause of error + --------------- + Unresolvable Address: Indicates that the sender is not able to + resolve the specified address parameter (e.g., type of address is not + supported by the sender). This is usually sent in combination with + or within an ABORT. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=5 | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Unresolvable Address / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unresolvable Address: variable length + + The unresolvable address field contains the complete Type, Length + and Value of the address parameter (or Host Name parameter) that + contains the unresolvable address or host name. + + Unrecognized Chunk Type (6) + + Cause of error + --------------- + Unrecognized Chunk Type: This error cause is returned to the + originator of the chunk if the receiver does not understand the chunk + and the upper bits of the 'Chunk Type' are set to 01 or 11. + + + + + +Stewart, et al. Standards Track [Page 44] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=6 | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Unrecognized Chunk / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unrecognized Chunk: variable length + + The Unrecognized Chunk field contains the unrecognized Chunk from + the SCTP packet complete with Chunk Type, Chunk Flags and Chunk + Length. + + Invalid Mandatory Parameter (7) + + Cause of error + --------------- + Invalid Mandatory Parameter: This error cause is returned to the + originator of an INIT or INIT ACK chunk when one of the mandatory + parameters is set to a invalid value. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=7 | Cause Length=4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unrecognized Parameters (8) + + Cause of error + --------------- + Unrecognized Parameters: This error cause is returned to the + originator of the INIT ACK chunk if the receiver does not recognize + one or more Optional TLV parameters in the INIT ACK chunk. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=8 | Cause Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Unrecognized Parameters / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Unrecognized Parameters: variable length + + The Unrecognized Parameters field contains the unrecognized + parameters copied from the INIT ACK chunk complete with TLV. This + error cause is normally contained in an ERROR chunk bundled with + the COOKIE ECHO chunk when responding to the INIT ACK, when the + sender of the COOKIE ECHO chunk wishes to report unrecognized + parameters. + + + +Stewart, et al. Standards Track [Page 45] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + No User Data (9) + + Cause of error + --------------- + No User Data: This error cause is returned to the originator of a + DATA chunk if a received DATA chunk has no user data. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=9 | Cause Length=8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / TSN value / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + TSN value: 32 bits (+unsigned integer) + + The TSN value field contains the TSN of the DATA chunk received + with no user data field. + + This cause code is normally returned in an ABORT chunk (see + Section 6.2) + + Cookie Received While Shutting Down (10) + + Cause of error + --------------- + Cookie Received While Shutting Down: A COOKIE ECHO was received + While the endpoint was in SHUTDOWN-ACK-SENT state. This error is + usually returned in an ERROR chunk bundled with the retransmitted + SHUTDOWN ACK. + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Cause Code=10 | Cause Length=4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +3.3.11 Cookie Echo (COOKIE ECHO) (10): + + This chunk is used only during the initialization of an association. + It is sent by the initiator of an association to its peer to complete + the initialization process. This chunk MUST precede any DATA chunk + sent within the association, but MAY be bundled with one or more DATA + chunks in the same packet. + + + + + + + + + +Stewart, et al. Standards Track [Page 46] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 10 |Chunk Flags | Length | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + / Cookie / + \ \ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bit + + Set to zero on transmit and ignored on receipt. + + Length: 16 bits (unsigned integer) + + Set to the size of the chunk in bytes, including the 4 bytes of + the chunk header and the size of the Cookie. + + Cookie: variable size + + This field must contain the exact cookie received in the State + Cookie parameter from the previous INIT ACK. + + An implementation SHOULD make the cookie as small as possible to + insure interoperability. + +3.3.12 Cookie Acknowledgement (COOKIE ACK) (11): + + This chunk is used only during the initialization of an association. + It is used to acknowledge the receipt of a COOKIE ECHO chunk. This + chunk MUST precede any DATA or SACK chunk sent within the + association, but MAY be bundled with one or more DATA chunks or SACK + chunk in the same SCTP packet. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 11 |Chunk Flags | Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Set to zero on transmit and ignored on receipt. + + + + + + + + +Stewart, et al. Standards Track [Page 47] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +3.3.13 Shutdown Complete (SHUTDOWN COMPLETE) (14): + + This chunk MUST be used to acknowledge the receipt of the SHUTDOWN + ACK chunk at the completion of the shutdown process, see Section 9.2 + for details. + + The SHUTDOWN COMPLETE chunk has no parameters. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type = 14 |Reserved |T| Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Chunk Flags: 8 bits + + Reserved: 7 bits + + Set to 0 on transmit and ignored on receipt. + + T bit: 1 bit + + The T bit is set to 0 if the sender had a TCB that it destroyed. + If the sender did not have a TCB it should set this bit to 1. + + Note: Special rules apply to this chunk for verification, please see + Section 8.5.1 for details. + +4. SCTP Association State Diagram + + During the lifetime of an SCTP association, the SCTP endpoint's + association progress from one state to another in response to various + events. The events that may potentially advance an association's + state include: + + o SCTP user primitive calls, e.g., [ASSOCIATE], [SHUTDOWN], [ABORT], + + o Reception of INIT, COOKIE ECHO, ABORT, SHUTDOWN, etc., control + chunks, or + + o Some timeout events. + + The state diagram in the figures below illustrates state changes, + together with the causing events and resulting actions. Note that + some of the error conditions are not shown in the state diagram. + Full description of all special cases should be found in the text. + + + + + +Stewart, et al. Standards Track [Page 48] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Note: Chunk names are given in all capital letters, while parameter + names have the first letter capitalized, e.g., COOKIE ECHO chunk type + vs. State Cookie parameter. If more than one event/message can occur + which causes a state transition it is labeled (A), (B) etc. + + ----- -------- (frm any state) + / \ / rcv ABORT [ABORT] + rcv INIT | | | ---------- or ---------- + --------------- | v v delete TCB snd ABORT + generate Cookie \ +---------+ delete TCB + snd INIT ACK ---| CLOSED | + +---------+ + / \ [ASSOCIATE] + / \ --------------- + | | create TCB + | | snd INIT + | | strt init timer + rcv valid | | + COOKIE ECHO | v + (1) ---------------- | +------------+ + create TCB | | COOKIE-WAIT| (2) + snd COOKIE ACK | +------------+ + | | + | | rcv INIT ACK + | | ----------------- + | | snd COOKIE ECHO + | | stop init timer + | | strt cookie timer + | v + | +--------------+ + | | COOKIE-ECHOED| (3) + | +--------------+ + | | + | | rcv COOKIE ACK + | | ----------------- + | | stop cookie timer + v v + +---------------+ + | ESTABLISHED | + +---------------+ + + + + + + + + + + + +Stewart, et al. Standards Track [Page 49] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + (from the ESTABLISHED state only) + | + | + /--------+--------\ + [SHUTDOWN] / \ + -------------------| | + check outstanding | | + DATA chunks | | + v | + +---------+ | + |SHUTDOWN-| | rcv SHUTDOWN/check + |PENDING | | outstanding DATA + +---------+ | chunks + | |------------------ + No more outstanding | | + ---------------------| | + snd SHUTDOWN | | + strt shutdown timer | | + v v + +---------+ +-----------+ + (4) |SHUTDOWN-| | SHUTDOWN- | (5,6) + |SENT | | RECEIVED | + +---------+ +-----------+ + | \ | + (A) rcv SHUTDOWN ACK | \ | + ----------------------| \ | + stop shutdown timer | \rcv:SHUTDOWN | + send SHUTDOWN COMPLETE| \ (B) | + delete TCB | \ | + | \ | No more outstanding + | \ |----------------- + | \ | send SHUTDOWN ACK + (B)rcv SHUTDOWN | \ | strt shutdown timer + ----------------------| \ | + send SHUTDOWN ACK | \ | + start shutdown timer | \ | + move to SHUTDOWN- | \ | + ACK-SENT | | | + | v | + | +-----------+ + | | SHUTDOWN- | (7) + | | ACK-SENT | + | +----------+- + | | (C)rcv SHUTDOWN COMPLETE + | |----------------- + | | stop shutdown timer + | | delete TCB + | | + + + +Stewart, et al. Standards Track [Page 50] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + | | (D)rcv SHUTDOWN ACK + | |-------------- + | | stop shutdown timer + | | send SHUTDOWN COMPLETE + | | delete TCB + | | + \ +---------+ / + \-->| CLOSED |<--/ + +---------+ + + Figure 3: State Transition Diagram of SCTP + + Notes: + + 1) If the State Cookie in the received COOKIE ECHO is invalid (i.e., + failed to pass the integrity check), the receiver MUST silently + discard the packet. Or, if the received State Cookie is expired + (see Section 5.1.5), the receiver MUST send back an ERROR chunk. + In either case, the receiver stays in the CLOSED state. + + 2) If the T1-init timer expires, the endpoint MUST retransmit INIT + and re-start the T1-init timer without changing state. This MUST + be repeated up to 'Max.Init.Retransmits' times. After that, the + endpoint MUST abort the initialization process and report the + error to SCTP user. + + 3) If the T1-cookie timer expires, the endpoint MUST retransmit + COOKIE ECHO and re-start the T1-cookie timer without changing + state. This MUST be repeated up to 'Max.Init.Retransmits' times. + After that, the endpoint MUST abort the initialization process and + report the error to SCTP user. + + 4) In SHUTDOWN-SENT state the endpoint MUST acknowledge any received + DATA chunks without delay. + + 5) In SHUTDOWN-RECEIVED state, the endpoint MUST NOT accept any new + send request from its SCTP user. + + 6) In SHUTDOWN-RECEIVED state, the endpoint MUST transmit or + retransmit data and leave this state when all data in queue is + transmitted. + + 7) In SHUTDOWN-ACK-SENT state, the endpoint MUST NOT accept any new + send request from its SCTP user. + + The CLOSED state is used to indicate that an association is not + created (i.e., doesn't exist). + + + + +Stewart, et al. Standards Track [Page 51] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5. Association Initialization + + Before the first data transmission can take place from one SCTP + endpoint ("A") to another SCTP endpoint ("Z"), the two endpoints must + complete an initialization process in order to set up an SCTP + association between them. + + The SCTP user at an endpoint should use the ASSOCIATE primitive to + initialize an SCTP association to another SCTP endpoint. + + IMPLEMENTATION NOTE: From an SCTP-user's point of view, an + association may be implicitly opened, without an ASSOCIATE primitive + (see 10.1 B) being invoked, by the initiating endpoint's sending of + the first user data to the destination endpoint. The initiating SCTP + will assume default values for all mandatory and optional parameters + for the INIT/INIT ACK. + + Once the association is established, unidirectional streams are open + for data transfer on both ends (see Section 5.1.1). + +5.1 Normal Establishment of an Association + + The initialization process consists of the following steps (assuming + that SCTP endpoint "A" tries to set up an association with SCTP + endpoint "Z" and "Z" accepts the new association): + + A) "A" first sends an INIT chunk to "Z". In the INIT, "A" must + provide its Verification Tag (Tag_A) in the Initiate Tag field. + Tag_A SHOULD be a random number in the range of 1 to 4294967295 + (see 5.3.1 for Tag value selection). After sending the INIT, "A" + starts the T1-init timer and enters the COOKIE-WAIT state. + + B) "Z" shall respond immediately with an INIT ACK chunk. The + destination IP address of the INIT ACK MUST be set to the source + IP address of the INIT to which this INIT ACK is responding. In + the response, besides filling in other parameters, "Z" must set + the Verification Tag field to Tag_A, and also provide its own + Verification Tag (Tag_Z) in the Initiate Tag field. + + Moreover, "Z" MUST generate and send along with the INIT ACK a + State Cookie. See Section 5.1.3 for State Cookie generation. + + Note: After sending out INIT ACK with the State Cookie parameter, + "Z" MUST NOT allocate any resources, nor keep any states for the + new association. Otherwise, "Z" will be vulnerable to resource + attacks. + + + + + +Stewart, et al. Standards Track [Page 52] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + C) Upon reception of the INIT ACK from "Z", "A" shall stop the T1- + init timer and leave COOKIE-WAIT state. "A" shall then send the + State Cookie received in the INIT ACK chunk in a COOKIE ECHO + chunk, start the T1-cookie timer, and enter the COOKIE-ECHOED + state. + + Note: The COOKIE ECHO chunk can be bundled with any pending + outbound DATA chunks, but it MUST be the first chunk in the packet + and until the COOKIE ACK is returned the sender MUST NOT send any + other packets to the peer. + + D) Upon reception of the COOKIE ECHO chunk, Endpoint "Z" will reply + with a COOKIE ACK chunk after building a TCB and moving to the + ESTABLISHED state. A COOKIE ACK chunk may be bundled with any + pending DATA chunks (and/or SACK chunks), but the COOKIE ACK chunk + MUST be the first chunk in the packet. + + IMPLEMENTATION NOTE: An implementation may choose to send the + Communication Up notification to the SCTP user upon reception of a + valid COOKIE ECHO chunk. + + E) Upon reception of the COOKIE ACK, endpoint "A" will move from the + COOKIE-ECHOED state to the ESTABLISHED state, stopping the T1- + cookie timer. It may also notify its ULP about the successful + establishment of the association with a Communication Up + notification (see Section 10). + + An INIT or INIT ACK chunk MUST NOT be bundled with any other chunk. + They MUST be the only chunks present in the SCTP packets that carry + them. + + An endpoint MUST send the INIT ACK to the IP address from which it + received the INIT. + + Note: T1-init timer and T1-cookie timer shall follow the same rules + given in Section 6.3. + + If an endpoint receives an INIT, INIT ACK, or COOKIE ECHO chunk but + decides not to establish the new association due to missing mandatory + parameters in the received INIT or INIT ACK, invalid parameter + values, or lack of local resources, it MUST respond with an ABORT + chunk. It SHOULD also specify the cause of abort, such as the type + of the missing mandatory parameters, etc., by including the error + cause parameters with the ABORT chunk. The Verification Tag field in + the common header of the outbound SCTP packet containing the ABORT + chunk MUST be set to the Initiate Tag value of the peer. + + + + + +Stewart, et al. Standards Track [Page 53] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + After the reception of the first DATA chunk in an association the + endpoint MUST immediately respond with a SACK to acknowledge the DATA + chunk. Subsequent acknowledgements should be done as described in + Section 6.2. + + When the TCB is created, each endpoint MUST set its internal + Cumulative TSN Ack Point to the value of its transmitted Initial TSN + minus one. + + IMPLEMENTATION NOTE: The IP addresses and SCTP port are generally + used as the key to find the TCB within an SCTP instance. + +5.1.1 Handle Stream Parameters + + In the INIT and INIT ACK chunks, the sender of the chunk shall + indicate the number of outbound streams (OS) it wishes to have in the + association, as well as the maximum inbound streams (MIS) it will + accept from the other endpoint. + + After receiving the stream configuration information from the other + side, each endpoint shall perform the following check: If the peer's + MIS is less than the endpoint's OS, meaning that the peer is + incapable of supporting all the outbound streams the endpoint wants + to configure, the endpoint MUST either use MIS outbound streams, or + abort the association and report to its upper layer the resources + shortage at its peer. + + After the association is initialized, the valid outbound stream + identifier range for either endpoint shall be 0 to min(local OS, + remote MIS)-1. + +5.1.2 Handle Address Parameters + + During the association initialization, an endpoint shall use the + following rules to discover and collect the destination transport + address(es) of its peer. + + A) If there are no address parameters present in the received INIT or + INIT ACK chunk, the endpoint shall take the source IP address from + which the chunk arrives and record it, in combination with the + SCTP source port number, as the only destination transport address + for this peer. + + B) If there is a Host Name parameter present in the received INIT or + INIT ACK chunk, the endpoint shall resolve that host name to a + list of IP address(es) and derive the transport address(es) of + this peer by combining the resolved IP address(es) with the SCTP + source port. + + + +Stewart, et al. Standards Track [Page 54] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The endpoint MUST ignore any other IP address parameters if they + are also present in the received INIT or INIT ACK chunk. + + The time at which the receiver of an INIT resolves the host name + has potential security implications to SCTP. If the receiver of + an INIT resolves the host name upon the reception of the chunk, + and the mechanism the receiver uses to resolve the host name + involves potential long delay (e.g. DNS query), the receiver may + open itself up to resource attacks for the period of time while it + is waiting for the name resolution results before it can build the + State Cookie and release local resources. + + Therefore, in cases where the name translation involves potential + long delay, the receiver of the INIT MUST postpone the name + resolution till the reception of the COOKIE ECHO chunk from the + peer. In such a case, the receiver of the INIT SHOULD build the + State Cookie using the received Host Name (instead of destination + transport addresses) and send the INIT ACK to the source IP + address from which the INIT was received. + + The receiver of an INIT ACK shall always immediately attempt to + resolve the name upon the reception of the chunk. + + The receiver of the INIT or INIT ACK MUST NOT send user data + (piggy-backed or stand-alone) to its peer until the host name is + successfully resolved. + + If the name resolution is not successful, the endpoint MUST + immediately send an ABORT with "Unresolvable Address" error cause + to its peer. The ABORT shall be sent to the source IP address + from which the last peer packet was received. + + C) If there are only IPv4/IPv6 addresses present in the received INIT + or INIT ACK chunk, the receiver shall derive and record all the + transport address(es) from the received chunk AND the source IP + address that sent the INIT or INIT ACK. The transport address(es) + are derived by the combination of SCTP source port (from the + common header) and the IP address parameter(s) carried in the INIT + or INIT ACK chunk and the source IP address of the IP datagram. + The receiver should use only these transport addresses as + destination transport addresses when sending subsequent packets to + its peer. + + IMPLEMENTATION NOTE: In some cases (e.g., when the implementation + doesn't control the source IP address that is used for + transmitting), an endpoint might need to include in its INIT or + INIT ACK all possible IP addresses from which packets to the peer + could be transmitted. + + + +Stewart, et al. Standards Track [Page 55] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + After all transport addresses are derived from the INIT or INIT ACK + chunk using the above rules, the endpoint shall select one of the + transport addresses as the initial primary path. + + Note: The INIT-ACK MUST be sent to the source address of the INIT. + + The sender of INIT may include a 'Supported Address Types' parameter + in the INIT to indicate what types of address are acceptable. When + this parameter is present, the receiver of INIT (initiatee) MUST + either use one of the address types indicated in the Supported + Address Types parameter when responding to the INIT, or abort the + association with an "Unresolvable Address" error cause if it is + unwilling or incapable of using any of the address types indicated by + its peer. + + IMPLEMENTATION NOTE: In the case that the receiver of an INIT ACK + fails to resolve the address parameter due to an unsupported type, it + can abort the initiation process and then attempt a re-initiation by + using a 'Supported Address Types' parameter in the new INIT to + indicate what types of address it prefers. + +5.1.3 Generating State Cookie + + When sending an INIT ACK as a response to an INIT chunk, the sender + of INIT ACK creates a State Cookie and sends it in the State Cookie + parameter of the INIT ACK. Inside this State Cookie, the sender + should include a MAC (see [RFC2104] for an example), a time stamp on + when the State Cookie is created, and the lifespan of the State + Cookie, along with all the information necessary for it to establish + the association. + + The following steps SHOULD be taken to generate the State Cookie: + + 1) Create an association TCB using information from both the received + INIT and the outgoing INIT ACK chunk, + + 2) In the TCB, set the creation time to the current time of day, and + the lifespan to the protocol parameter 'Valid.Cookie.Life', + + 3) From the TCB, identify and collect the minimal subset of + information needed to re-create the TCB, and generate a MAC using + this subset of information and a secret key (see [RFC2104] for an + example of generating a MAC), and + + 4) Generate the State Cookie by combining this subset of information + and the resultant MAC. + + + + + +Stewart, et al. Standards Track [Page 56] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + After sending the INIT ACK with the State Cookie parameter, the + sender SHOULD delete the TCB and any other local resource related to + the new association, so as to prevent resource attacks. + + The hashing method used to generate the MAC is strictly a private + matter for the receiver of the INIT chunk. The use of a MAC is + mandatory to prevent denial of service attacks. The secret key + SHOULD be random ([RFC1750] provides some information on randomness + guidelines); it SHOULD be changed reasonably frequently, and the + timestamp in the State Cookie MAY be used to determine which key + should be used to verify the MAC. + + An implementation SHOULD make the cookie as small as possible to + insure interoperability. + +5.1.4 State Cookie Processing + + When an endpoint (in the COOKIE WAIT state) receives an INIT ACK + chunk with a State Cookie parameter, it MUST immediately send a + COOKIE ECHO chunk to its peer with the received State Cookie. The + sender MAY also add any pending DATA chunks to the packet after the + COOKIE ECHO chunk. + + The endpoint shall also start the T1-cookie timer after sending out + the COOKIE ECHO chunk. If the timer expires, the endpoint shall + retransmit the COOKIE ECHO chunk and restart the T1-cookie timer. + This is repeated until either a COOKIE ACK is received or ' + Max.Init.Retransmits' is reached causing the peer endpoint to be + marked unreachable (and thus the association enters the CLOSED + state). + +5.1.5 State Cookie Authentication + + When an endpoint receives a COOKIE ECHO chunk from another endpoint + with which it has no association, it shall take the following + actions: + + 1) Compute a MAC using the TCB data carried in the State Cookie and + the secret key (note the timestamp in the State Cookie MAY be used + to determine which secret key to use). Reference [RFC2104] can be + used as a guideline for generating the MAC, + + 2) Authenticate the State Cookie as one that it previously generated + by comparing the computed MAC against the one carried in the State + Cookie. If this comparison fails, the SCTP packet, including the + COOKIE ECHO and any DATA chunks, should be silently discarded, + + + + + +Stewart, et al. Standards Track [Page 57] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 3) Compare the creation timestamp in the State Cookie to the current + local time. If the elapsed time is longer than the lifespan + carried in the State Cookie, then the packet, including the COOKIE + ECHO and any attached DATA chunks, SHOULD be discarded and the + endpoint MUST transmit an ERROR chunk with a "Stale Cookie" error + cause to the peer endpoint, + + 4) If the State Cookie is valid, create an association to the sender + of the COOKIE ECHO chunk with the information in the TCB data + carried in the COOKIE ECHO, and enter the ESTABLISHED state, + + 5) Send a COOKIE ACK chunk to the peer acknowledging reception of the + COOKIE ECHO. The COOKIE ACK MAY be bundled with an outbound DATA + chunk or SACK chunk; however, the COOKIE ACK MUST be the first + chunk in the SCTP packet. + + 6) Immediately acknowledge any DATA chunk bundled with the COOKIE + ECHO with a SACK (subsequent DATA chunk acknowledgement should + follow the rules defined in Section 6.2). As mentioned in step + 5), if the SACK is bundled with the COOKIE ACK, the COOKIE ACK + MUST appear first in the SCTP packet. + + If a COOKIE ECHO is received from an endpoint with which the receiver + of the COOKIE ECHO has an existing association, the procedures in + Section 5.2 should be followed. + +5.1.6 An Example of Normal Association Establishment + + In the following example, "A" initiates the association and then + sends a user message to "Z", then "Z" sends two user messages to "A" + later (assuming no bundling or fragmentation occurs): + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 58] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Endpoint A Endpoint Z + {app sets association with Z} + (build TCB) + INIT [I-Tag=Tag_A + & other info] --------\ + (Start T1-init timer) \ + (Enter COOKIE-WAIT state) \---> (compose temp TCB and Cookie_Z) + + /--- INIT ACK [Veri Tag=Tag_A, + / I-Tag=Tag_Z, + (Cancel T1-init timer) <------/ Cookie_Z, & other info] + (destroy temp TCB) + COOKIE ECHO [Cookie_Z] ------\ + (Start T1-init timer) \ + (Enter COOKIE-ECHOED state) \---> (build TCB enter ESTABLISHED + state) + + + /---- COOKIE-ACK + / + (Cancel T1-init timer, <-----/ + Enter ESTABLISHED state) + {app sends 1st user data; strm 0} + DATA [TSN=initial TSN_A + Strm=0,Seq=1 & user data]--\ + (Start T3-rtx timer) \ + \-> + /----- SACK [TSN Ack=init + TSN_A,Block=0] + (Cancel T3-rtx timer) <------/ + + ... + {app sends 2 messages;strm 0} + /---- DATA + / [TSN=init TSN_Z + <--/ Strm=0,Seq=1 & user data 1] + SACK [TSN Ack=init TSN_Z, /---- DATA + Block=0] --------\ / [TSN=init TSN_Z +1, + \/ Strm=0,Seq=2 & user data 2] + <------/\ + \ + \------> + + Figure 4: INITiation Example + + If the T1-init timer expires at "A" after the INIT or COOKIE ECHO + chunks are sent, the same INIT or COOKIE ECHO chunk with the same + Initiate Tag (i.e., Tag_A) or State Cookie shall be retransmitted and + + + +Stewart, et al. Standards Track [Page 59] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + the timer restarted. This shall be repeated Max.Init.Retransmits + times before "A" considers "Z" unreachable and reports the failure to + its upper layer (and thus the association enters the CLOSED state). + When retransmitting the INIT, the endpoint MUST follow the rules + defined in 6.3 to determine the proper timer value. + +5.2 Handle Duplicate or Unexpected INIT, INIT ACK, COOKIE ECHO, and + COOKIE ACK + + During the lifetime of an association (in one of the possible + states), an endpoint may receive from its peer endpoint one of the + setup chunks (INIT, INIT ACK, COOKIE ECHO, and COOKIE ACK). The + receiver shall treat such a setup chunk as a duplicate and process it + as described in this section. + + Note: An endpoint will not receive the chunk unless the chunk was + sent to a SCTP transport address and is from a SCTP transport address + associated with this endpoint. Therefore, the endpoint processes + such a chunk as part of its current association. + + The following scenarios can cause duplicated or unexpected chunks: + + A) The peer has crashed without being detected, re-started itself and + sent out a new INIT chunk trying to restore the association, + + B) Both sides are trying to initialize the association at about the + same time, + + C) The chunk is from a stale packet that was used to establish the + present association or a past association that is no longer in + existence, + + D) The chunk is a false packet generated by an attacker, or + + E) The peer never received the COOKIE ACK and is retransmitting its + COOKIE ECHO. + + The rules in the following sections shall be applied in order to + identify and correctly handle these cases. + +5.2.1 INIT received in COOKIE-WAIT or COOKIE-ECHOED State (Item B) + + This usually indicates an initialization collision, i.e., each + endpoint is attempting, at about the same time, to establish an + association with the other endpoint. + + Upon receipt of an INIT in the COOKIE-WAIT or COOKIE-ECHOED state, an + endpoint MUST respond with an INIT ACK using the same parameters it + + + +Stewart, et al. Standards Track [Page 60] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + sent in its original INIT chunk (including its Initiation Tag, + unchanged). These original parameters are combined with those from + the newly received INIT chunk. The endpoint shall also generate a + State Cookie with the INIT ACK. The endpoint uses the parameters + sent in its INIT to calculate the State Cookie. + + After that, the endpoint MUST NOT change its state, the T1-init timer + shall be left running and the corresponding TCB MUST NOT be + destroyed. The normal procedures for handling State Cookies when a + TCB exists will resolve the duplicate INITs to a single association. + + For an endpoint that is in the COOKIE-ECHOED state it MUST populate + its Tie-Tags with the Tag information of itself and its peer (see + section 5.2.2 for a description of the Tie-Tags). + +5.2.2 Unexpected INIT in States Other than CLOSED, COOKIE-ECHOED, + COOKIE-WAIT and SHUTDOWN-ACK-SENT + + Unless otherwise stated, upon reception of an unexpected INIT for + this association, the endpoint shall generate an INIT ACK with a + State Cookie. In the outbound INIT ACK the endpoint MUST copy its + current Verification Tag and peer's Verification Tag into a reserved + place within the state cookie. We shall refer to these locations as + the Peer's-Tie-Tag and the Local-Tie-Tag. The outbound SCTP packet + containing this INIT ACK MUST carry a Verification Tag value equal to + the Initiation Tag found in the unexpected INIT. And the INIT ACK + MUST contain a new Initiation Tag (randomly generated see Section + 5.3.1). Other parameters for the endpoint SHOULD be copied from the + existing parameters of the association (e.g. number of outbound + streams) into the INIT ACK and cookie. + + After sending out the INIT ACK, the endpoint shall take no further + actions, i.e., the existing association, including its current state, + and the corresponding TCB MUST NOT be changed. + + Note: Only when a TCB exists and the association is not in a COOKIE- + WAIT state are the Tie-Tags populated. For a normal association INIT + (i.e. the endpoint is in a COOKIE-WAIT state), the Tie-Tags MUST be + set to 0 (indicating that no previous TCB existed). The INIT ACK and + State Cookie are populated as specified in section 5.2.1. + +5.2.3 Unexpected INIT ACK + + If an INIT ACK is received by an endpoint in any state other than the + COOKIE-WAIT state, the endpoint should discard the INIT ACK chunk. + An unexpected INIT ACK usually indicates the processing of an old or + duplicated INIT chunk. + + + + +Stewart, et al. Standards Track [Page 61] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5.2.4 Handle a COOKIE ECHO when a TCB exists + + When a COOKIE ECHO chunk is received by an endpoint in any state for + an existing association (i.e., not in the CLOSED state) the following + rules shall be applied: + + 1) Compute a MAC as described in Step 1 of Section 5.1.5, + + 2) Authenticate the State Cookie as described in Step 2 of Section + 5.1.5 (this is case C or D above). + + 3) Compare the timestamp in the State Cookie to the current time. If + the State Cookie is older than the lifespan carried in the State + Cookie and the Verification Tags contained in the State Cookie do + not match the current association's Verification Tags, the packet, + including the COOKIE ECHO and any DATA chunks, should be + discarded. The endpoint also MUST transmit an ERROR chunk with a + "Stale Cookie" error cause to the peer endpoint (this is case C or + D in section 5.2). + + If both Verification Tags in the State Cookie match the + Verification Tags of the current association, consider the State + Cookie valid (this is case E of section 5.2) even if the lifespan + is exceeded. + + 4) If the State Cookie proves to be valid, unpack the TCB into a + temporary TCB. + + 5) Refer to Table 2 to determine the correct action to be taken. + + + + + + + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 62] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + ++------------+------------+---------------+--------------+-------------+ +| Local Tag | Peer's Tag | Local-Tie-Tag |Peer's-Tie-Tag| Action/ | +| | | | | Description | ++------------+------------+---------------+--------------+-------------+ +| X | X | M | M | (A) | ++------------+------------+---------------+--------------+-------------+ +| M | X | A | A | (B) | ++------------+------------+---------------+--------------+-------------+ +| M | 0 | A | A | (B) | ++------------+------------+---------------+--------------+-------------+ +| X | M | 0 | 0 | (C) | ++------------+------------+---------------+--------------+-------------+ +| M | M | A | A | (D) | ++======================================================================+ +| Table 2: Handling of a COOKIE ECHO when a TCB exists | ++======================================================================+ + + Legend: + + X - Tag does not match the existing TCB + M - Tag matches the existing TCB. + 0 - No Tie-Tag in Cookie (unknown). + A - All cases, i.e. M, X or 0. + + Note: For any case not shown in Table 2, the cookie should be + silently discarded. + + Action + + A) In this case, the peer may have restarted. When the endpoint
recognizes this potential 'restart', the existing session is
treated the same as if it received an ABORT followed by a new
COOKIE ECHO with the following exceptions:

- Any SCTP DATA Chunks MAY be retained (this is an implementation
specific option).

- A notification of RESTART SHOULD be sent to the ULP instead of
a "COMMUNICATION LOST" notification.

All the congestion control parameters (e.g., cwnd, ssthresh)
related to this peer MUST be reset to their initial values (see
Section 6.2.1).

After this the endpoint shall enter the ESTABLISHED state. Standards Track [Page 63] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes + the peer has restarted (Action A), it MUST NOT setup a new + association but instead resend the SHUTDOWN ACK and send an ERROR + chunk with a "Cookie Received while Shutting Down" error cause to + its peer. + + B) In this case, both sides may be attempting to start an association + at about the same time but the peer endpoint started its INIT + after responding to the local endpoint's INIT. Thus it may have + picked a new Verification Tag not being aware of the previous Tag + it had sent this endpoint. The endpoint should stay in or enter + the ESTABLISHED state but it MUST update its peer's Verification + Tag from the State Cookie, stop any init or cookie timers that may + running and send a COOKIE ACK. + + C) In this case, the local endpoint's cookie has arrived late. + Before it arrived, the local endpoint sent an INIT and received an + INIT-ACK and finally sent a COOKIE ECHO with the peer's same tag + but a new tag of its own. The cookie should be silently + discarded. The endpoint SHOULD NOT change states and should leave + any timers running. + + D) When both local and remote tags match the endpoint should always + enter the ESTABLISHED state, if it has not already done so. It + should stop any init or cookie timers that may be running and send + a COOKIE ACK. + + Note: The "peer's Verification Tag" is the tag received in the + Initiate Tag field of the INIT or INIT ACK chunk. + + An Example of a Association Restart + + In the following example, "A" initiates the association after a + restart has occurred. Endpoint "Z" had no knowledge of the restart + until the exchange (i.e. Heartbeats had not yet detected the failure + of "A"). (assuming no bundling or fragmentation occurs): + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 65] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5.2.5 Handle Duplicate COOKIE-ACK. + + At any state other than COOKIE-ECHOED, an endpoint should silently + discard a received COOKIE ACK chunk. + +5.2.6 Handle Stale COOKIE Error + + Receipt of an ERROR chunk with a "Stale Cookie" error cause indicates + one of a number of possible events: + + A) That the association failed to completely setup before the State + Cookie issued by the sender was processed. + + B) An old State Cookie was processed after setup completed. + + C) An old State Cookie is received from someone that the receiver is + not interested in having an association with and the ABORT chunk + was lost. + + When processing an ERROR chunk with a "Stale Cookie" error cause an + endpoint should first examine if an association is in the process of + being setup, i.e. the association is in the COOKIE-ECHOED state. In + all cases if the association is not in the COOKIE-ECHOED state, the + ERROR chunk should be silently discarded. + + If the association is in the COOKIE-ECHOED state, the endpoint may + elect one of the following three alternatives. + + 1) Send a new INIT chunk to the endpoint to generate a new State + Cookie and re-attempt the setup procedure. + + 2) Discard the TCB and report to the upper layer the inability to + setup the association. + + 3) Send a new INIT chunk to the endpoint, adding a Cookie + Preservative parameter requesting an extension to the lifetime of + the State Cookie. When calculating the time extension, an + implementation SHOULD use the RTT information measured based on + the previous COOKIE ECHO / ERROR exchange, and should add no more + than 1 second beyond the measured RTT, due to long State Cookie + lifetimes making the endpoint more subject to a replay attack. + + + + + + + + + + +Stewart, et al. Standards Track [Page 66] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +5.3 Other Initialization Issues + +5.3.1 Selection of Tag Value + + Initiate Tag values should be selected from the range of 1 to 2**32 - + 1. It is very important that the Initiate Tag value be randomized to + help protect against "man in the middle" and "sequence number" + attacks. The methods described in [RFC1750] can be used for the + Initiate Tag randomization. Careful selection of Initiate Tags is + also necessary to prevent old duplicate packets from previous + associations being mistakenly processed as belonging to the current + association. + + Moreover, the Verification Tag value used by either endpoint in a + given association MUST NOT change during the lifetime of an + association. A new Verification Tag value MUST be used each time the + endpoint tears-down and then re-establishes an association to the + same peer. + +6. User Data Transfer + + Data transmission MUST only happen in the ESTABLISHED, SHUTDOWN- + PENDING, and SHUTDOWN-RECEIVED states. The only exception to this is + that DATA chunks are allowed to be bundled with an outbound COOKIE + ECHO chunk when in COOKIE-WAIT state. + + DATA chunks MUST only be received according to the rules below in + ESTABLISHED, SHUTDOWN-PENDING, SHUTDOWN-SENT. A DATA chunk received + in CLOSED is out of the blue and SHOULD be handled per 8.4. A DATA + chunk received in any other state SHOULD be discarded. + + A SACK MUST be processed in ESTABLISHED, SHUTDOWN-PENDING, and + SHUTDOWN-RECEIVED. An incoming SACK MAY be processed in COOKIE- + ECHOED. A SACK in the CLOSED state is out of the blue and SHOULD be + processed according to the rules in 8.4. A SACK chunk received in + any other state SHOULD be discarded. + + + A SCTP receiver MUST be able to receive a minimum of 1500 bytes in + one SCTP packet. This means that a SCTP endpoint MUST NOT indicate + less than 1500 bytes in its Initial a_rwnd sent in the INIT or INIT + ACK. + + For transmission efficiency, SCTP defines mechanisms for bundling of + small user messages and fragmentation of large user messages. The + following diagram depicts the flow of user messages through SCTP. + + + + + +Stewart, et al. Standards Track [Page 67] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + In this section the term "data sender" refers to the endpoint that + transmits a DATA chunk and the term "data receiver" refers to the + endpoint that receives a DATA chunk. A data receiver will transmit + SACK chunks. + + +--------------------------+ + | User Messages | + +--------------------------+ + SCTP user ^ | + ==================|==|======================================= + | v (1) + +------------------+ +--------------------+ + | SCTP DATA Chunks | |SCTP Control Chunks | + +------------------+ +--------------------+ + ^ | ^ | + | v (2) | v (2) + +--------------------------+ + | SCTP packets | + +--------------------------+ + SCTP ^ | + ===========================|==|=========================== + | v + Connectionless Packet Transfer Service (e.g., IP) + + Notes: + + 1) When converting user messages into DATA chunks, an endpoint + will fragment user messages larger than the current association + path MTU into multiple DATA chunks. Standards Track [Page 68] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +6.1 Transmission of DATA Chunks + + This document is specified as if there is a single retransmission + timer per destination transport address, but implementations MAY have + a retransmission timer for each DATA chunk. + + The following general rules MUST be applied by the data sender for + transmission and/or retransmission of outbound DATA chunks: + + A) At any given time, the data sender MUST NOT transmit new data to + any destination transport address if its peer's rwnd indicates + that the peer has no buffer space (i.e. rwnd is 0, see Section + 6.2.1). However, regardless of the value of rwnd (including if it + is 0), the data sender can always have one DATA chunk in flight to + the receiver if allowed by cwnd (see rule B below). This rule + allows the sender to probe for a change in rwnd that the sender + missed due to the SACK having been lost in transit from the data + receiver to the data sender. + + B) At any given time, the sender MUST NOT transmit new data to a + given transport address if it has cwnd or more bytes of data + outstanding to that transport address. + + C) When the time comes for the sender to transmit, before sending new + DATA chunks, the sender MUST first transmit any outstanding DATA + chunks which are marked for retransmission (limited by the current + cwnd). + + D) Then, the sender can send out as many new DATA chunks as Rule A + and Rule B above allow. + + Multiple DATA chunks committed for transmission MAY be bundled in a + single packet. Furthermore, DATA chunks being retransmitted MAY be + bundled with new DATA chunks, as long as the resulting packet size + does not exceed the path MTU. Standards Track [Page 69] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + IMPLEMENTATION NOTE: When the window is full (i.e., transmission is + disallowed by Rule A and/or Rule B), the sender MAY still accept send + requests from its upper layer, but MUST transmit no more DATA chunks + until some or all of the outstanding DATA chunks are acknowledged and + transmission is allowed by Rule A and Rule B again. + + Whenever a transmission or retransmission is made to any address, if + the T3-rtx timer of that address is not currently running, the sender + MUST start that timer. If the timer for that address is already + running, the sender MUST restart the timer if the earliest (i.e., + lowest TSN) outstanding DATA chunk sent to that address is being + retransmitted. Otherwise, the data sender MUST NOT restart the + timer. + + When starting or restarting the T3-rtx timer, the timer value must be + adjusted according to the timer rules defined in Sections 6.3.2, and + 6.3.3. + + Note: The data sender SHOULD NOT use a TSN that is more than 2**31 - + 1 above the beginning TSN of the current send window. + +6.2 Acknowledgement on Reception of DATA Chunks + + The SCTP endpoint MUST always acknowledge the reception of each valid + DATA chunk. + + The guidelines on delayed acknowledgement algorithm specified in + Section 4.2 of [RFC2581] SHOULD be followed. Specifically, an + acknowledgement SHOULD be generated for at least every second packet + (not every second DATA chunk) received, and SHOULD be generated + within 200 ms of the arrival of any unacknowledged DATA chunk. In + some situations it may be beneficial for an SCTP transmitter to be + more conservative than the algorithms detailed in this document + allow. However, an SCTP transmitter MUST NOT be more aggressive than + the following algorithms allow. + + A SCTP receiver MUST NOT generate more than one SACK for every + incoming packet, other than to update the offered window as the + receiving application consumes new data. + + IMPLEMENTATION NOTE: The maximum delay for generating an + acknowledgement may be configured by the SCTP administrator, either + statically or dynamically, in order to meet the specific timing + requirement of the protocol being carried. + + An implementation MUST NOT allow the maximum delay to be configured + to be more than 500 ms. In other words an implementation MAY lower + this value below 500ms but MUST NOT raise it above 500ms. + + + +Stewart, et al. Standards Track [Page 70] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Acknowledgements MUST be sent in SACK chunks unless shutdown was + requested by the ULP in which case an endpoint MAY send an + acknowledgement in the SHUTDOWN chunk. A SACK chunk can acknowledge + the reception of multiple DATA chunks. See Section 3.3.4 for SACK + chunk format. In particular, the SCTP endpoint MUST fill in the + Cumulative TSN Ack field to indicate the latest sequential TSN (of a + valid DATA chunk) it has received. Any received DATA chunks with TSN + greater than the value in the Cumulative TSN Ack field SHOULD also be + reported in the Gap Ack Block fields. + + Note: The SHUTDOWN chunk does not contain Gap Ack Block fields. + Therefore, the endpoint should use a SACK instead of the SHUTDOWN + chunk to acknowledge DATA chunks received out of order . + + When a packet arrives with duplicate DATA chunk(s) and with no new + DATA chunk(s), the endpoint MUST immediately send a SACK with no + delay. If a packet arrives with duplicate DATA chunk(s) bundled with + new DATA chunks, the endpoint MAY immediately send a SACK. Normally + receipt of duplicate DATA chunks will occur when the original SACK + chunk was lost and the peer's RTO has expired. The duplicate TSN + number(s) SHOULD be reported in the SACK as duplicate. + + When an endpoint receives a SACK, it MAY use the Duplicate TSN + information to determine if SACK loss is occurring. Further use of + this data is for future study. + + The data receiver is responsible for maintaining its receive buffers. + The data receiver SHOULD notify the data sender in a timely manner of + changes in its ability to receive data. How an implementation + manages its receive buffers is dependent on many factors (e.g., + Operating System, memory management system, amount of memory, etc.). + However, the data sender strategy defined in Section 6.2.1 is based + on the assumption of receiver operation similar to the following: + + A) At initialization of the association, the endpoint tells the + peer how much receive buffer space it has allocated to the + association in the INIT or INIT ACK. The endpoint sets a_rwnd + to this value. + + B) As DATA chunks are received and buffered, decrement a_rwnd by + the number of bytes received and buffered. This is, in effect, + closing rwnd at the data sender and restricting the amount of + data it can transmit. + + C) As DATA chunks are delivered to the ULP and released from the + receive buffers, increment a_rwnd by the number of bytes + delivered to the upper layer. This is, in effect, opening up + rwnd on the data sender and allowing it to send more data. The + + + +Stewart, et al. Standards Track [Page 71] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + data receiver SHOULD NOT increment a_rwnd unless it has + released bytes from its receive buffer. For example, if the + receiver is holding fragmented DATA chunks in a reassembly + queue, it should not increment a_rwnd. + + D) When sending a SACK, the data receiver SHOULD place the current + value of a_rwnd into the a_rwnd field. The data receiver + SHOULD take into account that the data sender will not + retransmit DATA chunks that are acked via the Cumulative TSN + Ack (i.e., will drop from its retransmit queue). + + Under certain circumstances, the data receiver may need to drop DATA + chunks that it has received but hasn't released from its receive + buffers (i.e., delivered to the ULP). These DATA chunks may have + been acked in Gap Ack Blocks. For example, the data receiver may be + holding data in its receive buffers while reassembling a fragmented + user message from its peer when it runs out of receive buffer space. + It may drop these DATA chunks even though it has acknowledged them in + Gap Ack Blocks. If a data receiver drops DATA chunks, it MUST NOT + include them in Gap Ack Blocks in subsequent SACKs until they are + received again via retransmission. In addition, the endpoint should + take into account the dropped data when calculating its a_rwnd. + + An endpoint SHOULD NOT revoke a SACK and discard data. Only in + extreme circumstance should an endpoint use this procedure (such as + out of buffer space). Standards Track [Page 72] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Endpoint A Endpoint Z + + {App sends 3 messages; strm 0} + DATA [TSN=7,Strm=0,Seq=3] ------------> (ack delayed) + (Start T3-rtx timer) + + DATA [TSN=8,Strm=0,Seq=4] ------------> (send ack) + /------- SACK [TSN Ack=8,block=0] + (cancel T3-rtx timer) <-----/ + + DATA [TSN=9,Strm=0,Seq=5] ------------> (ack delayed) + (Start T3-rtx timer) + ... + {App sends 1 message; strm 1} + (bundle SACK with DATA) + /----- SACK [TSN Ack=9,block=0] \ + / DATA [TSN=6,Strm=1,Seq=2] + (cancel T3-rtx timer) <------/ (Start T3-rtx timer) + + (ack delayed) + (send ack) + SACK [TSN Ack=6,block=0] -------------> (cancel T3-rtx timer) + + Figure 7: Delayed Acknowledgment Example + + If an endpoint receives a DATA chunk with no user data (i.e., the + Length field is set to 16) it MUST send an ABORT with error cause set + to "No User Data". + + An endpoint SHOULD NOT send a DATA chunk with no user data part. + +6.2.1 Processing a Received SACK + + Each SACK an endpoint receives contains an a_rwnd value. This value + represents the amount of buffer space the data receiver, at the time + of transmitting the SACK, has left of its total receive buffer space + (as specified in the INIT/INIT ACK). Using a_rwnd, Cumulative TSN + Ack and Gap Ack Blocks, the data sender can develop a representation + of the peer's receive buffer space. + + One of the problems the data sender must take into account when + processing a SACK is that a SACK can be received out of order. That + is, a SACK sent by the data receiver can pass an earlier SACK and be + received first by the data sender. If a SACK is received out of + order, the data sender can develop an incorrect view of the peer's + receive buffer space. + + + + + +Stewart, et al. Standards Track [Page 73] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Since there is no explicit identifier that can be used to detect + out-of-order SACKs, the data sender must use heuristics to determine + if a SACK is new. + + An endpoint SHOULD use the following rules to calculate the rwnd, + using the a_rwnd value, the Cumulative TSN Ack and Gap Ack Blocks in + a received SACK. + + A) At the establishment of the association, the endpoint initializes + the rwnd to the Advertised Receiver Window Credit (a_rwnd) the + peer specified in the INIT or INIT ACK. + + B) Any time a DATA chunk is transmitted (or retransmitted) to a peer, + the endpoint subtracts the data size of the chunk from the rwnd of + that peer. + + C) Any time a DATA chunk is marked for retransmission (via either + T3-rtx timer expiration (Section 6.3.3)or via fast retransmit + (Section 7.2.4)), add the data size of those chunks to the rwnd. + + Note: If the implementation is maintaining a timer on each DATA + chunk then only DATA chunks whose timer expired would be marked + for retransmission. + + D) Any time a SACK arrives, the endpoint performs the following: + + i) If Cumulative TSN Ack is less than the Cumulative TSN Ack + Point, then drop the SACK. Since Cumulative TSN Ack is + monotonically increasing, a SACK whose Cumulative TSN Ack is + less than the Cumulative TSN Ack Point indicates an out-of- + order SACK. + + ii) Set rwnd equal to the newly received a_rwnd minus the + number of bytes still outstanding after processing the + Cumulative TSN Ack and the Gap Ack Blocks. + + iii) If the SACK is missing a TSN that was previously + acknowledged via a Gap Ack Block (e.g., the data receiver + reneged on the data), then mark the corresponding DATA chunk as + available for retransmit: Mark it as missing for fast + retransmit as described in Section 7.2.4 and if no retransmit + timer is running for the destination address to which the DATA + chunk was originally transmitted, then T3-rtx is started for + that destination address. + + + + + + + +Stewart, et al. Standards Track [Page 74] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +6.3 Management of Retransmission Timer + + An SCTP endpoint uses a retransmission timer T3-rtx to ensure data + delivery in the absence of any feedback from its peer. The duration + of this timer is referred to as RTO (retransmission timeout). + + When an endpoint's peer is multi-homed, the endpoint will calculate a + separate RTO for each different destination transport address of its + peer endpoint. + + The computation and management of RTO in SCTP follows closely how TCP + manages its retransmission timer. To compute the current RTO, an + endpoint maintains two state variables per destination transport + address: SRTT (smoothed round-trip time) and RTTVAR (round-trip time + variation). + +6.3.1 RTO Calculation + + The rules governing the computation of SRTT, RTTVAR, and RTO are as + follows: + + C1) Until an RTT measurement has been made for a packet sent to the + given destination transport address, set RTO to the protocol + parameter 'RTO.Initial'. + + C2) When the first RTT measurement R is made, set SRTT <- R, RTTVAR + <- R/2, and RTO <- SRTT + 4 * RTTVAR. + + C3) When a new RTT measurement R' is made, set + + RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| SRTT + <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' + + Note: The value of SRTT used in the update to RTTVAR is its value + before updating SRTT itself using the second assignment. + + After the computation, update RTO <- SRTT + 4 * RTTVAR. + + C4) When data is in flight and when allowed by rule C5 below, a new + RTT measurement MUST be made each round trip. Furthermore, new + RTT measurements SHOULD be made no more than once per round-trip + for a given destination transport address. There are two reasons + for this recommendation: First, it appears that measuring more + frequently often does not in practice yield any significant + benefit [ALLMAN99]; second, if measurements are made more often, + then the values of RTO.Alpha and RTO.Beta in rule C3 above should + be adjusted so that SRTT and RTTVAR still adjust to changes at + roughly the same rate (in terms of how many round trips it takes + + + +Stewart, et al. Standards Track [Page 75] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + them to reflect new values) as they would if making only one + measurement per round-trip and using RTO.Alpha and RTO.Beta as + given in rule C3. However, the exact nature of these adjustments + remains a research issue. + + C5) Karn's algorithm: RTT measurements MUST NOT be made using packets + that were retransmitted (and thus for which it is ambiguous + whether the reply was for the first instance of the packet or a + later instance). + + C6) Whenever RTO is computed, if it is less than RTO.Min seconds then + it is rounded up to RTO.Min seconds. The reason for this rule is + that RTOs that do not have a high minimum value are susceptible + to unnecessary timeouts [ALLMAN99]. + + C7) A maximum value may be placed on RTO provided it is at least + RTO.max seconds. + + There is no requirement for the clock granularity G used for + computing RTT measurements and the different state variables, other + than: + + G1) Whenever RTTVAR is computed, if RTTVAR = 0, then adjust RTTVAR <- + G. + + Experience [ALLMAN99] has shown that finer clock granularities (<= + 100 msec) perform somewhat better than more coarse granularities. + +6.3.2 Retransmission Timer Rules + + The rules for managing the retransmission timer are as follows: + + R1) Every time a DATA chunk is sent to any address (including a + retransmission), if the T3-rtx timer of that address is not + running, start it running so that it will expire after the RTO of + that address. The RTO used here is that obtained after any + doubling due to previous T3-rtx timer expirations on the + corresponding destination address as discussed in rule E2 below. + + R2) Whenever all outstanding data sent to an address have been + acknowledged, turn off the T3-rtx timer of that address. + + R3) Whenever a SACK is received that acknowledges the DATA chunk with + the earliest outstanding TSN for that address, restart T3-rtx + timer for that address with its current RTO (if there is still + outstanding data on that address). + + + + + +Stewart, et al. Standards Track [Page 76] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + R4) Whenever a SACK is received missing a TSN that was previously + acknowledged via a Gap Ack Block, start T3-rtx for the + destination address to which the DATA chunk was originally + transmitted if it is not already running. + + The following example shows the use of various timer rules (assuming + the receiver uses delayed acks). + + Endpoint A Endpoint Z + {App begins to send} + Data [TSN=7,Strm=0,Seq=3] ------------> (ack delayed) + (Start T3-rtx timer) + {App sends 1 message; strm 1} + (bundle ack with data) + DATA [TSN=8,Strm=0,Seq=4] ----\ /-- SACK [TSN Ack=7,Block=0] + \ / DATA [TSN=6,Strm=1,Seq=2] + \ / (Start T3-rtx timer) + \ + / \ + (Re-start T3-rtx timer) <------/ \--> (ack delayed) + (ack delayed) + {send ack} + SACK [TSN Ack=6,Block=0] --------------> (Cancel T3-rtx timer) + .. + (send ack) + (Cancel T3-rtx timer) <-------------- SACK [TSN Ack=8,Block=0] + + Figure 8 - Timer Rule Examples + +6.3.3 Handle T3-rtx Expiration + + Whenever the retransmission timer T3-rtx expires for a destination + address, do the following: + + E1) For the destination address for which the timer expires, adjust + its ssthresh with rules defined in Section 7.2.3 and set the cwnd + <- MTU. + + E2) For the destination address for which the timer expires, set RTO + <- RTO * 2 ("back off the timer"). The maximum value discussed + in rule C7 above (RTO.max) may be used to provide an upper bound + to this doubling operation. + + E3) Determine how many of the earliest (i.e., lowest TSN) outstanding + DATA chunks for the address for which the T3-rtx has expired will + fit into a single packet, subject to the MTU constraint for the + path corresponding to the destination transport address to which + the retransmission is being sent (this may be different from the + + + +Stewart, et al. Standards Track [Page 77] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + address for which the timer expires [see Section 6.4]). Call + this value K. Bundle and retransmit those K DATA chunks in a + single packet to the destination endpoint. + + E4) Start the retransmission timer T3-rtx on the destination address + to which the retransmission is sent, if rule R1 above indicates + to do so. The RTO to be used for starting T3-rtx should be the + one for the destination address to which the retransmission is + sent, which, when the receiver is multi-homed, may be different + from the destination address for which the timer expired (see + Section 6.4 below). + + After retransmitting, once a new RTT measurement is obtained (which + can happen only when new data has been sent and acknowledged, per + rule C5, or for a measurement made from a HEARTBEAT [see Section + 8.3]), the computation in rule C3 is performed, including the + computation of RTO, which may result in "collapsing" RTO back down + after it has been subject to doubling (rule E2). + + Note: Any DATA chunks that were sent to the address for which the + T3-rtx timer expired but did not fit in one MTU (rule E3 above), + should be marked for retransmission and sent as soon as cwnd allows + (normally when a SACK arrives). + + The final rule for managing the retransmission timer concerns + failover (see Section 6.4.1): + + F1) Whenever an endpoint switches from the current destination + transport address to a different one, the current retransmission + timers are left running. As soon as the endpoint transmits a + packet containing DATA chunk(s) to the new transport address, + start the timer on that transport address, using the RTO value of + the destination address to which the data is being sent, if rule + R1 indicates to do so. + +6.4 Multi-homed SCTP Endpoints + + An SCTP endpoint is considered multi-homed if there are more than one + transport address that can be used as a destination address to reach + that endpoint. + + Moreover, the ULP of an endpoint shall select one of the multiple + destination addresses of a multi-homed peer endpoint as the primary + path (see Sections 5.1.2 and 10.1 for details). + + By default, an endpoint SHOULD always transmit to the primary path, + unless the SCTP user explicitly specifies the destination transport + address (and possibly source transport address) to use. + + + +Stewart, et al. Standards Track [Page 78] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + An endpoint SHOULD transmit reply chunks (e.g., SACK, HEARTBEAT ACK, + etc.) to the same destination transport address from which it + received the DATA or control chunk to which it is replying. This + rule should also be followed if the endpoint is bundling DATA chunks + together with the reply chunk. + + However, when acknowledging multiple DATA chunks received in packets + from different source addresses in a single SACK, the SACK chunk may + be transmitted to one of the destination transport addresses from + which the DATA or control chunks being acknowledged were received. + + When a receiver of a duplicate DATA chunk sends a SACK to a multi- + homed endpoint it MAY be beneficial to vary the destination address + and not use the source address of the DATA chunk. The reason being + that receiving a duplicate from a multi-homed endpoint might indicate + that the return path (as specified in the source address of the DATA + chunk) for the SACK is broken. + + Furthermore, when its peer is multi-homed, an endpoint SHOULD try to + retransmit a chunk to an active destination transport address that is + different from the last destination address to which the DATA chunk + was sent. + + Retransmissions do not affect the total outstanding data count. + However, if the DATA chunk is retransmitted onto a different + destination address, both the outstanding data counts on the new + destination address and the old destination address to which the data + chunk was last sent shall be adjusted accordingly. + +6.4.1 Failover from Inactive Destination Address + + Some of the transport addresses of a multi-homed SCTP endpoint may + become inactive due to either the occurrence of certain error + conditions (see Section 8.2) or adjustments from SCTP user. + + When there is outbound data to send and the primary path becomes + inactive (e.g., due to failures), or where the SCTP user explicitly + requests to send data to an inactive destination transport address, + before reporting an error to its ULP, the SCTP endpoint should try to + send the data to an alternate active destination transport address if + one exists. + + When retransmitting data, if the endpoint is multi-homed, it should + consider each source-destination address pair in its retransmission + selection policy. When retransmitting the endpoint should attempt to + pick the most divergent source-destination pair from the original + source-destination pair to which the packet was transmitted. + + + + +Stewart, et al. Standards Track [Page 79] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Note: Rules for picking the most divergent source-destination pair + are an implementation decision and is not specified within this + document. + +6.5 Stream Identifier and Stream Sequence Number + + Every DATA chunk MUST carry a valid stream identifier. If an + endpoint receives a DATA chunk with an invalid stream identifier, it + shall acknowledge the reception of the DATA chunk following the + normal procedure, immediately send an ERROR chunk with cause set to + "Invalid Stream Identifier" (see Section 3.3.10) and discard the DATA + chunk. The endpoint may bundle the ERROR chunk in the same packet as + the SACK as long as the ERROR follows the SACK. + + The stream sequence number in all the streams shall start from 0 when + the association is established. Also, when the stream sequence + number reaches the value 65535 the next stream sequence number shall + be set to 0. + +6.6 Ordered and Unordered Delivery + + Within a stream, an endpoint MUST deliver DATA chunks received with + the U flag set to 0 to the upper layer according to the order of + their stream sequence number. If DATA chunks arrive out of order of + their stream sequence number, the endpoint MUST hold the received + DATA chunks from delivery to the ULP until they are re-ordered. + + However, an SCTP endpoint can indicate that no ordered delivery is + required for a particular DATA chunk transmitted within the stream by + setting the U flag of the DATA chunk to 1. + + When an endpoint receives a DATA chunk with the U flag set to 1, it + must bypass the ordering mechanism and immediately deliver the data + to the upper layer (after re-assembly if the user data is fragmented + by the data sender). + + This provides an effective way of transmitting "out-of-band" data in + a given stream. Also, a stream can be used as an "unordered" stream + by simply setting the U flag to 1 in all DATA chunks sent through + that stream. + + IMPLEMENTATION NOTE: When sending an unordered DATA chunk, an + implementation may choose to place the DATA chunk in an outbound + packet that is at the head of the outbound transmission queue if + possible. + + + + + + +Stewart, et al. Standards Track [Page 80] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The 'Stream Sequence Number' field in a DATA chunk with U flag set to + 1 has no significance. The sender can fill it with arbitrary value, + but the receiver MUST ignore the field. + + Note: When transmitting ordered and unordered data, an endpoint does + not increment its Stream Sequence Number when transmitting a DATA + chunk with U flag set to 1. + +6.7 Report Gaps in Received DATA TSNs + + Upon the reception of a new DATA chunk, an endpoint shall examine the + continuity of the TSNs received. If the endpoint detects a gap in + the received DATA chunk sequence, it SHOULD send a SACK with Gap Ack + Blocks immediately. The data receiver continues sending a SACK after + receipt of each SCTP packet that doesn't fill the gap. + + Based on the Gap Ack Block from the received SACK, the endpoint can + calculate the missing DATA chunks and make decisions on whether to + retransmit them (see Section 6.2.1 for details). + + Multiple gaps can be reported in one single SACK (see Section 3.3.4). + + When its peer is multi-homed, the SCTP endpoint SHOULD always try to + send the SACK to the same destination address from which the last + DATA chunk was received. + + Upon the reception of a SACK, the endpoint MUST remove all DATA + chunks which have been acknowledged by the SACK's Cumulative TSN Ack + from its transmit queue. The endpoint MUST also treat all the DATA + chunks with TSNs not included in the Gap Ack Blocks reported by the + SACK as "missing". The number of "missing" reports for each + outstanding DATA chunk MUST be recorded by the data sender in order + to make retransmission decisions. See Section 7.2.4 for details. + + The following example shows the use of SACK to report a gap. + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 81] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Endpoint A Endpoint Z + {App sends 3 messages; strm 0} + DATA [TSN=6,Strm=0,Seq=2] ---------------> (ack delayed) + (Start T3-rtx timer) + + DATA [TSN=7,Strm=0,Seq=3] --------> X (lost) + + DATA [TSN=8,Strm=0,Seq=4] ---------------> (gap detected, + immediately send ack) + /----- SACK [TSN Ack=6,Block=1, + / Strt=2,End=2] + <-----/ + (remove 6 from out-queue, + and mark 7 as "1" missing report) + + Figure 9 - Reporting a Gap using SACK + + The maximum number of Gap Ack Blocks that can be reported within a + single SACK chunk is limited by the current path MTU. When a single + SACK can not cover all the Gap Ack Blocks needed to be reported due + to the MTU limitation, the endpoint MUST send only one SACK, + reporting the Gap Ack Blocks from the lowest to highest TSNs, within + the size limit set by the MTU, and leave the remaining highest TSN + numbers unacknowledged. + +6.8 Adler-32 Checksum Calculation + + When sending an SCTP packet, the endpoint MUST strengthen the data + integrity of the transmission by including the Adler-32 checksum + value calculated on the packet, as described below. + + After the packet is constructed (containing the SCTP common header + and one or more control or DATA chunks), the transmitter shall: + + 1) Fill in the proper Verification Tag in the SCTP common header and + initialize the checksum field to 0's. + + 2) Calculate the Adler-32 checksum of the whole packet, including the + SCTP common header and all the chunks. Refer to appendix B for + details of the Adler-32 algorithm. And, + + 3) Put the resultant value into the checksum field in the common + header, and leave the rest of the bits unchanged. + + When an SCTP packet is received, the receiver MUST first check the + Adler-32 checksum: + + 1) Store the received Adler-32 checksum value aside, + + + +Stewart, et al. Standards Track [Page 82] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 2) Replace the 32 bits of the checksum field in the received SCTP + packet with all '0's and calculate an Adler-32 checksum value of + the whole received packet. And, + + 3) Verify that the calculated Adler-32 checksum is the same as the + received Adler-32 checksum. If not, the receiver MUST treat the + packet as an invalid SCTP packet. + + The default procedure for handling invalid SCTP packets is to + silently discard them. + +6.9 Fragmentation and Reassembly + + An endpoint MAY support fragmentation when sending DATA chunks, but + MUST support reassembly when receiving DATA chunks. If an endpoint + supports fragmentation, it MUST fragment a user message if the size + of the user message to be sent causes the outbound SCTP packet size + to exceed the current MTU. If an implementation does not support + fragmentation of outbound user messages, the endpoint must return an + error to its upper layer and not attempt to send the user message. + + IMPLEMENTATION NOTE: In this error case, the Send primitive + discussed in Section 10.1 would need to return an error to the upper + layer. + + If its peer is multi-homed, the endpoint shall choose a size no + larger than the association Path MTU. The association Path MTU is + the smallest Path MTU of all destination addresses. + + Note: Once a message is fragmented it cannot be re-fragmented. + Instead if the PMTU has been reduced, then IP fragmentation must be + used. Please see Section 7.3 for details of PMTU discovery. + + When determining when to fragment, the SCTP implementation MUST take + into account the SCTP packet header as well as the DATA chunk + header(s). The implementation MUST also take into account the space + required for a SACK chunk if bundling a SACK chunk with the DATA + chunk. + + Fragmentation takes the following steps: + + 1) The data sender MUST break the user message into a series of DATA + chunks such that each chunk plus SCTP overhead fits into an IP + datagram smaller than or equal to the association Path MTU. + + 2) The transmitter MUST then assign, in sequence, a separate TSN to + each of the DATA chunks in the series. The transmitter assigns + the same SSN to each of the DATA chunks. If the user indicates + + + +Stewart, et al. Standards Track [Page 83] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + that the user message is to be delivered using unordered delivery, + then the U flag of each DATA chunk of the user message MUST be set + to 1. + + 3) The transmitter MUST also set the B/E bits of the first DATA chunk + in the series to '10', the B/E bits of the last DATA chunk in the + series to '01', and the B/E bits of all other DATA chunks in the + series to '00'. + + An endpoint MUST recognize fragmented DATA chunks by examining the + B/E bits in each of the received DATA chunks, and queue the + fragmented DATA chunks for re-assembly. Once the user message is + reassembled, SCTP shall pass the re-assembled user message to the + specific stream for possible re-ordering and final dispatching. + + Note: If the data receiver runs out of buffer space while still + waiting for more fragments to complete the re-assembly of the + message, it should dispatch part of its inbound message through a + partial delivery API (see Section 10), freeing some of its receive + buffer space so that the rest of the message may be received. + +6.10 Bundling + + An endpoint bundles chunks by simply including multiple chunks in one + outbound SCTP packet. The total size of the resultant IP datagram, + including the SCTP packet and IP headers, MUST be less or equal to + the current Path MTU. + + If its peer endpoint is multi-homed, the sending endpoint shall + choose a size no larger than the latest MTU of the current primary + path. + + When bundling control chunks with DATA chunks, an endpoint MUST place + control chunks first in the outbound SCTP packet. The transmitter + MUST transmit DATA chunks within a SCTP packet in increasing order of + TSN. + + Note: Since control chunks must be placed first in a packet and + since DATA chunks must be transmitted before SHUTDOWN or SHUTDOWN ACK + chunks, DATA chunks cannot be bundled with SHUTDOWN or SHUTDOWN ACK + chunks. + + Partial chunks MUST NOT be placed in an SCTP packet. + + + + + + + + +Stewart, et al. Standards Track [Page 84] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + An endpoint MUST process received chunks in their order in the + packet. The receiver uses the chunk length field to determine the end + of a chunk and beginning of the next chunk taking account of the fact + that all chunks end on a 4 byte boundary. If the receiver detects a + partial chunk, it MUST drop the chunk. + + An endpoint MUST NOT bundle INIT, INIT ACK or SHUTDOWN COMPLETE with + any other chunks. + +7. Congestion control + + Congestion control is one of the basic functions in SCTP. For some + applications, it may be likely that adequate resources will be + allocated to SCTP traffic to assure prompt delivery of time-critical + data - thus it would appear to be unlikely, during normal operations, + that transmissions encounter severe congestion conditions. However + SCTP must operate under adverse operational conditions, which can + develop upon partial network failures or unexpected traffic surges. + In such situations SCTP must follow correct congestion control steps + to recover from congestion quickly in order to get data delivered as + soon as possible. In the absence of network congestion, these + preventive congestion control algorithms should show no impact on the + protocol performance. + + IMPLEMENTATION NOTE: As far as its specific performance requirements + are met, an implementation is always allowed to adopt a more + conservative congestion control algorithm than the one defined below. + + The congestion control algorithms used by SCTP are based on + [RFC2581]. This section describes how the algorithms defined in + RFC2581 are adapted for use in SCTP. We first list differences in + protocol designs between TCP and SCTP, and then describe SCTP's + congestion control scheme. The description will use the same + terminology as in TCP congestion control whenever appropriate. + + SCTP congestion control is always applied to the entire association, + and not to individual streams. + +7.1 SCTP Differences from TCP Congestion control + + Gap Ack Blocks in the SCTP SACK carry the same semantic meaning as + the TCP SACK. TCP considers the information carried in the SACK as + advisory information only. SCTP considers the information carried in + the Gap Ack Blocks in the SACK chunk as advisory. In SCTP, any DATA + chunk that has been acknowledged by SACK, including DATA that arrived + at the receiving end out of order, are not considered fully delivered + until the Cumulative TSN Ack Point passes the TSN of the DATA chunk + (i.e., the DATA chunk has been acknowledged by the Cumulative TSN Ack + + + +Stewart, et al. Standards Track [Page 85] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + field in the SACK). Consequently, the value of cwnd controls the + amount of outstanding data, rather than (as in the case of non-SACK + TCP) the upper bound between the highest acknowledged sequence number + and the latest DATA chunk that can be sent within the congestion + window. SCTP SACK leads to different implementations of fast- + retransmit and fast-recovery than non-SACK TCP. As an example see + [FALL96]. + + The biggest difference between SCTP and TCP, however, is multi- + homing. SCTP is designed to establish robust communication + associations between two endpoints each of which may be reachable by + more than one transport address. Potentially different addresses may + lead to different data paths between the two endpoints, thus ideally + one may need a separate set of congestion control parameters for each + of the paths. The treatment here of congestion control for multi- + homed receivers is new with SCTP and may require refinement in the + future. The current algorithms make the following assumptions: + + o The sender usually uses the same destination address until being + instructed by the upper layer otherwise; however, SCTP may change + to an alternate destination in the event an address is marked + inactive (see Section 8.2). Also, SCTP may retransmit to a + different transport address than the original transmission. + + o The sender keeps a separate congestion control parameter set for + each of the destination addresses it can send to (not each + source-destination pair but for each destination). The parameters + should decay if the address is not used for a long enough time + period. + + o For each of the destination addresses, an endpoint does slow-start + upon the first transmission to that address. + + Note: TCP guarantees in-sequence delivery of data to its upper-layer + protocol within a single TCP session. This means that when TCP + notices a gap in the received sequence number, it waits until the gap + is filled before delivering the data that was received with sequence + numbers higher than that of the missing data. On the other hand, + SCTP can deliver data to its upper-layer protocol even if there is a + gap in TSN if the Stream Sequence Numbers are in sequence for a + particular stream (i.e., the missing DATA chunks are for a different + stream) or if unordered delivery is indicated. Although this does + not affect cwnd, it might affect rwnd calculation. + + + + + + + + +Stewart, et al. Standards Track [Page 86] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +7.2 SCTP Slow-Start and Congestion Avoidance + + The slow start and congestion avoidance algorithms MUST be used by an + endpoint to control the amount of data being injected into the + network. The congestion control in SCTP is employed in regard to the + association, not to an individual stream. In some situations it may + be beneficial for an SCTP sender to be more conservative than the + algorithms allow; however, an SCTP sender MUST NOT be more aggressive + than the following algorithms allow. + + Like TCP, an SCTP endpoint uses the following three control variables + to regulate its transmission rate. + + o Receiver advertised window size (rwnd, in bytes), which is set by + the receiver based on its available buffer space for incoming + packets. + + Note: This variable is kept on the entire association. + + o Congestion control window (cwnd, in bytes), which is adjusted by + the sender based on observed network conditions. + + Note: This variable is maintained on a per-destination address + basis. + + o Slow-start threshold (ssthresh, in bytes), which is used by the + sender to distinguish slow start and congestion avoidance phases. + + Note: This variable is maintained on a per-destination address + basis. + + SCTP also requires one additional control variable, + partial_bytes_acked, which is used during congestion avoidance phase + to facilitate cwnd adjustment. + + Unlike TCP, an SCTP sender MUST keep a set of these control variables + cwnd, ssthresh and partial_bytes_acked for EACH destination address + of its peer (when its peer is multi-homed). Only one rwnd is kept + for the whole association (no matter if the peer is multi-homed or + has a single address). + +7.2.1 Slow-Start + + Beginning data transmission into a network with unknown conditions or + after a sufficiently long idle period requires SCTP to probe the + network to determine the available capacity. The slow start + algorithm is used for this purpose at the beginning of a transfer, or + after repairing loss detected by the retransmission timer. + + + +Stewart, et al. Standards Track [Page 87] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o The initial cwnd before DATA transmission or after a sufficiently + long idle period MUST be <= 2*MTU. + + o The initial cwnd after a retransmission timeout MUST be no more + than 1*MTU. + + o The initial value of ssthresh MAY be arbitrarily high (for + example, implementations MAY use the size of the receiver + advertised window). + + o Whenever cwnd is greater than zero, the endpoint is allowed to + have cwnd bytes of data outstanding on that transport address. + + o When cwnd is less than or equal to ssthresh an SCTP endpoint MUST + use the slow start algorithm to increase cwnd (assuming the + current congestion window is being fully utilized). If an + incoming SACK advances the Cumulative TSN Ack Point, cwnd MUST be + increased by at most the lesser of 1) the total size of the + previously outstanding DATA chunk(s) acknowledged, and 2) the + destination's path MTU. This protects against the ACK-Splitting + attack outlined in [SAVAGE99]. + + In instances where its peer endpoint is multi-homed, if an endpoint + receives a SACK that advances its Cumulative TSN Ack Point, then it + should update its cwnd (or cwnds) apportioned to the destination + addresses to which it transmitted the acknowledged data. However if + the received SACK does not advance the Cumulative TSN Ack Point, the + endpoint MUST NOT adjust the cwnd of any of the destination + addresses. + + Because an endpoint's cwnd is not tied to its Cumulative TSN Ack + Point, as duplicate SACKs come in, even though they may not advance + the Cumulative TSN Ack Point an endpoint can still use them to clock + out new data. That is, the data newly acknowledged by the SACK + diminishes the amount of data now in flight to less than cwnd; and so + the current, unchanged value of cwnd now allows new data to be sent. + On the other hand, the increase of cwnd must be tied to the + Cumulative TSN Ack Point advancement as specified above. Otherwise + the duplicate SACKs will not only clock out new data, but also will + adversely clock out more new data than what has just left the + network, during a time of possible congestion. + + o When the endpoint does not transmit data on a given transport + address, the cwnd of the transport address should be adjusted to + max(cwnd/2, 2*MTU) per RTO. + + + + + + +Stewart, et al. Standards Track [Page 88] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +7.2.2 Congestion Avoidance + + When cwnd is greater than ssthresh, cwnd should be incremented by + 1*MTU per RTT if the sender has cwnd or more bytes of data + outstanding for the corresponding transport address. + + In practice an implementation can achieve this goal in the following + way: + + o partial_bytes_acked is initialized to 0. + + o Whenever cwnd is greater than ssthresh, upon each SACK arrival + that advances the Cumulative TSN Ack Point, increase + partial_bytes_acked by the total number of bytes of all new chunks + acknowledged in that SACK including chunks acknowledged by the new + Cumulative TSN Ack and by Gap Ack Blocks. + + o When partial_bytes_acked is equal to or greater than cwnd and + before the arrival of the SACK the sender had cwnd or more bytes + of data outstanding (i.e., before arrival of the SACK, flightsize + was greater than or equal to cwnd), increase cwnd by MTU, and + reset partial_bytes_acked to (partial_bytes_acked - cwnd). + + o Same as in the slow start, when the sender does not transmit DATA + on a given transport address, the cwnd of the transport address + should be adjusted to max(cwnd / 2, 2*MTU) per RTO. + + o When all of the data transmitted by the sender has been + acknowledged by the receiver, partial_bytes_acked is initialized + to 0. + +7.2.3 Congestion Control + + Upon detection of packet losses from SACK (see Section 7.2.4), An + endpoint should do the following: + + ssthresh = max(cwnd/2, 2*MTU) + cwnd = ssthresh + + Basically, a packet loss causes cwnd to be cut in half. + + When the T3-rtx timer expires on an address, SCTP should perform slow + start by: + + ssthresh = max(cwnd/2, 2*MTU) + cwnd = 1*MTU + + + + + +Stewart, et al. Standards Track [Page 89] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + and assure that no more than one SCTP packet will be in flight for + that address until the endpoint receives acknowledgement for + successful delivery of data to that address. + +7.2.4 Fast Retransmit on Gap Reports + + In the absence of data loss, an endpoint performs delayed + acknowledgement. However, whenever an endpoint notices a hole in the + arriving TSN sequence, it SHOULD start sending a SACK back every time + a packet arrives carrying data until the hole is filled. + + Whenever an endpoint receives a SACK that indicates some TSN(s) + missing, it SHOULD wait for 3 further miss indications (via + subsequent SACK's) on the same TSN(s) before taking action with + regard to Fast Retransmit. + + When the TSN(s) is reported as missing in the fourth consecutive + SACK, the data sender shall: + + 1) Mark the missing DATA chunk(s) for retransmission, + + 2) Adjust the ssthresh and cwnd of the destination address(es) to + which the missing DATA chunks were last sent, according to the + formula described in Section 7.2.3. + + 3) Determine how many of the earliest (i.e., lowest TSN) DATA chunks + marked for retransmission will fit into a single packet, subject + to constraint of the path MTU of the destination transport address + to which the packet is being sent. Call this value K. Retransmit + those K DATA chunks in a single packet. + + 4) Restart T3-rtx timer only if the last SACK acknowledged the lowest + outstanding TSN number sent to that address, or the endpoint is + retransmitting the first outstanding DATA chunk sent to that + address. + + Note: Before the above adjustments, if the received SACK also + acknowledges new DATA chunks and advances the Cumulative TSN Ack + Point, the cwnd adjustment rules defined in Sections 7.2.1 and 7.2.2 + must be applied first. + + A straightforward implementation of the above keeps a counter for + each TSN hole reported by a SACK. The counter increments for each + consecutive SACK reporting the TSN hole. After reaching 4 and + starting the fast retransmit procedure, the counter resets to 0. + + + + + + +Stewart, et al. Standards Track [Page 90] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Because cwnd in SCTP indirectly bounds the number of outstanding + TSN's, the effect of TCP fast-recovery is achieved automatically with + no adjustment to the congestion control window size. + +7.3 Path MTU Discovery + + [RFC1191] specifies "Path MTU Discovery", whereby an endpoint + maintains an estimate of the maximum transmission unit (MTU) along a + given Internet path and refrains from sending packets along that path + which exceed the MTU, other than occasional attempts to probe for a + change in the Path MTU (PMTU). RFC 1191 is thorough in its + discussion of the MTU discovery mechanism and strategies for + determining the current end-to-end MTU setting as well as detecting + changes in this value. [RFC1981] specifies the same mechanisms for + IPv6. An SCTP sender using IPv6 MUST use Path MTU Discovery unless + all packets are less than the minimum IPv6 MTU [RFC2460]. + + An endpoint SHOULD apply these techniques, and SHOULD do so on a + per-destination-address basis. + + There are 4 ways in which SCTP differs from the description in RFC + 1191 of applying MTU discovery to TCP: + + 1) SCTP associations can span multiple addresses. An endpoint MUST + maintain separate MTU estimates for each destination address of + its peer. + + 2) Elsewhere in this document, when the term "MTU" is discussed, it + refers to the MTU associated with the destination address + corresponding to the context of the discussion. + + 3) Unlike TCP, SCTP does not have a notion of "Maximum Segment Size". + Accordingly, the MTU for each destination address SHOULD be + initialized to a value no larger than the link MTU for the local + interface to which packets for that remote destination address + will be routed. + + 4) Since data transmission in SCTP is naturally structured in terms + of TSNs rather than bytes (as is the case for TCP), the discussion + in Section 6.5 of RFC 1191 applies: When retransmitting an IP + datagram to a remote address for which the IP datagram appears too + large for the path MTU to that address, the IP datagram SHOULD be + retransmitted without the DF bit set, allowing it to possibly be + fragmented. Transmissions of new IP datagrams MUST have DF set. + + + + + + + +Stewart, et al. Standards Track [Page 91] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 5) The sender should track an association PMTU which will be the + smallest PMTU discovered for all of the peer's destination + addresses. When fragmenting messages into multiple parts this + association PMTU should be used to calculate the size of each + fragment. This will allow retransmissions to be seamlessly sent + to an alternate address without encountering IP fragmentation. + + Other than these differences, the discussion of TCP's use of MTU + discovery in RFCs 1191 and 1981 applies to SCTP on a per- + destination-address basis. + + Note: For IPv6 destination addresses the DF bit does not exist, + instead the IP datagram must be fragmented as described in [RFC2460]. + +8. Fault Management + +8.1 Endpoint Failure Detection + + An endpoint shall keep a counter on the total number of consecutive + retransmissions to its peer (including retransmissions to all the + destination transport addresses of the peer if it is multi-homed). + If the value of this counter exceeds the limit indicated in the + protocol parameter 'Association.Max.Retrans', the endpoint shall + consider the peer endpoint unreachable and shall stop transmitting + any more data to it (and thus the association enters the CLOSED + state). In addition, the endpoint shall report the failure to the + upper layer, and optionally report back all outstanding user data + remaining in its outbound queue. The association is automatically + closed when the peer endpoint becomes unreachable. + + The counter shall be reset each time a DATA chunk sent to that peer + endpoint is acknowledged (by the reception of a SACK), or a + HEARTBEAT-ACK is received from the peer endpoint. + +8.2 Path Failure Detection + + When its peer endpoint is multi-homed, an endpoint should keep a + error counter for each of the destination transport addresses of the + peer endpoint. + + Each time the T3-rtx timer expires on any address, or when a + HEARTBEAT sent to an idle address is not acknowledged within a RTO, + the error counter of that destination address will be incremented. + When the value in the error counter exceeds the protocol parameter + 'Path.Max.Retrans' of that destination address, the endpoint should + mark the destination transport address as inactive, and a + notification SHOULD be sent to the upper layer. + + + + +Stewart, et al. Standards Track [Page 92] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + When an outstanding TSN is acknowledged or a HEARTBEAT sent to that + address is acknowledged with a HEARTBEAT ACK, the endpoint shall + clear the error counter of the destination transport address to which + the DATA chunk was last sent (or HEARTBEAT was sent). When the peer + endpoint is multi-homed and the last chunk sent to it was a + retransmission to an alternate address, there exists an ambiguity as + to whether or not the acknowledgement should be credited to the + address of the last chunk sent. However, this ambiguity does not + seem to bear any significant consequence to SCTP behavior. If this + ambiguity is undesirable, the transmitter may choose not to clear the + error counter if the last chunk sent was a retransmission. + + Note: When configuring the SCTP endpoint, the user should avoid + having the value of 'Association.Max.Retrans' larger than the + summation of the 'Path.Max.Retrans' of all the destination addresses + for the remote endpoint. Otherwise, all the destination addresses + may become inactive while the endpoint still considers the peer + endpoint reachable. When this condition occurs, how the SCTP chooses + to function is implementation specific. + + When the primary path is marked inactive (due to excessive + retransmissions, for instance), the sender MAY automatically transmit + new packets to an alternate destination address if one exists and is + active. If more than one alternate address is active when the + primary path is marked inactive only ONE transport address SHOULD be + chosen and used as the new destination transport address. + +8.3 Path Heartbeat + + By default, an SCTP endpoint shall monitor the reachability of the + idle destination transport address(es) of its peer by sending a + HEARTBEAT chunk periodically to the destination transport + address(es). + + A destination transport address is considered "idle" if no new chunk + which can be used for updating path RTT (usually including first + transmission DATA, INIT, COOKIE ECHO, HEARTBEAT etc.) and no + HEARTBEAT has been sent to it within the current heartbeat period of + that address. This applies to both active and inactive destination + addresses. + + The upper layer can optionally initiate the following functions: + + A) Disable heartbeat on a specific destination transport address of a + given association, + + B) Change the HB.interval, + + + + +Stewart, et al. Standards Track [Page 93] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + C) Re-enable heartbeat on a specific destination transport address of + a given association, and, + + D) Request an on-demand HEARTBEAT on a specific destination transport + address of a given association. + + The endpoint should increment the respective error counter of the + destination transport address each time a HEARTBEAT is sent to that + address and not acknowledged within one RTO. + + When the value of this counter reaches the protocol parameter ' + Path.Max.Retrans', the endpoint should mark the corresponding + destination address as inactive if it is not so marked, and may also + optionally report to the upper layer the change of reachability of + this destination address. After this, the endpoint should continue + HEARTBEAT on this destination address but should stop increasing the + counter. + + The sender of the HEARTBEAT chunk should include in the Heartbeat + Information field of the chunk the current time when the packet is + sent out and the destination address to which the packet is sent. + + IMPLEMENTATION NOTE: An alternative implementation of the heartbeat + mechanism that can be used is to increment the error counter variable + every time a HEARTBEAT is sent to a destination. Whenever a + HEARTBEAT ACK arrives, the sender SHOULD clear the error counter of + the destination that the HEARTBEAT was sent to. This in effect would + clear the previously stroked error (and any other error counts as + well). + + The receiver of the HEARTBEAT should immediately respond with a + HEARTBEAT ACK that contains the Heartbeat Information field copied + from the received HEARTBEAT chunk. + + Upon the receipt of the HEARTBEAT ACK, the sender of the HEARTBEAT + should clear the error counter of the destination transport address + to which the HEARTBEAT was sent, and mark the destination transport + address as active if it is not so marked. The endpoint may + optionally report to the upper layer when an inactive destination + address is marked as active due to the reception of the latest + HEARTBEAT ACK. The receiver of the HEARTBEAT ACK must also clear the + association overall error count as well (as defined in section 8.1). + + The receiver of the HEARTBEAT ACK should also perform an RTT + measurement for that destination transport address using the time + value carried in the HEARTBEAT ACK chunk. + + + + + +Stewart, et al. Standards Track [Page 94] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + On an idle destination address that is allowed to heartbeat, a + HEARTBEAT chunk is RECOMMENDED to be sent once per RTO of that + destination address plus the protocol parameter 'HB.interval' , with + jittering of +/- 50%, and exponential back-off of the RTO if the + previous HEARTBEAT is unanswered. + + A primitive is provided for the SCTP user to change the HB.interval + and turn on or off the heartbeat on a given destination address. The + heartbeat interval set by the SCTP user is added to the RTO of that + destination (including any exponential backoff). Only one heartbeat + should be sent each time the heartbeat timer expires (if multiple + destinations are idle). It is a implementation decision on how to + choose which of the candidate idle destinations to heartbeat to (if + more than one destination is idle). + + Note: When tuning the heartbeat interval, there is a side effect that + SHOULD be taken into account. When this value is increased, i.e. + the HEARTBEAT takes longer, the detection of lost ABORT messages + takes longer as well. If a peer endpoint ABORTs the association for + any reason and the ABORT chunk is lost, the local endpoint will only + discover the lost ABORT by sending a DATA chunk or HEARTBEAT chunk + (thus causing the peer to send another ABORT). This must be + considered when tuning the HEARTBEAT timer. If the HEARTBEAT is + disabled only sending DATA to the association will discover a lost + ABORT from the peer. + +8.4 Handle "Out of the blue" Packets + + An SCTP packet is called an "out of the blue" (OOTB) packet if it is + correctly formed, i.e., passed the receiver's Adler-32 check (see + Section 6.8), but the receiver is not able to identify the + association to which this packet belongs. + + The receiver of an OOTB packet MUST do the following: + + 1) If the OOTB packet is to or from a non-unicast address, silently + discard the packet. Otherwise, + + 2) If the OOTB packet contains an ABORT chunk, the receiver MUST + silently discard the OOTB packet and take no further action. + Otherwise, + + 3) If the packet contains an INIT chunk with a Verification Tag set + to '0', process it as described in Section 5.1. Otherwise, + + 4) If the packet contains a COOKIE ECHO in the first chunk, process + it as described in Section 5.1. Otherwise, + + + + +Stewart, et al. Standards Track [Page 95] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + 5) If the packet contains a SHUTDOWN ACK chunk, the receiver should + respond to the sender of the OOTB packet with a SHUTDOWN COMPLETE. + When sending the SHUTDOWN COMPLETE, the receiver of the OOTB + packet must fill in the Verification Tag field of the outbound + packet with the Verification Tag received in the SHUTDOWN ACK and + set the T-bit in the Chunk Flags to indicate that no TCB was + found. Otherwise, + + 6) If the packet contains a SHUTDOWN COMPLETE chunk, the receiver + should silently discard the packet and take no further action. + Otherwise, + + 7) If the packet contains a "Stale cookie" ERROR or a COOKIE ACK the + SCTP Packet should be silently discarded. Otherwise, + + 8) The receiver should respond to the sender of the OOTB packet with + an ABORT. When sending the ABORT, the receiver of the OOTB packet + MUST fill in the Verification Tag field of the outbound packet + with the value found in the Verification Tag field of the OOTB + packet and set the T-bit in the Chunk Flags to indicate that no + TCB was found. After sending this ABORT, the receiver of the OOTB + packet shall discard the OOTB packet and take no further action. + +8.5 Verification Tag + + The Verification Tag rules defined in this section apply when sending + or receiving SCTP packets which do not contain an INIT, SHUTDOWN + COMPLETE, COOKIE ECHO (see Section 5.1), ABORT or SHUTDOWN ACK chunk. + The rules for sending and receiving SCTP packets containing one of + these chunk types are discussed separately in Section 8.5.1. + + When sending an SCTP packet, the endpoint MUST fill in the + Verification Tag field of the outbound packet with the tag value in + the Initiate Tag parameter of the INIT or INIT ACK received from its + peer. + + When receiving an SCTP packet, the endpoint MUST ensure that the + value in the Verification Tag field of the received SCTP packet + matches its own Tag. If the received Verification Tag value does not + match the receiver's own tag value, the receiver shall silently + discard the packet and shall not process it any further except for + those cases listed in Section 8.5.1 below. + + + + + + + + + +Stewart, et al. Standards Track [Page 96] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +8.5.1 Exceptions in Verification Tag Rules + + A) Rules for packet carrying INIT: + + - The sender MUST set the Verification Tag of the packet to 0. + + - When an endpoint receives an SCTP packet with the Verification + Tag set to 0, it should verify that the packet contains only an + INIT chunk. Otherwise, the receiver MUST silently discard the + packet. + + B) Rules for packet carrying ABORT: + + - The endpoint shall always fill in the Verification Tag field of + the outbound packet with the destination endpoint's tag value + if it is known. + + - If the ABORT is sent in response to an OOTB packet, the + endpoint MUST follow the procedure described in Section 8.4. + + - The receiver MUST accept the packet if the Verification Tag + matches either its own tag, OR the tag of its peer. Otherwise, + the receiver MUST silently discard the packet and take no + further action. + + C) Rules for packet carrying SHUTDOWN COMPLETE: + + - When sending a SHUTDOWN COMPLETE, if the receiver of the + SHUTDOWN ACK has a TCB then the destination endpoint's tag MUST + be used. Only where no TCB exists should the sender use the + Verification Tag from the SHUTDOWN ACK. + + - The receiver of a SHUTDOWN COMPLETE shall accept the packet if + the Verification Tag field of the packet matches its own tag OR + it is set to its peer's tag and the T bit is set in the Chunk + Flags. Otherwise, the receiver MUST silently discard the packet + and take no further action. An endpoint MUST ignore the + SHUTDOWN COMPLETE if it is not in the SHUTDOWN-ACK-SENT state. + + D) Rules for packet carrying a COOKIE ECHO + + - When sending a COOKIE ECHO, the endpoint MUST use the value of + the Initial Tag received in the INIT ACK. + + - The receiver of a COOKIE ECHO follows the procedures in Section + 5. + + + + + +Stewart, et al. Standards Track [Page 97] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + E) Rules for packet carrying a SHUTDOWN ACK + + - If the receiver is in COOKIE-ECHOED or COOKIE-WAIT state the + procedures in section 8.4 SHOULD be followed, in other words it + should be treated as an Out Of The Blue packet. + +9. Termination of Association + + An endpoint should terminate its association when it exits from + service. An association can be terminated by either abort or + shutdown. An abort of an association is abortive by definition in + that any data pending on either end of the association is discarded + and not delivered to the peer. A shutdown of an association is + considered a graceful close where all data in queue by either + endpoint is delivered to the respective peers. However, in the case + of a shutdown, SCTP does not support a half-open state (like TCP) + wherein one side may continue sending data while the other end is + closed. When either endpoint performs a shutdown, the association on + each peer will stop accepting new data from its user and only deliver + data in queue at the time of sending or receiving the SHUTDOWN chunk. + +9.1 Abort of an Association + + When an endpoint decides to abort an existing association, it shall + send an ABORT chunk to its peer endpoint. The sender MUST fill in + the peer's Verification Tag in the outbound packet and MUST NOT + bundle any DATA chunk with the ABORT. + + An endpoint MUST NOT respond to any received packet that contains an + ABORT chunk (also see Section 8.4). + + An endpoint receiving an ABORT shall apply the special Verification + Tag check rules described in Section 8.5.1. + + After checking the Verification Tag, the receiving endpoint shall + remove the association from its record, and shall report the + termination to its upper layer. + +9.2 Shutdown of an Association + + Using the SHUTDOWN primitive (see Section 10.1), the upper layer of + an endpoint in an association can gracefully close the association. + This will allow all outstanding DATA chunks from the peer of the + shutdown initiator to be delivered before the association terminates. + + Upon receipt of the SHUTDOWN primitive from its upper layer, the + endpoint enters SHUTDOWN-PENDING state and remains there until all + outstanding data has been acknowledged by its peer. The endpoint + + + +Stewart, et al. Standards Track [Page 98] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + accepts no new data from its upper layer, but retransmits data to the + far end if necessary to fill gaps. + + Once all its outstanding data has been acknowledged, the endpoint + shall send a SHUTDOWN chunk to its peer including in the Cumulative + TSN Ack field the last sequential TSN it has received from the peer. + It shall then start the T2-shutdown timer and enter the SHUTDOWN-SENT + state. If the timer expires, the endpoint must re-send the SHUTDOWN + with the updated last sequential TSN received from its peer. + + The rules in Section 6.3 MUST be followed to determine the proper + timer value for T2-shutdown. To indicate any gaps in TSN, the + endpoint may also bundle a SACK with the SHUTDOWN chunk in the same + SCTP packet. + + An endpoint should limit the number of retransmissions of the + SHUTDOWN chunk to the protocol parameter 'Association.Max.Retrans'. + If this threshold is exceeded the endpoint should destroy the TCB and + MUST report the peer endpoint unreachable to the upper layer (and + thus the association enters the CLOSED state). The reception of any + packet from its peer (i.e. as the peer sends all of its queued DATA + chunks) should clear the endpoint's retransmission count and restart + the T2-Shutdown timer, giving its peer ample opportunity to transmit + all of its queued DATA chunks that have not yet been sent. + + Upon the reception of the SHUTDOWN, the peer endpoint shall + + - enter the SHUTDOWN-RECEIVED state, + + - stop accepting new data from its SCTP user + + - verify, by checking the Cumulative TSN Ack field of the chunk, + that all its outstanding DATA chunks have been received by the + SHUTDOWN sender. + + Once an endpoint as reached the SHUTDOWN-RECEIVED state it MUST NOT + send a SHUTDOWN in response to a ULP request, and should discard + subsequent SHUTDOWN chunks. + + If there are still outstanding DATA chunks left, the SHUTDOWN + receiver shall continue to follow normal data transmission procedures + defined in Section 6 until all outstanding DATA chunks are + acknowledged; however, the SHUTDOWN receiver MUST NOT accept new data + from its SCTP user. + + While in SHUTDOWN-SENT state, the SHUTDOWN sender MUST immediately + respond to each received packet containing one or more DATA chunk(s) + with a SACK, a SHUTDOWN chunk, and restart the T2-shutdown timer. If + + + +Stewart, et al. Standards Track [Page 99] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + it has no more outstanding DATA chunks, the SHUTDOWN receiver shall + send a SHUTDOWN ACK and start a T2-shutdown timer of its own, + entering the SHUTDOWN-ACK-SENT state. If the timer expires, the + endpoint must re-send the SHUTDOWN ACK. + + The sender of the SHUTDOWN ACK should limit the number of + retransmissions of the SHUTDOWN ACK chunk to the protocol parameter ' + Association.Max.Retrans'. If this threshold is exceeded the endpoint + should destroy the TCB and may report the peer endpoint unreachable + to the upper layer (and thus the association enters the CLOSED + state). + + Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall stop + the T2-shutdown timer, send a SHUTDOWN COMPLETE chunk to its peer, + and remove all record of the association. + + Upon reception of the SHUTDOWN COMPLETE chunk the endpoint will + verify that it is in SHUTDOWN-ACK-SENT state, if it is not the chunk + should be discarded. If the endpoint is in the SHUTDOWN-ACK-SENT + state the endpoint should stop the T2-shutdown timer and remove all + knowledge of the association (and thus the association enters the + CLOSED state). + + An endpoint SHOULD assure that all its outstanding DATA chunks have + been acknowledged before initiating the shutdown procedure. + + An endpoint should reject any new data request from its upper layer + if it is in SHUTDOWN-PENDING, SHUTDOWN-SENT, SHUTDOWN-RECEIVED, or + SHUTDOWN-ACK-SENT state. + + If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT + chunk (e.g., if the SHUTDOWN COMPLETE was lost) with source and + destination transport addresses (either in the IP addresses or in the + INIT chunk) that belong to this association, it should discard the + INIT chunk and retransmit the SHUTDOWN ACK chunk. + + Note: Receipt of an INIT with the same source and destination IP + addresses as used in transport addresses assigned to an endpoint but + with a different port number indicates the initialization of a + separate association. + + The sender of the INIT or COOKIE ECHO should respond to the receipt + of a SHUTDOWN-ACK with a stand-alone SHUTDOWN COMPLETE in an SCTP + packet with the Verification Tag field of its common header set to + the same tag that was received in the SHUTDOWN ACK packet. This is + considered an Out of the Blue packet as defined in Section 8.4. The + sender of the INIT lets T1-init continue running and remains in the + + + + +Stewart, et al. Standards Track [Page 100] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + COOKIE-WAIT or COOKIE-ECHOED state. Normal T1-init timer expiration + will cause the INIT or COOKIE chunk to be retransmitted and thus + start a new association. + + If a SHUTDOWN is received in COOKIE WAIT or COOKIE ECHOED states the + SHUTDOWN chunk SHOULD be silently discarded. + + If an endpoint is in SHUTDOWN-SENT state and receives a SHUTDOWN + chunk from its peer, the endpoint shall respond immediately with a + SHUTDOWN ACK to its peer, and move into a SHUTDOWN-ACK-SENT state + restarting its T2-shutdown timer. + + If an endpoint is in the SHUTDOWN-ACK-SENT state and receives a + SHUTDOWN ACK, it shall stop the T2-shutdown timer, send a SHUTDOWN + COMPLETE chunk to its peer, and remove all record of the association. + +10. Interface with Upper Layer + + The Upper Layer Protocols (ULP) shall request for services by passing + primitives to SCTP and shall receive notifications from SCTP for + various events. + + The primitives and notifications described in this section should be + used as a guideline for implementing SCTP. The following functional + description of ULP interface primitives is shown for illustrative + purposes. Different SCTP implementations may have different ULP + interfaces. However, all SCTPs must provide a certain minimum set of + services to guarantee that all SCTP implementations can support the + same protocol hierarchy. + +10.1 ULP-to-SCTP + + The following sections functionally characterize a ULP/SCTP + interface. The notation used is similar to most procedure or + function calls in high level languages. + + The ULP primitives described below specify the basic functions the + SCTP must perform to support inter-process communication. Individual + implementations must define their own exact format, and may provide + combinations or subsets of the basic functions in single calls. + + A) Initialize + + Format: INITIALIZE ([local port], [local eligible address list]) -> + local SCTP instance name + + + + + + +Stewart, et al. Standards Track [Page 101] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + This primitive allows SCTP to initialize its internal data structures + and allocate necessary resources for setting up its operation + environment. Once SCTP is initialized, ULP can communicate directly + with other endpoints without re-invoking this primitive. + + SCTP will return a local SCTP instance name to the ULP. + + Mandatory attributes: + + None. + + Optional attributes: + + The following types of attributes may be passed along with the + primitive: + + o local port - SCTP port number, if ULP wants it to be specified; + + o local eligible address list - An address list that the local SCTP + endpoint should bind. By default, if an address list is not + included, all IP addresses assigned to the host should be used by + the local endpoint. + + IMPLEMENTATION NOTE: If this optional attribute is supported by an + implementation, it will be the responsibility of the implementation + to enforce that the IP source address field of any SCTP packets sent + out by this endpoint contains one of the IP addresses indicated in + the local eligible address list. + + B) Associate + + Format: ASSOCIATE(local SCTP instance name, destination transport addr, + outbound stream count) + -> association id [,destination transport addr list] [,outbound stream + count] + + This primitive allows the upper layer to initiate an association to a + specific peer endpoint. + + The peer endpoint shall be specified by one of the transport + addresses which defines the endpoint (see Section 1.4). If the local + SCTP instance has not been initialized, the ASSOCIATE is considered + an error. + + An association id, which is a local handle to the SCTP association, + will be returned on successful establishment of the association. If + SCTP is not able to open an SCTP association with the peer endpoint, + an error is returned. + + + +Stewart, et al. Standards Track [Page 102] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Other association parameters may be returned, including the complete + destination transport addresses of the peer as well as the outbound + stream count of the local endpoint. One of the transport address + from the returned destination addresses will be selected by the local + endpoint as default primary path for sending SCTP packets to this + peer. The returned "destination transport addr list" can be used by + the ULP to change the default primary path or to force sending a + packet to a specific transport address. + + IMPLEMENTATION NOTE: If ASSOCIATE primitive is implemented as a + blocking function call, the ASSOCIATE primitive can return + association parameters in addition to the association id upon + successful establishment. If ASSOCIATE primitive is implemented as a + non-blocking call, only the association id shall be returned and + association parameters shall be passed using the COMMUNICATION UP + notification. + + Mandatory attributes: + + o local SCTP instance name - obtained from the INITIALIZE operation. + + o destination transport addr - specified as one of the transport + addresses of the peer endpoint with which the association is to be + established. + + o outbound stream count - the number of outbound streams the ULP + would like to open towards this peer endpoint. + + Optional attributes: + + None. + + C) Shutdown + + Format: SHUTDOWN(association id) + -> result + + Gracefully closes an association. Any locally queued user data will + be delivered to the peer. The association will be terminated only + after the peer acknowledges all the SCTP packets sent. A success + code will be returned on successful termination of the association. + If attempting to terminate the association results in a failure, an + error code shall be returned. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + + + +Stewart, et al. Standards Track [Page 103] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Optional attributes: + + None. + + D) Abort + + Format: ABORT(association id [, cause code]) + -> result + + Ungracefully closes an association. Any locally queued user data + will be discarded and an ABORT chunk is sent to the peer. A success + code will be returned on successful abortion of the association. If + attempting to abort the association results in a failure, an error + code shall be returned. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + Optional attributes: + + o cause code - reason of the abort to be passed to the peer. + + None. + + E) Send + + Format: SEND(association id, buffer address, byte count [,context] + [,stream id] [,life time] [,destination transport address] + [,unorder flag] [,no-bundle flag] [,payload protocol-id] ) + -> result + + This is the main method to send user data via SCTP. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o buffer address - the location where the user message to be + transmitted is stored; + + o byte count - The size of the user data in number of bytes; + + Optional attributes: + + o context - an optional 32 bit integer that will be carried in the + sending failure notification to the ULP if the transportation of + this User Message fails. + + + +Stewart, et al. Standards Track [Page 104] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o stream id - to indicate which stream to send the data on. If not + specified, stream 0 will be used. + + o life time - specifies the life time of the user data. The user + data will not be sent by SCTP after the life time expires. This + parameter can be used to avoid efforts to transmit stale user + messages. SCTP notifies the ULP if the data cannot be initiated + to transport (i.e. sent to the destination via SCTP's send + primitive) within the life time variable. However, the user data + will be transmitted if SCTP has attempted to transmit a chunk + before the life time expired. + + IMPLEMENTATION NOTE: In order to better support the data lifetime + option, the transmitter may hold back the assigning of the TSN number + to an outbound DATA chunk to the last moment. And, for + implementation simplicity, once a TSN number has been assigned the + sender should consider the send of this DATA chunk as committed, + overriding any lifetime option attached to the DATA chunk. + + o destination transport address - specified as one of the + destination transport addresses of the peer endpoint to which this + packet should be sent. Whenever possible, SCTP should use this + destination transport address for sending the packets, instead of + the current primary path. + + o unorder flag - this flag, if present, indicates that the user + would like the data delivered in an unordered fashion to the peer + (i.e., the U flag is set to 1 on all DATA chunks carrying this + message). + + o no-bundle flag - instructs SCTP not to bundle this user data with + other outbound DATA chunks. SCTP MAY still bundle even when this + flag is present, when faced with network congestion. + + o payload protocol-id - A 32 bit unsigned integer that is to be + passed to the peer indicating the type of payload protocol data + being transmitted. This value is passed as opaque data by SCTP. + + F) Set Primary + + Format: SETPRIMARY(association id, destination transport address, + [source transport address] ) + -> result + + Instructs the local SCTP to use the specified destination transport + address as primary path for sending packets. + + + + + +Stewart, et al. Standards Track [Page 105] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + The result of attempting this operation shall be returned. If the + specified destination transport address is not present in the + "destination transport address list" returned earlier in an associate + command or communication up notification, an error shall be returned. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - specified as one of the transport + addresses of the peer endpoint, which should be used as primary + address for sending packets. This overrides the current primary + address information maintained by the local SCTP endpoint. + + Optional attributes: + + o source transport address - optionally, some implementations may + allow you to set the default source address placed in all outgoing + IP datagrams. + + G) Receive + + Format: RECEIVE(association id, buffer address, buffer size + [,stream id]) + -> byte count [,transport address] [,stream id] [,stream sequence + number] [,partial flag] [,delivery number] [,payload protocol-id] + + This primitive shall read the first user message in the SCTP in-queue + into the buffer specified by ULP, if there is one available. The + size of the message read, in bytes, will be returned. It may, + depending on the specific implementation, also return other + information such as the sender's address, the stream id on which it + is received, whether there are more messages available for retrieval, + etc. For ordered messages, their stream sequence number may also be + returned. + + Depending upon the implementation, if this primitive is invoked when + no message is available the implementation should return an + indication of this condition or should block the invoking process + until data does become available. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o buffer address - the memory location indicated by the ULP to store + the received message. + + + + +Stewart, et al. Standards Track [Page 106] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o buffer size - the maximum size of data to be received, in bytes. + + Optional attributes: + + o stream id - to indicate which stream to receive the data on. + + o stream sequence number - the stream sequence number assigned by + the sending SCTP peer. + + o partial flag - if this returned flag is set to 1, then this + Receive contains a partial delivery of the whole message. When + this flag is set, the stream id and stream sequence number MUST + accompany this receive. When this flag is set to 0, it indicates + that no more deliveries will be received for this stream sequence + number. + + o payload protocol-id - A 32 bit unsigned integer that is received + from the peer indicating the type of payload protocol of the + received data. This value is passed as opaque data by SCTP. + + H) Status + + Format: STATUS(association id) + -> status data + + This primitive should return a data block containing the following + information: + association connection state, + destination transport address list, + destination transport address reachability states, + current receiver window size, + current congestion window sizes, + number of unacknowledged DATA chunks, + number of DATA chunks pending receipt, + primary path, + most recent SRTT on primary path, + RTO on primary path, + SRTT and RTO on other destination addresses, etc. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + Optional attributes: + + None. + + + + + +Stewart, et al. Standards Track [Page 107] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + I) Change Heartbeat + + Format: CHANGEHEARTBEAT(association id, destination transport address, + new state [,interval]) + -> result + + Instructs the local endpoint to enable or disable heartbeat on the + specified destination transport address. + + The result of attempting this operation shall be returned. + + Note: Even when enabled, heartbeat will not take place if the + destination transport address is not idle. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - specified as one of the transport + addresses of the peer endpoint. + + o new state - the new state of heartbeat for this destination + transport address (either enabled or disabled). + + Optional attributes: + + o interval - if present, indicates the frequency of the heartbeat if + this is to enable heartbeat on a destination transport address. + This value is added to the RTO of the destination transport + address. This value, if present, effects all destinations. + + J) Request HeartBeat + + Format: REQUESTHEARTBEAT(association id, destination transport + address) + -> result + + Instructs the local endpoint to perform a HeartBeat on the specified + destination transport address of the given association. The returned + result should indicate whether the transmission of the HEARTBEAT + chunk to the destination address is successful. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - the transport address of the + association on which a heartbeat should be issued. + + + +Stewart, et al. Standards Track [Page 108] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + K) Get SRTT Report + + Format: GETSRTTREPORT(association id, destination transport address) + -> srtt result + + Instructs the local SCTP to report the current SRTT measurement on + the specified destination transport address of the given association. + The returned result can be an integer containing the most recent SRTT + in milliseconds. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - the transport address of the + association on which the SRTT measurement is to be reported. + + L) Set Failure Threshold + + Format: SETFAILURETHRESHOLD(association id, destination transport + address, failure threshold) + -> result + + This primitive allows the local SCTP to customize the reachability + failure detection threshold 'Path.Max.Retrans' for the specified + destination address. + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o destination transport address - the transport address of the + association on which the failure detection threshold is to be set. + + o failure threshold - the new value of 'Path.Max.Retrans' for the + destination address. + + M) Set Protocol Parameters + + Format: SETPROTOCOLPARAMETERS(association id, [,destination transport + address,] protocol parameter list) + -> result + + This primitive allows the local SCTP to customize the protocol + parameters. + + + + + + +Stewart, et al. Standards Track [Page 109] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Mandatory attributes: + + o association id - local handle to the SCTP association + + o protocol parameter list - The specific names and values of the + protocol parameters (e.g., Association.Max.Retrans [see Section + 14]) that the SCTP user wishes to customize. + + Optional attributes: + + o destination transport address - some of the protocol parameters + may be set on a per destination transport address basis. + + N) Receive unsent message + + Format: RECEIVE_UNSENT(data retrieval id, buffer address, buffer size + [,stream id] [, stream sequence number] [,partial flag] + [,payload protocol-id]) + + o data retrieval id - The identification passed to the ULP in the + failure notification. + + o buffer address - the memory location indicated by the ULP to store + the received message. + + o buffer size - the maximum size of data to be received, in bytes. + + Optional attributes: + + o stream id - this is a return value that is set to indicate + which stream the data was sent to. + + o stream sequence number - this value is returned indicating + the stream sequence number that was associated with the message. + + o partial flag - if this returned flag is set to 1, then this + message is a partial delivery of the whole message. When + this flag is set, the stream id and stream sequence number MUST + accompany this receive. When this flag is set to 0, it indicates + that no more deliveries will be received for this stream sequence + number. + + o payload protocol-id - The 32 bit unsigned integer that was sent to + be sent to the peer indicating the type of payload protocol of the + received data. + + + + + + +Stewart, et al. Standards Track [Page 110] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + O) Receive unacknowledged message + + Format: RECEIVE_UNACKED(data retrieval id, buffer address, buffer size, + [,stream id] [, stream sequence number] [,partial flag] + [,payload protocol-id]) + + o data retrieval id - The identification passed to the ULP in the + failure notification. + + o buffer address - the memory location indicated by the ULP to store + the received message. + + o buffer size - the maximum size of data to be received, in bytes. + + Optional attributes: + + o stream id - this is a return value that is set to indicate which + stream the data was sent to. + + o stream sequence number - this value is returned indicating the + stream sequence number that was associated with the message. + + o partial flag - if this returned flag is set to 1, then this + message is a partial delivery of the whole message. When this + flag is set, the stream id and stream sequence number MUST + accompany this receive. When this flag is set to 0, it indicates + that no more deliveries will be received for this stream sequence + number. + + o payload protocol-id - The 32 bit unsigned integer that was sent to + be sent to the peer indicating the type of payload protocol of the + received data. + + P) Destroy SCTP instance + + Format: DESTROY(local SCTP instance name) + + o local SCTP instance name - this is the value that was passed to + the application in the initialize primitive and it indicates which + SCTP instance to be destroyed. + +10.2 SCTP-to-ULP + + It is assumed that the operating system or application environment + provides a means for the SCTP to asynchronously signal the ULP + process. When SCTP does signal an ULP process, certain information + is passed to the ULP. + + + + +Stewart, et al. Standards Track [Page 111] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + IMPLEMENTATION NOTE: In some cases this may be done through a + separate socket or error channel. + + A) DATA ARRIVE notification + + SCTP shall invoke this notification on the ULP when a user message is + successfully received and ready for retrieval. + + The following may be optionally be passed with the notification: + + o association id - local handle to the SCTP association + + o stream id - to indicate which stream the data is received on. + + B) SEND FAILURE notification + + If a message can not be delivered SCTP shall invoke this notification + on the ULP. + + The following may be optionally be passed with the notification: + + o association id - local handle to the SCTP association + + o data retrieval id - an identification used to retrieve unsent and + unacknowledged data. + + o cause code - indicating the reason of the failure, e.g., size too + large, message life-time expiration, etc. + + o context - optional information associated with this message (see D + in Section 10.1). + + C) NETWORK STATUS CHANGE notification + + When a destination transport address is marked inactive (e.g., when + SCTP detects a failure), or marked active (e.g., when SCTP detects a + recovery), SCTP shall invoke this notification on the ULP. + + The following shall be passed with the notification: + + o association id - local handle to the SCTP association + + o destination transport address - This indicates the destination + transport address of the peer endpoint affected by the change; + + o new-status - This indicates the new status. + + + + + +Stewart, et al. Standards Track [Page 112] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + D) COMMUNICATION UP notification + + This notification is used when SCTP becomes ready to send or receive + user messages, or when a lost communication to an endpoint is + restored. + + IMPLEMENTATION NOTE: If ASSOCIATE primitive is implemented as a + blocking function call, the association parameters are returned as a + result of the ASSOCIATE primitive itself. In that case, + COMMUNICATION UP notification is optional at the association + initiator's side. + + The following shall be passed with the notification: + + o association id - local handle to the SCTP association + + o status - This indicates what type of event has occurred + + o destination transport address list - the complete set of transport + addresses of the peer + + o outbound stream count - the maximum number of streams allowed to + be used in this association by the ULP + + o inbound stream count - the number of streams the peer endpoint has + requested with this association (this may not be the same number + as 'outbound stream count'). + + E) COMMUNICATION LOST notification + + When SCTP loses communication to an endpoint completely (e.g., via + Heartbeats) or detects that the endpoint has performed an abort + operation, it shall invoke this notification on the ULP. + + The following shall be passed with the notification: + + o association id - local handle to the SCTP association + + o status - This indicates what type of event has occurred; The status + may indicate a failure OR a normal termination event + occurred in response to a shutdown or abort request. + + The following may be passed with the notification: + + o data retrieval id - an identification used to retrieve unsent and + unacknowledged data. + + o last-acked - the TSN last acked by that peer endpoint; + + + +Stewart, et al. Standards Track [Page 113] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + o last-sent - the TSN last sent to that peer endpoint; + + F) COMMUNICATION ERROR notification + + When SCTP receives an ERROR chunk from its peer and decides to notify + its ULP, it can invoke this notification on the ULP. + + The following can be passed with the notification: + + o association id - local handle to the SCTP association + + o error info - this indicates the type of error and optionally some + additional information received through the ERROR chunk. + + G) RESTART notification + + When SCTP detects that the peer has restarted, it may send this + notification to its ULP. + + The following can be passed with the notification: + + o association id - local handle to the SCTP association + + H) SHUTDOWN COMPLETE notification + + When SCTP completes the shutdown procedures (section 9.2) this + notification is passed to the upper layer. + + The following can be passed with the notification: + + o association id - local handle to the SCTP association + +11. Security Considerations + +11.1 Security Objectives + + As a common transport protocol designed to reliably carry time- + sensitive user messages, such as billing or signaling messages for + telephony services, between two networked endpoints, SCTP has the + following security objectives. + + - availability of reliable and timely data transport services + - integrity of the user-to-user information carried by SCTP + + + + + + + + +Stewart, et al. Standards Track [Page 114] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +11.2 SCTP Responses To Potential Threats + + SCTP may potentially be used in a wide variety of risk situations. + It is important for operator(s) of systems running SCTP to analyze + their particular situations and decide on the appropriate counter- + measures. + + Operators of systems running SCTP should consult [RFC2196] for + guidance in securing their site. + +11.2.1 Countering Insider Attacks + + The principles of [RFC2196] should be applied to minimize the risk of + theft of information or sabotage by insiders. Such procedures + include publication of security policies, control of access at the + physical, software, and network levels, and separation of services. + +11.2.2 Protecting against Data Corruption in the Network + + Where the risk of undetected errors in datagrams delivered by the + lower layer transport services is considered to be too great, + additional integrity protection is required. If this additional + protection were provided in the application-layer, the SCTP header + would remain vulnerable to deliberate integrity attacks. While the + existing SCTP mechanisms for detection of packet replays are + considered sufficient for normal operation, stronger protections are + needed to protect SCTP when the operating environment contains + significant risk of deliberate attacks from a sophisticated + adversary. + + In order to promote software code-reuse, to avoid re-inventing the + wheel, and to avoid gratuitous complexity to SCTP, the IP + Authentication Header [RFC2402] SHOULD be used when the threat + environment requires stronger integrity protections, but does not + require confidentiality. + + A widely implemented BSD Sockets API extension exists for + applications to request IP security services, such as AH or ESP from + an operating system kernel. Applications can use such an API to + request AH whenever AH use is appropriate. + +11.2.3 Protecting Confidentiality + + In most cases, the risk of breach of confidentiality applies to the + signaling data payload, not to the SCTP or lower-layer protocol + overheads. If that is true, encryption of the SCTP user data only + might be considered. As with the supplementary checksum service, + user data encryption MAY be performed by the SCTP user application. + + + +Stewart, et al. Standards Track [Page 115] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Alternately, the user application MAY use an implementation-specific + API to request that the IP Encapsulating Security Payload (ESP) + [RFC2406] be used to provide confidentiality and integrity. + + Particularly for mobile users, the requirement for confidentiality + might include the masking of IP addresses and ports. In this case + ESP SHOULD be used instead of application-level confidentiality. If + ESP is used to protect confidentiality of SCTP traffic, an ESP + cryptographic transform that includes cryptographic integrity + protection MUST be used, because if there is a confidentiality threat + there will also be a strong integrity threat. + + Whenever ESP is in use, application-level encryption is not generally + required. + + Regardless of where confidentiality is provided, the ISAKMP [RFC2408] + and the Internet Key Exchange (IKE) [RFC2409] SHOULD be used for key + management. + + Operators should consult [RFC2401] for more information on the + security services available at and immediately above the Internet + Protocol layer. + +11.2.4 Protecting against Blind Denial of Service Attacks + + A blind attack is one where the attacker is unable to intercept or + otherwise see the content of data flows passing to and from the + target SCTP node. Blind denial of service attacks may take the form + of flooding, masquerade, or improper monopolization of services. + + Flooding + + The objective of flooding is to cause loss of service and incorrect + behavior at target systems through resource exhaustion, interference + with legitimate transactions, and exploitation of buffer-related + software bugs. Flooding may be directed either at the SCTP node or + at resources in the intervening IP Access Links or the Internet. + Where the latter entities are the target, flooding will manifest + itself as loss of network services, including potentially the breach + of any firewalls in place. + + In general, protection against flooding begins at the equipment + design level, where it includes measures such as: + + - avoiding commitment of limited resources before determining that + the request for service is legitimate + + + + + +Stewart, et al. Standards Track [Page 116] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + - giving priority to completion of processing in progress over the + acceptance of new work + + - identification and removal of duplicate or stale queued requests + for service. + + - not responding to unexpected packets sent to non-unicast + addresses. + + Network equipment should be capable of generating an alarm and log if + a suspicious increase in traffic occurs. The log should provide + information such as the identity of the incoming link and source + address(es) used which will help the network or SCTP system operator + to take protective measures. Procedures should be in place for the + operator to act on such alarms if a clear pattern of abuse emerges. + + The design of SCTP is resistant to flooding attacks, particularly in + its use of a four-way start-up handshake, its use of a cookie to + defer commitment of resources at the responding SCTP node until the + handshake is completed, and its use of a Verification Tag to prevent + insertion of extraneous packets into the flow of an established + association. + + The IP Authentication Header and Encapsulating Security Payload might + be useful in reducing the risk of certain kinds of denial of service + attacks." + + The use of the Host Name feature in the INIT chunk could be used to + flood a target DNS server. A large backlog of DNS queries, resolving + the Host Name received in the INIT chunk to IP addresses, could be + accomplished by sending INIT's to multiple hosts in a given domain. + In addition, an attacker could use the Host Name feature in an + indirect attack on a third party by sending large numbers of INITs to + random hosts containing the host name of the target. In addition to + the strain on DNS resources, this could also result in large numbers + of INIT ACKs being sent to the target. One method to protect against + this type of attack is to verify that the IP addresses received from + DNS include the source IP address of the original INIT. If the list + of IP addresses received from DNS does not include the source IP + address of the INIT, the endpoint MAY silently discard the INIT. + This last option will not protect against the attack against the DNS. + + + + + + + + + + +Stewart, et al. Standards Track [Page 117] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Blind Masquerade + + Masquerade can be used to deny service in several ways: + + - by tying up resources at the target SCTP node to which the + impersonated node has limited access. For example, the target + node may by policy permit a maximum of one SCTP association with + the impersonated SCTP node. The masquerading attacker may attempt + to establish an association purporting to come from the + impersonated node so that the latter cannot do so when it requires + it. + + - by deliberately allowing the impersonation to be detected, thereby + provoking counter-measures which cause the impersonated node to be + locked out of the target SCTP node. + + - by interfering with an established association by inserting + extraneous content such as a SHUTDOWN request. + + SCTP reduces the risk of blind masquerade attacks through IP spoofing + by use of the four-way startup handshake. Man-in-the-middle + masquerade attacks are discussed in Section 11.3 below. Because the + initial exchange is memoryless, no lockout mechanism is triggered by + blind masquerade attacks. In addition, the INIT ACK containing the + State Cookie is transmitted back to the IP address from which it + received the INIT. Thus the attacker would not receive the INIT ACK + containing the State Cookie. SCTP protects against insertion of + extraneous packets into the flow of an established association by use + of the Verification Tag. + + Logging of received INIT requests and abnormalities such as + unexpected INIT ACKs might be considered as a way to detect patterns + of hostile activity. However, the potential usefulness of such + logging must be weighed against the increased SCTP startup processing + it implies, rendering the SCTP node more vulnerable to flooding + attacks. Logging is pointless without the establishment of operating + procedures to review and analyze the logs on a routine basis. + + Improper Monopolization of Services + + Attacks under this heading are performed openly and legitimately by + the attacker. They are directed against fellow users of the target + SCTP node or of the shared resources between the attacker and the + target node. Possible attacks include the opening of a large number + of associations between the attacker's node and the target, or + transfer of large volumes of information within a legitimately- + established association. + + + + +Stewart, et al. Standards Track [Page 118] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Policy limits should be placed on the number of associations per + adjoining SCTP node. SCTP user applications should be capable of + detecting large volumes of illegitimate or "no-op" messages within a + given association and either logging or terminating the association + as a result, based on local policy. + +11.3 Protection against Fraud and Repudiation + + The objective of fraud is to obtain services without authorization + and specifically without paying for them. In order to achieve this + objective, the attacker must induce the SCTP user application at the + target SCTP node to provide the desired service while accepting + invalid billing data or failing to collect it. Repudiation is a + related problem, since it may occur as a deliberate act of fraud or + simply because the repudiating party kept inadequate records of + service received. + + Potential fraudulent attacks include interception and misuse of + authorizing information such as credit card numbers, blind masquerade + and replay, and man-in-the middle attacks which modify the packets + passing through a target SCTP association in real time. + + The interception attack is countered by the confidentiality measures + discussed in Section 11.2.3 above. + + Section describes how SCTP is resistant to blind masquerade + attacks, as a result of the four-way startup handshake and the + Verification Tag. The Verification Tag and TSN together are + protections against blind replay attacks, where the replay is into an + existing association. + + However, SCTP does not protect against man-in-the-middle attacks + where the attacker is able to intercept and alter the packets sent + and received in an association. For example, the INIT ACK will have + sufficient information sent on the wire for an adversary in the + middle to hijack an existing SCTP association. Where a significant + possibility of such attacks is seen to exist, or where possible + repudiation is an issue, the use of the IPSEC AH service is + recommended to ensure both the integrity and the authenticity of the + SCTP packets passed. + + SCTP also provides no protection against attacks originating at or + beyond the SCTP node and taking place within the context of an + existing association. Prevention of such attacks should be covered + by appropriate security policies at the host site, as discussed in + Section 11.2.1. + + + + + +Stewart, et al. Standards Track [Page 119] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +12. Recommended Transmission Control Block (TCB) Parameters + + This section details a recommended set of parameters that should be + contained within the TCB for an implementation. This section is for + illustrative purposes and should not be deemed as requirements on an + implementation or as an exhaustive list of all parameters inside an + SCTP TCB. Each implementation may need its own additional parameters + for optimization. + +12.1 Parameters necessary for the SCTP instance + + Associations: A list of current associations and mappings to the data + consumers for each association. This may be in the + form of a hash table or other implementation dependent + structure. The data consumers may be process + identification information such as file descriptors, + named pipe pointer, or table pointers dependent on how + SCTP is implemented. + + Secret Key: A secret key used by this endpoint to compute the MAC. + This SHOULD be a cryptographic quality random number + with a sufficient length. Discussion in [RFC1750] can + be helpful in selection of the key. + + Address List: The list of IP addresses that this instance has bound. + This information is passed to one's peer(s) in INIT and + INIT ACK chunks. + + SCTP Port: The local SCTP port number the endpoint is bound to. + +12.2 Parameters necessary per association (i.e. the TCB) + + Peer : Tag value to be sent in every packet and is received + Verification: in the INIT or INIT ACK chunk. + Tag : + + My : Tag expected in every inbound packet and sent in the + Verification: INIT or INIT ACK chunk. + Tag : + + State : A state variable indicating what state the association + : is in, i.e. COOKIE-WAIT, COOKIE-ECHOED, ESTABLISHED, + : SHUTDOWN-PENDING, SHUTDOWN-SENT, SHUTDOWN-RECEIVED, + : SHUTDOWN-ACK-SENT. + + Note: No "CLOSED" state is illustrated since if a + association is "CLOSED" its TCB SHOULD be removed. + + + + +Stewart, et al. Standards Track [Page 120] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Peer : A list of SCTP transport addresses that the peer is + Transport : bound to. This information is derived from the INIT or + Address : INIT ACK and is used to associate an inbound packet + List : with a given association. Normally this information is + : hashed or keyed for quick lookup and access of the TCB. + + Primary : This is the current primary destination transport + Path : address of the peer endpoint. It may also specify a + : source transport address on this endpoint. + + Overall : The overall association error count. + Error Count : + + Overall : The threshold for this association that if the Overall + Error : Error Count reaches will cause this association to be + Threshold : torn down. + + Peer Rwnd : Current calculated value of the peer's rwnd. + + Next TSN : The next TSN number to be assigned to a new DATA chunk. + : This is sent in the INIT or INIT ACK chunk to the peer + : and incremented each time a DATA chunk is assigned a + : TSN (normally just prior to transmit or during + : fragmentation). + + Last Rcvd : This is the last TSN received in sequence. This value + TSN : is set initially by taking the peer's Initial TSN, + : received in the INIT or INIT ACK chunk, and + : subtracting one from it. + + Mapping : An array of bits or bytes indicating which out of + Array : order TSN's have been received (relative to the + : Last Rcvd TSN). If no gaps exist, i.e. no out of order + : packets have been received, this array will be set to + : all zero. This structure may be in the form of a + : circular buffer or bit array. + + Ack State : This flag indicates if the next received packet + : is to be responded to with a SACK. This is initialized + : to 0. When a packet is received it is incremented. + : If this value reaches 2 or more, a SACK is sent and the + : value is reset to 0. Note: This is used only when no + : DATA chunks are received out of order. When DATA chunks + : are out of order, SACK's are not delayed (see Section + : 6). + + + + + + +Stewart, et al. Standards Track [Page 121] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Inbound : An array of structures to track the inbound streams. + Streams : Normally including the next sequence number expected + : and possibly the stream number. + + Outbound : An array of structures to track the outbound streams. + Streams : Normally including the next sequence number to + : be sent on the stream. + + Reasm Queue : A re-assembly queue. + + Local : The list of local IP addresses bound in to this + Transport : association. + Address : + List : + + Association : The smallest PMTU discovered for all of the + PMTU : peer's transport addresses. + +12.3 Per Transport Address Data + + For each destination transport address in the peer's address list + derived from the INIT or INIT ACK chunk, a number of data elements + needs to be maintained including: + + Error count : The current error count for this destination. + + Error : Current error threshold for this destination i.e. + Threshold : what value marks the destination down if Error count + : reaches this value. + + cwnd : The current congestion window. + + ssthresh : The current ssthresh value. + + RTO : The current retransmission timeout value. + + SRTT : The current smoothed round trip time. + + RTTVAR : The current RTT variation. + + partial : The tracking method for increase of cwnd when in + bytes acked : congestion avoidance mode (see Section 6.2.2) + + state : The current state of this destination, i.e. DOWN, UP, + : ALLOW-HB, NO-HEARTBEAT, etc. + + PMTU : The current known path MTU. + + + + +Stewart, et al. Standards Track [Page 122] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + Per : A timer used by each destination. + Destination : + Timer : + + RTO-Pending : A flag used to track if one of the DATA chunks sent to + this address is currently being used to compute a + RTT. If this flag is 0, the next DATA chunk sent to this + destination should be used to compute a RTT and this + flag should be set. Every time the RTT calculation + completes (i.e. the DATA chunk is SACK'd) clear this + flag. + + last-time : The time this destination was last sent to. This can be + used : used to determine if a HEARTBEAT is needed. + +12.4 General Parameters Needed + + Out Queue : A queue of outbound DATA chunks. + + In Queue : A queue of inbound DATA chunks. + +13. IANA Considerations + + This protocol will require port reservation like TCP for the use of + "well known" servers within the Internet. All current TCP ports + shall be automatically reserved in the SCTP port address space. New + requests should follow IANA's current mechanisms for TCP. + + This protocol may also be extended through IANA in three ways: + + -- through definition of additional chunk types, + -- through definition of additional parameter types, or + -- through definition of additional cause codes within + ERROR chunks + + In the case where a particular ULP using SCTP desires to have its own + ports, the ULP should be responsible for registering with IANA for + getting its ports assigned. + +13.1 IETF-defined Chunk Extension + + The definition and use of new chunk types is an integral part of + SCTP. Thus, new chunk types are assigned by IANA through an IETF + Consensus action as defined in [RFC2434]. + + The documentation for a new chunk code type must include the + following information: + + + + +Stewart, et al. Standards Track [Page 123] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + a) A long and short name for the new chunk type; + + b) A detailed description of the structure of the chunk, which MUST + conform to the basic structure defined in Section 3.2; + + c) A detailed definition and description of intended use of each + field within the chunk, including the chunk flags if any; + + d) A detailed procedural description of the use of the new chunk type + within the operation of the protocol. + + The last chunk type (255) is reserved for future extension if + necessary. + +13.2 IETF-defined Chunk Parameter Extension + + The assignment of new chunk parameter type codes is done through an + IETF Consensus action as defined in [RFC2434]. Documentation of the + chunk parameter MUST contain the following information: + + a) Name of the parameter type. + + b) Detailed description of the structure of the parameter field. + This structure MUST conform to the general type-length-value + format described in Section 3.2.1. + + c) Detailed definition of each component of the parameter value. + + d) Detailed description of the intended use of this parameter type, + and an indication of whether and under what circumstances multiple + instances of this parameter type may be found within the same + chunk. + +13.3 IETF-defined Additional Error Causes + + Additional cause codes may be allocated in the range 11 to 65535 + through a Specification Required action as defined in [RFC2434]. + Provided documentation must include the following information: + + a) Name of the error condition. + + b) Detailed description of the conditions under which an SCTP + endpoint should issue an ERROR (or ABORT) with this cause code. + + c) Expected action by the SCTP endpoint which receives an ERROR (or + ABORT) chunk containing this cause code. + + + + + +Stewart, et al. Standards Track [Page 124] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + d) Detailed description of the structure and content of data fields + which accompany this cause code. + + The initial word (32 bits) of a cause code parameter MUST conform to + the format shown in Section 3.3.10, i.e.: + + -- first two bytes contain the cause code value + -- last two bytes contain length of the Cause Parameter. + +13.4 Payload Protocol Identifiers + + Except for value 0 which is reserved by SCTP to indicate an + unspecified payload protocol identifier in a DATA chunk, SCTP will + not be responsible for standardizing or verifying any payload + protocol identifiers; SCTP simply receives the identifier from the + upper layer and carries it with the corresponding payload data. + + The upper layer, i.e., the SCTP user, SHOULD standardize any specific + protocol identifier with IANA if it is so desired. The use of any + specific payload protocol identifier is out of the scope of SCTP. + +14. Suggested SCTP Protocol Parameter Values + + The following protocol parameters are RECOMMENDED: + + RTO.Initial - 3 seconds + RTO.Min - 1 second + RTO.Max - 60 seconds + RTO.Alpha - 1/8 + RTO.Beta - 1/4 + Valid.Cookie.Life - 60 seconds + Association.Max.Retrans - 10 attempts + Path.Max.Retrans - 5 attempts (per destination address) + Max.Init.Retransmits - 8 attempts + HB.interval - 30 seconds + + IMPLEMENTATION NOTE: The SCTP implementation may allow ULP to + customize some of these protocol parameters (see Section 10). + + Note: RTO.Min SHOULD be set as recommended above. + + + + + + + + + + + +Stewart, et al. Standards Track [Page 125] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +15. Acknowledgements + + The authors wish to thank Mark Allman, R.J. Atkinson, Richard Band, + Scott Bradner, Steve Bellovin, Peter Butler, Ram Dantu, R. + Ezhirpavai, Mike Fisk, Sally Floyd, Atsushi Fukumoto, Matt Holdrege, + Henry Houh, Christian Huitema, Gary Lehecka, Jonathan Lee, David + Lehmann, John Loughney, Daniel Luan, Barry Nagelberg, Thomas Narten, + Erik Nordmark, Lyndon Ong, Shyamal Prasad, Kelvin Porter, Heinz + Prantner, Jarno Rajahalme, Raymond E. Reeves, Renee Revis, Ivan Arias + Rodriguez, A. Sankar, Greg Sidebottom, Brian Wyld, La Monte Yarroll, + and many others for their invaluable comments. + +16. Authors' Addresses + + Randall R. Stewart + 24 Burning Bush Trail. + Crystal Lake, IL 60012 + USA + + Phone: +1-815-477-2127 + EMail: rrs@cisco.com + + + Qiaobing Xie + Motorola, Inc. + 1501 W. Shure Drive, #2309 + Arlington Heights, IL 60004 + USA + + Phone: +1-847-632-3028 + EMail: qxie1@email.mot.com + + + Ken Morneault + Cisco Systems Inc. + 13615 Dulles Technology Drive + Herndon, VA. 20171 + USA + + Phone: +1-703-484-3323 + EMail: kmorneau@cisco.com + + + + + + + + + + +Stewart, et al. Postel, "Assigned Numbers", STD 2, RFC + 1700, October 1994. + + [RFC1981] McCann, J., Deering, S. and J. Mogul, "Path MTU Discovery + for IP version 6", RFC 1981, August 1996. + + + + +Stewart, et al. Standards Track [Page 128] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + [RFC1982] Elz, R. and R. Bush, "Serial Number Arithmetic", RFC 1982, + August 1996. + + [RFC2026] Bradner, S., "The Internet Standards Process -- Revision + 3", BCP 9, RFC 2026, October 1996. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2401] Kent, S. and R. Atkinson, "Security Architecture for the + Internet Protocol", RFC 2401, November 1998. + + [RFC2402] Kent, S. and R. Atkinson, "IP Authentication Header", RFC + 2402, November 1998. + + [RFC2406] Kent, S. and R. Atkinson, "IP Encapsulating Security + Payload (ESP)", RFC 2406, November 1998. + + [RFC2408] Maughan, D., Schertler, M., Schneider, M. and J. Turner, + "Internet Security Association and Key Management + Protocol", RFC 2408, November 1998. + + [RFC2409] Harkins, D. and D. Carrel, "The Internet Key Exchange + (IKE)", RFC 2409, November 1998. + + [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 2434, + October 1998. + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, December 1998. + + [RFC2581] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion + Control", RFC 2581, April 1999. + +18. Bibliography + + [ALLMAN99] Allman, M. and Paxson, V., "On Estimating End-to-End + Network Path Properties", Proc. SIGCOMM'99, 1999. + + [FALL96] Fall, K. and Floyd, S., Simulation-based Comparisons of + Tahoe, Reno, and SACK TCP, Computer Communications Review, + V. 26 N. 3, July 1996, pp. 5-21. + + [RFC1750] Eastlake, D. To indicate that an endpoint is ECN capable + an endpoint SHOULD add to the INIT and or INIT ACK chunk the TLV + reserved for ECN. This TLV contains no parameters, and thus has the + following format: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Parameter Type = 32768 | Parameter Length = 4 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + ECN-Echo: + + RFC 2481 details a specific bit for a receiver to send back in its + TCP acknowledgements to notify the sender of the Congestion + Experienced (CE) bit having arrived from the network. For SCTP this + same indication is made by including the ECNE chunk. This chunk + contains one data element, i.e. the lowest TSN associated with the IP + datagram marked with the CE bit, and looks as follows: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk Type=12 | Flags=00000000| Chunk Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Lowest TSN Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Note: The ECNE is considered a Control chunk. + + + + + +Stewart, et al. Standards Track [Page 131] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + CWR: + + RFC 2481 details a specific bit for a sender to send in the header of + its next outbound TCP segment to indicate to its peer that it has + reduced its congestion window. This is termed the CWR bit. For + SCTP the same indication is made by including the CWR chunk. + This chunk contains one data element, i.e. the TSN number that + was sent in the ECNE chunk. This element represents the lowest + TSN number in the datagram that was originally marked with the + CE bit. + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Chunk Type=13 | Flags=00000000| Chunk Length = 8 | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Lowest TSN Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Note: The CWR is considered a Control chunk. + +Appendix B Alder 32 bit checksum calculation + + The Adler-32 checksum calculation given in this appendix is copied from + [RFC1950]. + + Adler-32 is composed of two sums accumulated per byte: s1 is the sum + of all bytes, s2 is the sum of all s1 values. Both sums are done + modulo 65521. s1 is initialized to 1, s2 to zero. The Adler-32 + checksum is stored as s2*65536 + s1 in network byte order. + + The following C code computes the Adler-32 checksum of a data buffer. + It is written for clarity, not for speed. The sample code is in the + ANSI C programming language. Non C users may find it easier to read + with these hints: + + + + + + + + + + + + + + + + +Stewart, et al. Standards Track [Page 132] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + + & Bitwise AND operator. + >> Bitwise right shift operator. When applied to an + unsigned quantity, as here, right shift inserts zero bit(s) + at the left. + << Bitwise left shift operator. Left shift inserts zero + bit(s) at the right. + ++ "n++" increments the variable n. + % modulo operator: a % b is the remainder of a divided by b. + #define BASE 65521 /* largest prime smaller than 65536 */ + /* + Update a running Adler-32 checksum with the bytes buf[0..len-1] + and return the updated checksum. The Adler-32 checksum should be + initialized to 1. + + Usage example: + + unsigned long adler = 1L; + + while (read_buffer(buffer, length) != EOF) { + adler = update_adler32(adler, buffer, length); + } + if (adler != original_adler) error(); + */ + unsigned long update_adler32(unsigned long adler, + unsigned char *buf, int len) + { + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int n; + + for (n = 0; n < len; n++) { + s1 = (s1 + buf[n]) % BASE; + s2 = (s2 + s1) % BASE; + } + return (s2 << 16) + s1; + } + + /* Return the adler32 of the bytes buf[0..len-1] */ + unsigned long adler32(unsigned char *buf, int len) + { + return update_adler32(1L, buf, len); + } + + + + + + + + + +Stewart, et al. Standards Track [Page 133] + +RFC 2960 Stream Control Transmission Protocol October 2000 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2000). Some router + implementations also allow equal-cost multipath usage with RIP and + other routing protocols. The effect of multipath routing on a + forwarder is that the forwarder potentially has several next-hops for + any given destination and must use some method to choose which next- + hop should be used for a given data packet. + +1. Introduction + + Various routing protocols, including OSPF and ISIS, explicitly allow + "Equal-Cost Multipath" routing. Some router implementations also + allow equal-cost multipath usage with RIP and other routing + protocols. Using equal-cost multipath means that if multiple equal- + cost routes to the same destination exist, they can be discovered and + used to provide load balancing among redundant paths. + + The effect of multipath routing on a forwarder is that the forwarder + potentially has several next-hops for any given destination and must + use some method to choose which next-hop should be used for a given + data packet. This memo summarizes current practices, problems, and + solutions. + + + + + + + +Thaler & Hopps Informational [Page 1] + +RFC 2991 Multipath Issues November 2000 + + +2. Concerns + + Several router implementations allow multipath forwarding. This is + sometimes done naively via round-robin, where each packet matching a + given destination route is forwarded using the subsequent next-hop, + in a round-robin fashion. This does provide a form of load + balancing, but there are several problems with approaches such as + round-robin or random: + + Variable Path MTU + Since each of the redundant paths may have a different MTU, + this means that the overall path MTU can change on a packet- + by-packet basis, negating the usefulness of path MTU discovery. + + Variable Latencies + Since each of the redundant paths may have a different latency + involved, having packets take separate paths can cause packets + to always arrive out of order, increasing delivery latency and + buffering requirements. + + Packet reordering causes TCP to believe that loss has taken + place when packets with higher sequence numbers arrive before + an earlier one. When three or more packets are received before + a "late" packet, TCP enters a mode called "fast-retransmit" [6] + which consumes extra bandwidth (which could potentially cause + more loss, decreasing throughput) as it attempts to + unnecessarily retransmit the delayed packet(s). Hence, + reordering can be detrimental to network performance. + + Debugging + Common debugging utilities such as ping and traceroute are much + less reliable in the presence of multiple paths and may even + present completely wrong results. + + In multicast routing, the problem with multiple paths is that + multicast routing protocols prevent loops and duplicates by + constructing a single tree to all receivers of the same group + address. Multicast routing protocols deployed today (DVMRP, PIM-DM, + PIM-SM) [2] construct shortest-path trees rooted at either the + source, or another router known as a Core or Rendezvous Point. + Hence, the way they ensure that duplicates will not arise is that a + given tree must use only a single next-hop towards the root of the + tree. + + + + + + + + +Thaler & Hopps Informational [Page 2] + +RFC 2991 Multipath Issues November 2000 + + +3. Requirements + + In the remainder of this document, we will use the term "flow" to + represent the granularity at which the router keeps state (if at all) + for classes of traffic. The exact definition of a flow may depend on + the actual implementation. For example, a flow might be identified + solely by destination address, or it might be identified by (source + address, destination address, protocol id) triplet. Hence "flow" is + not necessarily synonymous with the term "microflow" as used in RFC + 2474 [7], which also includes port numbers. Indeed, including + transport-layer information in the next-hop selection process can + actually be problematic. For example, if packets are fragmented, the + transport-layer information may not be available in every packet. + Furthermore, having the choice of path depend on transport-layer + fields may negate the benefit of caching information such as MTU for + use in subsequent connections between the same endpoints. + + All of the problems outlined in the previous section arise when + packets in the same unicast or multicast "flow" are split among + multiple paths. The natural solution is therefore to ensure that + packets for the same flow always use the same path. + + Two additional features are desirable: + + Minimal disruption + When multipath is used, meaning that multiple routes contribute + valid next-hops, the chances are higher of routes being added + and deleted from consideration than when only the "best" route + is used (in which case metric changes in alternate routes have + no effect on traffic paths). Since a higher number of routes + may actually be used for forwarding when multipath is in use, + the potential for packet reordering and packet loss due to + route flaps can be much greater than when not using multipath. + Hence, it is desirable to minimize the number of active flows + affected by the addition or deletion of another next-hop. + + Fast implementation + The amount of additional computation required to forward a + packet should be small. For example, when doing round-robin, + this computation might consist of incrementing (modulo the + number of next-hops) a next-hop index. + +4. Solutions + + We now provide three possible methods for improving the performance + of multipath and then discuss their applicability to unicast and + multicast forwarding. + + + + +Thaler & Hopps Informational [Page 3] + +RFC 2991 Multipath Issues November 2000 + + + Modulo-N Hash + To select a next-hop from the list of N next-hops, the router + performs a modulo-N hash over the packet header fields that + identify a flow. This has the advantage of being fast, at the + expense of (N-1)/N of all flows changing paths whenever a + next-hop is added or removed. + + Hash-Threshold + The router first selects a key by performing a hash over the + packet header fields that identify the flow. The N next-hops + have been assigned unique regions in the hash function's output + space. By comparing the hash value against region boundaries + the router can determine which region the hash value belongs to + and thus which next-hop to use. This method has the advantage + of only affecting flows near the region boundaries (or + thresholds) when next-hops are added or removed. For ECMP + hash-threshold's lookup can be done with a simple division + (hash_value / fixed_region_size). When a next-hop is added or + removed, between 1/4 and 1/2 of all flows change paths. An + analysis of this method can be found in [3]. + + Highest Random Weight (HRW) + The router computes a key for EACH next-hop by performing a + hash over the packet header fields that identify the flow, as + well as over the address of the next-hop. The router then + chooses the next-hop with the highest resulting key value [4]. + This has the advantage of minimizing the number of flows + affected by a next-hop addition or deletion (only 1/N of them), + but is approximately N times as expensive as a modulo-N hash. + + The applicability of these three alternatives depends on (at least) + two factors: whether the forwarder maintains per-flow state, and how + precious CPU is to a multipath forwarder. + + Some routers may maintain per-flow state for reasons other than for + supporting multipath. For example, routers typically keep per-flow + state for multicast flows so that they can maintain the list of + interfaces to which packets in the flow should be copied. + + If per-flow state is maintained in a multipath forwarder, then + computation of the next-hop can be done by the router at state + creation time. This entails no additional computations at packet + forwarding time compared with normal forwarding to a single next-hop, + since the next-hop is precomputed. In this case, any method can be + used, including round-robin, random, modulo-N, hash-threshold or HRW. + Hash functions such as modulo-N, hash-threshold and HRW are better if + the forwarder state may be deleted for any reason during the lifetime + of a flow since subsequent next-hop computations by the router will + + + +Thaler & Hopps Informational [Page 4] + +RFC 2991 Multipath Issues November 2000 + + + always select the same path. This also improves the usefulness of + debugging utilities such as traceroute. Finally, to maximize the + stability of paths (and hence the usefulness of traceroute, etc.), + the use of HRW is recommended over the other methods mentioned + herein. + + If per-flow state is not maintained by the forwarder, then using + multiple next-hops requires that the next-hop be calculated at packet + arrival time. When CPU is more precious than stability of flow + paths, hash-threshold is recommended over the other methods mentioned + herein. + +4.1. Unicast Forwarding + + Depending on the implementation, unicast forwarding may or may not + keep per-flow state. We recommend that where forwarder + implementations keep flow state, routers should use HRW at state + creation time (and next-hop deletion time) to select the next-hop, + and that forwarders without per-flow state use hash-threshold. + +4.2. Multicast Forwarding + + Today's multicast forwarding engines use a cache of forwarding + entries indexed by group (or group prefix) and source (or source + prefix). This means that today's multicast forwarder's always keep + per-flow state, although for some multicast routing protocols, the + "flow" may be fairly coarse (e.g., traffic from all sources to the + same destination). Since per-flow state is kept by the forwarder, it + is recommended that the router always use HRW to select the next-hop. + + Routers using explicit-joining protocols such as PIM-SM [5] should + thus use the multipath information when determining to which neighbor + a join message should be sent. For example, when multiple next-hops + exist for a given Rendezvous Point (RP) toward which a (*,G) Join + should be sent, it is recommended that HRW be used to select the + next-hop to use for each group. + +5. Applicability + + The algorithms discussed above (except round-robin) all rely on some + form of hash function. Equal flow distribution is achieved when the + hash function is uniformly distributed. Since the commonly used hash + functions only become uniformly distributed when the number of inputs + is relatively large, these algorithms are more applicable to routers + used to route many flows, than in, for example, a small business + setting. + + + + + +Thaler & Hopps Informational [Page 5] + +RFC 2991 Multipath Issues November 2000 + + +6. Redundant Parallel Links + + A related problem occurs when multiple parallel links are used + between the same pair of routers. A common solution is to bundle the + two links together into a "super"-link when is then used for routing. + For multicast forwarding, this results in the two links being reduced + to a single next-hop (over the combined link) which can be used to + prevent duplicates. When a unicast or multicast packet is queued to + the combined link, some method, such as those discussed earlier, is + still required to determine the physical link on which to transmit + the packet. If the parallel links are identical, then most of the + concerns discussed in this document are avoided with the combined + link. The exception is packet reordering, which can still occur with + round-robin, adversely affecting TCP. + +7. Security Considerations + + This document discusses issues with various methods of choosing a + next-hop from among multiple valid next-hops. As such, it does not + directly impact the security of the Internet infrastructure or its + applications. + + One issue that is worth mentioning, however, is that when next-hop + selection is predictable, an attacker can synthesize traffic that + will all hash the same, making it possible to launch a denial-of- + service attack that overloads a particular path. Since a special + case of this is when the same (single) next-hop is always selected, + such an attack is easiest when multipath is not being used. + Introducing multipath routing can make such an attack more difficult; + the more unpredictable the hash is, the harder it becomes to conduct + a denial-of-service attack against any single link. + + + + + + + + + + + + + + + + + + + + +Thaler & Hopps Informational [Page 6] + +RFC 2991 Multipath Issues November 2000 + + +8. References + + [1] Moy, J., "OSPF Version 2", STD 54, RFC 2328, April 1998. + + [2] Maufer, T., "Deploying IP Multicast in the Enterprise", + Prentice-Hall, 1998. + + [3] Hopps, C., "Analysis of an Equal-Cost Multi-Path Algorithm", RFC + 2992, November 2000. + + [4] Thaler, D., and C.V. Ravishankar, "Using Name-Based Mappings to + Increase Hit Rates", IEEE/ACM Transactions on Networking, + February 1998. + + [5] Estrin, D., Farinacci, D., Helmy, A., Thaler, D., Deering, S., + Handley, M., Jacobson, V., Liu, C., Sharma, P. and L. Wei, + "Protocol Independent Multicast-Sparse Mode (PIM-SM): Protocol + Specification", RFC 2362, June 1998. + + [6] Allman, M., Paxson, V. and W. Stevens, "TCP Congestion Control", + RFC 2581, April 1999. + + [7] Nichols, K., Blake, S., Baker, F. and D. Black., "Definition of + the Differentiated Services Field (DS Field) in the IPv4 and + IPv6 Headers", RFC 2474, December 1998. + + + + + + + + + + + + + + + + + + + + + + + + + + +Thaler & Hopps Informational [Page 7] + +RFC 2991 Multipath Issues November 2000 + + +9. Authors' Addresses + + Dave Thaler + Microsoft + One Microsoft Way + Redmond, WA 98052 + + Phone: +1 425 703 8835 + EMail: dthaler@dthaler.microsoft.com + + + Christian E. Hopps + NextHop Technologies, Inc. + 517 W. William Street + Ann Arbor, MI 48103-4943 + U.S.A + + Phone: +1 734 936 0291 + EMail: chopps@nexthop.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Thaler & Hopps Informational [Page 8] + +RFC 2991 Multipath Issues November 2000 + + +10. Full Copyright Statement + + Copyright (C) The Internet Society (2000). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. Berners-Lee +Request for Comments: 3986 W3C/MIT +STD: 66 R. Fielding +Updates: 1738 Day Software +Obsoletes: 2732, 2396, 1808 L. Masinter +Category: Standards Track Adobe Systems + January 2005 + + + Uniform Resource Identifier (URI): Generic Syntax + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + A Uniform Resource Identifier (URI) is a compact sequence of + characters that identifies an abstract or physical resource. This + specification defines the generic URI syntax and a process for + resolving URI references that might be in relative form, along with + guidelines and security considerations for the use of URIs on the + Internet. The URI syntax defines a grammar that is a superset of all + valid URIs, allowing an implementation to parse the common components + of a URI reference without knowing the scheme-specific requirements + of every possible identifier. This specification does not define a + generative grammar for URIs; that task is performed by the individual + specifications of each URI scheme. + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 1] + +RFC 3986 URI Generic Syntax January 2005 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 1.1. Overview of URIs . . . . . . . . . . . . . . . . . . . . 4 + 1.1.1. Generic Syntax . . . . . . . . . . . . . . . . . 6 + 1.1.2. Examples . . . . . . . . . . . . . . . . . . . . 7 + 1.1.3. URI, URL, and URN . . . . . . . . . . . . . . . 7 + 1.2. Design Considerations . . . . . . . . . . . . . . . . . 8 + 1.2.1. Transcription . . . . . . . . . . . . . . . . . 8 + 1.2.2. Separating Identification from Interaction . . . 9 + 1.2.3. Hierarchical Identifiers . . . . . . . . . . . . 10 + 1.3. Syntax Notation . . . . . . . . . . . . . . . . . . . . 11 + 2. Characters . . . . . . . . . . . . . . . . . . . . . . . . . . 11 + 2.1. Percent-Encoding . . . . . . . . . . . . . . . . . . . . 12 + 2.2. Reserved Characters . . . . . . . . . . . . . . . . . . 12 + 2.3. Unreserved Characters . . . . . . . . . . . . . . . . . 13 + 2.4. When to Encode or Decode . . . . . . . . . . . . . . . . 14 + 2.5. Identifying Data . . . . . . . . . . . . . . . . . . . . 14 + 3. Syntax Components . . . . . . . . . . . . . . . . . . . . . . 16 + 3.1. Scheme . . . . . . . . . . . . . . . . . . . . . . . . . 17 + 3.2. Authority . . . . . . . . . . . . . . . . . . . . . . . 17 + 3.2.1. User Information . . . . . . . . . . . . . . . . 18 + 3.2.2. Host . . . . . . . . . . . . . . . . . . . . . . 18 + 3.2.3. Port . . . . . . . . . . . . . . . . . . . . . . 22 + 3.3. Path . . . . . . . . . . . . . . . . . . . . . . . . . . 22 + 3.4. Query . . . . . . . . . . . . . . . . . . . . . . . . . 23 + 3.5. Fragment . . . . . . . . . . . . . . . . . . . . . . . . 24 + 4. Usage . . . . . . . . . . . . . . . . . . . . . . . . . . . . 25 + 4.1. URI Reference . . . . . . . . . . . . . . . . . . . . . 25 + 4.2. Relative Reference . . . . . . . . . . . . . . . . . . . 26 + 4.3. Absolute URI . . . . . . . . . . . . . . . . . . . . . . 27 + 4.4. Same-Document Reference . . . . . . . . . . . . . . . . 27 + 4.5. Suffix Reference . . . . . . . . . . . . . . . . . . . . 27 + 5. Reference Resolution . . . . . . . . . . . . . . . . . . . . . 28 + 5.1. Establishing a Base URI . . . . . . . . . . . . . . . . 28 + 5.1.1. Base URI Embedded in Content . . . . . . . . . . 29 + 5.1.2. Base URI from the Encapsulating Entity . . . . . 29 + 5.1.3. Base URI from the Retrieval URI . . . . . . . . 30 + 5.1.4. Default Base URI . . . . . . . . . . . . . . . . 30 + 5.2. Relative Resolution . . . . . . . . . . . . . . . . . . 30 + 5.2.1. Pre-parse the Base URI . . . . . . . . . . . . . 31 + 5.2.2. Transform References . . . . . . . . . . . . . . 31 + 5.2.3. Merge Paths . . . . . . . . . . . . . . . . . . 32 + 5.2.4. Remove Dot Segments . . . . . . . . . . . . . . 33 + 5.3. Component Recomposition . . . . . . . . . . . . . . . . 35 + 5.4. Reference Resolution Examples . . . . . . . . . . . . . 35 + 5.4.1. Normal Examples . . . . . . . . . . . . . . . . 36 + 5.4.2. Abnormal Examples . . . . . . . . . . . . . . . 36 + + + +Berners-Lee, et al. Standards Track [Page 2] + +RFC 3986 URI Generic Syntax January 2005 + + + 6. Normalization and Comparison . . . . . . . . . . . . . . . . . 38 + 6.1. Equivalence . . . . . . . . . . . . . . . . . . . . . . 38 + 6.2. Comparison Ladder . . . . . . . . . . . . . . . . . . . 39 + 6.2.1. Simple String Comparison . . . . . . . . . . . . 39 + 6.2.2. Syntax-Based Normalization . . . . . . . . . . . 40 + 6.2.3. Scheme-Based Normalization . . . . . . . . . . . 41 + 6.2.4. Protocol-Based Normalization . . . . . . . . . . 42 + 7. Security Considerations . . . . . . . . . . . . . . . . . . . 43 + 7.1. Reliability and Consistency . . . . . . . . . . . . . . 43 + 7.2. Malicious Construction . . . . . . . . . . . . . . . . . 43 + 7.3. Back-End Transcoding . . . . . . . . . . . . . . . . . . 44 + 7.4. Rare IP Address Formats . . . . . . . . . . . . . . . . 45 + 7.5. Sensitive Information . . . . . . . . . . . . . . . . . 45 + 7.6. Semantic Attacks . . . . . . . . . . . . . . . . . . . . 45 + 8. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 46 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 46 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 46 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 46 + 10.2. Informative References . . . . . . . . . . . . . . . . . 47 + A. Collected ABNF for URI . . . . . . . . . . . . . . . . . . . . 49 + B. Parsing a URI Reference with a Regular Expression . . . . . . 50 + C. Delimiting a URI in Context . . . . . . . . . . . . . . . . . 51 + D. Changes from RFC 2396 . . . . . . . . . . . . . . . . . . . . 53 + D.1. Additions . . . . . . . . . . . . . . . . . . . . . . . 53 + D.2. Modifications . . . . . . . . . . . . . . . . . . . . . 53 + Index . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 56 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 60 + Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . 61 + + + + + + + + + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 3] + +RFC 3986 URI Generic Syntax January 2005 + + +1. Introduction + + A Uniform Resource Identifier (URI) provides a simple and extensible + means for identifying a resource. This specification of URI syntax + and semantics is derived from concepts introduced by the World Wide + Web global information initiative, whose use of these identifiers + dates from 1990 and is described in "Universal Resource Identifiers + in WWW" [RFC1630]. The syntax is designed to meet the + recommendations laid out in "Functional Recommendations for Internet + Resource Locators" [RFC1736] and "Functional Requirements for Uniform + Resource Names" [RFC1737]. + + This document obsoletes [RFC2396], which merged "Uniform Resource + Locators" [RFC1738] and "Relative Uniform Resource Locators" + [RFC1808] in order to define a single, generic syntax for all URIs. + It obsoletes [RFC2732], which introduced syntax for an IPv6 address. + It excludes portions of RFC 1738 that defined the specific syntax of + individual URI schemes; those portions will be updated as separate + documents. The process for registration of new URI schemes is + defined separately by [BCP35]. Advice for designers of new URI + schemes can be found in [RFC2718]. All significant changes from RFC + 2396 are noted in Appendix D. + + This specification uses the terms "character" and "coded character + set" in accordance with the definitions provided in [BCP19], and + "character encoding" in place of what [BCP19] refers to as a + "charset". + +1.1. Overview of URIs + + URIs are characterized as follows: + + Uniform + + Uniformity provides several benefits. It allows different types + of resource identifiers to be used in the same context, even when + the mechanisms used to access those resources may differ. It + allows uniform semantic interpretation of common syntactic + conventions across different types of resource identifiers. It + allows introduction of new types of resource identifiers without + interfering with the way that existing identifiers are used. It + allows the identifiers to be reused in many different contexts, + thus permitting new applications or protocols to leverage a pre- + existing, large, and widely used set of resource identifiers. + + + + + + + +Berners-Lee, et al. Standards Track [Page 4] + +RFC 3986 URI Generic Syntax January 2005 + + + Resource + + This specification does not limit the scope of what might be a + resource; rather, the term "resource" is used in a general sense + for whatever might be identified by a URI. Familiar examples + include an electronic document, an image, a source of information + with a consistent purpose (e.g., "today's weather report for Los + Angeles"), a service (e.g., an HTTP-to-SMS gateway), and a + collection of other resources. A resource is not necessarily + accessible via the Internet; e.g., human beings, corporations, and + bound books in a library can also be resources. Likewise, + abstract concepts can be resources, such as the operators and + operands of a mathematical equation, the types of a relationship + (e.g., "parent" or "employee"), or numeric values (e.g., zero, + one, and infinity). + + Identifier + + An identifier embodies the information required to distinguish + what is being identified from all other things within its scope of + identification. Our use of the terms "identify" and "identifying" + refer to this purpose of distinguishing one resource from all + other resources, regardless of how that purpose is accomplished + (e.g., by name, address, or context). These terms should not be + mistaken as an assumption that an identifier defines or embodies + the identity of what is referenced, though that may be the case + for some identifiers. Nor should it be assumed that a system + using URIs will access the resource identified: in many cases, + URIs are used to denote resources without any intention that they + be accessed. Likewise, the "one" resource identified might not be + singular in nature (e.g., a resource might be a named set or a + mapping that varies over time). + + A URI is an identifier consisting of a sequence of characters + matching the syntax rule named in Section 3. It enables + uniform identification of resources via a separately defined + extensible set of naming schemes (Section 3.1). How that + identification is accomplished, assigned, or enabled is delegated to + each scheme specification. + + This specification does not place any limits on the nature of a + resource, the reasons why an application might seek to refer to a + resource, or the kinds of systems that might use URIs for the sake of + identifying resources. This specification does not require that a + URI persists in identifying the same resource over time, though that + is a common goal of all URI schemes. Nevertheless, nothing in this + + + + + +Berners-Lee, et al. Standards Track [Page 5] + +RFC 3986 URI Generic Syntax January 2005 + + + specification prevents an application from limiting itself to + particular types of resources, or to a subset of URIs that maintains + characteristics desired by that application. + + URIs have a global scope and are interpreted consistently regardless + of context, though the result of that interpretation may be in + relation to the end-user's context. For example, "http://localhost/" + has the same interpretation for every user of that reference, even + though the network interface corresponding to "localhost" may be + different for each end-user: interpretation is independent of access. + However, an action made on the basis of that reference will take + place in relation to the end-user's context, which implies that an + action intended to refer to a globally unique thing must use a URI + that distinguishes that resource from all other things. URIs that + identify in relation to the end-user's local context should only be + used when the context itself is a defining aspect of the resource, + such as when an on-line help manual refers to a file on the end- + user's file system (e.g., "file:///etc/hosts"). + +1.1.1. Generic Syntax + + Each URI begins with a scheme name, as defined in Section 3.1, that + refers to a specification for assigning identifiers within that + scheme. As such, the URI syntax is a federated and extensible naming + system wherein each scheme's specification may further restrict the + syntax and semantics of identifiers using that scheme. + + This specification defines those elements of the URI syntax that are + required of all URI schemes or are common to many URI schemes. It + thus defines the syntax and semantics needed to implement a scheme- + independent parsing mechanism for URI references, by which the + scheme-dependent handling of a URI can be postponed until the + scheme-dependent semantics are needed. Likewise, protocols and data + formats that make use of URI references can refer to this + specification as a definition for the range of syntax allowed for all + URIs, including those schemes that have yet to be defined. This + decouples the evolution of identification schemes from the evolution + of protocols, data formats, and implementations that make use of + URIs. + + A parser of the generic URI syntax can parse any URI reference into + its major components. Once the scheme is determined, further + scheme-specific parsing can be performed on the components. In other + words, the URI generic syntax is a superset of the syntax of all URI + schemes. + + + + + + +Berners-Lee, et al. Standards Track [Page 6] + +RFC 3986 URI Generic Syntax January 2005 + + +1.1.2. Examples + + The following example URIs illustrate several URI schemes and + variations in their common syntax components: + + ftp://ftp.is.co.za/rfc/rfc1808.txt + + http://www.ietf.org/rfc/rfc2396.txt + + ldap://[2001:db8::7]/c=GB?objectClass?one + + mailto:John.Doe@example.com + + news:comp.infosystems.www.servers.unix + + tel:+1-816-555-1212 + + telnet:// + + urn:oasis:names:specification:docbook:dtd:xml:4.1.2 + + +1.1.3. URI, URL, and URN + + A URI can be further classified as a locator, a name, or both. The + term "Uniform Resource Locator" (URL) refers to the subset of URIs + that, in addition to identifying a resource, provide a means of + locating the resource by describing its primary access mechanism + (e.g., its network "location"). The term "Uniform Resource Name" + (URN) has been used historically to refer to both URIs under the + "urn" scheme [RFC2141], which are required to remain globally unique + and persistent even when the resource ceases to exist or becomes + unavailable, and to any other URI with the properties of a name. + + An individual scheme does not have to be classified as being just one + of "name" or "locator". Instances of URIs from any given scheme may + have the characteristics of names or locators or both, often + depending on the persistence and care in the assignment of + identifiers by the naming authority, rather than on any quality of + the scheme. Future specifications and related documentation should + use the general term "URI" rather than the more restrictive terms + "URL" and "URN" [RFC3305]. + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 7] + +RFC 3986 URI Generic Syntax January 2005 + + +1.2. Design Considerations + +1.2.1. Transcription + + The URI syntax has been designed with global transcription as one of + its main considerations. A URI is a sequence of characters from a + very limited set: the letters of the basic Latin alphabet, digits, + and a few special characters. A URI may be represented in a variety + of ways; e.g., ink on paper, pixels on a screen, or a sequence of + character encoding octets. The interpretation of a URI depends only + on the characters used and not on how those characters are + represented in a network protocol. + + The goal of transcription can be described by a simple scenario. + Imagine two colleagues, Sam and Kim, sitting in a pub at an + international conference and exchanging research ideas. Sam asks Kim + for a location to get more information, so Kim writes the URI for the + research site on a napkin. Upon returning home, Sam takes out the + napkin and types the URI into a computer, which then retrieves the + information to which Kim referred. + + There are several design considerations revealed by the scenario: + + o A URI is a sequence of characters that is not always represented + as a sequence of octets. + + o A URI might be transcribed from a non-network source and thus + should consist of characters that are most likely able to be + entered into a computer, within the constraints imposed by + keyboards (and related input devices) across languages and + locales. + + o A URI often has to be remembered by people, and it is easier for + people to remember a URI when it consists of meaningful or + familiar components. + + These design considerations are not always in alignment. For + example, it is often the case that the most meaningful name for a URI + component would require characters that cannot be typed into some + systems. The ability to transcribe a resource identifier from one + medium to another has been considered more important than having a + URI consist of the most meaningful of components. + + In local or regional contexts and with improving technology, users + might benefit from being able to use a wider range of characters; + such use is not defined by this specification. Percent-encoded + octets (Section 2.1) may be used within a URI to represent characters + outside the range of the US-ASCII coded character set if this + + + +Berners-Lee, et al. Standards Track [Page 8] + +RFC 3986 URI Generic Syntax January 2005 + + + representation is allowed by the scheme or by the protocol element in + which the URI is referenced. Such a definition should specify the + character encoding used to map those characters to octets prior to + being percent-encoded for the URI. + +1.2.2. Separating Identification from Interaction + + A common misunderstanding of URIs is that they are only used to refer + to accessible resources. The URI itself only provides + identification; access to the resource is neither guaranteed nor + implied by the presence of a URI. Instead, any operation associated + with a URI reference is defined by the protocol element, data format + attribute, or natural language text in which it appears. + + Given a URI, a system may attempt to perform a variety of operations + on the resource, as might be characterized by words such as "access", + "update", "replace", or "find attributes". Such operations are + defined by the protocols that make use of URIs, not by this + specification. However, we do use a few general terms for describing + common operations on URIs. URI "resolution" is the process of + determining an access mechanism and the appropriate parameters + necessary to dereference a URI; this resolution may require several + iterations. To use that access mechanism to perform an action on the + URI's resource is to "dereference" the URI. + + When URIs are used within information retrieval systems to identify + sources of information, the most common form of URI dereference is + "retrieval": making use of a URI in order to retrieve a + representation of its associated resource. A "representation" is a + sequence of octets, along with representation metadata describing + those octets, that constitutes a record of the state of the resource + at the time when the representation is generated. Retrieval is + achieved by a process that might include using the URI as a cache key + to check for a locally cached representation, resolution of the URI + to determine an appropriate access mechanism (if any), and + dereference of the URI for the sake of applying a retrieval + operation. Depending on the protocols used to perform the retrieval, + additional information might be supplied about the resource (resource + metadata) and its relation to other resources. + + URI references in information retrieval systems are designed to be + late-binding: the result of an access is generally determined when it + is accessed and may vary over time or due to other aspects of the + interaction. These references are created in order to be used in the + future: what is being identified is not some specific result that was + obtained in the past, but rather some characteristic that is expected + to be true for future results. In such cases, the resource referred + to by the URI is actually a sameness of characteristics as observed + + + +Berners-Lee, et al. Standards Track [Page 9] + +RFC 3986 URI Generic Syntax January 2005 + + + over time, perhaps elucidated by additional comments or assertions + made by the resource provider. + + Although many URI schemes are named after protocols, this does not + imply that use of these URIs will result in access to the resource + via the named protocol. URIs are often used simply for the sake of + identification. Even when a URI is used to retrieve a representation + of a resource, that access might be through gateways, proxies, + caches, and name resolution services that are independent of the + protocol associated with the scheme name. The resolution of some + URIs may require the use of more than one protocol (e.g., both DNS + and HTTP are typically used to access an "http" URI's origin server + when a representation isn't found in a local cache). + +1.2.3. Hierarchical Identifiers + + The URI syntax is organized hierarchically, with components listed in + order of decreasing significance from left to right. For some URI + schemes, the visible hierarchy is limited to the scheme itself: + everything after the scheme component delimiter (":") is considered + opaque to URI processing. Other URI schemes make the hierarchy + explicit and visible to generic parsing algorithms. + + The generic syntax uses the slash ("/"), question mark ("?"), and + number sign ("#") characters to delimit components that are + significant to the generic parser's hierarchical interpretation of an + identifier. In addition to aiding the readability of such + identifiers through the consistent use of familiar syntax, this + uniform representation of hierarchy across naming schemes allows + scheme-independent references to be made relative to that hierarchy. + + It is often the case that a group or "tree" of documents has been + constructed to serve a common purpose, wherein the vast majority of + URI references in these documents point to resources within the tree + rather than outside it. Similarly, documents located at a particular + site are much more likely to refer to other resources at that site + than to resources at remote sites. Relative referencing of URIs + allows document trees to be partially independent of their location + and access scheme. For instance, it is possible for a single set of + hypertext documents to be simultaneously accessible and traversable + via each of the "file", "http", and "ftp" schemes if the documents + refer to each other with relative references. Furthermore, such + document trees can be moved, as a whole, without changing any of the + relative references. + + A relative reference (Section 4.2) refers to a resource by describing + the difference within a hierarchical name space between the reference + context and the target URI. The reference resolution algorithm, + + + +Berners-Lee, et al. Standards Track [Page 10] + +RFC 3986 URI Generic Syntax January 2005 + + + presented in Section 5, defines how such a reference is transformed + to the target URI. As relative references can only be used within + the context of a hierarchical URI, designers of new URI schemes + should use a syntax consistent with the generic syntax's hierarchical + components unless there are compelling reasons to forbid relative + referencing within that scheme. + + NOTE: Previous specifications used the terms "partial URI" and + "relative URI" to denote a relative reference to a URI. As some + readers misunderstood those terms to mean that relative URIs are a + subset of URIs rather than a method of referencing URIs, this + specification simply refers to them as relative references. + + All URI references are parsed by generic syntax parsers when used. + However, because hierarchical processing has no effect on an absolute + URI used in a reference unless it contains one or more dot-segments + (complete path segments of "." or "..", as described in Section 3.3), + URI scheme specifications can define opaque identifiers by + disallowing use of slash characters, question mark characters, and + the URIs "scheme:." and "scheme:..". + +1.3. Syntax Notation + + This specification uses the Augmented Backus-Naur Form (ABNF) + notation of [RFC2234], including the following core ABNF syntax rules + defined by that specification: ALPHA (letters), CR (carriage return), + DIGIT (decimal digits), DQUOTE (double quote), HEXDIG (hexadecimal + digits), LF (line feed), and SP (space). The complete URI syntax is + collected in Appendix A. + +2. Characters + + The URI syntax provides a method of encoding data, presumably for the + sake of identifying a resource, as a sequence of characters. The URI + characters are, in turn, frequently encoded as octets for transport + or presentation. This specification does not mandate any particular + character encoding for mapping between URI characters and the octets + used to store or transmit those characters. When a URI appears in a + protocol element, the character encoding is defined by that protocol; + without such a definition, a URI is assumed to be in the same + character encoding as the surrounding text. + + The ABNF notation defines its terminal values to be non-negative + integers (codepoints) based on the US-ASCII coded character set + [ASCII]. Because a URI is a sequence of characters, we must invert + that relation in order to understand the URI syntax. Therefore, the + + + + + +Berners-Lee, et al. Standards Track [Page 11] + +RFC 3986 URI Generic Syntax January 2005 + + + integer values used by the ABNF must be mapped back to their + corresponding characters via US-ASCII in order to complete the syntax + rules. + + A URI is composed from a limited set of characters consisting of + digits, letters, and a few graphic symbols. A reserved subset of + those characters may be used to delimit syntax components within a + URI while the remaining characters, including both the unreserved set + and those reserved characters not acting as delimiters, define each + component's identifying data. + +2.1. Percent-Encoding + + A percent-encoding mechanism is used to represent a data octet in a + component when that octet's corresponding character is outside the + allowed set or is being used as a delimiter of, or within, the + component. A percent-encoded octet is encoded as a character + triplet, consisting of the percent character "%" followed by the two + hexadecimal digits representing that octet's numeric value. For + example, "%20" is the percent-encoding for the binary octet + "00100000" (ABNF: %x20), which in US-ASCII corresponds to the space + character (SP). Section 2.4 describes when percent-encoding and + decoding is applied. + + pct-encoded = "%" HEXDIG HEXDIG + + The uppercase hexadecimal digits 'A' through 'F' are equivalent to + the lowercase digits 'a' through 'f', respectively. If two URIs + differ only in the case of hexadecimal digits used in percent-encoded + octets, they are equivalent. For consistency, URI producers and + normalizers should use uppercase hexadecimal digits for all percent- + encodings. + +2.2. Reserved Characters + + URIs include components and subcomponents that are delimited by + characters in the "reserved" set. These characters are called + "reserved" because they may (or may not) be defined as delimiters by + the generic syntax, by each scheme-specific syntax, or by the + implementation-specific syntax of a URI's dereferencing algorithm. + If data for a URI component would conflict with a reserved + character's purpose as a delimiter, then the conflicting data must be + percent-encoded before the URI is formed. + + + + + + + + +Berners-Lee, et al. Standards Track [Page 12] + +RFC 3986 URI Generic Syntax January 2005 + + + reserved = gen-delims / sub-delims + + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + The purpose of reserved characters is to provide a set of delimiting + characters that are distinguishable from other data within a URI. + URIs that differ in the replacement of a reserved character with its + corresponding percent-encoded octet are not equivalent. Percent- + encoding a reserved character, or decoding a percent-encoded octet + that corresponds to a reserved character, will change how the URI is + interpreted by most applications. Thus, characters in the reserved + set are protected from normalization and are therefore safe to be + used by scheme-specific and producer-specific algorithms for + delimiting data subcomponents within a URI. + + A subset of the reserved characters (gen-delims) is used as + delimiters of the generic URI components described in Section 3. A + component's ABNF syntax rule will not use the reserved or gen-delims + rule names directly; instead, each syntax rule lists the characters + allowed within that component (i.e., not delimiting it), and any of + those characters that are also in the reserved set are "reserved" for + use as subcomponent delimiters within the component. Only the most + common subcomponents are defined by this specification; other + subcomponents may be defined by a URI scheme's specification, or by + the implementation-specific syntax of a URI's dereferencing + algorithm, provided that such subcomponents are delimited by + characters in the reserved set allowed within that component. + + URI producing applications should percent-encode data octets that + correspond to characters in the reserved set unless these characters + are specifically allowed by the URI scheme to represent data in that + component. If a reserved character is found in a URI component and + no delimiting role is known for that character, then it must be + interpreted as representing the data octet corresponding to that + character's encoding in US-ASCII. + +2.3. Unreserved Characters + + Characters that are allowed in a URI but do not have a reserved + purpose are called unreserved. These include uppercase and lowercase + letters, decimal digits, hyphen, period, underscore, and tilde. + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + + + + + +Berners-Lee, et al. Standards Track [Page 13] + +RFC 3986 URI Generic Syntax January 2005 + + + URIs that differ in the replacement of an unreserved character with + its corresponding percent-encoded US-ASCII octet are equivalent: they + identify the same resource. However, URI comparison implementations + do not always perform normalization prior to comparison (see Section + 6). For consistency, percent-encoded octets in the ranges of ALPHA + (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), + underscore (%5F), or tilde (%7E) should not be created by URI + producers and, when found in a URI, should be decoded to their + corresponding unreserved characters by URI normalizers. + +2.4. When to Encode or Decode + + Under normal circumstances, the only time when octets within a URI + are percent-encoded is during the process of producing the URI from + its component parts. This is when an implementation determines which + of the reserved characters are to be used as subcomponent delimiters + and which can be safely used as data. Once produced, a URI is always + in its percent-encoded form. + + When a URI is dereferenced, the components and subcomponents + significant to the scheme-specific dereferencing process (if any) + must be parsed and separated before the percent-encoded octets within + those components can be safely decoded, as otherwise the data may be + mistaken for component delimiters. The only exception is for + percent-encoded octets corresponding to characters in the unreserved + set, which can be decoded at any time. For example, the octet + corresponding to the tilde ("~") character is often encoded as "%7E" + by older URI processing implementations; the "%7E" can be replaced by + "~" without changing its interpretation. + + Because the percent ("%") character serves as the indicator for + percent-encoded octets, it must be percent-encoded as "%25" for that + octet to be used as data within a URI. Implementations must not + percent-encode or decode the same string more than once, as decoding + an already decoded string might lead to misinterpreting a percent + data octet as the beginning of a percent-encoding, or vice versa in + the case of percent-encoding an already percent-encoded string. + +2.5. Identifying Data + + URI characters provide identifying data for each of the URI + components, serving as an external interface for identification + between systems. Although the presence and nature of the URI + production interface is hidden from clients that use its URIs (and is + thus beyond the scope of the interoperability requirements defined by + this specification), it is a frequent source of confusion and errors + in the interpretation of URI character issues. Implementers have to + be aware that there are multiple character encodings involved in the + + + +Berners-Lee, et al. Standards Track [Page 14] + +RFC 3986 URI Generic Syntax January 2005 + + + production and transmission of URIs: local name and data encoding, + public interface encoding, URI character encoding, data format + encoding, and protocol encoding. + + Local names, such as file system names, are stored with a local + character encoding. URI producing applications (e.g., origin + servers) will typically use the local encoding as the basis for + producing meaningful names. The URI producer will transform the + local encoding to one that is suitable for a public interface and + then transform the public interface encoding into the restricted set + of URI characters (reserved, unreserved, and percent-encodings). + Those characters are, in turn, encoded as octets to be used as a + reference within a data format (e.g., a document charset), and such + data formats are often subsequently encoded for transmission over + Internet protocols. + + For most systems, an unreserved character appearing within a URI + component is interpreted as representing the data octet corresponding + to that character's encoding in US-ASCII. Consumers of URIs assume + that the letter "X" corresponds to the octet "01011000", and even + when that assumption is incorrect, there is no harm in making it. A + system that internally provides identifiers in the form of a + different character encoding, such as EBCDIC, will generally perform + character translation of textual identifiers to UTF-8 [STD63] (or + some other superset of the US-ASCII character encoding) at an + internal interface, thereby providing more meaningful identifiers + than those resulting from simply percent-encoding the original + octets. + + For example, consider an information service that provides data, + stored locally using an EBCDIC-based file system, to clients on the + Internet through an HTTP server. When an author creates a file with + the name "Laguna Beach" on that file system, the "http" URI + corresponding to that resource is expected to contain the meaningful + string "Laguna%20Beach". If, however, that server produces URIs by + using an overly simplistic raw octet mapping, then the result would + be a URI containing "%D3%81%87%A4%95%81@%C2%85%81%83%88". An + internal transcoding interface fixes this problem by transcoding the + local name to a superset of US-ASCII prior to producing the URI. + Naturally, proper interpretation of an incoming URI on such an + interface requires that percent-encoded octets be decoded (e.g., + "%20" to SP) before the reverse transcoding is applied to obtain the + local name. + + In some cases, the internal interface between a URI component and the + identifying data that it has been crafted to represent is much less + direct than a character encoding translation. For example, portions + of a URI might reflect a query on non-ASCII data, or numeric + + + +Berners-Lee, et al. Standards Track [Page 15] + +RFC 3986 URI Generic Syntax January 2005 + + + coordinates on a map. Likewise, a URI scheme may define components + with additional encoding requirements that are applied prior to + forming the component and producing the URI. + + When a new URI scheme defines a component that represents textual + data consisting of characters from the Universal Character Set [UCS], + the data should first be encoded as octets according to the UTF-8 + character encoding [STD63]; then only those octets that do not + correspond to characters in the unreserved set should be percent- + encoded. For example, the character A would be represented as "A", + the character LATIN CAPITAL LETTER A WITH GRAVE would be represented + as "%C3%80", and the character KATAKANA LETTER A would be represented + as "%E3%82%A2". + +3. Syntax Components + + The generic URI syntax consists of a hierarchical sequence of + components referred to as the scheme, authority, path, query, and + fragment. + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + The scheme and path components are required, though the path may be + empty (no characters). When authority is present, the path must + either be empty or begin with a slash ("/") character. When + authority is not present, the path cannot begin with two slash + characters ("//"). These restrictions result in five different ABNF + rules for a path (Section 3.3), only one of which will match any + given URI reference. + + The following are two example URIs and their component parts: + + foo://example.com:8042/over/there?name=ferret#nose + \_/ \______________/\_________/ \_________/ \__/ + | | | | | + scheme authority path query fragment + | _____________________|__ + / \ / \ + urn:example:animal:ferret:nose + + + + + + + +Berners-Lee, et al. Standards Track [Page 16] + +RFC 3986 URI Generic Syntax January 2005 + + +3.1. Scheme + + Each URI begins with a scheme name that refers to a specification for + assigning identifiers within that scheme. As such, the URI syntax is + a federated and extensible naming system wherein each scheme's + specification may further restrict the syntax and semantics of + identifiers using that scheme. + + Scheme names consist of a sequence of characters beginning with a + letter and followed by any combination of letters, digits, plus + ("+"), period ("."), or hyphen ("-"). Although schemes are case- + insensitive, the canonical form is lowercase and documents that + specify schemes must do so with lowercase letters. An implementation + should accept uppercase letters as equivalent to lowercase in scheme + names (e.g., allow "HTTP" as well as "http") for the sake of + robustness but should only produce lowercase scheme names for + consistency. + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + Individual schemes are not specified by this document. The process + for registration of new URI schemes is defined separately by [BCP35]. + The scheme registry maintains the mapping between scheme names and + their specifications. Advice for designers of new URI schemes can be + found in [RFC2718]. URI scheme specifications must define their own + syntax so that all strings matching their scheme-specific syntax will + also match the grammar, as described in Section 4.3. + + When presented with a URI that violates one or more scheme-specific + restrictions, the scheme-specific resolution process should flag the + reference as an error rather than ignore the unused parts; doing so + reduces the number of equivalent URIs and helps detect abuses of the + generic syntax, which might indicate that the URI has been + constructed to mislead the user (Section 7.6). + +3.2. Authority + + Many URI schemes include a hierarchical element for a naming + authority so that governance of the name space defined by the + remainder of the URI is delegated to that authority (which may, in + turn, delegate it further). The generic syntax provides a common + means for distinguishing an authority based on a registered name or + server address, along with optional port and user information. + + The authority component is preceded by a double slash ("//") and is + terminated by the next slash ("/"), question mark ("?"), or number + sign ("#") character, or by the end of the URI. + + + + +Berners-Lee, et al. Standards Track [Page 17] + +RFC 3986 URI Generic Syntax January 2005 + + + authority = [ userinfo "@" ] host [ ":" port ] + + URI producers and normalizers should omit the ":" delimiter that + separates host from port if the port component is empty. Some + schemes do not allow the userinfo and/or port subcomponents. + + If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character. Non- + validating parsers (those that merely separate a URI reference into + its major components) will often ignore the subcomponent structure of + authority, treating it as an opaque string from the double-slash to + the first terminating delimiter, until such time as the URI is + dereferenced. + +3.2.1. User Information + + The userinfo subcomponent may consist of a user name and, optionally, + scheme-specific information about how to gain authorization to access + the resource. The user information, if present, is followed by a + commercial at-sign ("@") that delimits it from the host. + + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + Use of the format "user:password" in the userinfo field is + deprecated. Applications should not render as clear text any data + after the first colon (":") character found within a userinfo + subcomponent unless the data after the colon is the empty string + (indicating no password). Applications may choose to ignore or + reject such data when it is received as part of a reference and + should reject the storage of such data in unencrypted form. The + passing of authentication information in clear text has proven to be + a security risk in almost every case where it has been used. + + Applications that render a URI for the sake of user feedback, such as + in graphical hypertext browsing, should render userinfo in a way that + is distinguished from the rest of a URI, when feasible. Such + rendering will assist the user in cases where the userinfo has been + misleadingly crafted to look like a trusted domain name + (Section 7.6). + +3.2.2. Host + + The host subcomponent of authority is identified by an IP literal + encapsulated within square brackets, an IPv4 address in dotted- + decimal form, or a registered name. The host subcomponent is case- + insensitive. The presence of a host subcomponent within a URI does + not imply that the scheme requires access to the given host on the + Internet. In many cases, the host syntax is used only for the sake + + + +Berners-Lee, et al. Standards Track [Page 18] + +RFC 3986 URI Generic Syntax January 2005 + + + of reusing the existing registration process created and deployed for + DNS, thus obtaining a globally unique name without the cost of + deploying another registry. However, such use comes with its own + costs: domain name ownership may change over time for reasons not + anticipated by the URI producer. In other cases, the data within the + host component identifies a registered name that has nothing to do + with an Internet host. We use the name "host" for the ABNF rule + because that is its most common purpose, not its only purpose. + + host = IP-literal / IPv4address / reg-name + + The syntax rule for host is ambiguous because it does not completely + distinguish between an IPv4address and a reg-name. In order to + disambiguate the syntax, we apply the "first-match-wins" algorithm: + If host matches the rule for IPv4address, then it should be + considered an IPv4 address literal and not a reg-name. Although host + is case-insensitive, producers and normalizers should use lowercase + for registered names and hexadecimal addresses for the sake of + uniformity, while only using uppercase letters for percent-encodings. + + A host identified by an Internet Protocol literal address, version 6 + [RFC3513] or later, is distinguished by enclosing the IP literal + within square brackets ("[" and "]"). This is the only place where + square bracket characters are allowed in the URI syntax. In + anticipation of future, as-yet-undefined IP literal address formats, + an implementation may use an optional version flag to indicate such a + format explicitly rather than rely on heuristic determination. + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + The version flag does not indicate the IP version; rather, it + indicates future versions of the literal format. As such, + implementations must not provide the version flag for the existing + IPv4 and IPv6 literal address forms described below. If a URI + containing an IP-literal that starts with "v" (case-insensitive), + indicating that the version flag is present, is dereferenced by an + application that does not know the meaning of that version flag, then + the application should return an appropriate error for "address + mechanism not supported". + + A host identified by an IPv6 literal address is represented inside + the square brackets without a preceding version flag. The ABNF + provided here is a translation of the text definition of an IPv6 + literal address provided in [RFC3513]. This syntax does not support + IPv6 scoped addressing zone identifiers. + + + + +Berners-Lee, et al. Standards Track [Page 19] + +RFC 3986 URI Generic Syntax January 2005 + + + A 128-bit IPv6 address is divided into eight 16-bit pieces. Each + piece is represented numerically in case-insensitive hexadecimal, + using one to four hexadecimal digits (leading zeroes are permitted). + The eight encoded pieces are given most-significant first, separated + by colon characters. Optionally, the least-significant two pieces + may instead be represented in IPv4 address textual format. A + sequence of one or more consecutive zero-valued 16-bit pieces within + the address may be elided, omitting all their digits and leaving + exactly two consecutive colons in their place to mark the elision. + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + ls32 = ( h16 ":" h16 ) / IPv4address + ; least-significant 32 bits of address + + h16 = 1*4HEXDIG + ; 16 bits of address represented in hexadecimal + + A host identified by an IPv4 literal address is represented in + dotted-decimal notation (a sequence of four decimal numbers in the + range 0 to 255, separated by "."), as described in [RFC1123] by + reference to [RFC0952]. Note that other forms of dotted notation may + be interpreted on some platforms, as described in Section 7.4, but + only the dotted-decimal form of four octets is allowed by this + grammar. + + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + A host identified by a registered name is a sequence of characters + usually intended for lookup within a locally defined host or service + name registry, though the URI's scheme-specific semantics may require + that a specific registry (or fixed name table) be used instead. The + most common name registry mechanism is the Domain Name System (DNS). + A registered name intended for lookup in the DNS uses the syntax + + + +Berners-Lee, et al. Standards Track [Page 20] + +RFC 3986 URI Generic Syntax January 2005 + + + defined in Section 3.5 of [RFC1034] and Section 2.1 of [RFC1123]. + Such a name consists of a sequence of domain labels separated by ".", + each domain label starting and ending with an alphanumeric character + and possibly also containing "-" characters. The rightmost domain + label of a fully qualified domain name in DNS may be followed by a + single "." and should be if it is necessary to distinguish between + the complete domain name and some local domain. + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + If the URI scheme defines a default for host, then that default + applies when the host subcomponent is undefined or when the + registered name is empty (zero length). For example, the "file" URI + scheme is defined so that no authority, an empty host, and + "localhost" all mean the end-user's machine, whereas the "http" + scheme considers a missing authority or empty host invalid. + + This specification does not mandate a particular registered name + lookup technology and therefore does not restrict the syntax of reg- + name beyond what is necessary for interoperability. Instead, it + delegates the issue of registered name syntax conformance to the + operating system of each application performing URI resolution, and + that operating system decides what it will allow for the purpose of + host identification. A URI resolution implementation might use DNS, + host tables, yellow pages, NetInfo, WINS, or any other system for + lookup of registered names. However, a globally scoped naming + system, such as DNS fully qualified domain names, is necessary for + URIs intended to have global scope. URI producers should use names + that conform to the DNS syntax, even when use of DNS is not + immediately apparent, and should limit these names to no more than + 255 characters in length. + + The reg-name syntax allows percent-encoded octets in order to + represent non-ASCII registered names in a uniform way that is + independent of the underlying name resolution technology. Non-ASCII + characters must first be encoded according to UTF-8 [STD63], and then + each octet of the corresponding UTF-8 sequence must be percent- + encoded to be represented as URI characters. URI producing + applications must not use percent-encoding in host unless it is used + to represent a UTF-8 character sequence. When a non-ASCII registered + name represents an internationalized domain name intended for + resolution via the DNS, the name must be transformed to the IDNA + encoding [RFC3490] prior to name lookup. URI producers should + provide these registered names in the IDNA encoding, rather than a + percent-encoding, if they wish to maximize interoperability with + legacy URI resolvers. + + + + + +Berners-Lee, et al. Standards Track [Page 21] + +RFC 3986 URI Generic Syntax January 2005 + + +3.2.3. Port + + The port subcomponent of authority is designated by an optional port + number in decimal following the host and delimited from it by a + single colon (":") character. + + port = *DIGIT + + A scheme may define a default port. For example, the "http" scheme + defines a default port of "80", corresponding to its reserved TCP + port number. The type of port designated by the port number (e.g., + TCP, UDP, SCTP) is defined by the URI scheme. URI producers and + normalizers should omit the port component and its ":" delimiter if + port is empty or if its value would be the same as that of the + scheme's default. + +3.3. Path + + The path component contains data, usually organized in hierarchical + form, that, along with data in the non-hierarchical query component + (Section 3.4), serves to identify a resource within the scope of the + URI's scheme and naming authority (if any). The path is terminated + by the first question mark ("?") or number sign ("#") character, or + by the end of the URI. + + If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character. If a URI + does not contain an authority component, then the path cannot begin + with two slash characters ("//"). In addition, a URI reference + (Section 4.1) may be a relative-path reference, in which case the + first path segment cannot contain a colon (":") character. The ABNF + requires five separate rules to disambiguate these cases, only one of + which will match the path substring within a given URI reference. We + use the generic term "path component" to describe the URI substring + matched by the parser to one of these rules. + + path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0 + + + + +Berners-Lee, et al. Standards Track [Page 22] + +RFC 3986 URI Generic Syntax January 2005 + + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + A path consists of a sequence of path segments separated by a slash + ("/") character. A path is always defined for a URI, though the + defined path may be empty (zero length). Use of the slash character + to indicate hierarchy is only required when a URI will be used as the + context for relative references. For example, the URI + has a path of "fred@example.com", whereas + the URI has an empty path. + + The path segments "." and "..", also known as dot-segments, are + defined for relative reference within the path name hierarchy. They + are intended for use at the beginning of a relative-path reference + (Section 4.2) to indicate relative position within the hierarchical + tree of names. This is similar to their role within some operating + systems' file directory structures to indicate the current directory + and parent directory, respectively. However, unlike in a file + system, these dot-segments are only interpreted within the URI path + hierarchy and are removed as part of the resolution process (Section + 5.2). + + Aside from dot-segments in hierarchical paths, a path segment is + considered opaque by the generic syntax. URI producing applications + often use the reserved characters allowed in a segment to delimit + scheme-specific or dereference-handler-specific subcomponents. For + example, the semicolon (";") and equals ("=") reserved characters are + often used to delimit parameters and parameter values applicable to + that segment. The comma (",") reserved character is often used for + similar purposes. For example, one URI producer might use a segment + such as "name;v=1.1" to indicate a reference to version 1.1 of + "name", whereas another might use a segment such as "name,1.1" to + indicate the same. Parameter types may be defined by scheme-specific + semantics, but in most cases the syntax of a parameter is specific to + the implementation of the URI's dereferencing algorithm. + +3.4. Query + + The query component contains non-hierarchical data that, along with + data in the path component (Section 3.3), serves to identify a + resource within the scope of the URI's scheme and naming authority + (if any). The query component is indicated by the first question + mark ("?") character and terminated by a number sign ("#") character + or by the end of the URI. + + + +Berners-Lee, et al. Standards Track [Page 23] + +RFC 3986 URI Generic Syntax January 2005 + + + query = *( pchar / "/" / "?" ) + + The characters slash ("/") and question mark ("?") may represent data + within the query component. Beware that some older, erroneous + implementations may not handle such data correctly when it is used as + the base URI for relative references (Section 5.1), apparently + because they fail to distinguish query data from path data when + looking for hierarchical separators. However, as query components + are often used to carry identifying information in the form of + "key=value" pairs and one frequently used value is a reference to + another URI, it is sometimes better for usability to avoid percent- + encoding those characters. + +3.5. Fragment + + The fragment identifier component of a URI allows indirect + identification of a secondary resource by reference to a primary + resource and additional identifying information. The identified + secondary resource may be some portion or subset of the primary + resource, some view on representations of the primary resource, or + some other resource defined or described by those representations. A + fragment identifier component is indicated by the presence of a + number sign ("#") character and terminated by the end of the URI. + + fragment = *( pchar / "/" / "?" ) + + The semantics of a fragment identifier are defined by the set of + representations that might result from a retrieval action on the + primary resource. The fragment's format and resolution is therefore + dependent on the media type [RFC2046] of a potentially retrieved + representation, even though such a retrieval is only performed if the + URI is dereferenced. If no such representation exists, then the + semantics of the fragment are considered unknown and are effectively + unconstrained. Fragment identifier semantics are independent of the + URI scheme and thus cannot be redefined by scheme specifications. + + Individual media types may define their own restrictions on or + structures within the fragment identifier syntax for specifying + different types of subsets, views, or external references that are + identifiable as secondary resources by that media type. If the + primary resource has multiple representations, as is often the case + for resources whose representation is selected based on attributes of + the retrieval request (a.k.a., content negotiation), then whatever is + identified by the fragment should be consistent across all of those + representations. Each representation should either define the + fragment so that it corresponds to the same secondary resource, + regardless of how it is represented, or should leave the fragment + undefined (i.e., not found). + + + +Berners-Lee, et al. Standards Track [Page 24] + +RFC 3986 URI Generic Syntax January 2005 + + + As with any URI, use of a fragment identifier component does not + imply that a retrieval action will take place. A URI with a fragment + identifier may be used to refer to the secondary resource without any + implication that the primary resource is accessible or will ever be + accessed. + + Fragment identifiers have a special role in information retrieval + systems as the primary form of client-side indirect referencing, + allowing an author to specifically identify aspects of an existing + resource that are only indirectly provided by the resource owner. As + such, the fragment identifier is not used in the scheme-specific + processing of a URI; instead, the fragment identifier is separated + from the rest of the URI prior to a dereference, and thus the + identifying information within the fragment itself is dereferenced + solely by the user agent, regardless of the URI scheme. Although + this separate handling is often perceived to be a loss of + information, particularly for accurate redirection of references as + resources move over time, it also serves to prevent information + providers from denying reference authors the right to refer to + information within a resource selectively. Indirect referencing also + provides additional flexibility and extensibility to systems that use + URIs, as new media types are easier to define and deploy than new + schemes of identification. + + The characters slash ("/") and question mark ("?") are allowed to + represent data within the fragment identifier. Beware that some + older, erroneous implementations may not handle this data correctly + when it is used as the base URI for relative references (Section + 5.1). + +4. Usage + + When applications make reference to a URI, they do not always use the + full form of reference defined by the "URI" syntax rule. To save + space and take advantage of hierarchical locality, many Internet + protocol elements and media type formats allow an abbreviation of a + URI, whereas others restrict the syntax to a particular form of URI. + We define the most common forms of reference syntax in this + specification because they impact and depend upon the design of the + generic syntax, requiring a uniform parsing algorithm in order to be + interpreted consistently. + +4.1. URI Reference + + URI-reference is used to denote the most common usage of a resource + identifier. + + URI-reference = URI / relative-ref + + + +Berners-Lee, et al. Standards Track [Page 25] + +RFC 3986 URI Generic Syntax January 2005 + + + A URI-reference is either a URI or a relative reference. If the + URI-reference's prefix does not match the syntax of a scheme followed + by its colon separator, then the URI-reference is a relative + reference. + + A URI-reference is typically parsed first into the five URI + components, in order to determine what components are present and + whether the reference is relative. Then, each component is parsed + for its subparts and their validation. The ABNF of URI-reference, + along with the "first-match-wins" disambiguation rule, is sufficient + to define a validating parser for the generic syntax. Readers + familiar with regular expressions should see Appendix B for an + example of a non-validating URI-reference parser that will take any + given string and extract the URI components. + +4.2. Relative Reference + + A relative reference takes advantage of the hierarchical syntax + (Section 1.2.3) to express a URI reference relative to the name space + of another hierarchical URI. + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + The URI referred to by a relative reference, also known as the target + URI, is obtained by applying the reference resolution algorithm of + Section 5. + + A relative reference that begins with two slash characters is termed + a network-path reference; such references are rarely used. A + relative reference that begins with a single slash character is + termed an absolute-path reference. A relative reference that does + not begin with a slash character is termed a relative-path reference. + + A path segment that contains a colon character (e.g., "this:that") + cannot be used as the first segment of a relative-path reference, as + it would be mistaken for a scheme name. Such a segment must be + preceded by a dot-segment (e.g., "./this:that") to make a relative- + path reference. + + + + + + + + +Berners-Lee, et al. Standards Track [Page 26] + +RFC 3986 URI Generic Syntax January 2005 + + +4.3. Absolute URI + + Some protocol elements allow only the absolute form of a URI without + a fragment identifier. For example, defining a base URI for later + use by relative references calls for an absolute-URI syntax rule that + does not allow a fragment. + + absolute-URI = scheme ":" hier-part [ "?" query ] + + URI scheme specifications must define their own syntax so that all + strings matching their scheme-specific syntax will also match the + grammar. Scheme specifications will not define + fragment identifier syntax or usage, regardless of its applicability + to resources identifiable via that scheme, as fragment identification + is orthogonal to scheme definition. However, scheme specifications + are encouraged to include a wide range of examples, including + examples that show use of the scheme's URIs with fragment identifiers + when such usage is appropriate. + +4.4. Same-Document Reference + + When a URI reference refers to a URI that is, aside from its fragment + component (if any), identical to the base URI (Section 5.1), that + reference is called a "same-document" reference. The most frequent + examples of same-document references are relative references that are + empty or include only the number sign ("#") separator followed by a + fragment identifier. + + When a same-document reference is dereferenced for a retrieval + action, the target of that reference is defined to be within the same + entity (representation, document, or message) as the reference; + therefore, a dereference should not result in a new retrieval action. + + Normalization of the base and target URIs prior to their comparison, + as described in Sections 6.2.2 and 6.2.3, is allowed but rarely + performed in practice. Normalization may increase the set of same- + document references, which may be of benefit to some caching + applications. As such, reference authors should not assume that a + slightly different, though equivalent, reference URI will (or will + not) be interpreted as a same-document reference by any given + application. + +4.5. Suffix Reference + + The URI syntax is designed for unambiguous reference to resources and + extensibility via the URI scheme. However, as URI identification and + usage have become commonplace, traditional media (television, radio, + newspapers, billboards, etc.) have increasingly used a suffix of the + + + +Berners-Lee, et al. Standards Track [Page 27] + +RFC 3986 URI Generic Syntax January 2005 + + + URI as a reference, consisting of only the authority and path + portions of the URI, such as + + www.w3.org/Addressing/ + + or simply a DNS registered name on its own. Such references are + primarily intended for human interpretation rather than for machines, + with the assumption that context-based heuristics are sufficient to + complete the URI (e.g., most registered names beginning with "www" + are likely to have a URI prefix of "http://"). Although there is no + standard set of heuristics for disambiguating a URI suffix, many + client implementations allow them to be entered by the user and + heuristically resolved. + + Although this practice of using suffix references is common, it + should be avoided whenever possible and should never be used in + situations where long-term references are expected. The heuristics + noted above will change over time, particularly when a new URI scheme + becomes popular, and are often incorrect when used out of context. + Furthermore, they can lead to security issues along the lines of + those described in [RFC1535]. + + As a URI suffix has the same syntax as a relative-path reference, a + suffix reference cannot be used in contexts where a relative + reference is expected. As a result, suffix references are limited to + places where there is no defined base URI, such as dialog boxes and + off-line advertisements. + +5. Reference Resolution + + This section defines the process of resolving a URI reference within + a context that allows relative references so that the result is a + string matching the syntax rule of Section 3. + +5.1. Establishing a Base URI + + The term "relative" implies that a "base URI" exists against which + the relative reference is applied. Aside from fragment-only + references (Section 4.4), relative references are only usable when a + base URI is known. A base URI must be established by the parser + prior to parsing URI references that might be relative. A base URI + must conform to the syntax rule (Section 4.3). If the + base URI is obtained from a URI reference, then that reference must + be converted to absolute form and stripped of any fragment component + prior to its use as a base URI. + + + + + + +Berners-Lee, et al. Standards Track [Page 28] + +RFC 3986 URI Generic Syntax January 2005 + + + The base URI of a reference can be established in one of four ways, + discussed below in order of precedence. The order of precedence can + be thought of in terms of layers, where the innermost defined base + URI has the highest precedence. This can be visualized graphically + as follows: + + .----------------------------------------------------------. + | .----------------------------------------------------. | + | | .----------------------------------------------. | | + | | | .----------------------------------------. | | | + | | | | .----------------------------------. | | | | + | | | | | | | | | | + | | | | `----------------------------------' | | | | + | | | | (5.1.1) Base URI embedded in content | | | | + | | | `----------------------------------------' | | | + | | | (5.1.2) Base URI of the encapsulating entity | | | + | | | (message, representation, or none) | | | + | | `----------------------------------------------' | | + | | (5.1.3) URI used to retrieve the entity | | + | `----------------------------------------------------' | + | (5.1.4) Default Base URI (application-dependent) | + `----------------------------------------------------------' + +5.1.1. Base URI Embedded in Content + + Within certain media types, a base URI for relative references can be + embedded within the content itself so that it can be readily obtained + by a parser. This can be useful for descriptive documents, such as + tables of contents, which may be transmitted to others through + protocols other than their usual retrieval context (e.g., email or + USENET news). + + It is beyond the scope of this specification to specify how, for each + media type, a base URI can be embedded. The appropriate syntax, when + available, is described by the data format specification associated + with each media type. + +5.1.2. Base URI from the Encapsulating Entity + + If no base URI is embedded, the base URI is defined by the + representation's retrieval context. For a document that is enclosed + within another entity, such as a message or archive, the retrieval + context is that entity. Thus, the default base URI of a + representation is the base URI of the entity in which the + representation is encapsulated. + + + + + + +Berners-Lee, et al. Standards Track [Page 29] + +RFC 3986 URI Generic Syntax January 2005 + + + A mechanism for embedding a base URI within MIME container types + (e.g., the message and multipart types) is defined by MHTML + [RFC2557]. Protocols that do not use the MIME message header syntax, + but that do allow some form of tagged metadata to be included within + messages, may define their own syntax for defining a base URI as part + of a message. + +5.1.3. Base URI from the Retrieval URI + + If no base URI is embedded and the representation is not encapsulated + within some other entity, then, if a URI was used to retrieve the + representation, that URI shall be considered the base URI. Note that + if the retrieval was the result of a redirected request, the last URI + used (i.e., the URI that resulted in the actual retrieval of the + representation) is the base URI. + +5.1.4. Default Base URI + + If none of the conditions described above apply, then the base URI is + defined by the context of the application. As this definition is + necessarily application-dependent, failing to define a base URI by + using one of the other methods may result in the same content being + interpreted differently by different types of applications. + + A sender of a representation containing relative references is + responsible for ensuring that a base URI for those references can be + established. Aside from fragment-only references, relative + references can only be used reliably in situations where the base URI + is well defined. + +5.2. Relative Resolution + + This section describes an algorithm for converting a URI reference + that might be relative to a given base URI into the parsed components + of the reference's target. The components can then be recomposed, as + described in Section 5.3, to form the target URI. This algorithm + provides definitive results that can be used to test the output of + other implementations. Applications may implement relative reference + resolution by using some other algorithm, provided that the results + match what would be given by this one. + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 30] + +RFC 3986 URI Generic Syntax January 2005 + + +5.2.1. Pre-parse the Base URI + + The base URI (Base) is established according to the procedure of + Section 5.1 and parsed into the five main components described in + Section 3. Note that only the scheme component is required to be + present in a base URI; the other components may be empty or + undefined. A component is undefined if its associated delimiter does + not appear in the URI reference; the path component is never + undefined, though it may be empty. + + Normalization of the base URI, as described in Sections 6.2.2 and + 6.2.3, is optional. A URI reference must be transformed to its + target URI before it can be normalized. + +5.2.2. Transform References + + For each URI reference (R), the following pseudocode describes an + algorithm for transforming R into its target URI (T): + + -- The URI reference is parsed into the five URI components + -- + (R.scheme, R.authority, R.path, R.query, R.fragment) = parse(R); + + -- A non-strict parser may ignore a scheme in the reference + -- if it is identical to the base URI's scheme. + -- + if ((not strict) and (R.scheme == Base.scheme)) then + undefine(R.scheme); + endif; + + + + + + + + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 31] + +RFC 3986 URI Generic Syntax January 2005 + + + if defined(R.scheme) then + T.scheme = R.scheme; + T.authority = R.authority; + T.path = remove_dot_segments(R.path); + T.query = R.query; + else + if defined(R.authority) then + T.authority = R.authority; + T.path = remove_dot_segments(R.path); + T.query = R.query; + else + if (R.path == "") then + T.path = Base.path; + if defined(R.query) then + T.query = R.query; + else + T.query = Base.query; + endif; + else + if (R.path starts-with "/") then + T.path = remove_dot_segments(R.path); + else + T.path = merge(Base.path, R.path); + T.path = remove_dot_segments(T.path); + endif; + T.query = R.query; + endif; + T.authority = Base.authority; + endif; + T.scheme = Base.scheme; + endif; + + T.fragment = R.fragment; + +5.2.3. Merge Paths + + The pseudocode above refers to a "merge" routine for merging a + relative-path reference with the path of the base URI. This is + accomplished as follows: + + o If the base URI has a defined authority component and an empty + path, then return a string consisting of "/" concatenated with the + reference's path; otherwise, + + + + + + + + +Berners-Lee, et al. Standards Track [Page 32] + +RFC 3986 URI Generic Syntax January 2005 + + + o return a string consisting of the reference's path component + appended to all but the last segment of the base URI's path (i.e., + excluding any characters after the right-most "/" in the base URI + path, or excluding the entire base URI path if it does not contain + any "/" characters). + +5.2.4. Remove Dot Segments + + The pseudocode also refers to a "remove_dot_segments" routine for + interpreting and removing the special "." and ".." complete path + segments from a referenced path. This is done after the path is + extracted from a reference, whether or not the path was relative, in + order to remove any invalid or extraneous dot-segments prior to + forming the target URI. Although there are many ways to accomplish + this removal process, we describe a simple method using two string + buffers. + + 1. The input buffer is initialized with the now-appended path + components and the output buffer is initialized to the empty + string. + + 2. While the input buffer is not empty, loop as follows: + + A. If the input buffer begins with a prefix of "../" or "./", + then remove that prefix from the input buffer; otherwise, + + B. if the input buffer begins with a prefix of "/./" or "/.", + where "." is a complete path segment, then replace that + prefix with "/" in the input buffer; otherwise, + + C. if the input buffer begins with a prefix of "/../" or "/..", + where ".." is a complete path segment, then replace that + prefix with "/" in the input buffer and remove the last + segment and its preceding "/" (if any) from the output + buffer; otherwise, + + D. if the input buffer consists only of "." or "..", then remove + that from the input buffer; otherwise, + + E. move the first path segment in the input buffer to the end of + the output buffer, including the initial "/" character (if + any) and any subsequent characters up to, but not including, + the next "/" character or the end of the input buffer. + + 3. Finally, the output buffer is returned as the result of + remove_dot_segments. + + + + + +Berners-Lee, et al. Standards Track [Page 33] + +RFC 3986 URI Generic Syntax January 2005 + + + Note that dot-segments are intended for use in URI references to + express an identifier relative to the hierarchy of names in the base + URI. The remove_dot_segments algorithm respects that hierarchy by + removing extra dot-segments rather than treat them as an error or + leaving them to be misinterpreted by dereference implementations. + + The following illustrates how the above steps are applied for two + examples of merged paths, showing the state of the two buffers after + each step. + + STEP OUTPUT BUFFER INPUT BUFFER + + 1 : /a/b/c/./../../g + 2E: /a /b/c/./../../g + 2E: /a/b /c/./../../g + 2E: /a/b/c /./../../g + 2B: /a/b/c /../../g + 2C: /a/b /../g + 2C: /a /g + 2E: /a/g + + STEP OUTPUT BUFFER INPUT BUFFER + + 1 : mid/content=5/../6 + 2E: mid /content=5/../6 + 2E: mid/content=5 /../6 + 2C: mid /6 + 2E: mid/6 + + Some applications may find it more efficient to implement the + remove_dot_segments algorithm by using two segment stacks rather than + strings. + + Note: Beware that some older, erroneous implementations will fail + to separate a reference's query component from its path component + prior to merging the base and reference paths, resulting in an + interoperability failure if the query component contains the + strings "/../" or "/./". + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 34] + +RFC 3986 URI Generic Syntax January 2005 + + +5.3. Component Recomposition + + Parsed URI components can be recomposed to obtain the corresponding + URI reference string. Using pseudocode, this would be: + + result = "" + + if defined(scheme) then + append scheme to result; + append ":" to result; + endif; + + if defined(authority) then + append "//" to result; + append authority to result; + endif; + + append path to result; + + if defined(query) then + append "?" to result; + append query to result; + endif; + + if defined(fragment) then + append "#" to result; + append fragment to result; + endif; + + return result; + + Note that we are careful to preserve the distinction between a + component that is undefined, meaning that its separator was not + present in the reference, and a component that is empty, meaning that + the separator was present and was immediately followed by the next + component separator or the end of the reference. + +5.4. Reference Resolution Examples + + Within a representation with a well defined base URI of + + http://a/b/c/d;p?q + + a relative reference is transformed to its target URI as follows. + + + + + + + +Berners-Lee, et al. Standards Track [Page 35] + +RFC 3986 URI Generic Syntax January 2005 + + +5.4.1. Normal Examples + + "g:h" = "g:h" + "g" = "http://a/b/c/g" + "./g" = "http://a/b/c/g" + "g/" = "http://a/b/c/g/" + "/g" = "http://a/g" + "//g" = "http://g" + "?y" = "http://a/b/c/d;p?y" + "g?y" = "http://a/b/c/g?y" + "#s" = "http://a/b/c/d;p?q#s" + "g#s" = "http://a/b/c/g#s" + "g?y#s" = "http://a/b/c/g?y#s" + ";x" = "http://a/b/c/;x" + "g;x" = "http://a/b/c/g;x" + "g;x?y#s" = "http://a/b/c/g;x?y#s" + "" = "http://a/b/c/d;p?q" + "." = "http://a/b/c/" + "./" = "http://a/b/c/" + ".." = "http://a/b/" + "../" = "http://a/b/" + "../g" = "http://a/b/g" + "../.." = "http://a/" + "../../" = "http://a/" + "../../g" = "http://a/g" + +5.4.2. Abnormal Examples + + Although the following abnormal examples are unlikely to occur in + normal practice, all URI parsers should be capable of resolving them + consistently. Each example uses the same base as that above. + + Parsers must be careful in handling cases where there are more ".." + segments in a relative-path reference than there are hierarchical + levels in the base URI's path. Note that the ".." syntax cannot be + used to change the authority component of a URI. + + "../../../g" = "http://a/g" + "../../../../g" = "http://a/g" + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 36] + +RFC 3986 URI Generic Syntax January 2005 + + + Similarly, parsers must remove the dot-segments "." and ".." when + they are complete components of a path, but not when they are only + part of a segment. + + "/./g" = "http://a/g" + "/../g" = "http://a/g" + "g." = "http://a/b/c/g." + ".g" = "http://a/b/c/.g" + "g.." = "http://a/b/c/g.." + "..g" = "http://a/b/c/..g" + + Less likely are cases where the relative reference uses unnecessary + or nonsensical forms of the "." and ".." complete path segments. + + "./../g" = "http://a/b/g" + "./g/." = "http://a/b/c/g/" + "g/./h" = "http://a/b/c/g/h" + "g/../h" = "http://a/b/c/h" + "g;x=1/./y" = "http://a/b/c/g;x=1/y" + "g;x=1/../y" = "http://a/b/c/y" + + Some applications fail to separate the reference's query and/or + fragment components from the path component before merging it with + the base path and removing dot-segments. This error is rarely + noticed, as typical usage of a fragment never includes the hierarchy + ("/") character and the query component is not normally used within + relative references. + + "g?y/./x" = "http://a/b/c/g?y/./x" + "g?y/../x" = "http://a/b/c/g?y/../x" + "g#s/./x" = "http://a/b/c/g#s/./x" + "g#s/../x" = "http://a/b/c/g#s/../x" + + Some parsers allow the scheme name to be present in a relative + reference if it is the same as the base URI scheme. This is + considered to be a loophole in prior specifications of partial URI + [RFC1630]. Its use should be avoided but is allowed for backward + compatibility. + + "http:g" = "http:g" ; for strict parsers + / "http://a/b/c/g" ; for backward compatibility + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 37] + +RFC 3986 URI Generic Syntax January 2005 + + +6. Normalization and Comparison + + One of the most common operations on URIs is simple comparison: + determining whether two URIs are equivalent without using the URIs to + access their respective resource(s). A comparison is performed every + time a response cache is accessed, a browser checks its history to + color a link, or an XML parser processes tags within a namespace. + Extensive normalization prior to comparison of URIs is often used by + spiders and indexing engines to prune a search space or to reduce + duplication of request actions and response storage. + + URI comparison is performed for some particular purpose. Protocols + or implementations that compare URIs for different purposes will + often be subject to differing design trade-offs in regards to how + much effort should be spent in reducing aliased identifiers. This + section describes various methods that may be used to compare URIs, + the trade-offs between them, and the types of applications that might + use them. + +6.1. Equivalence + + Because URIs exist to identify resources, presumably they should be + considered equivalent when they identify the same resource. However, + this definition of equivalence is not of much practical use, as there + is no way for an implementation to compare two resources unless it + has full knowledge or control of them. For this reason, + determination of equivalence or difference of URIs is based on string + comparison, perhaps augmented by reference to additional rules + provided by URI scheme definitions. We use the terms "different" and + "equivalent" to describe the possible outcomes of such comparisons, + but there are many application-dependent versions of equivalence. + + Even though it is possible to determine that two URIs are equivalent, + URI comparison is not sufficient to determine whether two URIs + identify different resources. For example, an owner of two different + domain names could decide to serve the same resource from both, + resulting in two different URIs. Therefore, comparison methods are + designed to minimize false negatives while strictly avoiding false + positives. + + In testing for equivalence, applications should not directly compare + relative references; the references should be converted to their + respective target URIs before comparison. When URIs are compared to + select (or avoid) a network action, such as retrieval of a + representation, fragment components (if any) should be excluded from + the comparison. + + + + + +Berners-Lee, et al. Standards Track [Page 38] + +RFC 3986 URI Generic Syntax January 2005 + + +6.2. Comparison Ladder + + A variety of methods are used in practice to test URI equivalence. + These methods fall into a range, distinguished by the amount of + processing required and the degree to which the probability of false + negatives is reduced. As noted above, false negatives cannot be + eliminated. In practice, their probability can be reduced, but this + reduction requires more processing and is not cost-effective for all + applications. + + If this range of comparison practices is considered as a ladder, the + following discussion will climb the ladder, starting with practices + that are cheap but have a relatively higher chance of producing false + negatives, and proceeding to those that have higher computational + cost and lower risk of false negatives. + +6.2.1. Simple String Comparison + + If two URIs, when considered as character strings, are identical, + then it is safe to conclude that they are equivalent. This type of + equivalence test has very low computational cost and is in wide use + in a variety of applications, particularly in the domain of parsing. + + Testing strings for equivalence requires some basic precautions. + This procedure is often referred to as "bit-for-bit" or + "byte-for-byte" comparison, which is potentially misleading. Testing + strings for equality is normally based on pair comparison of the + characters that make up the strings, starting from the first and + proceeding until both strings are exhausted and all characters are + found to be equal, until a pair of characters compares unequal, or + until one of the strings is exhausted before the other. + + This character comparison requires that each pair of characters be + put in comparable form. For example, should one URI be stored in a + byte array in EBCDIC encoding and the second in a Java String object + (UTF-16), bit-for-bit comparisons applied naively will produce + errors. It is better to speak of equality on a character-for- + character basis rather than on a byte-for-byte or bit-for-bit basis. + In practical terms, character-by-character comparisons should be done + codepoint-by-codepoint after conversion to a common character + encoding. + + False negatives are caused by the production and use of URI aliases. + Unnecessary aliases can be reduced, regardless of the comparison + method, by consistently providing URI references in an already- + normalized form (i.e., a form identical to what would be produced + after normalization is applied, as described below). + + + + +Berners-Lee, et al. Standards Track [Page 39] + +RFC 3986 URI Generic Syntax January 2005 + + + Protocols and data formats often limit some URI comparisons to simple + string comparison, based on the theory that people and + implementations will, in their own best interest, be consistent in + providing URI references, or at least consistent enough to negate any + efficiency that might be obtained from further normalization. + +6.2.2. Syntax-Based Normalization + + Implementations may use logic based on the definitions provided by + this specification to reduce the probability of false negatives. + This processing is moderately higher in cost than character-for- + character string comparison. For example, an application using this + approach could reasonably consider the following two URIs equivalent: + + example://a/b/c/%7Bfoo%7D + eXAMPLE://a/./b/../b/%63/%7bfoo%7d + + Web user agents, such as browsers, typically apply this type of URI + normalization when determining whether a cached response is + available. Syntax-based normalization includes such techniques as + case normalization, percent-encoding normalization, and removal of + dot-segments. + + Case Normalization + + For all URIs, the hexadecimal digits within a percent-encoding + triplet (e.g., "%3a" versus "%3A") are case-insensitive and therefore + should be normalized to use uppercase letters for the digits A-F. + + When a URI uses components of the generic syntax, the component + syntax equivalence rules always apply; namely, that the scheme and + host are case-insensitive and therefore should be normalized to + lowercase. For example, the URI is + equivalent to . The other generic syntax + components are assumed to be case-sensitive unless specifically + defined otherwise by the scheme (see Section 6.2.3). + + Percent-Encoding Normalization + + The percent-encoding mechanism (Section 2.1) is a frequent source of + variance among otherwise identical URIs. In addition to the case + normalization issue noted above, some URI producers percent-encode + octets that do not require percent-encoding, resulting in URIs that + are equivalent to their non-encoded counterparts. These URIs should + be normalized by decoding any percent-encoded octet that corresponds + to an unreserved character, as described in Section 2.3. + + + + + +Berners-Lee, et al. Standards Track [Page 40] + +RFC 3986 URI Generic Syntax January 2005 + + + Path Segment Normalization + + The complete path segments "." and ".." are intended only for use + within relative references (Section 4.1) and are removed as part of + the reference resolution process (Section 5.2). However, some + deployed implementations incorrectly assume that reference resolution + is not necessary when the reference is already a URI and thus fail to + remove dot-segments when they occur in non-relative paths. URI + normalizers should remove dot-segments by applying the + remove_dot_segments algorithm to the path, as described in + Section 5.2.4. + +6.2.3. Scheme-Based Normalization + + The syntax and semantics of URIs vary from scheme to scheme, as + described by the defining specification for each scheme. + Implementations may use scheme-specific rules, at further processing + cost, to reduce the probability of false negatives. For example, + because the "http" scheme makes use of an authority component, has a + default port of "80", and defines an empty path to be equivalent to + "/", the following four URIs are equivalent: + + http://example.com + http://example.com/ + http://example.com:/ + http://example.com:80/ + + In general, a URI that uses the generic syntax for authority with an + empty path should be normalized to a path of "/". Likewise, an + explicit ":port", for which the port is empty or the default for the + scheme, is equivalent to one where the port and its ":" delimiter are + elided and thus should be removed by scheme-based normalization. For + example, the second URI above is the normal form for the "http" + scheme. + + Another case where normalization varies by scheme is in the handling + of an empty authority component or empty host subcomponent. For many + scheme specifications, an empty authority or host is considered an + error; for others, it is considered equivalent to "localhost" or the + end-user's host. When a scheme defines a default for authority and a + URI reference to that default is desired, the reference should be + normalized to an empty authority for the sake of uniformity, brevity, + and internationalization. If, however, either the userinfo or port + subcomponents are non-empty, then the host should be given explicitly + even if it matches the default. + + Normalization should not remove delimiters when their associated + component is empty unless licensed to do so by the scheme + + + +Berners-Lee, et al. Standards Track [Page 41] + +RFC 3986 URI Generic Syntax January 2005 + + + specification. For example, the URI "http://example.com/?" cannot be + assumed to be equivalent to any of the examples above. Likewise, the + presence or absence of delimiters within a userinfo subcomponent is + usually significant to its interpretation. The fragment component is + not subject to any scheme-based normalization; thus, two URIs that + differ only by the suffix "#" are considered different regardless of + the scheme. + + Some schemes define additional subcomponents that consist of case- + insensitive data, giving an implicit license to normalizers to + convert this data to a common case (e.g., all lowercase). For + example, URI schemes that define a subcomponent of path to contain an + Internet hostname, such as the "mailto" URI scheme, cause that + subcomponent to be case-insensitive and thus subject to case + normalization (e.g., "mailto:Joe@Example.COM" is equivalent to + "mailto:Joe@example.com", even though the generic syntax considers + the path component to be case-sensitive). + + Other scheme-specific normalizations are possible. + +6.2.4. Protocol-Based Normalization + + Substantial effort to reduce the incidence of false negatives is + often cost-effective for web spiders. Therefore, they implement even + more aggressive techniques in URI comparison. For example, if they + observe that a URI such as + + http://example.com/data + + redirects to a URI differing only in the trailing slash + + http://example.com/data/ + + they will likely regard the two as equivalent in the future. This + kind of technique is only appropriate when equivalence is clearly + indicated by both the result of accessing the resources and the + common conventions of their scheme's dereference algorithm (in this + case, use of redirection by HTTP origin servers to avoid problems + with relative references). + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 42] + +RFC 3986 URI Generic Syntax January 2005 + + +7. Security Considerations + + A URI does not in itself pose a security threat. However, as URIs + are often used to provide a compact set of instructions for access to + network resources, care must be taken to properly interpret the data + within a URI, to prevent that data from causing unintended access, + and to avoid including data that should not be revealed in plain + text. + +7.1. Reliability and Consistency + + There is no guarantee that once a URI has been used to retrieve + information, the same information will be retrievable by that URI in + the future. Nor is there any guarantee that the information + retrievable via that URI in the future will be observably similar to + that retrieved in the past. The URI syntax does not constrain how a + given scheme or authority apportions its namespace or maintains it + over time. Such guarantees can only be obtained from the person(s) + controlling that namespace and the resource in question. A specific + URI scheme may define additional semantics, such as name persistence, + if those semantics are required of all naming authorities for that + scheme. + +7.2. Malicious Construction + + It is sometimes possible to construct a URI so that an attempt to + perform a seemingly harmless, idempotent operation, such as the + retrieval of a representation, will in fact cause a possibly damaging + remote operation. The unsafe URI is typically constructed by + specifying a port number other than that reserved for the network + protocol in question. The client unwittingly contacts a site running + a different protocol service, and data within the URI contains + instructions that, when interpreted according to this other protocol, + cause an unexpected operation. A frequent example of such abuse has + been the use of a protocol-based scheme with a port component of + "25", thereby fooling user agent software into sending an unintended + or impersonating message via an SMTP server. + + Applications should prevent dereference of a URI that specifies a TCP + port number within the "well-known port" range (0 - 1023) unless the + protocol being used to dereference that URI is compatible with the + protocol expected on that well-known port. Although IANA maintains a + registry of well-known ports, applications should make such + restrictions user-configurable to avoid preventing the deployment of + new services. + + + + + + +Berners-Lee, et al. Standards Track [Page 43] + +RFC 3986 URI Generic Syntax January 2005 + + + When a URI contains percent-encoded octets that match the delimiters + for a given resolution or dereference protocol (for example, CR and + LF characters for the TELNET protocol), these percent-encodings must + not be decoded before transmission across that protocol. Transfer of + the percent-encoding, which might violate the protocol, is less + harmful than allowing decoded octets to be interpreted as additional + operations or parameters, perhaps triggering an unexpected and + possibly harmful remote operation. + +7.3. Back-End Transcoding + + When a URI is dereferenced, the data within it is often parsed by + both the user agent and one or more servers. In HTTP, for example, a + typical user agent will parse a URI into its five major components, + access the authority's server, and send it the data within the + authority, path, and query components. A typical server will take + that information, parse the path into segments and the query into + key/value pairs, and then invoke implementation-specific handlers to + respond to the request. As a result, a common security concern for + server implementations that handle a URI, either as a whole or split + into separate components, is proper interpretation of the octet data + represented by the characters and percent-encodings within that URI. + + Percent-encoded octets must be decoded at some point during the + dereference process. Applications must split the URI into its + components and subcomponents prior to decoding the octets, as + otherwise the decoded octets might be mistaken for delimiters. + Security checks of the data within a URI should be applied after + decoding the octets. Note, however, that the "%00" percent-encoding + (NUL) may require special handling and should be rejected if the + application is not expecting to receive raw data within a component. + + Special care should be taken when the URI path interpretation process + involves the use of a back-end file system or related system + functions. File systems typically assign an operational meaning to + special characters, such as the "/", "\", ":", "[", and "]" + characters, and to special device names like ".", "..", "...", "aux", + "lpt", etc. In some cases, merely testing for the existence of such + a name will cause the operating system to pause or invoke unrelated + system calls, leading to significant security concerns regarding + denial of service and unintended data transfer. It would be + impossible for this specification to list all such significant + characters and device names. Implementers should research the + reserved names and characters for the types of storage device that + may be attached to their applications and restrict the use of data + obtained from URI components accordingly. + + + + + +Berners-Lee, et al. Standards Track [Page 44] + +RFC 3986 URI Generic Syntax January 2005 + + +7.4. Rare IP Address Formats + + Although the URI syntax for IPv4address only allows the common + dotted-decimal form of IPv4 address literal, many implementations + that process URIs make use of platform-dependent system routines, + such as gethostbyname() and inet_aton(), to translate the string + literal to an actual IP address. Unfortunately, such system routines + often allow and process a much larger set of formats than those + described in Section 3.2.2. + + For example, many implementations allow dotted forms of three + numbers, wherein the last part is interpreted as a 16-bit quantity + and placed in the right-most two bytes of the network address (e.g., + a Class B network). Likewise, a dotted form of two numbers means + that the last part is interpreted as a 24-bit quantity and placed in + the right-most three bytes of the network address (Class A), and a + single number (without dots) is interpreted as a 32-bit quantity and + stored directly in the network address. Adding further to the + confusion, some implementations allow each dotted part to be + interpreted as decimal, octal, or hexadecimal, as specified in the C + language (i.e., a leading 0x or 0X implies hexadecimal; a leading 0 + implies octal; otherwise, the number is interpreted as decimal). + + These additional IP address formats are not allowed in the URI syntax + due to differences between platform implementations. However, they + can become a security concern if an application attempts to filter + access to resources based on the IP address in string literal format. + If this filtering is performed, literals should be converted to + numeric form and filtered based on the numeric value, and not on a + prefix or suffix of the string form. + +7.5. Sensitive Information + + URI producers should not provide a URI that contains a username or + password that is intended to be secret. URIs are frequently + displayed by browsers, stored in clear text bookmarks, and logged by + user agent history and intermediary applications (proxies). A + password appearing within the userinfo component is deprecated and + should be considered an error (or simply ignored) except in those + rare cases where the 'password' parameter is intended to be public. + +7.6. Semantic Attacks + + Because the userinfo subcomponent is rarely used and appears before + the host in the authority component, it can be used to construct a + URI intended to mislead a human user by appearing to identify one + (trusted) naming authority while actually identifying a different + authority hidden behind the noise. For example + + + +Berners-Lee, et al. Standards Track [Page 45] + +RFC 3986 URI Generic Syntax January 2005 + + + ftp://cnn.example.com&story=breaking_news@ + + might lead a human user to assume that the host is 'cnn.example.com', + whereas it is actually ''. Note that a misleading userinfo + subcomponent could be much longer than the example above. + + A misleading URI, such as that above, is an attack on the user's + preconceived notions about the meaning of a URI rather than an attack + on the software itself. User agents may be able to reduce the impact + of such attacks by distinguishing the various components of the URI + when they are rendered, such as by using a different color or tone to + render userinfo if any is present, though there is no panacea. More + information on URI-based semantic attacks can be found in [Siedzik]. + +8. IANA Considerations + + URI scheme names, as defined by in Section 3.1, form a + registered namespace that is managed by IANA according to the + procedures defined in [BCP35]. No IANA actions are required by this + document. + +9. Acknowledgements + + This specification is derived from RFC 2396 [RFC2396], RFC 1808 + [RFC1808], and RFC 1738 [RFC1738]; the acknowledgements in those + documents still apply. It also incorporates the update (with + corrections) for IPv6 literals in the host syntax, as defined by + Robert M. Hinden, Brian E. Carpenter, and Larry Masinter in + [RFC2732]. In addition, contributions by Gisle Aas, Reese Anschultz, + Daniel Barclay, Tim Bray, Mike Brown, Rob Cameron, Jeremy Carroll, + Dan Connolly, Adam M. Costello, John Cowan, Jason Diamond, Martin + Duerst, Stefan Eissing, Clive D.W. Feather, Al Gilman, Tony Hammond, + Elliotte Harold, Pat Hayes, Henry Holtzman, Ian B. Jacobs, Michael + Kay, John C. Klensin, Graham Klyne, Dan Kohn, Bruce Lilly, Andrew + Main, Dave McAlpin, Ira McDonald, Michael Mealling, Ray Merkert, + Stephen Pollei, Julian Reschke, Tomas Rokicki, Miles Sabin, Kai + Schaetzl, Mark Thomson, Ronald Tschalaer, Norm Walsh, Marc Warne, + Stuart Williams, and Henry Zongaro are gratefully acknowledged. + +10. References + +10.1. Normative References + + [ASCII] American National Standards Institute, "Coded Character + Set -- 7-bit American Standard Code for Information + Interchange", ANSI X3.4, 1986. + + + + + +Berners-Lee, et al. Standards Track [Page 46] + +RFC 3986 URI Generic Syntax January 2005 + + + [RFC2234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, November 1997. + + [STD63] Yergeau, F., "UTF-8, a transformation format of + ISO 10646", STD 63, RFC 3629, November 2003. + + [UCS] International Organization for Standardization, + "Information Technology - Universal Multiple-Octet Coded + Character Set (UCS)", ISO/IEC 10646:2003, December 2003. + +10.2. Informative References + + [BCP19] Freed, N. and J. Postel, "IANA Charset Registration + Procedures", BCP 19, RFC 2978, October 2000. + + [BCP35] Petke, R. and I. King, "Registration Procedures for URL + Scheme Names", BCP 35, RFC 2717, November 1999. + + [RFC0952] Harrenstien, K., Stahl, M., and E. Feinler, "DoD Internet + host table specification", RFC 952, October 1985. + + [RFC1034] Mockapetris, P., "Domain names - concepts and facilities", + STD 13, RFC 1034, November 1987. + + [RFC1123] Braden, R., "Requirements for Internet Hosts - Application + and Support", STD 3, RFC 1123, October 1989. + + [RFC1535] Gavron, E., "A Security Problem and Proposed Correction + With Widely Deployed DNS Software", RFC 1535, + October 1993. + + [RFC1630] Berners-Lee, T., "Universal Resource Identifiers in WWW: A + Unifying Syntax for the Expression of Names and Addresses + of Objects on the Network as used in the World-Wide Web", + RFC 1630, June 1994. + + [RFC1736] Kunze, J., "Functional Recommendations for Internet + Resource Locators", RFC 1736, February 1995. + + [RFC1737] Sollins, K. and L. Masinter, "Functional Requirements for + Uniform Resource Names", RFC 1737, December 1994. + + [RFC1738] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform + Resource Locators (URL)", RFC 1738, December 1994. + + [RFC1808] Fielding, R., "Relative Uniform Resource Locators", + RFC 1808, June 1995. + + + + +Berners-Lee, et al. Standards Track [Page 47] + +RFC 3986 URI Generic Syntax January 2005 + + + [RFC2046] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Two: Media Types", RFC 2046, + November 1996. + + [RFC2141] Moats, R., "URN Syntax", RFC 2141, May 1997. + + [RFC2396] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifiers (URI): Generic Syntax", RFC 2396, + August 1998. + + [RFC2518] Goland, Y., Whitehead, E., Faizi, A., Carter, S., and D. + Jensen, "HTTP Extensions for Distributed Authoring -- + WEBDAV", RFC 2518, February 1999. + + [RFC2557] Palme, J., Hopmann, A., and N. Shelness, "MIME + Encapsulation of Aggregate Documents, such as HTML + (MHTML)", RFC 2557, March 1999. + + [RFC2718] Masinter, L., Alvestrand, H., Zigmond, D., and R. Petke, + "Guidelines for new URL Schemes", RFC 2718, November 1999. + + [RFC2732] Hinden, R., Carpenter, B., and L. Masinter, "Format for + Literal IPv6 Addresses in URL's", RFC 2732, December 1999. + + [RFC3305] Mealling, M. and R. Denenberg, "Report from the Joint + W3C/IETF URI Planning Interest Group: Uniform Resource + Identifiers (URIs), URLs, and Uniform Resource Names + (URNs): Clarifications and Recommendations", RFC 3305, + August 2002. + + [RFC3490] Faltstrom, P., Hoffman, P., and A. Costello, + "Internationalizing Domain Names in Applications (IDNA)", + RFC 3490, March 2003. + + [RFC3513] Hinden, R. and S. Deering, "Internet Protocol Version 6 + (IPv6) Addressing Architecture", RFC 3513, April 2003. + + [Siedzik] Siedzik, R., "Semantic Attacks: What's in a URL?", + April 2001, . + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 48] + +RFC 3986 URI Generic Syntax January 2005 + + +Appendix A. Collected ABNF for URI + + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + + hier-part = "//" authority path-abempty + / path-absolute + / path-rootless + / path-empty + + URI-reference = URI / relative-ref + + absolute-URI = scheme ":" hier-part [ "?" query ] + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + authority = [ userinfo "@" ] host [ ":" port ] + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + host = IP-literal / IPv4address / reg-name + port = *DIGIT + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + h16 = 1*4HEXDIG + ls32 = ( h16 ":" h16 ) / IPv4address + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + + + + + + +Berners-Lee, et al. Standards Track [Page 49] + +RFC 3986 URI Generic Syntax January 2005 + + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + path = path-abempty ; begins with "/" or is empty + / path-absolute ; begins with "/" but not "//" + / path-noscheme ; begins with a non-colon segment + / path-rootless ; begins with a segment + / path-empty ; zero characters + + path-abempty = *( "/" segment ) + path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-noscheme = segment-nz-nc *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) + path-empty = 0 + + segment = *pchar + segment-nz = 1*pchar + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + + query = *( pchar / "/" / "?" ) + + fragment = *( pchar / "/" / "?" ) + + pct-encoded = "%" HEXDIG HEXDIG + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + +Appendix B. Parsing a URI Reference with a Regular Expression + + As the "first-match-wins" algorithm is identical to the "greedy" + disambiguation method used by POSIX regular expressions, it is + natural and commonplace to use a regular expression for parsing the + potential five components of a URI reference. + + The following line is the regular expression for breaking-down a + well-formed URI reference into its components. + + + +Berners-Lee, et al. Standards Track [Page 50] + +RFC 3986 URI Generic Syntax January 2005 + + + ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + 12 3 4 5 6 7 8 9 + + The numbers in the second line above are only to assist readability; + they indicate the reference points for each subexpression (i.e., each + paired parenthesis). We refer to the value matched for subexpression + as $. For example, matching the above expression to + + http://www.ics.uci.edu/pub/ietf/uri/#Related + + results in the following subexpression matches: + + $1 = http: + $2 = http + $3 = //www.ics.uci.edu + $4 = www.ics.uci.edu + $5 = /pub/ietf/uri/ + $6 = + $7 = + $8 = #Related + $9 = Related + + where indicates that the component is not present, as is + the case for the query component in the above example. Therefore, we + can determine the value of the five components as + + scheme = $2 + authority = $4 + path = $5 + query = $7 + fragment = $9 + + Going in the opposite direction, we can recreate a URI reference from + its components by using the algorithm of Section 5.3. + +Appendix C. Delimiting a URI in Context + + URIs are often transmitted through formats that do not provide a + clear context for their interpretation. For example, there are many + occasions when a URI is included in plain text; examples include text + sent in email, USENET news, and on printed paper. In such cases, it + is important to be able to delimit the URI from the rest of the text, + and in particular from punctuation marks that might be mistaken for + part of the URI. + + In practice, URIs are delimited in a variety of ways, but usually + within double-quotes "http://example.com/", angle brackets + , or just by using whitespace: + + + +Berners-Lee, et al. Standards Track [Page 51] + +RFC 3986 URI Generic Syntax January 2005 + + + http://example.com/ + + These wrappers do not form part of the URI. + + In some cases, extra whitespace (spaces, line-breaks, tabs, etc.) may + have to be added to break a long URI across lines. The whitespace + should be ignored when the URI is extracted. + + No whitespace should be introduced after a hyphen ("-") character. + Because some typesetters and printers may (erroneously) introduce a + hyphen at the end of line when breaking it, the interpreter of a URI + containing a line break immediately after a hyphen should ignore all + whitespace around the line break and should be aware that the hyphen + may or may not actually be part of the URI. + + Using <> angle brackets around each URI is especially recommended as + a delimiting style for a reference that contains embedded whitespace. + + The prefix "URL:" (with or without a trailing space) was formerly + recommended as a way to help distinguish a URI from other bracketed + designators, though it is not commonly used in practice and is no + longer recommended. + + For robustness, software that accepts user-typed URI should attempt + to recognize and strip both delimiters and embedded whitespace. + + For example, the text + + Yes, Jim, I found it under "http://www.w3.org/Addressing/", + but you can probably pick it up from . Note the warning in . + + contains the URI references + + http://www.w3.org/Addressing/ + ftp://foo.example.com/rfc/ + http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 52] + +RFC 3986 URI Generic Syntax January 2005 + + +Appendix D. Changes from RFC 2396 + +D.1. Additions + + An ABNF rule for URI has been introduced to correspond to one common + usage of the term: an absolute URI with optional fragment. + + IPv6 (and later) literals have been added to the list of possible + identifiers for the host portion of an authority component, as + described by [RFC2732], with the addition of "[" and "]" to the + reserved set and a version flag to anticipate future versions of IP + literals. Square brackets are now specified as reserved within the + authority component and are not allowed outside their use as + delimiters for an IP literal within host. In order to make this + change without changing the technical definition of the path, query, + and fragment components, those rules were redefined to directly + specify the characters allowed. + + As [RFC2732] defers to [RFC3513] for definition of an IPv6 literal + address, which, unfortunately, lacks an ABNF description of + IPv6address, we created a new ABNF rule for IPv6address that matches + the text representations defined by Section 2.2 of [RFC3513]. + Likewise, the definition of IPv4address has been improved in order to + limit each decimal octet to the range 0-255. + + Section 6, on URI normalization and comparison, has been completely + rewritten and extended by using input from Tim Bray and discussion + within the W3C Technical Architecture Group. + +D.2. Modifications + + The ad-hoc BNF syntax of RFC 2396 has been replaced with the ABNF of + [RFC2234]. This change required all rule names that formerly + included underscore characters to be renamed with a dash instead. In + addition, a number of syntax rules have been eliminated or simplified + to make the overall grammar more comprehensible. Specifications that + refer to the obsolete grammar rules may be understood by replacing + those rules according to the following table: + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 53] + +RFC 3986 URI Generic Syntax January 2005 + + + +----------------+--------------------------------------------------+ + | obsolete rule | translation | + +----------------+--------------------------------------------------+ + | absoluteURI | absolute-URI | + | relativeURI | relative-part [ "?" query ] | + | hier_part | ( "//" authority path-abempty / | + | | path-absolute ) [ "?" query ] | + | | | + | opaque_part | path-rootless [ "?" query ] | + | net_path | "//" authority path-abempty | + | abs_path | path-absolute | + | rel_path | path-rootless | + | rel_segment | segment-nz-nc | + | reg_name | reg-name | + | server | authority | + | hostport | host [ ":" port ] | + | hostname | reg-name | + | path_segments | path-abempty | + | param | * | + | | | + | uric | unreserved / pct-encoded / ";" / "?" / ":" | + | | / "@" / "&" / "=" / "+" / "$" / "," / "/" | + | | | + | uric_no_slash | unreserved / pct-encoded / ";" / "?" / ":" | + | | / "@" / "&" / "=" / "+" / "$" / "," | + | | | + | mark | "-" / "_" / "." / "!" / "~" / "*" / "'" | + | | / "(" / ")" | + | | | + | escaped | pct-encoded | + | hex | HEXDIG | + | alphanum | ALPHA / DIGIT | + +----------------+--------------------------------------------------+ + + Use of the above obsolete rules for the definition of scheme-specific + syntax is deprecated. + + Section 2, on characters, has been rewritten to explain what + characters are reserved, when they are reserved, and why they are + reserved, even when they are not used as delimiters by the generic + syntax. The mark characters that are typically unsafe to decode, + including the exclamation mark ("!"), asterisk ("*"), single-quote + ("'"), and open and close parentheses ("(" and ")"), have been moved + to the reserved set in order to clarify the distinction between + reserved and unreserved and, hopefully, to answer the most common + question of scheme designers. Likewise, the section on + percent-encoded characters has been rewritten, and URI normalizers + are now given license to decode any percent-encoded octets + + + +Berners-Lee, et al. Standards Track [Page 54] + +RFC 3986 URI Generic Syntax January 2005 + + + corresponding to unreserved characters. In general, the terms + "escaped" and "unescaped" have been replaced with "percent-encoded" + and "decoded", respectively, to reduce confusion with other forms of + escape mechanisms. + + The ABNF for URI and URI-reference has been redesigned to make them + more friendly to LALR parsers and to reduce complexity. As a result, + the layout form of syntax description has been removed, along with + the uric, uric_no_slash, opaque_part, net_path, abs_path, rel_path, + path_segments, rel_segment, and mark rules. All references to + "opaque" URIs have been replaced with a better description of how the + path component may be opaque to hierarchy. The relativeURI rule has + been replaced with relative-ref to avoid unnecessary confusion over + whether they are a subset of URI. The ambiguity regarding the + parsing of URI-reference as a URI or a relative-ref with a colon in + the first segment has been eliminated through the use of five + separate path matching rules. + + The fragment identifier has been moved back into the section on + generic syntax components and within the URI and relative-ref rules, + though it remains excluded from absolute-URI. The number sign ("#") + character has been moved back to the reserved set as a result of + reintegrating the fragment syntax. + + The ABNF has been corrected to allow the path component to be empty. + This also allows an absolute-URI to consist of nothing after the + "scheme:", as is present in practice with the "dav:" namespace + [RFC2518] and with the "about:" scheme used internally by many WWW + browser implementations. The ambiguity regarding the boundary + between authority and path has been eliminated through the use of + five separate path matching rules. + + Registry-based naming authorities that use the generic syntax are now + defined within the host rule. This change allows current + implementations, where whatever name provided is simply fed to the + local name resolution mechanism, to be consistent with the + specification. It also removes the need to re-specify DNS name + formats here. Furthermore, it allows the host component to contain + percent-encoded octets, which is necessary to enable + internationalized domain names to be provided in URIs, processed in + their native character encodings at the application layers above URI + processing, and passed to an IDNA library as a registered name in the + UTF-8 character encoding. The server, hostport, hostname, + domainlabel, toplabel, and alphanum rules have been removed. + + The resolving relative references algorithm of [RFC2396] has been + rewritten with pseudocode for this revision to improve clarity and + fix the following issues: + + + +Berners-Lee, et al. Standards Track [Page 55] + +RFC 3986 URI Generic Syntax January 2005 + + + o [RFC2396] section 5.2, step 6a, failed to account for a base URI + with no path. + + o Restored the behavior of [RFC1808] where, if the reference + contains an empty path and a defined query component, the target + URI inherits the base URI's path component. + + o The determination of whether a URI reference is a same-document + reference has been decoupled from the URI parser, simplifying the + URI processing interface within applications in a way consistent + with the internal architecture of deployed URI processing + implementations. The determination is now based on comparison to + the base URI after transforming a reference to absolute form, + rather than on the format of the reference itself. This change + may result in more references being considered "same-document" + under this specification than there would be under the rules given + in RFC 2396, especially when normalization is used to reduce + aliases. However, it does not change the status of existing + same-document references. + + o Separated the path merge routine into two routines: merge, for + describing combination of the base URI path with a relative-path + reference, and remove_dot_segments, for describing how to remove + the special "." and ".." segments from a composed path. The + remove_dot_segments algorithm is now applied to all URI reference + paths in order to match common implementations and to improve the + normalization of URIs in practice. This change only impacts the + parsing of abnormal references and same-scheme references wherein + the base URI has a non-hierarchical path. + +Index + + A + ABNF 11 + absolute 27 + absolute-path 26 + absolute-URI 27 + access 9 + authority 17, 18 + + B + base URI 28 + + C + character encoding 4 + character 4 + characters 8, 11 + coded character set 4 + + + +Berners-Lee, et al. Standards Track [Page 56] + +RFC 3986 URI Generic Syntax January 2005 + + + D + dec-octet 20 + dereference 9 + dot-segments 23 + + F + fragment 16, 24 + + G + gen-delims 13 + generic syntax 6 + + H + h16 20 + hier-part 16 + hierarchical 10 + host 18 + + I + identifier 5 + IP-literal 19 + IPv4 20 + IPv4address 19, 20 + IPv6 19 + IPv6address 19, 20 + IPvFuture 19 + + L + locator 7 + ls32 20 + + M + merge 32 + + N + name 7 + network-path 26 + + P + path 16, 22, 26 + path-abempty 22 + path-absolute 22 + path-empty 22 + path-noscheme 22 + path-rootless 22 + path-abempty 16, 22, 26 + path-absolute 16, 22, 26 + path-empty 16, 22, 26 + + + +Berners-Lee, et al. Standards Track [Page 57] + +RFC 3986 URI Generic Syntax January 2005 + + + path-rootless 16, 22 + pchar 23 + pct-encoded 12 + percent-encoding 12 + port 22 + + Q + query 16, 23 + + R + reg-name 21 + registered name 20 + relative 10, 28 + relative-path 26 + relative-ref 26 + remove_dot_segments 33 + representation 9 + reserved 12 + resolution 9, 28 + resource 5 + retrieval 9 + + S + same-document 27 + sameness 9 + scheme 16, 17 + segment 22, 23 + segment-nz 23 + segment-nz-nc 23 + sub-delims 13 + suffix 27 + + T + transcription 8 + + U + uniform 4 + unreserved 13 + URI grammar + absolute-URI 27 + ALPHA 11 + authority 18 + CR 11 + dec-octet 20 + DIGIT 11 + DQUOTE 11 + fragment 24 + gen-delims 13 + + + +Berners-Lee, et al. Standards Track [Page 58] + +RFC 3986 URI Generic Syntax January 2005 + + + h16 20 + HEXDIG 11 + hier-part 16 + host 19 + IP-literal 19 + IPv4address 20 + IPv6address 20 + IPvFuture 19 + LF 11 + ls32 20 + OCTET 11 + path 22 + path-abempty 22 + path-absolute 22 + path-empty 22 + path-noscheme 22 + path-rootless 22 + pchar 23 + pct-encoded 12 + port 22 + query 24 + reg-name 21 + relative-ref 26 + reserved 13 + scheme 17 + segment 23 + segment-nz 23 + segment-nz-nc 23 + SP 11 + sub-delims 13 + unreserved 13 + URI 16 + URI-reference 25 + userinfo 18 + URI 16 + URI-reference 25 + URL 7 + URN 7 + userinfo 18 + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 59] + +RFC 3986 URI Generic Syntax January 2005 + + +Authors' Addresses + + Tim Berners-Lee + World Wide Web Consortium + Massachusetts Institute of Technology + 77 Massachusetts Avenue + Cambridge, MA 02139 + USA + + Phone: +1-617-253-5702 + Fax: +1-617-258-5999 + EMail: timbl@w3.org + URI: http://www.w3.org/People/Berners-Lee/ + + + Roy T. Fielding + Day Software + 5251 California Ave., Suite 110 + Irvine, CA 92617 + USA + + Phone: +1-949-679-2960 + Fax: +1-949-679-2972 + EMail: fielding@gbiv.com + URI: http://roy.gbiv.com/ + + + Larry Masinter + Adobe Systems Incorporated + 345 Park Ave + San Jose, CA 95110 + USA + + Phone: +1-408-536-3024 + EMail: LMM@acm.org + URI: http://larry.masinter.net/ + + + + + + + + + + + + + + + +Berners-Lee, et al. Standards Track [Page 60] + +RFC 3986 URI Generic Syntax January 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the IETF's procedures with respect to rights in IETF Documents can + be found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + +Berners-Lee, et al. Standards Track [Page 61] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc3987.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc3987.txt new file mode 100644 index 0000000..f0b1513 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc3987.txt @@ -0,0 +1,2579 @@ + + + + + + +Network Working Group M. Duerst +Request for Comments: 3987 W3C +Category: Standards Track M. Suignard + Microsoft Corporation + January 2005 + + + Internationalized Resource Identifiers (IRIs) + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + This document defines a new protocol element, the Internationalized + Resource Identifier (IRI), as a complement to the Uniform Resource + Identifier (URI). An IRI is a sequence of characters from the + Universal Character Set (Unicode/ISO 10646). A mapping from IRIs to + URIs is defined, which means that IRIs can be used instead of URIs, + where appropriate, to identify resources. + + The approach of defining a new protocol element was chosen instead of + extending or changing the definition of URIs. This was done in order + to allow a clear distinction and to avoid incompatibilities with + existing software. Guidelines are provided for the use and + deployment of IRIs in various protocols, formats, and software + components that currently deal with URIs. + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3 + 1.1. Overview and Motivation . . . . . . . . . . . . . . . . 3 + 1.2. Applicability . . . . . . . . . . . . . . . . . . . . . 3 + 1.3. Definitions . . . . . . . . . . . . . . . . . . . . . . 4 + 1.4. Notation . . . . . . . . . . . . . . . . . . . . . . . . 5 + 2. IRI Syntax . . . . . . . . . . . . . . . . . . . . . . . . . . 6 + 2.1. Summary of IRI Syntax . . . . . . . . . . . . . . . . . 6 + 2.2. ABNF for IRI References and IRIs . . . . . . . . . . . . 7 + + + + +Duerst & Suignard Standards Track [Page 1] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + 3. Relationship between IRIs and URIs . . . . . . . . . . . . . . 10 + 3.1. Mapping of IRIs to URIs . . . . . . . . . . . . . . . . 10 + 3.2. Converting URIs to IRIs . . . . . . . . . . . . . . . . 14 + 3.2.1. Examples . . . . . . . . . . . . . . . . . . . . 15 + 4. Bidirectional IRIs for Right-to-Left Languages. . . . . . . . 16 + 4.1. Logical Storage and Visual Presentation . . . . . . . . 17 + 4.2. Bidi IRI Structure . . . . . . . . . . . . . . . . . . . 18 + 4.3. Input of Bidi IRIs . . . . . . . . . . . . . . . . . . . 19 + 4.4. Examples . . . . . . . . . . . . . . . . . . . . . . . . 19 + 5. Normalization and Comparison . . . . . . . . . . . . . . . . . 21 + 5.1. Equivalence . . . . . . . . . . . . . . . . . . . . . . 22 + 5.2. Preparation for Comparison . . . . . . . . . . . . . . . 22 + 5.3. Comparison Ladder . . . . . . . . . . . . . . . . . . . 23 + 5.3.1. Simple String Comparison . . . . . . . . . . . . 23 + 5.3.2. Syntax-Based Normalization . . . . . . . . . . . 24 + 5.3.3. Scheme-Based Normalization . . . . . . . . . . . 27 + 5.3.4. Protocol-Based Normalization . . . . . . . . . . 28 + 6. Use of IRIs . . . . . . . . . . . . . . . . . . . . . . . . . 29 + 6.1. Limitations on UCS Characters Allowed in IRIs . . . . . 29 + 6.2. Software Interfaces and Protocols . . . . . . . . . . . 29 + 6.3. Format of URIs and IRIs in Documents and Protocols . . . 30 + 6.4. Use of UTF-8 for Encoding Original Characters .. . . . . 30 + 6.5. Relative IRI References . . . . . . . . . . . . . . . . 32 + 7. URI/IRI Processing Guidelines (informative) . . . . . . . . . 32 + 7.1. URI/IRI Software Interfaces . . . . . . . . . . . . . . 32 + 7.2. URI/IRI Entry . . . . . . . . . . . . . . . . . . . . . 33 + 7.3. URI/IRI Transfer between Applications . . . . . . . . . 33 + 7.4. URI/IRI Generation . . . . . . . . . . . . . . . . . . . 34 + 7.5. URI/IRI Selection . . . . . . . . . . . . . . . . . . . 34 + 7.6. Display of URIs/IRIs . . . . . . . . . . . . . . . . . . 35 + 7.7. Interpretation of URIs and IRIs . . . . . . . . . . . . 36 + 7.8. Upgrading Strategy . . . . . . . . . . . . . . . . . . . 36 + 8. Security Considerations . . . . . . . . . . . . . . . . . . . 37 + 9. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . . 39 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 40 + 10.1. Normative References . . . . . . . . . . . . . . . . . . 40 + 10.2. Informative References . . . . . . . . . . . . . . . . . 41 + A. Design Alternatives . . . . . . . . . . . . . . . . . . . . . 44 + A.1. New Scheme(s) . . . . . . . . . . . . . . . . . . . . . 44 + A.2. Character Encodings Other Than UTF-8 . . . . . . . . . . 44 + A.3. New Encoding Convention . . . . . . . . . . . . . . . . 44 + A.4. Indicating Character Encodings in the URI/IRI . . . . . 45 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . . 45 + Full Copyright Statement . . . . . . . . . . . . . . . . . . . . . 46 + + + + + + + +Duerst & Suignard Standards Track [Page 2] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +1. Introduction + +1.1. Overview and Motivation + + A Uniform Resource Identifier (URI) is defined in [RFC3986] as a + sequence of characters chosen from a limited subset of the repertoire + of US-ASCII [ASCII] characters. + + The characters in URIs are frequently used for representing words of + natural languages. This usage has many advantages: Such URIs are + easier to memorize, easier to interpret, easier to transcribe, easier + to create, and easier to guess. For most languages other than + English, however, the natural script uses characters other than A - + Z. For many people, handling Latin characters is as difficult as + handling the characters of other scripts is for those who use only + the Latin alphabet. Many languages with non-Latin scripts are + transcribed with Latin letters. These transcriptions are now often + used in URIs, but they introduce additional ambiguities. + + The infrastructure for the appropriate handling of characters from + local scripts is now widely deployed in local versions of operating + system and application software. Software that can handle a wide + variety of scripts and languages at the same time is increasingly + common. Also, increasing numbers of protocols and formats can carry + a wide range of characters. + + This document defines a new protocol element called Internationalized + Resource Identifier (IRI) by extending the syntax of URIs to a much + wider repertoire of characters. It also defines "internationalized" + versions corresponding to other constructs from [RFC3986], such as + URI references. The syntax of IRIs is defined in section 2, and the + relationship between IRIs and URIs in section 3. + + Using characters outside of A - Z in IRIs brings some difficulties. + Section 4 discusses the special case of bidirectional IRIs, section 5 + various forms of equivalence between IRIs, and section 6 the use of + IRIs in different situations. Section 7 gives additional informative + guidelines, and section 8 security considerations. + +1.2. Applicability + + IRIs are designed to be compatible with recommendations for new URI + schemes [RFC2718]. The compatibility is provided by specifying a + well-defined and deterministic mapping from the IRI character + sequence to the functionally equivalent URI character sequence. + Practical use of IRIs (or IRI references) in place of URIs (or URI + references) depends on the following conditions being met: + + + + +Duerst & Suignard Standards Track [Page 3] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + a. A protocol or format element should be explicitly designated to + be able to carry IRIs. The intent is not to introduce IRIs into + contexts that are not defined to accept them. For example, XML + schema [XMLSchema] has an explicit type "anyURI" that includes + IRIs and IRI references. Therefore, IRIs and IRI references can + be in attributes and elements of type "anyURI". On the other + hand, in the HTTP protocol [RFC2616], the Request URI is defined + as a URI, which means that direct use of IRIs is not allowed in + HTTP requests. + + b. The protocol or format carrying the IRIs should have a mechanism + to represent the wide range of characters used in IRIs, either + natively or by some protocol- or format-specific escaping + mechanism (for example, numeric character references in [XML1]). + + c. The URI corresponding to the IRI in question has to encode + original characters into octets using UTF-8. For new URI + schemes, this is recommended in [RFC2718]. It can apply to a + whole scheme (e.g., IMAP URLs [RFC2192] and POP URLs [RFC2384], + or the URN syntax [RFC2141]). It can apply to a specific part of + a URI, such as the fragment identifier (e.g., [XPointer]). It + can apply to a specific URI or part(s) thereof. For details, + please see section 6.4. + +1.3. Definitions + + The following definitions are used in this document; they follow the + terms in [RFC2130], [RFC2277], and [ISO10646]. + + character: A member of a set of elements used for the organization, + control, or representation of data. For example, "LATIN CAPITAL + LETTER A" names a character. + + octet: An ordered sequence of eight bits considered as a unit. + + character repertoire: A set of characters (in the mathematical + sense). + + sequence of characters: A sequence of characters (one after another). + + sequence of octets: A sequence of octets (one after another). + + character encoding: A method of representing a sequence of characters + as a sequence of octets (maybe with variants). Also, a method of + (unambiguously) converting a sequence of octets into a sequence of + characters. + + + + + +Duerst & Suignard Standards Track [Page 4] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + charset: The name of a parameter or attribute used to identify a + character encoding. + + UCS: Universal Character Set. The coded character set defined by + ISO/IEC 10646 [ISO10646] and the Unicode Standard [UNIV4]. + + IRI reference: Denotes the common usage of an Internationalized + Resource Identifier. An IRI reference may be absolute or + relative. However, the "IRI" that results from such a reference + only includes absolute IRIs; any relative IRI references are + resolved to their absolute form. Note that in [RFC2396] URIs did + not include fragment identifiers, but in [RFC3986] fragment + identifiers are part of URIs. + + running text: Human text (paragraphs, sentences, phrases) with syntax + according to orthographic conventions of a natural language, as + opposed to syntax defined for ease of processing by machines + (e.g., markup, programming languages). + + protocol element: Any portion of a message that affects processing of + that message by the protocol in question. + + presentation element: A presentation form corresponding to a protocol + element; for example, using a wider range of characters. + + create (a URI or IRI): With respect to URIs and IRIs, the term is + used for the initial creation. This may be the initial creation + of a resource with a certain identifier, or the initial exposition + of a resource under a particular identifier. + + generate (a URI or IRI): With respect to URIs and IRIs, the term is + used when the IRI is generated by derivation from other + information. + +1.4. Notation + + RFCs and Internet Drafts currently do not allow any characters + outside the US-ASCII repertoire. Therefore, this document uses + various special notations to denote such characters in examples. + + In text, characters outside US-ASCII are sometimes referenced by + using a prefix of 'U+', followed by four to six hexadecimal digits. + + To represent characters outside US-ASCII in examples, this document + uses two notations: 'XML Notation' and 'Bidi Notation'. + + + + + + +Duerst & Suignard Standards Track [Page 5] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + XML Notation uses a leading '&#x', a trailing ';', and the + hexadecimal number of the character in the UCS in between. For + example, я stands for CYRILLIC CAPITAL LETTER YA. In this + notation, an actual '&' is denoted by '&'. + + Bidi Notation is used for bidirectional examples: Lowercase letters + stand for Latin letters or other letters that are written left to + right, whereas uppercase letters represent Arabic or Hebrew letters + that are written right to left. + + To denote actual octets in examples (as opposed to percent-encoded + octets), the two hex digits denoting the octet are enclosed in "<" + and ">". For example, the octet often denoted as 0xc9 is denoted + here as . + + In this document, the key words "MUST", "MUST NOT", "REQUIRED", + "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", + and "OPTIONAL" are to be interpreted as described in [RFC2119]. + +2. IRI Syntax + + This section defines the syntax of Internationalized Resource + Identifiers (IRIs). + + As with URIs, an IRI is defined as a sequence of characters, not as a + sequence of octets. This definition accommodates the fact that IRIs + may be written on paper or read over the radio as well as stored or + transmitted digitally. The same IRI may be represented as different + sequences of octets in different protocols or documents if these + protocols or documents use different character encodings (and/or + transfer encodings). Using the same character encoding as the + containing protocol or document ensures that the characters in the + IRI can be handled (e.g., searched, converted, displayed) in the same + way as the rest of the protocol or document. + +2.1. Summary of IRI Syntax + + IRIs are defined similarly to URIs in [RFC3986], but the class of + unreserved characters is extended by adding the characters of the UCS + (Universal Character Set, [ISO10646]) beyond U+007F, subject to the + limitations given in the syntax rules below and in section 6.1. + + Otherwise, the syntax and use of components and reserved characters + is the same as that in [RFC3986]. All the operations defined in + [RFC3986], such as the resolution of relative references, can be + applied to IRIs by IRI-processing software in exactly the same way as + they are for URIs by URI-processing software. + + + + +Duerst & Suignard Standards Track [Page 6] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Characters outside the US-ASCII repertoire are not reserved and + therefore MUST NOT be used for syntactical purposes, such as to + delimit components in newly defined schemes. For example, U+00A2, + CENT SIGN, is not allowed as a delimiter in IRIs, because it is in + the 'iunreserved' category. This is similar to the fact that it is + not possible to use '-' as a delimiter in URIs, because it is in the + 'unreserved' category. + +2.2. ABNF for IRI References and IRIs + + Although it might be possible to define IRI references and IRIs + merely by their transformation to URI references and URIs, they can + also be accepted and processed directly. Therefore, an ABNF + definition for IRI references (which are the most general concept and + the start of the grammar) and IRIs is given here. The syntax of this + ABNF is described in [RFC2234]. Character numbers are taken from the + UCS, without implying any actual binary encoding. Terminals in the + ABNF are characters, not bytes. + + The following grammar closely follows the URI grammar in [RFC3986], + except that the range of unreserved characters is expanded to include + UCS characters, with the restriction that private UCS characters can + occur only in query parts. The grammar is split into two parts: + Rules that differ from [RFC3986] because of the above-mentioned + expansion, and rules that are the same as those in [RFC3986]. For + rules that are different than those in [RFC3986], the names of the + non-terminals have been changed as follows. If the non-terminal + contains 'URI', this has been changed to 'IRI'. Otherwise, an 'i' + has been prefixed. + + The following rules are different from those in [RFC3986]: + + IRI = scheme ":" ihier-part [ "?" iquery ] + [ "#" ifragment ] + + ihier-part = "//" iauthority ipath-abempty + / ipath-absolute + / ipath-rootless + / ipath-empty + + IRI-reference = IRI / irelative-ref + + absolute-IRI = scheme ":" ihier-part [ "?" iquery ] + + irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] + + irelative-part = "//" iauthority ipath-abempty + / ipath-absolute + + + +Duerst & Suignard Standards Track [Page 7] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + / ipath-noscheme + / ipath-empty + + iauthority = [ iuserinfo "@" ] ihost [ ":" port ] + iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" ) + ihost = IP-literal / IPv4address / ireg-name + + ireg-name = *( iunreserved / pct-encoded / sub-delims ) + + ipath = ipath-abempty ; begins with "/" or is empty + / ipath-absolute ; begins with "/" but not "//" + / ipath-noscheme ; begins with a non-colon segment + / ipath-rootless ; begins with a segment + / ipath-empty ; zero characters + + ipath-abempty = *( "/" isegment ) + ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ] + ipath-noscheme = isegment-nz-nc *( "/" isegment ) + ipath-rootless = isegment-nz *( "/" isegment ) + ipath-empty = 0 + + isegment = *ipchar + isegment-nz = 1*ipchar + isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims + / "@" ) + ; non-zero-length segment without any colon ":" + + ipchar = iunreserved / pct-encoded / sub-delims / ":" + / "@" + + iquery = *( ipchar / iprivate / "/" / "?" ) + + ifragment = *( ipchar / "/" / "?" ) + + iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar + + ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF + / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD + / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD + / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD + / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD + / %xD0000-DFFFD / %xE1000-EFFFD + + iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD + + Some productions are ambiguous. The "first-match-wins" (a.k.a. + "greedy") algorithm applies. For details, see [RFC3986]. + + + + +Duerst & Suignard Standards Track [Page 8] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + The following rules are the same as those in [RFC3986]: + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + port = *DIGIT + + IP-literal = "[" ( IPv6address / IPvFuture ) "]" + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + + IPv6address = 6( h16 ":" ) ls32 + / "::" 5( h16 ":" ) ls32 + / [ h16 ] "::" 4( h16 ":" ) ls32 + / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + / [ *4( h16 ":" ) h16 ] "::" ls32 + / [ *5( h16 ":" ) h16 ] "::" h16 + / [ *6( h16 ":" ) h16 ] "::" + + h16 = 1*4HEXDIG + ls32 = ( h16 ":" h16 ) / IPv4address + + IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + + dec-octet = DIGIT ; 0-9 + / %x31-39 DIGIT ; 10-99 + / "1" 2DIGIT ; 100-199 + / "2" %x30-34 DIGIT ; 200-249 + / "25" %x30-35 ; 250-255 + + pct-encoded = "%" HEXDIG HEXDIG + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + + This syntax does not support IPv6 scoped addressing zone identifiers. + + + + + + + + + + + +Duerst & Suignard Standards Track [Page 9] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +3. Relationship between IRIs and URIs + + IRIs are meant to replace URIs in identifying resources for + protocols, formats, and software components that use a UCS-based + character repertoire. These protocols and components may never need + to use URIs directly, especially when the resource identifier is used + simply for identification purposes. However, when the resource + identifier is used for resource retrieval, it is in many cases + necessary to determine the associated URI, because currently most + retrieval mechanisms are only defined for URIs. In this case, IRIs + can serve as presentation elements for URI protocol elements. An + example would be an address bar in a Web user agent. (Additional + rationale is given in section 3.1.) + +3.1. Mapping of IRIs to URIs + + This section defines how to map an IRI to a URI. Everything in this + section also applies to IRI references and URI references, as well as + to components thereof (for example, fragment identifiers). + + This mapping has two purposes: + + Syntaxical. Many URI schemes and components define additional + syntactical restrictions not captured in section 2.2. + Scheme-specific restrictions are applied to IRIs by converting + IRIs to URIs and checking the URIs against the scheme-specific + restrictions. + + Interpretational. URIs identify resources in various ways. IRIs also + identify resources. When the IRI is used solely for + identification purposes, it is not necessary to map the IRI to a + URI (see section 5). However, when an IRI is used for resource + retrieval, the resource that the IRI locates is the same as the + one located by the URI obtained after converting the IRI according + to the procedure defined here. This means that there is no need + to define resolution separately on the IRI level. + + Applications MUST map IRIs to URIs by using the following two steps. + + Step 1. Generate a UCS character sequence from the original IRI + format. This step has the following three variants, + depending on the form of the input: + + a. If the IRI is written on paper, read aloud, or otherwise + represented as a sequence of characters independent of + any character encoding, represent the IRI as a sequence + of characters from the UCS normalized according to + Normalization Form C (NFC, [UTR15]). + + + +Duerst & Suignard Standards Track [Page 10] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + b. If the IRI is in some digital representation (e.g., an + octet stream) in some known non-Unicode character + encoding, convert the IRI to a sequence of characters + from the UCS normalized according to NFC. + + c. If the IRI is in a Unicode-based character encoding (for + example, UTF-8 or UTF-16), do not normalize (see section + for details). Apply step 2 directly to the + encoded Unicode character sequence. + + Step 2. For each character in 'ucschar' or 'iprivate', apply steps + 2.1 through 2.3 below. + + 2.1. Convert the character to a sequence of one or more octets + using UTF-8 [RFC3629]. + + 2.2. Convert each octet to %HH, where HH is the hexadecimal + notation of the octet value. Note that this is identical + to the percent-encoding mechanism in section 2.1 of + [RFC3986]. To reduce variability, the hexadecimal notation + SHOULD use uppercase letters. + + 2.3. Replace the original character with the resulting character + sequence (i.e., a sequence of %HH triplets). + + The above mapping from IRIs to URIs produces URIs fully conforming to + [RFC3986]. The mapping is also an identity transformation for URIs + and is idempotent; applying the mapping a second time will not + change anything. Every URI is by definition an IRI. + + Systems accepting IRIs MAY convert the ireg-name component of an IRI + as follows (before step 2 above) for schemes known to use domain + names in ireg-name, if the scheme definition does not allow + percent-encoding for ireg-name: + + Replace the ireg-name part of the IRI by the part converted using the + ToASCII operation specified in section 4.1 of [RFC3490] on each + dot-separated label, and by using U+002E (FULL STOP) as a label + separator, with the flag UseSTD3ASCIIRules set to TRUE, and with the + flag AllowUnassigned set to FALSE for creating IRIs and set to TRUE + otherwise. + + + + + + + + + + +Duerst & Suignard Standards Track [Page 11] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + The ToASCII operation may fail, but this would mean that the IRI + cannot be resolved. This conversion SHOULD be used when the goal is + to maximize interoperability with legacy URI resolvers. For example, + the IRI + + "http://résumé.example.org" + + may be converted to + + "http://xn--rsum-bpad.example.org" + + instead of + + "http://r%C3%A9sum%C3%A9.example.org". + + An IRI with a scheme that is known to use domain names in ireg-name, + but where the scheme definition does not allow percent-encoding for + ireg-name, meets scheme-specific restrictions if either the + straightforward conversion or the conversion using the ToASCII + operation on ireg-name result in an URI that meets the scheme- + specific restrictions. + + Such an IRI resolves to the URI obtained after converting the IRI and + uses the ToASCII operation on ireg-name. Implementations do not have + to do this conversion as long as they produce the same result. + + Note: The difference between variants b and c in step 1 (using + normalization with NFC, versus not using any normalization) + accounts for the fact that in many non-Unicode character + encodings, some text cannot be represented directly. For example, + the word "Vietnam" is natively written "Việt Nam" + (containing a LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW) + in NFC, but a direct transcoding from the windows-1258 character + encoding leads to "Việt Nam" (containing a LATIN SMALL + LETTER E WITH CIRCUMFLEX followed by a COMBINING DOT BELOW). + Direct transcoding of other 8-bit encodings of Vietnamese may lead + to other representations. + + Note: The uniform treatment of the whole IRI in step 2 is important + to make processing independent of URI scheme. See [Gettys] for an + in-depth discussion. + + Note: In practice, whether the general mapping (steps 1 and 2) or the + ToASCII operation of [RFC3490] is used for ireg-name will not be + noticed if mapping from IRI to URI and resolution is tightly + integrated (e.g., carried out in the same user agent). But + + + + + +Duerst & Suignard Standards Track [Page 12] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + conversion using [RFC3490] may be able to better deal with + backwards compatibility issues in case mapping and resolution are + separated, as in the case of using an HTTP proxy. + + Note: Internationalized Domain Names may be contained in parts of an + IRI other than the ireg-name part. It is the responsibility of + scheme-specific implementations (if the Internationalized Domain + Name is part of the scheme syntax) or of server-side + implementations (if the Internationalized Domain Name is part of + 'iquery') to apply the necessary conversions at the appropriate + point. Example: Trying to validate the Web page at + http://résumé.example.org would lead to an IRI of + http://validator.w3.org/check?uri=http%3A%2F%2Frésumé. + example.org, which would convert to a URI of + http://validator.w3.org/check?uri=http%3A%2F%2Fr%C3%A9sum%C3%A9. + example.org. The server side implementation would be responsible + for making the necessary conversions to be able to retrieve the + Web page. + + Systems accepting IRIs MAY also deal with the printable characters in + US-ASCII that are not allowed in URIs, namely "<", ">", '"', space, + "{", "}", "|", "\", "^", and "`", in step 2 above. If these + characters are found but are not converted, then the conversion + SHOULD fail. Please note that the number sign ("#"), the percent + sign ("%"), and the square bracket characters ("[", "]") are not part + of the above list and MUST NOT be converted. Protocols and formats + that have used earlier definitions of IRIs including these characters + MAY require percent-encoding of these characters as a preprocessing + step to extract the actual IRI from a given field. This + preprocessing MAY also be used by applications allowing the user to + enter an IRI. + + Note: In this process (in step 2.3), characters allowed in URI + references and existing percent-encoded sequences are not encoded + further. (This mapping is similar to, but different from, the + encoding applied when arbitrary content is included in some part + of a URI.) For example, an IRI of + "http://www.example.org/red%09rosé#red" (in XML notation) is + converted to + "http://www.example.org/red%09ros%C3%A9#red", not to something + like + "http%3A%2F%2Fwww.example.org%2Fred%2509ros%C3%A9%23red". + + Note: Some older software transcoding to UTF-8 may produce illegal + output for some input, in particular for characters outside the + BMP (Basic Multilingual Plane). As an example, for the IRI with + non-BMP characters (in XML Notation): + "http://example.com/𐌀𐌁𐌂"; + + + +Duerst & Suignard Standards Track [Page 13] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + which contains the first three letters of the Old Italic alphabet, + the correct conversion to a URI is + "http://example.com/%F0%90%8C%80%F0%90%8C%81%F0%90%8C%82" + +3.2. Converting URIs to IRIs + + In some situations, converting a URI into an equivalent IRI may be + desirable. This section gives a procedure for this conversion. The + conversion described in this section will always result in an IRI + that maps back to the URI used as an input for the conversion (except + for potential case differences in percent-encoding and for potential + percent-encoded unreserved characters). However, the IRI resulting + from this conversion may not be exactly the same as the original IRI + (if there ever was one). + + URI-to-IRI conversion removes percent-encodings, but not all + percent-encodings can be eliminated. There are several reasons for + this: + + 1. Some percent-encodings are necessary to distinguish percent- + encoded and unencoded uses of reserved characters. + + 2. Some percent-encodings cannot be interpreted as sequences of + UTF-8 octets. + + (Note: The octet patterns of UTF-8 are highly regular. + Therefore, there is a very high probability, but no guarantee, + that percent-encodings that can be interpreted as sequences of + UTF-8 octets actually originated from UTF-8. For a detailed + discussion, see [Duerst97].) + + 3. The conversion may result in a character that is not appropriate + in an IRI. See sections 2.2, 4.1, and 6.1 for further details. + + Conversion from a URI to an IRI is done by using the following steps + (or any other algorithm that produces the same result): + + 1. Represent the URI as a sequence of octets in US-ASCII. + + 2. Convert all percent-encodings ("%" followed by two hexadecimal + digits) to the corresponding octets, except those corresponding + to "%", characters in "reserved", and characters in US-ASCII not + allowed in URIs. + + 3. Re-percent-encode any octet produced in step 2 that is not part + of a strictly legal UTF-8 octet sequence. + + + + + +Duerst & Suignard Standards Track [Page 14] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + 4. Re-percent-encode all octets produced in step 3 that in UTF-8 + represent characters that are not appropriate according to + sections 2.2, 4.1, and 6.1. + + 5. Interpret the resulting octet sequence as a sequence of characters + encoded in UTF-8. + + This procedure will convert as many percent-encoded characters as + possible to characters in an IRI. Because there are some choices + when step 4 is applied (see section 6.1), results may vary. + + Conversions from URIs to IRIs MUST NOT use any character encoding + other than UTF-8 in steps 3 and 4, even if it might be possible to + guess from the context that another character encoding than UTF-8 was + used in the URI. For example, the URI + "http://www.example.org/r%E9sum%E9.html" might with some guessing be + interpreted to contain two e-acute characters encoded as iso-8859-1. + It must not be converted to an IRI containing these e-acute + characters. Otherwise, in the future the IRI will be mapped to + "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different + URI from "http://www.example.org/r%E9sum%E9.html". + +3.2.1. Examples + + This section shows various examples of converting URIs to IRIs. Each + example shows the result after each of the steps 1 through 5 is + applied. XML Notation is used for the final result. Octets are + denoted by "<" followed by two hexadecimal digits followed by ">". + + The following example contains the sequence "%C3%BC", which is a + strictly legal UTF-8 sequence, and which is converted into the actual + character U+00FC, LATIN SMALL LETTER U WITH DIAERESIS (also known as + u-umlaut). + + 1. http://www.example.org/D%C3%BCrst + + 2. http://www.example.org/Drst + + 3. http://www.example.org/Drst + + 4. http://www.example.org/Drst + + 5. http://www.example.org/Dürst + + The following example contains the sequence "%FC", which might + represent U+00FC, LATIN SMALL LETTER U WITH DIAERESIS, in the + iso-8859-1 character encoding. (It might represent other characters + in other character encodings. For example, the octet in + + + +Duerst & Suignard Standards Track [Page 15] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + iso-8859-5 represents U+045C, CYRILLIC SMALL LETTER KJE.) Because + is not part of a strictly legal UTF-8 sequence, it is + re-percent-encoded in step 3. + + 1. http://www.example.org/D%FCrst + + 2. http://www.example.org/Drst + + 3. http://www.example.org/D%FCrst + + 4. http://www.example.org/D%FCrst + + 5. http://www.example.org/D%FCrst + + The following example contains "%e2%80%ae", which is the percent- + encoded UTF-8 character encoding of U+202E, RIGHT-TO-LEFT OVERRIDE. + Section 4.1 forbids the direct use of this character in an IRI. + Therefore, the corresponding octets are re-percent-encoded in step 4. + This example shows that the case (upper- or lowercase) of letters + used in percent-encodings may not be preserved. The example also + contains a punycode-encoded domain name label (xn--99zt52a), which is + not converted. + + 1. http://xn--99zt52a.example.org/%e2%80%ae + + 2. http://xn--99zt52a.example.org/<80> + + 3. http://xn--99zt52a.example.org/<80> + + 4. http://xn--99zt52a.example.org/%E2%80%AE + + 5. http://xn--99zt52a.example.org/%E2%80%AE + + Implementations with scheme-specific knowledge MAY convert + punycode-encoded domain name labels to the corresponding characters + by using the ToUnicode procedure. Thus, for the example above, the + label "xn--99zt52a" may be converted to U+7D0D U+8C46 (Japanese + Natto), leading to the overall IRI of + "http://納豆.example.org/%E2%80%AE". + +4. Bidirectional IRIs for Right-to-Left Languages + + Some UCS characters, such as those used in the Arabic and Hebrew + scripts, have an inherent right-to-left (rtl) writing direction. + IRIs containing these characters (called bidirectional IRIs or Bidi + IRIs) require additional attention because of the non-trivial + + + + + +Duerst & Suignard Standards Track [Page 16] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + relation between logical representation (used for digital + representation and for reading/spelling) and visual representation + (used for display/printing). + + Because of the complex interaction between the logical + representation, the visual representation, and the syntax of a Bidi + IRI, a balance is needed between various requirements. The main + requirements are + + 1. user-predictable conversion between visual and logical + representation; + + 2. the ability to include a wide range of characters in various + parts of the IRI; and + + 3. minor or no changes or restrictions for implementations. + +4.1. Logical Storage and Visual Presentation + + When stored or transmitted in digital representation, bidirectional + IRIs MUST be in full logical order and MUST conform to the IRI syntax + rules (which includes the rules relevant to their scheme). This + ensures that bidirectional IRIs can be processed in the same way as + other IRIs. + + Bidirectional IRIs MUST be rendered by using the Unicode + Bidirectional Algorithm [UNIV4], [UNI9]. Bidirectional IRIs MUST be + rendered in the same way as they would be if they were in a + left-to-right embedding; i.e., as if they were preceded by U+202A, + LEFT-TO-RIGHT EMBEDDING (LRE), and followed by U+202C, POP + DIRECTIONAL FORMATTING (PDF). Setting the embedding direction can + also be done in a higher-level protocol (e.g., the dir='ltr' + attribute in HTML). + + There is no requirement to use the above embedding if the display is + still the same without the embedding. For example, a bidirectional + IRI in a text with left-to-right base directionality (such as used + for English or Cyrillic) that is preceded and followed by whitespace + and strong left-to-right characters does not need an embedding. + Also, a bidirectional relative IRI reference that only contains + strong right-to-left characters and weak characters and that starts + and ends with a strong right-to-left character and appears in a text + with right-to-left base directionality (such as used for Arabic or + Hebrew) and is preceded and followed by whitespace and strong + characters does not need an embedding. + + + + + + +Duerst & Suignard Standards Track [Page 17] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + In some other cases, using U+200E, LEFT-TO-RIGHT MARK (LRM), may be + sufficient to force the correct display behavior. However, the + details of the Unicode Bidirectional algorithm are not always easy to + understand. Implementers are strongly advised to err on the side of + caution and to use embedding in all cases where they are not + completely sure that the display behavior is unaffected without the + embedding. + + The Unicode Bidirectional Algorithm ([UNI9], section 4.3) permits + higher-level protocols to influence bidirectional rendering. Such + changes by higher-level protocols MUST NOT be used if they change the + rendering of IRIs. + + The bidirectional formatting characters that may be used before or + after the IRI to ensure correct display are not themselves part of + the IRI. IRIs MUST NOT contain bidirectional formatting characters + (LRM, RLM, LRE, RLE, LRO, RLO, and PDF). They affect the visual + rendering of the IRI but do not appear themselves. It would + therefore not be possible to input an IRI with such characters + correctly. + +4.2. Bidi IRI Structure + + The Unicode Bidirectional Algorithm is designed mainly for running + text. To make sure that it does not affect the rendering of + bidirectional IRIs too much, some restrictions on bidirectional IRIs + are necessary. These restrictions are given in terms of delimiters + (structural characters, mostly punctuation such as "@", ".", ":", and + "/") and components (usually consisting mostly of letters and + digits). + + The following syntax rules from section 2.2 correspond to components + for the purpose of Bidi behavior: iuserinfo, ireg-name, isegment, + isegment-nz, isegment-nz-nc, ireg-name, iquery, and ifragment. + + Specifications that define the syntax of any of the above components + MAY divide them further and define smaller parts to be components + according to this document. As an example, the restrictions of + [RFC3490] on bidirectional domain names correspond to treating each + label of a domain name as a component for schemes with ireg-name as a + domain name. Even where the components are not defined formally, it + may be helpful to think about some syntax in terms of components and + to apply the relevant restrictions. For example, for the usual + name/value syntax in query parts, it is convenient to treat each name + and each value as a component. As another example, the extensions in + a resource name can be treated as separate components. + + + + + +Duerst & Suignard Standards Track [Page 18] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + For each component, the following restrictions apply: + + 1. A component SHOULD NOT use both right-to-left and left-to-right + characters. + + 2. A component using right-to-left characters SHOULD start and end + with right-to-left characters. + + The above restrictions are given as shoulds, rather than as musts. + For IRIs that are never presented visually, they are not relevant. + However, for IRIs in general, they are very important to ensure + consistent conversion between visual presentation and logical + representation, in both directions. + + Note: In some components, the above restrictions may actually be + strictly enforced. For example, [RFC3490] requires that these + restrictions apply to the labels of a host name for those schemes + where ireg-name is a host name. In some other components (for + example, path components) following these restrictions may not be + too difficult. For other components, such as parts of the query + part, it may be very difficult to enforce the restrictions because + the values of query parameters may be arbitrary character + sequences. + + If the above restrictions cannot be satisfied otherwise, the affected + component can always be mapped to URI notation as described in + section 3.1. Please note that the whole component has to be mapped + (see also Example 9 below). + +4.3. Input of Bidi IRIs + + Bidi input methods MUST generate Bidi IRIs in logical order while + rendering them according to section 4.1. During input, rendering + SHOULD be updated after every new character is input to avoid end- + user confusion. + +4.4. Examples + + This section gives examples of bidirectional IRIs, in Bidi Notation. + It shows legal IRIs with the relationship between logical and visual + representation and explains how certain phenomena in this + relationship may look strange to somebody not familiar with + bidirectional behavior, but familiar to users of Arabic and Hebrew. + It also shows what happens if the restrictions given in section 4.2 + are not followed. The examples below can be seen at [BidiEx], in + Arabic, Hebrew, and Bidi Notation variants. + + + + + +Duerst & Suignard Standards Track [Page 19] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + To read the bidi text in the examples, read the visual representation + from left to right until you encounter a block of rtl text. Read the + rtl block (including slashes and other special characters) from right + to left, then continue at the next unread ltr character. + + Example 1: A single component with rtl characters is inverted: + Logical representation: "http://ab.CDEFGH.ij/kl/mn/op.html" + Visual representation: "http://ab.HGFEDC.ij/kl/mn/op.html" + Components can be read one by one, and each component can be read in + its natural direction. + + Example 2: More than one consecutive component with rtl characters is + inverted as a whole: + Logical representation: "http://ab.CDE.FGH/ij/kl/mn/op.html" + Visual representation: "http://ab.HGF.EDC/ij/kl/mn/op.html" + A sequence of rtl components is read rtl, in the same way as a + sequence of rtl words is read rtl in a bidi text. + + Example 3: All components of an IRI (except for the scheme) are rtl. + All rtl components are inverted overall: + Logical representation: "http://AB.CD.EF/GH/IJ/KL?MN=OP;QR=ST#UV" + Visual representation: "http://VU#TS=RQ;PO=NM?LK/JI/HG/FE.DC.BA" + The whole IRI (except the scheme) is read rtl. Delimiters between + rtl components stay between the respective components; delimiters + between ltr and rtl components don't move. + + Example 4: Each of several sequences of rtl components is inverted on + its own: + Logical representation: "http://AB.CD.ef/gh/IJ/KL.html" + Visual representation: "http://DC.BA.ef/gh/LK/JI.html" + Each sequence of rtl components is read rtl, in the same way as each + sequence of rtl words in an ltr text is read rtl. + + Example 5: Example 2, applied to components of different kinds: + Logical representation: "http://ab.cd.EF/GH/ij/kl.html" + Visual representation: "http://ab.cd.HG/FE/ij/kl.html" + The inversion of the domain name label and the path component may be + unexpected, but it is consistent with other bidi behavior. For + reassurance that the domain component really is "ab.cd.EF", it may be + helpful to read aloud the visual representation following the bidi + algorithm. After "http://ab.cd." one reads the RTL block + "E-F-slash-G-H", which corresponds to the logical representation. + + Example 6: Same as Example 5, with more rtl components: + Logical representation: "http://ab.CD.EF/GH/IJ/kl.html" + Visual representation: "http://ab.JI/HG/FE.DC/kl.html" + The inversion of the domain name labels and the path components may + be easier to identify because the delimiters also move. + + + +Duerst & Suignard Standards Track [Page 20] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Example 7: A single rtl component includes digits: + Logical representation: "http://ab.CDE123FGH.ij/kl/mn/op.html" + Visual representation: "http://ab.HGF123EDC.ij/kl/mn/op.html" + Numbers are written ltr in all cases but are treated as an additional + embedding inside a run of rtl characters. This is completely + consistent with usual bidirectional text. + + Example 8 (not allowed): Numbers are at the start or end of an rtl + component: + Logical representation: "http://ab.cd.ef/GH1/2IJ/KL.html" + Visual representation: "http://ab.cd.ef/LK/JI1/2HG.html" + The sequence "1/2" is interpreted by the bidi algorithm as a + fraction, fragmenting the components and leading to confusion. There + are other characters that are interpreted in a special way close to + numbers; in particular, "+", "-", "#", "$", "%", ",", ".", and ":". + + Example 9 (not allowed): The numbers in the previous example are + percent-encoded: + Logical representation: "http://ab.cd.ef/GH%31/%32IJ/KL.html", + Visual representation (Hebrew): "http://ab.cd.ef/%31HG/LK/JI%32.html" + Visual representation (Arabic): "http://ab.cd.ef/31%HG/%LK/JI32.html" + Depending on whether the uppercase letters represent Arabic or + Hebrew, the visual representation is different. + + Example 10 (allowed but not recommended): + Logical representation: "http://ab.CDEFGH.123/kl/mn/op.html" + Visual representation: "http://ab.123.HGFEDC/kl/mn/op.html" + Components consisting of only numbers are allowed (it would be rather + difficult to prohibit them), but these may interact with adjacent RTL + components in ways that are not easy to predict. + +5. Normalization and Comparison + + Note: The structure and much of the material for this section is + taken from section 6 of [RFC3986]; the differences are due to the + specifics of IRIs. + + One of the most common operations on IRIs is simple comparison: + Determining whether two IRIs are equivalent without using the IRIs or + the mapped URIs to access their respective resource(s). A comparison + is performed whenever a response cache is accessed, a browser checks + its history to color a link, or an XML parser processes tags within a + namespace. Extensive normalization prior to comparison of IRIs may + be used by spiders and indexing engines to prune a search space or + reduce duplication of request actions and response storage. + + + + + + +Duerst & Suignard Standards Track [Page 21] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + IRI comparison is performed for some particular purpose. Protocols + or implementations that compare IRIs for different purposes will + often be subject to differing design trade-offs in regards to how + much effort should be spent in reducing aliased identifiers. This + section describes various methods that may be used to compare IRIs, + the trade-offs between them, and the types of applications that might + use them. + +5.1. Equivalence + + Because IRIs exist to identify resources, presumably they should be + considered equivalent when they identify the same resource. However, + this definition of equivalence is not of much practical use, as there + is no way for an implementation to compare two resources unless it + has full knowledge or control of them. For this reason, determination + of equivalence or difference of IRIs is based on string comparison, + perhaps augmented by reference to additional rules provided by URI + scheme definitions. We use the terms "different" and "equivalent" to + describe the possible outcomes of such comparisons, but there are + many application-dependent versions of equivalence. + + Even though it is possible to determine that two IRIs are equivalent, + IRI comparison is not sufficient to determine whether two IRIs + identify different resources. For example, an owner of two different + domain names could decide to serve the same resource from both, + resulting in two different IRIs. Therefore, comparison methods are + designed to minimize false negatives while strictly avoiding false + positives. + + In testing for equivalence, applications should not directly compare + relative references; the references should be converted to their + respective target IRIs before comparison. When IRIs are compared to + select (or avoid) a network action, such as retrieval of a + representation, fragment components (if any) should be excluded from + the comparison. + + Applications using IRIs as identity tokens with no relationship to a + protocol MUST use the Simple String Comparison (see section 5.3.1). + All other applications MUST select one of the comparison practices + from the Comparison Ladder (see section 5.3 or, after IRI-to-URI + conversion, select one of the comparison practices from the URI + comparison ladder in [RFC3986], section 6.2) + +5.2. Preparation for Comparison + + Any kind of IRI comparison REQUIRES that all escapings or encodings + in the protocol or format that carries an IRI are resolved. This is + usually done when the protocol or format is parsed. Examples of such + + + +Duerst & Suignard Standards Track [Page 22] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + escapings or encodings are entities and numeric character references + in [HTML4] and [XML1]. As an example, + "http://example.org/rosé" (in HTML), + "http://example.org/rosé"; (in HTML or XML), and + "http://example.org/rosé"; (in HTML or XML) are all resolved into + what is denoted in this document (see section 1.4) as + "http://example.org/rosé"; (the "é" here standing for the + actual e-acute character, to compensate for the fact that this + document cannot contain non-ASCII characters). + + Similar considerations apply to encodings such as Transfer Codings in + HTTP (see [RFC2616]) and Content Transfer Encodings in MIME + ([RFC2045]), although in these cases, the encoding is based not on + characters but on octets, and additional care is required to make + sure that characters, and not just arbitrary octets, are compared + (see section 5.3.1). + +5.3. Comparison Ladder + + In practice, a variety of methods are used, to test IRI equivalence. + These methods fall into a range distinguished by the amount of + processing required and the degree to which the probability of false + negatives is reduced. As noted above, false negatives cannot be + eliminated. In practice, their probability can be reduced, but this + reduction requires more processing and is not cost-effective for all + applications. + + If this range of comparison practices is considered as a ladder, the + following discussion will climb the ladder, starting with practices + that are cheap but have a relatively higher chance of producing false + negatives, and proceeding to those that have higher computational + cost and lower risk of false negatives. + +5.3.1. Simple String Comparison + + If two IRIs, when considered as character strings, are identical, + then it is safe to conclude that they are equivalent. This type of + equivalence test has very low computational cost and is in wide use + in a variety of applications, particularly in the domain of parsing. + It is also used when a definitive answer to the question of IRI + equivalence is needed that is independent of the scheme used and that + can be calculated quickly and without accessing a network. An + example of such a case is XML Namespaces ([XMLNamespace]). + + Testing strings for equivalence requires some basic precautions. This + procedure is often referred to as "bit-for-bit" or "byte-for-byte" + comparison, which is potentially misleading. Testing strings for + equality is normally based on pair comparison of the characters that + + + +Duerst & Suignard Standards Track [Page 23] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + make up the strings, starting from the first and proceeding until + both strings are exhausted and all characters are found to be equal, + until a pair of characters compares unequal, or until one of the + strings is exhausted before the other. + + This character comparison requires that each pair of characters be + put in comparable encoding form. For example, should one IRI be + stored in a byte array in UTF-8 encoding form and the second in a + UTF-16 encoding form, bit-for-bit comparisons applied naively will + produce errors. It is better to speak of equality on a + character-for-character rather than on a byte-for-byte or bit-for-bit + basis. In practical terms, character-by-character comparisons should + be done codepoint by codepoint after conversion to a common character + encoding form. When comparing character by character, the comparison + function MUST NOT map IRIs to URIs, because such a mapping would + create additional spurious equivalences. It follows that an IRI + SHOULD NOT be modified when being transported if there is any chance + that this IRI might be used as an identifier. + + False negatives are caused by the production and use of IRI aliases. + Unnecessary aliases can be reduced, regardless of the comparison + method, by consistently providing IRI references in an already + normalized form (i.e., a form identical to what would be produced + after normalization is applied, as described below). Protocols and + data formats often limit some IRI comparisons to simple string + comparison, based on the theory that people and implementations will, + in their own best interest, be consistent in providing IRI + references, or at least be consistent enough to negate any efficiency + that might be obtained from further normalization. + +5.3.2. Syntax-Based Normalization + + Implementations may use logic based on the definitions provided by + this specification to reduce the probability of false negatives. This + processing is moderately higher in cost than character-for-character + string comparison. For example, an application using this approach + could reasonably consider the following two IRIs equivalent: + + example://a/b/c/%7Bfoo%7D/rosé + eXAMPLE://a/./b/../b/%63/%7bfoo%7d/ros%C3%A9 + + Web user agents, such as browsers, typically apply this type of IRI + normalization when determining whether a cached response is + available. Syntax-based normalization includes such techniques as + case normalization, character normalization, percent-encoding + normalization, and removal of dot-segments. + + + + + +Duerst & Suignard Standards Track [Page 24] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Case Normalization + + For all IRIs, the hexadecimal digits within a percent-encoding + triplet (e.g., "%3a" versus "%3A") are case-insensitive and therefore + should be normalized to use uppercase letters for the digits A - F. + + When an IRI uses components of the generic syntax, the component + syntax equivalence rules always apply; namely, that the scheme and + US-ASCII only host are case insensitive and therefore should be + normalized to lowercase. For example, the URI + "HTTP://www.EXAMPLE.com/" is equivalent to "http://www.example.com/". + Case equivalence for non-ASCII characters in IRI components that are + IDNs are discussed in section 5.3.3. The other generic syntax + components are assumed to be case sensitive unless specifically + defined otherwise by the scheme. + + Creating schemes that allow case-insensitive syntax components + containing non-ASCII characters should be avoided. Case normalization + of non-ASCII characters can be culturally dependent and is always a + complex operation. The only exception concerns non-ASCII host names + for which the character normalization includes a mapping step derived + from case folding. + + Character Normalization + + The Unicode Standard [UNIV4] defines various equivalences between + sequences of characters for various purposes. Unicode Standard Annex + #15 [UTR15] defines various Normalization Forms for these + equivalences, in particular Normalization Form C (NFC, Canonical + Decomposition, followed by Canonical Composition) and Normalization + Form KC (NFKC, Compatibility Decomposition, followed by Canonical + Composition). + + Equivalence of IRIs MUST rely on the assumption that IRIs are + appropriately pre-character-normalized rather than apply character + normalization when comparing two IRIs. The exceptions are conversion + from a non-digital form, and conversion from a non-UCS-based + character encoding to a UCS-based character encoding. In these cases, + NFC or a normalizing transcoder using NFC MUST be used for + interoperability. To avoid false negatives and problems with + transcoding, IRIs SHOULD be created by using NFC. Using NFKC may + avoid even more problems; for example, by choosing half-width Latin + letters instead of full-width ones, and full-width instead of + half-width Katakana. + + As an example, "http://www.example.org/résumé.html" (in XML + Notation) is in NFC. On the other hand, + "http://www.example.org/résumé.html" is not in NFC. + + + +Duerst & Suignard Standards Track [Page 25] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + The former uses precombined e-acute characters, and the latter uses + "e" characters followed by combining acute accents. Both usages are + defined as canonically equivalent in [UNIV4]. + + Note: Because it is unknown how a particular sequence of characters + is being treated with respect to character normalization, it would + be inappropriate to allow third parties to normalize an IRI + arbitrarily. This does not contradict the recommendation that + when a resource is created, its IRI should be as character + normalized as possible (i.e., NFC or even NFKC). This is similar + to the uppercase/lowercase problems. Some parts of a URI are case + insensitive (domain name). For others, it is unclear whether they + are case sensitive, case insensitive, or something in between + (e.g., case sensitive, but with a multiple choice selection if the + wrong case is used, instead of a direct negative result). The + best recipe is that the creator use a reasonable capitalization + and, when transferring the URI, capitalization never be changed. + + Various IRI schemes may allow the usage of Internationalized Domain + Names (IDN) [RFC3490] either in the ireg-name part or elsewhere. + Character Normalization also applies to IDNs, as discussed in section + 5.3.3. + + Percent-Encoding Normalization + + The percent-encoding mechanism (section 2.1 of [RFC3986]) is a + frequent source of variance among otherwise identical IRIs. In + addition to the case normalization issue noted above, some IRI + producers percent-encode octets that do not require percent-encoding, + resulting in IRIs that are equivalent to their non encoded + counterparts. These IRIs should be normalized by decoding any + percent-encoded octet sequence that corresponds to an unreserved + character, as described in section 2.3 of [RFC3986]. + + For actual resolution, differences in percent-encoding (except for + the percent-encoding of reserved characters) MUST always result in + the same resource. For example, "http://example.org/~user", + "http://example.org/%7euser", and "http://example.org/%7Euser", must + resolve to the same resource. + + If this kind of equivalence is to be tested, the percent-encoding of + both IRIs to be compared has to be aligned; for example, by + converting both IRIs to URIs (see section 3.1), eliminating escape + differences in the resulting URIs, and making sure that the case of + the hexadecimal characters in the percent-encoding is always the same + (preferably uppercase). If the IRI is to be passed to another + + + + + +Duerst & Suignard Standards Track [Page 26] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + application or used further in some other way, its original form MUST + be preserved. The conversion described here should be performed only + for local comparison. + + Path Segment Normalization + + The complete path segments "." and ".." are intended only for use + within relative references (section 4.1 of [RFC3986]) and are removed + as part of the reference resolution process (section 5.2 of + [RFC3986]). However, some implementations may incorrectly assume + that reference resolution is not necessary when the reference is + already an IRI, and thus fail to remove dot-segments when they occur + in non-relative paths. IRI normalizers should remove dot-segments by + applying the remove_dot_segments algorithm to the path, as described + in section 5.2.4 of [RFC3986]. + +5.3.3. Scheme-Based Normalization + + The syntax and semantics of IRIs vary from scheme to scheme, as + described by the defining specification for each scheme. + Implementations may use scheme-specific rules, at further processing + cost, to reduce the probability of false negatives. For example, + because the "http" scheme makes use of an authority component, has a + default port of "80", and defines an empty path to be equivalent to + "/", the following four IRIs are equivalent: + + http://example.com + http://example.com/ + http://example.com:/ + http://example.com:80/ + + In general, an IRI that uses the generic syntax for authority with an + empty path should be normalized to a path of "/". Likewise, an + explicit ":port", for which the port is empty or the default for the + scheme, is equivalent to one where the port and its ":" delimiter are + elided and thus should be removed by scheme-based normalization. For + example, the second IRI above is the normal form for the "http" + scheme. + + Another case where normalization varies by scheme is in the handling + of an empty authority component or empty host subcomponent. For many + scheme specifications, an empty authority or host is considered an + error; for others, it is considered equivalent to "localhost" or the + end-user's host. When a scheme defines a default for authority and + an IRI reference to that default is desired, the reference should be + normalized to an empty authority for the sake of uniformity, brevity, + + + + + +Duerst & Suignard Standards Track [Page 27] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + and internationalization. If, however, either the userinfo or port + subcomponents are non-empty, then the host should be given explicitly + even if it matches the default. + + Normalization should not remove delimiters when their associated + component is empty unless it is licensed to do so by the scheme + specification. For example, the IRI "http://example.com/?" cannot be + assumed to be equivalent to any of the examples above. Likewise, the + presence or absence of delimiters within a userinfo subcomponent is + usually significant to its interpretation. The fragment component is + not subject to any scheme-based normalization; thus, two IRIs that + differ only by the suffix "#" are considered different regardless of + the scheme. + + Some IRI schemes may allow the usage of Internationalized Domain + Names (IDN) [RFC3490] either in their ireg-name part or elsewhere. + When in use in IRIs, those names SHOULD be validated by using the + ToASCII operation defined in [RFC3490], with the flags + "UseSTD3ASCIIRules" and "AllowUnassigned". An IRI containing an + invalid IDN cannot successfully be resolved. Validated IDN + components of IRIs SHOULD be character normalized by using the + Nameprep process [RFC3491]; however, for legibility purposes, they + SHOULD NOT be converted into ASCII Compatible Encoding (ACE). + + Scheme-based normalization may also consider IDN components and their + conversions to punycode as equivalent. As an example, + "http://résumé.example.org" may be considered equivalent to + "http://xn--rsum-bpad.example.org". + + Other scheme-specific normalizations are possible. + +5.3.4. Protocol-Based Normalization + + Substantial effort to reduce the incidence of false negatives is + often cost-effective for web spiders. Consequently, they implement + even more aggressive techniques in IRI comparison. For example, if + they observe that an IRI such as + + http://example.com/data + + redirects to an IRI differing only in the trailing slash + + http://example.com/data/ + + they will likely regard the two as equivalent in the future. This + kind of technique is only appropriate when equivalence is clearly + indicated by both the result of accessing the resources and the + + + + +Duerst & Suignard Standards Track [Page 28] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + common conventions of their scheme's dereference algorithm (in this + case, use of redirection by HTTP origin servers to avoid problems + with relative references). + +6. Use of IRIs + +6.1. Limitations on UCS Characters Allowed in IRIs + + This section discusses limitations on characters and character + sequences usable for IRIs beyond those given in section 2.2 and + section 4.1. The considerations in this section are relevant when + IRIs are created and when URIs are converted to IRIs. + + a. The repertoire of characters allowed in each IRI component is + limited by the definition of that component. For example, the + definition of the scheme component does not allow characters + beyond US-ASCII. + + (Note: In accordance with URI practice, generic IRI software + cannot and should not check for such limitations.) + + b. The UCS contains many areas of characters for which there are + strong visual look-alikes. Because of the likelihood of + transcription errors, these also should be avoided. This + includes the full-width equivalents of Latin characters, + half-width Katakana characters for Japanese, and many others. It + also includes many look-alikes of "space", "delims", and + "unwise", characters excluded in [RFC3491]. + + Additional information is available from [UNIXML]. [UNIXML] is + written in the context of running text rather than in that of + identifiers. Nevertheless, it discusses many of the categories of + characters not appropriate for IRIs. + +6.2. Software Interfaces and Protocols + + Although an IRI is defined as a sequence of characters, software + interfaces for URIs typically function on sequences of octets or + other kinds of code units. Thus, software interfaces and protocols + MUST define which character encoding is used. + + Intermediate software interfaces between IRI-capable components and + URI-only components MUST map the IRIs per section 3.1, when + transferring from IRI-capable to URI-only components. This mapping + SHOULD be applied as late as possible. It SHOULD NOT be applied + between components that are known to be able to handle IRIs. + + + + + +Duerst & Suignard Standards Track [Page 29] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +6.3. Format of URIs and IRIs in Documents and Protocols + + Document formats that transport URIs may have to be upgraded to allow + the transport of IRIs. In cases where the document as a whole has a + native character encoding, IRIs MUST also be encoded in this + character encoding and converted accordingly by a parser or + interpreter. IRI characters not expressible in the native character + encoding SHOULD be escaped by using the escaping conventions of the + document format if such conventions are available. Alternatively, + they MAY be percent-encoded according to section 3.1. For example, in + HTML or XML, numeric character references SHOULD be used. If a + document as a whole has a native character encoding and that + character encoding is not UTF-8, then IRIs MUST NOT be placed into + the document in the UTF-8 character encoding. + + Note: Some formats already accommodate IRIs, although they use + different terminology. HTML 4.0 [HTML4] defines the conversion from + IRIs to URIs as error-avoiding behavior. XML 1.0 [XML1], XLink + [XLink], XML Schema [XMLSchema], and specifications based upon them + allow IRIs. Also, it is expected that all relevant new W3C formats + and protocols will be required to handle IRIs [CharMod]. + +6.4. Use of UTF-8 for Encoding Original Characters + + This section discusses details and gives examples for point c) in + section 1.2. To be able to use IRIs, the URI corresponding to the + IRI in question has to encode original characters into octets by + using UTF-8. This can be specified for all URIs of a URI scheme or + can apply to individual URIs for schemes that do not specify how to + encode original characters. It can apply to the whole URI, or only + to some part. For background information on encoding characters into + URIs, see also section 2.5 of [RFC3986]. + + For new URI schemes, using UTF-8 is recommended in [RFC2718]. + Examples where UTF-8 is already used are the URN syntax [RFC2141], + IMAP URLs [RFC2192], and POP URLs [RFC2384]. On the other hand, + because the HTTP URL scheme does not specify how to encode original + characters, only some HTTP URLs can have corresponding but different + IRIs. + + For example, for a document with a URI of + "http://www.example.org/r%C3%A9sum%C3%A9.html", it is possible to + construct a corresponding IRI (in XML notation, see, section 1.4): + "http://www.example.org/résumé.html" ("é"; stands for + the e-acute character, and "%C3%A9" is the UTF-8 encoded and + percent-encoded representation of that character). On the other + hand, for a document with a URI of + + + + +Duerst & Suignard Standards Track [Page 30] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + "http://www.example.org/r%E9sum%E9.html", the percent-encoding octets + cannot be converted to actual characters in an IRI, as the + percent-encoding is not based on UTF-8. + + This means that for most URI schemes, there is no need to upgrade + their scheme definition in order for them to work with IRIs. The + main case where upgrading makes sense is when a scheme definition, or + a particular component of a scheme, is strictly limited to the use of + US-ASCII characters with no provision to include non-ASCII + characters/octets via percent-encoding, or if a scheme definition + currently uses highly scheme-specific provisions for the encoding of + non-ASCII characters. An example of this is the mailto: scheme + [RFC2368]. + + This specification does not upgrade any scheme specifications in any + way; this has to be done separately. Also, note that there is no + such thing as an "IRI scheme"; all IRIs use URI schemes, and all URI + schemes can be used with IRIs, even though in some cases only by + using URIs directly as IRIs, without any conversion. + + URI schemes can impose restrictions on the syntax of scheme-specific + URIs; i.e., URIs that are admissible under the generic URI syntax + [RFC3986] may not be admissible due to narrower syntactic constraints + imposed by a URI scheme specification. URI scheme definitions cannot + broaden the syntactic restrictions of the generic URI syntax; + otherwise, it would be possible to generate URIs that satisfied the + scheme-specific syntactic constraints without satisfying the + syntactic constraints of the generic URI syntax. However, additional + syntactic constraints imposed by URI scheme specifications are + applicable to IRI, as the corresponding URI resulting from the + mapping defined in section 3.1 MUST be a valid URI under the + syntactic restrictions of generic URI syntax and any narrower + restrictions imposed by the corresponding URI scheme specification. + + The requirement for the use of UTF-8 applies to all parts of a URI + (with the potential exception of the ireg-name part; see section + 3.1). However, it is possible that the capability of IRIs to + represent a wide range of characters directly is used just in some + parts of the IRI (or IRI reference). The other parts of the IRI may + only contain US-ASCII characters, or they may not be based on UTF-8. + They may be based on another character encoding, or they may directly + encode raw binary data (see also [RFC2397]). + + For example, it is possible to have a URI reference of + "http://www.example.org/r%E9sum%E9.xml#r%C3%A9sum%C3%A9", where the + document name is encoded in iso-8859-1 based on server settings, but + where the fragment identifier is encoded in UTF-8 according to + + + + +Duerst & Suignard Standards Track [Page 31] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [XPointer]. The IRI corresponding to the above URI would be (in XML + notation) + "http://www.example.org/r%E9sum%E9.xml#résumé";. + + Similar considerations apply to query parts. The functionality of + IRIs (namely, to be able to include non-ASCII characters) can only be + used if the query part is encoded in UTF-8. + +6.5. Relative IRI References + + Processing of relative IRI references against a base is handled + straightforwardly; the algorithms of [RFC3986] can be applied + directly, treating the characters additionally allowed in IRI + references in the same way that unreserved characters are in URI + references. + +7. URI/IRI Processing Guidelines (Informative) + + This informative section provides guidelines for supporting IRIs in + the same software components and operations that currently process + URIs: Software interfaces that handle URIs, software that allows + users to enter URIs, software that creates or generates URIs, + software that displays URIs, formats and protocols that transport + URIs, and software that interprets URIs. These may all require + modification before functioning properly with IRIs. The + considerations in this section also apply to URI references and IRI + references. + +7.1. URI/IRI Software Interfaces + + Software interfaces that handle URIs, such as URI-handling APIs and + protocols transferring URIs, need interfaces and protocol elements + that are designed to carry IRIs. + + In case the current handling in an API or protocol is based on + US-ASCII, UTF-8 is recommended as the character encoding for IRIs, as + it is compatible with US-ASCII, is in accordance with the + recommendations of [RFC2277], and makes converting to URIs easy. In + any case, the API or protocol definition must clearly define the + character encoding to be used. + + The transfer from URI-only to IRI-capable components requires no + mapping, although the conversion described in section 3.2 above may + be performed. It is preferable not to perform this inverse + conversion when there is a chance that this cannot be done correctly. + + + + + + +Duerst & Suignard Standards Track [Page 32] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +7.2. URI/IRI Entry + + Some components allow users to enter URIs into the system by typing + or dictation, for example. This software must be updated to allow + for IRI entry. + + A person viewing a visual representation of an IRI (as a sequence of + glyphs, in some order, in some visual display) or hearing an IRI will + use an entry method for characters in the user's language to input + the IRI. Depending on the script and the input method used, this may + be a more or less complicated process. + + The process of IRI entry must ensure, as much as possible, that the + restrictions defined in section 2.2 are met. This may be done by + choosing appropriate input methods or variants/settings thereof, by + appropriately converting the characters being input, by eliminating + characters that cannot be converted, and/or by issuing a warning or + error message to the user. + + As an example of variant settings, input method editors for East + Asian Languages usually allow the input of Latin letters and related + characters in full-width or half-width versions. For IRI input, the + input method editor should be set so that it produces half-width + Latin letters and punctuation and full-width Katakana. + + An input field primarily or solely used for the input of URIs/IRIs + may allow the user to view an IRI as it is mapped to a URI. Places + where the input of IRIs is frequent may provide the possibility for + viewing an IRI as mapped to a URI. This will help users when some of + the software they use does not yet accept IRIs. + + An IRI input component interfacing to components that handle URIs, + but not IRIs, must map the IRI to a URI before passing it to these + components. + + For the input of IRIs with right-to-left characters, please see + section 4.3. + +7.3. URI/IRI Transfer between Applications + + Many applications, particularly mail user agents, try to detect URIs + appearing in plain text. For this, they use some heuristics based on + URI syntax. They then allow the user to click on such URIs and + retrieve the corresponding resource in an appropriate (usually + scheme-dependent) application. + + + + + + +Duerst & Suignard Standards Track [Page 33] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Such applications have to be upgraded to use the IRI syntax as a base + for heuristics. In particular, a non-ASCII character should not be + taken as the indication of the end of an IRI. Such applications also + have to make sure that they correctly convert the detected IRI from + the character encoding of the document or application where the IRI + appears to the character encoding used by the system-wide IRI + invocation mechanism, or to a URI (according to section 3.1) if the + system-wide invocation mechanism only accepts URIs. + + The clipboard is another frequently used way to transfer URIs and + IRIs from one application to another. On most platforms, the + clipboard is able to store and transfer text in many languages and + scripts. Correctly used, the clipboard transfers characters, not + bytes, which will do the right thing with IRIs. + +7.4. URI/IRI Generation + + Systems that offer resources through the Internet, where those + resources have logical names, sometimes automatically generate URIs + for the resources they offer. For example, some HTTP servers can + generate a directory listing for a file directory and then respond to + the generated URIs with the files. + + Many legacy character encodings are in use in various file systems. + Many currently deployed systems do not transform the local character + representation of the underlying system before generating URIs. + + For maximum interoperability, systems that generate resource + identifiers should make the appropriate transformations. For + example, if a file system contains a file named + "résumé.html", a server should expose this as + "r%C3%A9sum%C3%A9.html" in a URI, which allows use of + "résumé.html" in an IRI, even if locally the file name is + kept in a character encoding other than UTF-8. + + This recommendation particularly applies to HTTP servers. For FTP + servers, similar considerations apply; see [RFC2640]. + +7.5. URI/IRI Selection + + In some cases, resource owners and publishers have control over the + IRIs used to identify their resources. This control is mostly + executed by controlling the resource names, such as file names, + directly. + + + + + + + +Duerst & Suignard Standards Track [Page 34] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + In these cases, it is recommended to avoid choosing IRIs that are + easily confused. For example, for US-ASCII, the lower-case ell ("l") + is easily confused with the digit one ("1"), and the upper-case oh + ("O") is easily confused with the digit zero ("0"). Publishers + should avoid confusing users with "br0ken" or "1ame" identifiers. + + Outside the US-ASCII repertoire, there are many more opportunities + for confusion; a complete set of guidelines is too lengthy to include + here. As long as names are limited to characters from a single + script, native writers of a given script or language will know best + when ambiguities can appear, and how they can be avoided. What may + look ambiguous to a stranger may be completely obvious to the average + native user. On the other hand, in some cases, the UCS contains + variants for compatibility reasons; for example, for typographic + purposes. These should be avoided wherever possible. Although there + may be exceptions, newly created resource names should generally be + in NFKC [UTR15] (which means that they are also in NFC). + + As an example, the UCS contains the "fi" ligature at U+FB01 for + compatibility reasons. Wherever possible, IRIs should use the two + letters "f" and "i" rather than the "fi" ligature. An example where + the latter may be used is in the query part of an IRI for an explicit + search for a word written containing the "fi" ligature. + + In certain cases, there is a chance that characters from different + scripts look the same. The best known example is the similarity of + the Latin "A", the Greek "Alpha", and the Cyrillic "A". To avoid + such cases, only IRIs should be created where all the characters in a + single component are used together in a given language. This usually + means that all of these characters will be from the same script, but + there are languages that mix characters from different scripts (such + as Japanese). This is similar to the heuristics used to distinguish + between letters and numbers in the examples above. Also, for Latin, + Greek, and Cyrillic, using lowercase letters results in fewer + ambiguities than using uppercase letters would. + +7.6. Display of URIs/IRIs + + In situations where the rendering software is not expected to display + non-ASCII parts of the IRI correctly using the available layout and + font resources, these parts should be percent-encoded before being + displayed. + + For display of Bidi IRIs, please see section 4.1. + + + + + + + +Duerst & Suignard Standards Track [Page 35] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +7.7. Interpretation of URIs and IRIs + + Software that interprets IRIs as the names of local resources should + accept IRIs in multiple forms and convert and match them with the + appropriate local resource names. + + First, multiple representations include both IRIs in the native + character encoding of the protocol and also their URI counterparts. + + Second, it may include URIs constructed based on character encodings + other than UTF-8. These URIs may be produced by user agents that do + not conform to this specification and that use legacy character + encodings to convert non-ASCII characters to URIs. Whether this is + necessary, and what character encodings to cover, depends on a number + of factors, such as the legacy character encodings used locally and + the distribution of various versions of user agents. For example, + software for Japanese may accept URIs in Shift_JIS and/or EUC-JP in + addition to UTF-8. + + Third, it may include additional mappings to be more user-friendly + and robust against transmission errors. These would be similar to + how some servers currently treat URIs as case insensitive or perform + additional matching to account for spelling errors. For characters + beyond the US-ASCII repertoire, this may, for example, include + ignoring the accents on received IRIs or resource names. Please note + that such mappings, including case mappings, are language dependent. + + It can be difficult to identify a resource unambiguously if too many + mappings are taken into consideration. However, percent-encoded and + not percent-encoded parts of IRIs can always be clearly + distinguished. Also, the regularity of UTF-8 (see [Duerst97]) makes + the potential for collisions lower than it may seem at first. + +7.8. Upgrading Strategy + + Where this recommendation places further constraints on software for + which many instances are already deployed, it is important to + introduce upgrades carefully and to be aware of the various + interdependencies. + + If IRIs cannot be interpreted correctly, they should not be created, + generated, or transported. This suggests that upgrading URI + interpreting software to accept IRIs should have highest priority. + + On the other hand, a single IRI is interpreted only by a single or + very few interpreters that are known in advance, although it may be + entered and transported very widely. + + + + +Duerst & Suignard Standards Track [Page 36] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Therefore, IRIs benefit most from a broad upgrade of software to be + able to enter and transport IRIs. However, before an individual IRI + is published, care should be taken to upgrade the corresponding + interpreting software in order to cover the forms expected to be + received by various versions of entry and transport software. + + The upgrade of generating software to generate IRIs instead of using + a local character encoding should happen only after the service is + upgraded to accept IRIs. Similarly, IRIs should only be generated + when the service accepts IRIs and the intervening infrastructure and + protocol is known to transport them safely. + + Software converting from URIs to IRIs for display should be upgraded + only after upgraded entry software has been widely deployed to the + population that will see the displayed result. + + Where there is a free choice of character encodings, it is often + possible to reduce the effort and dependencies for upgrading to IRIs + by using UTF-8 rather than another encoding. For example, when a new + file-based Web server is set up, using UTF-8 as the character + encoding for file names will make the transition to IRIs easier. + Likewise, when a new Web form is set up using UTF-8 as the character + encoding of the form page, the returned query URIs will use UTF-8 as + the character encoding (unless the user, for whatever reason, changes + the character encoding) and will therefore be compatible with IRIs. + + These recommendations, when taken together, will allow for the + extension from URIs to IRIs in order to handle characters other than + US-ASCII while minimizing interoperability problems. For + considerations regarding the upgrade of URI scheme definitions, see + section 6.4. + +8. Security Considerations + + The security considerations discussed in [RFC3986] also apply to + IRIs. In addition, the following issues require particular care for + IRIs. + + Incorrect encoding or decoding can lead to security problems. In + particular, some UTF-8 decoders do not check against overlong byte + sequences. As an example, a "/" is encoded with the byte 0x2F both + in UTF-8 and in US-ASCII, but some UTF-8 decoders also wrongly + interpret the sequence 0xC0 0xAF as a "/". A sequence such as + + + + + + + + +Duerst & Suignard Standards Track [Page 37] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + "%C0%AF.." may pass some security tests and then be interpreted as + "/.." in a path if UTF-8 decoders are fault-tolerant, if conversion + and checking are not done in the right order, and/or if reserved + characters and unreserved characters are not clearly distinguished. + + There are various ways in which "spoofing" can occur with IRIs. + "Spoofing" means that somebody may add a resource name that looks the + same or similar to the user, but that points to a different resource. + The added resource may pretend to be the real resource by looking + very similar but may contain all kinds of changes that may be + difficult to spot and that can cause all kinds of problems. Most + spoofing possibilities for IRIs are extensions of those for URIs. + + Spoofing can occur for various reasons. First, a user's + normalization expectations or actual normalization when entering an + IRI or transcoding an IRI from a legacy character encoding do not + match the normalization used on the server side. Conceptually, this + is no different from the problems surrounding the use of + case-insensitive web servers. For example, a popular web page with a + mixed-case name ("http://big.example.com/PopularPage.html") might be + "spoofed" by someone who is able to create + "http://big.example.com/popularpage.html". However, the use of + unnormalized character sequences, and of additional mappings for user + convenience, may increase the chance for spoofing. Protocols and + servers that allow the creation of resources with names that are not + normalized are particularly vulnerable to such attacks. This is an + inherent security problem of the relevant protocol, server, or + resource and is not specific to IRIs, but it is mentioned here for + completeness. + + Spoofing can occur in various IRI components, such as the domain name + part or a path part. For considerations specific to the domain name + part, see [RFC3491]. For the path part, administrators of sites that + allow independent users to create resources in the same sub area may + have to be careful to check for spoofing. + + Spoofing can occur because in the UCS many characters look very + similar. Details are discussed in Section 7.5. Again, this is very + similar to spoofing possibilities on US-ASCII, e.g., using "br0ken" + or "1ame" URIs. + + Spoofing can occur when URIs with percent-encodings based on various + character encodings are accepted to deal with older user agents. In + some cases, particularly for Latin-based resource names, this is + usually easy to detect because UTF-8-encoded names, when interpreted + and viewed as legacy character encodings, produce mostly garbage. + + + + + +Duerst & Suignard Standards Track [Page 38] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + When concurrently used character encodings have a similar structure + but there are no characters that have exactly the same encoding, + detection is more difficult. + + Spoofing can occur with bidirectional IRIs, if the restrictions in + section 4.2 are not followed. The same visual representation may be + interpreted as different logical representations, and vice versa. It + is also very important that a correct Unicode bidirectional + implementation be used. + +9. Acknowledgements + + We would like to thank Larry Masinter for his work as coauthor of + many earlier versions of this document (draft-masinter-url-i18n-xx). + + The discussion on the issue addressed here started a long time ago. + There was a thread in the HTML working group in August 1995 (under + the topic of "Globalizing URIs") and in the www-international mailing + list in July 1996 (under the topic of "Internationalization and + URLs"), and there were ad-hoc meetings at the Unicode conferences in + September 1995 and September 1997. + + Many thanks go to Francois Yergeau, Matitiahu Allouche, Roy Fielding, + Tim Berners-Lee, Mark Davis, M.T. Carrasco Benitez, James Clark, Tim + Bray, Chris Wendt, Yaron Goland, Andrea Vine, Misha Wolf, Leslie + Daigle, Ted Hardie, Bill Fenner, Margaret Wasserman, Russ Housley, + Makoto MURATA, Steven Atkin, Ryan Stansifer, Tex Texin, Graham Klyne, + Bjoern Hoehrmann, Chris Lilley, Ian Jacobs, Adam Costello, Dan + Oscarson, Elliotte Rusty Harold, Mike J. Brown, Roy Badami, Jonathan + Rosenne, Asmus Freytag, Simon Josefsson, Carlos Viegas Damasio, Chris + Haynes, Walter Underwood, and many others for help with understanding + the issues and possible solutions, and with getting the details + right. + + This document is a product of the Internationalization Working Group + (I18N WG) of the World Wide Web Consortium (W3C). Thanks to the + members of the W3C I18N Working Group and Interest Group for their + contributions and their work on [CharMod]. Thanks also go to the + members of many other W3C Working Groups for adopting IRIs, and to + the members of the Montreal IAB Workshop on Internationalization and + Localization for their review. + + + + + + + + + + +Duerst & Suignard Standards Track [Page 39] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +10. References + +10.1. Normative References + + [ASCII] American National Standards Institute, "Coded + Character Set -- 7-bit American Standard Code for + Information Interchange", ANSI X3.4, 1986. + + [ISO10646] International Organization for Standardization, + "ISO/IEC 10646:2003: Information Technology - + Universal Multiple-Octet Coded Character Set (UCS)", + ISO Standard 10646, December 2003. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, November 1997. + + [RFC3490] Faltstrom, P., Hoffman, P., and A. Costello, + "Internationalizing Domain Names in Applications + (IDNA)", RFC 3490, March 2003. + + [RFC3491] Hoffman, P. and M. Blanchet, "Nameprep: A Stringprep + Profile for Internationalized Domain Names (IDN)", RFC + 3491, March 2003. + + [RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO + 10646", STD 63, RFC 3629, November 2003. + + [RFC3986] Berners-Lee, T., Fielding, R., and L. Masinter, + "Uniform Resource Identifier (URI): Generic Syntax", + STD 66, RFC 3986, January 2005. + + [UNI9] Davis, M., "The Bidirectional Algorithm", Unicode + Standard Annex #9, March 2004, + . + + [UNIV4] The Unicode Consortium, "The Unicode Standard, Version + 4.0.1, defined by: The Unicode Standard, Version 4.0 + (Reading, MA, Addison-Wesley, 2003. ISBN + 0-321-18578-1), as amended by Unicode 4.0.1 + (http://www.unicode.org/versions/Unicode4.0.1/)", + March 2004. + + + + + + + +Duerst & Suignard Standards Track [Page 40] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [UTR15] Davis, M. and M. Duerst, "Unicode Normalization + Forms", Unicode Standard Annex #15, April 2003, + . + +10.2. Informative References + + [BidiEx] "Examples of bidirectional IRIs", + . + + [CharMod] Duerst, M., Yergeau, F., Ishida, R., Wolf, M., and T. + Texin, "Character Model for the World Wide Web: + Resource Identifiers", World Wide Web Consortium + Candidate Recommendation, November 2004, + . + + [Duerst97] Duerst, M., "The Properties and Promises of UTF-8", + Proc. 11th International Unicode Conference, San Jose + , September 1997, + . + + [Gettys] Gettys, J., "URI Model Consequences", + . + + [HTML4] Raggett, D., Le Hors, A., and I. Jacobs, "HTML 4.01 + Specification", World Wide Web Consortium + Recommendation, December 1999, + . + + [RFC2045] Freed, N. and N. Borenstein, "Multipurpose Internet + Mail Extensions (MIME) Part One: Format of Internet + Message Bodies", RFC 2045, November 1996. + + [RFC2130] Weider, C., Preston, C., Simonsen, K., Alvestrand, H., + Atkinson, R., Crispin, M., and P. Svanberg, "The + Report of the IAB Character Set Workshop held 29 + February - 1 March, 1996", RFC 2130, April 1997. + + [RFC2141] Moats, R., "URN Syntax", RFC 2141, May 1997. + + [RFC2192] Newman, C., "IMAP URL Scheme", RFC 2192, September + 1997. + + [RFC2277] Alvestrand, H., "IETF Policy on Character Sets and + Languages", BCP 18, RFC 2277, January 1998. + + + +Duerst & Suignard Standards Track [Page 41] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [RFC2368] Hoffman, P., Masinter, L., and J. Zawinski, "The + mailto URL scheme", RFC 2368, July 1998. + + [RFC2384] Gellens, R., "POP URL Scheme", RFC 2384, August 1998. + + [RFC2396] Berners-Lee, T., Fielding, R., and L. Masinter, + "Uniform Resource Identifiers (URI): Generic Syntax", + RFC 2396, August 1998. + + [RFC2397] Masinter, L., "The "data" URL scheme", RFC 2397, + August 1998. + + [RFC2616] Fielding, R., Gettys, J., Mogul, J., Frystyk, H., + Masinter, L., Leach, P., and T. Berners-Lee, + "Hypertext Transfer Protocol -- HTTP/1.1", RFC 2616, + June 1999. + + [RFC2640] Curtin, B., "Internationalization of the File Transfer + Protocol", RFC 2640, July 1999. + + [RFC2718] Masinter, L., Alvestrand, H., Zigmond, D., and R. + Petke, "Guidelines for new URL Schemes", RFC 2718, + November 1999. + + [UNIXML] Duerst, M. and A. Freytag, "Unicode in XML and other + Markup Languages", Unicode Technical Report #20, World + Wide Web Consortium Note, June 2003, + . + + [XLink] DeRose, S., Maler, E., and D. Orchard, "XML Linking + Language (XLink) Version 1.0", World Wide Web + Consortium Recommendation, June 2001, + . + + [XML1] Bray, T., Paoli, J., Sperberg-McQueen, C., Maler, E., + and F. Yergeau, "Extensible Markup Language (XML) 1.0 + (Third Edition)", World Wide Web Consortium + Recommendation, February 2004, + . + + [XMLNamespace] Bray, T., Hollander, D., and A. Layman, "Namespaces in + XML", World Wide Web Consortium Recommendation, + January 1999, . + + [XMLSchema] Biron, P. and A. Malhotra, "XML Schema Part 2: + Datatypes", World Wide Web Consortium Recommendation, + May 2001, . + + + + +Duerst & Suignard Standards Track [Page 42] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + [XPointer] Grosso, P., Maler, E., Marsh, J. and N. Walsh, + "XPointer Framework", World Wide Web Consortium + Recommendation, March 2003, + . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Duerst & Suignard Standards Track [Page 43] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +Appendix A. Design Alternatives + + This section shortly summarizes major design alternatives and the + reasons for why they were not chosen. + +Appendix A.1. New Scheme(s) + + Introducing new schemes (for example, httpi:, ftpi:,...) or a new + metascheme (e.g., i:, leading to URI/IRI prefixes such as i:http:, + i:ftp:,...) was proposed to make IRI-to-URI conversion scheme + dependent or to distinguish between percent-encodings resulting from + IRI-to-URI conversion and percent-encodings from legacy character + encodings. + + New schemes are not needed to distinguish URIs from true IRIs (i.e., + IRIs that contain non-ASCII characters). The benefit of being able + to detect the origin of percent-encodings is marginal, as UTF-8 can + be detected with very high reliability. Deploying new schemes is + extremely hard, so not requiring new schemes for IRIs makes + deployment of IRIs vastly easier. Making conversion scheme dependent + is highly inadvisable and would be encouraged by separate schemes for + IRIs. Using a uniform convention for conversion from IRIs to URIs + makes IRI implementation orthogonal to the introduction of actual new + schemes. + +Appendix A.2. Character Encodings Other Than UTF-8 + + At an early stage, UTF-7 was considered as an alternative to UTF-8 + when IRIs are converted to URIs. UTF-7 would not have needed + percent-encoding and in most cases would have been shorter than + percent-encoded UTF-8. + + Using UTF-8 avoids a double layering and overloading of the use of + the "+" character. UTF-8 is fully compatible with US-ASCII and has + therefore been recommended by the IETF, and is being used widely. + + UTF-7 has never been used much and is now clearly being discouraged. + Requiring implementations to convert from UTF-8 to UTF-7 and back + would be an additional implementation burden. + +Appendix A.3. New Encoding Convention + + Instead of using the existing percent-encoding convention of URIs, + which is based on octets, the idea was to create a new encoding + convention; for example, to use "%u" to introduce UCS code points. + + + + + + +Duerst & Suignard Standards Track [Page 44] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + + Using the existing octet-based percent-encoding mechanism does not + need an upgrade of the URI syntax and does not need corresponding + server upgrades. + +Appendix A.4. Indicating Character Encodings in the URI/IRI + + Some proposals suggested indicating the character encodings used in + an URI or IRI with some new syntactic convention in the URI itself, + similar to the "charset" parameter for e-mails and Web pages. As an + example, the label in square brackets in + "http://www.example.org/ros[iso-8859-1]é"; indicated that the + following "é"; had to be interpreted as iso-8859-1. + + If UTF-8 is used exclusively, an upgrade to the URI syntax is not + needed. It avoids potentially multiple labels that have to be copied + correctly in all cases, even on the side of a bus or on a napkin, + leading to usability problems (and being prohibitively annoying). + Exclusively using UTF-8 also reduces transcoding errors and + confusion. + +Authors' Addresses + + Martin Duerst (Note: Please write "Duerst" with u-umlaut wherever + possible, for example as "Dürst" in XML and + HTML.) + World Wide Web Consortium + 5322 Endo + Fujisawa, Kanagawa 252-8520 + Japan + + Phone: +81 466 49 1170 + Fax: +81 466 49 1171 + EMail: duerst@w3.org + URI: http://www.w3.org/People/D%C3%BCrst/ + (Note: This is the percent-encoded form of an IRI.) + + + Michel Suignard + Microsoft Corporation + One Microsoft Way + Redmond, WA 98052 + U.S.A. + + Phone: +1 425 882-8080 + EMail: michelsu@microsoft.com + URI: http://www.suignard.com + + + + + +Duerst & Suignard Standards Track [Page 45] + +RFC 3987 Internationalized Resource Identifiers January 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the IETF's procedures with respect to rights in IETF Documents can + be found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + +Duerst & Suignard Standards Track [Page 46] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc4088.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc4088.txt new file mode 100644 index 0000000..6a4964c --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc4088.txt @@ -0,0 +1,1011 @@ + + + + + + +Network Working Group D. Black +Request for Comments: 4088 EMC Corporation +Category: Standards Track K. McCloghrie + Cisco Systems + J. Schoenwaelder + International University Bremen + June 2005 + + + Uniform Resource Identifier (URI) Scheme for the + Simple Network Management Protocol (SNMP) + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2005). + +Abstract + + The Simple Network Management Protocol (SNMP) and the Internet + Standard Management Framework are widely used for the management of + communication devices, creating a need to specify SNMP access + (including access to SNMP MIB object instances) from non-SNMP + management environments. For example, when out-of-band IP management + is used via a separate management interface (e.g., for a device that + does not support in-band IP access), a uniform way to indicate how to + contact the device for management is needed. Uniform Resource + Identifiers (URIs) fit this need well, as they allow a single text + string to indicate a management access communication endpoint for a + wide variety of IP-based protocols. + + This document defines a URI scheme so that SNMP can be designated as + the protocol used for management. The scheme also allows a URI to + designate one or more MIB object instances. + + + + + + + + + + +Black, et al. Standards Track [Page 1] + +RFC 4088 URI Scheme for SNMP June 2005 + + +Table of Contents + + 1. Introduction.................................................. 2 + 2. Usage......................................................... 3 + 3. Syntax of an SNMP URI......................................... 4 + 3.1. Relative Reference Considerations........................ 5 + 4. Semantics and Operations...................................... 6 + 4.1. SNMP Service URIs........................................ 6 + 4.2. SNMP Object URIs......................................... 7 + 4.2.1. SNMP Object URI Data Access....................... 8 + 4.3. OID Groups in SNMP URIs.................................. 10 + 4.4. Interoperability Considerations.......................... 10 + 5. Examples...................................................... 11 + 6. Security Considerations....................................... 12 + 6.1. SNMP URI to SNMP Gateway Security Considerations......... 13 + 7. IANA Considerations........................................... 14 + 8. Normative References.......................................... 14 + 9. Informative References........................................ 15 + 10. Acknowledgements............................................. 16 + Appendix A. Registration Template................................ 17 + +1. Introduction + + SNMP and the Internet-Standard Management Framework were originally + devised to manage IP devices via in-band means, in which management + access is primarily via the same interface(s) used to send and + receive IP traffic. SNMP's wide adoption has resulted in its use for + managing communication devices that do not support in-band IP access + (e.g., Fibre Channel devices); a separate out-of-band IP interface is + often used for management. URIs provide a convenient way to locate + that interface and specify the protocol to be used for management; + one possible scenario is for an in-band query to return a URI that + indicates how the device is managed. This document specifies a URI + scheme to permit SNMP (including a specific SNMP context) to be + designated as the management protocol by such a URI. This scheme + also allows a URI to refer to specific object instances within an + SNMP MIB. + + For a detailed overview of the documents that describe the current + Internet-Standard Management Framework, please refer to Section 7 of + [RFC3410]. + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in [RFC2119]. + + + + + + +Black, et al. Standards Track [Page 2] + +RFC 4088 URI Scheme for SNMP June 2005 + + +2. Usage + + There are two major classes of SNMP URI usage: configuration and + gateways between SNMP and other protocols that use SNMP URIs. + + An SNMP URI used for configuration indicates the location of + management information as part of the configuration of an application + containing an SNMP manager. The URI can be obtained from a + configuration file or may be provided by a managed device (see + Section 1 for an example). Management information is exchanged + between the SNMP manager and agent, but it does not flow beyond the + manager, as shown in the following diagram: + + *********** SNMP-Request ********* + * *================>* * + URI ---------->* Manager * * Agent * + * *<================* * + *********** SNMP-Response ********* + ^ + | + Other Config Info ------------+ + + Additional configuration information (e.g., a security secret or key) + may be provided via an interface other than that used for the URI. + For example, when a managed device provides an SNMP URI in an + unprotected fashion, that device should not provide a secret or key + required to use the URI. The secret or key should instead be pre- + configured in or pre-authorized to the manager; see Section 6. + + For gateway usage, clients employ SNMP URIs to request management + information via an SNMP URI to SNMP gateway (also called an SNMP + gateway in this document). The SNMP manager within the SNMP gateway + accesses the management information and returns it to the requesting + client, as shown in the following diagram: + + SNMP gateway + ********** URI *********** SNMP-Request ********* + * *===========>* *================>* * + * Client * * Manager * * Agent * + * *<===========* *<================* * + ********** Info *********** SNMP-Response ********* + ^ + | + Other Config Info ------------+ + + Additional configuration information (e.g., security secrets or keys) + may be provided via an interface other than that used for the URI. + For example, some types of security information, including secrets + + + +Black, et al. Standards Track [Page 3] + +RFC 4088 URI Scheme for SNMP June 2005 + + + and keys, should be pre-configured in or pre-authorized to the + manager rather than be provided by the client; see Section 6. + +3. Syntax of an SNMP URI + + An SNMP URI has the following ABNF [RFC2234] syntax, based on the + ABNF syntax rules for userinfo, host, port, and (path) segment in + [RFC3986] and the ABNF syntax rule for HEXDIG in [RFC2234]: + + snmp-uri = "snmp://" snmp-authority [ context [ oids ]] + + snmp-authority = [ securityName "@" ] host [ ":" port ] + securityName = userinfo ; SNMP securityName + + context = "/" contextName [ ";" contextEngineID ] + contextName = segment ; SNMP contextName + contextEngineID = 1*(HEXDIG HEXDIG) ; SNMP contextEngineID + + oids = "/" ( oid / oid-group ) [ suffix ] + oid-group = "(" oid *( "," oid ) ")" + oid = < as specified by [RFC 3061] > + suffix = "+" / ".*" + + The userinfo and (path) segment ABNF rules are reused for syntax + only. In contrast, host and port have both the syntax and semantics + specified in [RFC3986]. See [RFC3411] for the semantics of + securityName, contextEngineID, and contextName. + + The snmp-authority syntax matches the URI authority syntax in Section + 3.2 of [RFC3986], with the additional restriction that the userinfo + component of an authority (when present) MUST be an SNMP + securityName. If the securityName is empty or not given, the entity + making use of an SNMP URI is expected to know what SNMP securityName + to use if one is required. Inclusion of authentication information + (e.g., passwords) in URIs has been deprecated (see Section 3.2.1 of + [RFC3986]), so any secret or key required for SNMP access must be + provided via other means that may be out-of-band with respect to + communication of the URI. If the port is empty or not given, port + 161 is assumed. + + If the contextName is empty or not given, the zero-length string ("") + is assumed, as it is the default SNMP context. An SNMP + contextEngineID is a variable-format binary element that is usually + discovered by an SNMP manager. An SNMP URI encodes a contextEngineID + as hexadecimal digits corresponding to a sequence of bytes. If the + contextEngineID is empty or not given, the context engine is to be + discovered by querying the SNMP agent at the specified host and port; + see Section 4.1 below. The contextEngineID component of the URI + + + +Black, et al. Standards Track [Page 4] + +RFC 4088 URI Scheme for SNMP June 2005 + + + SHOULD be present if more than one context engine at the designated + host and port supports the designated context. + + An SNMP URI that designates the default SNMP context ("") MAY end + with the "/" character that introduces the contextName component. An + SNMP URI MUST NOT end with the "/" character that introduces an oid + or oid-group component, as the empty string is not a valid OID for + SNMP. + + The encoding rules specified in [RFC3986] MUST be used for SNMP URIs, + including the use of percent encoding ("%" followed by two hex + digits) as needed to represent characters defined as reserved in + [RFC3986] and any characters not allowed in a URI. SNMP permits any + UTF-8 character to be used in a securityName or contextName; all + multi-byte UTF-8 characters in an SNMP URI MUST be percent encoded as + specified in Sections 2.1 and 2.5 of [RFC3986]. These requirements + are a consequence of reusing the ABNF syntax rules for userinfo and + segment from [RFC3986]. + + SNMP URIs will generally be short enough to avoid implementation + string-length limits (e.g., that may occur at 255 characters). Such + limits may be a concern for large OID groups; relative references to + URIs (see Section 4.2 of [RFC3986]) may provide an alternative in + some circumstances. + + Use of IP addresses in SNMP URIs is acceptable in situations where + dependence on availability of DNS service is undesirable or must be + avoided; otherwise, IP addresses should not be used (see [RFC1900] + for further explanation). + +3.1. Relative Reference Considerations + + Use of the SNMP default context (zero-length string) within an SNMP + URI can result in a second instance of "//" in the URI, such as the + following: + + snmp://// + + This is allowed by [RFC3986] syntax; if a URI parser does not handle + the second "//" correctly, the parser is broken and needs to be + fixed. This example is important because use of the SNMP default + context in SNMP URIs is expected to be common. + + On the other hand, the second occurrence of "//" in an absolute SNMP + URI affects usage of relative references to that URI (see Section 4.2 + of [RFC3986]) because a "//" at the start of a relative reference + always introduces a URI authority component (host plus optional + userinfo and/or port; see [RFC3986]). Specifically, a relative + + + +Black, et al. Standards Track [Page 5] + +RFC 4088 URI Scheme for SNMP June 2005 + + + reference of the form // will not work, because the "//" will + cause to be parsed as a URI authority, resulting in a syntax + error when the parser fails to find a host in . To avoid this + problem, relative references that start with "//" but do not contain + a URI authority component MUST NOT be used. Functionality equivalent + to any such forbidden relative reference can be obtained by prefixing + "." or ".." to the forbidden relative reference (e.g., ..//). + The prefix to use depends on the base URI. + +4. Semantics and Operations + + An SNMP URI that does not include any OIDs is called an SNMP service + URI because it designates a communication endpoint for access to SNMP + management service. An SNMP URI that includes one or more OIDs is + called an SNMP object URI because it designates one or more object + instances in an SNMP MIB. The expected means of using an SNMP URI is + to employ an SNMP manager to access the SNMP context designated by + the URI via the SNMP agent at the host and port designated by the + URI. + +4.1. SNMP Service URIs + + An SNMP service URI does not designate a data object, but rather an + SNMP context to be accessed by a service; the telnet URI scheme + [RFC1738] is another example of URIs that designate service access. + If the contextName in the URI is empty or not given, "" (the zero- + length string) is assumed, as it is the default SNMP context. + + If a contextEngineID is given in an SNMP service URI, the context + engine that it designates is to be used. If the contextEngineID is + empty or not given in the URI, the context engine is to be + discovered; the context engine to be used is the one that supports + the context designated by the URI. The contextEngineID component of + the URI SHOULD be present if more than one context engine at the + designated host and port supports the designated context. + + Many common uses of SNMP URIs are expected to omit (i.e., default) + the contextEngineID because they do not involve SNMP proxy agents, + which are the most common reason for multiple SNMP context engines to + exist at a single host and port. Specifically, when an SNMP agent is + local to the network interface that it manages, the agent will + usually have only one context engine, in which case it is safe to + omit the contextEngineID component of an SNMP URI. In addition, many + SNMP agents that are local to a network interface support only the + default SNMP context (zero-length string). + + + + + + +Black, et al. Standards Track [Page 6] + +RFC 4088 URI Scheme for SNMP June 2005 + + +4.2. SNMP Object URIs + + An SNMP object URI contains one or more OIDs. The URI is used by + first separating the OID or OID group (including its preceding slash + plus any parentheses and suffix) and then processing the resulting + SNMP service URI as specified in Section 4.1 (above) to determine the + SNMP context to be accessed. The OID or OID group is then used to + generate SNMP operations directed to that SNMP context. + + The semantics of an SNMP object URI depend on whether the OID or OID + group has a suffix and what that suffix is. There are three possible + formats; in each case, the MIB object instances are designated within + the SNMP context specified by the service URI portion of the SNMP + object URI. The semantics of an SNMP object URI that contains a + single OID are as follows: + + (1) An OID without a suffix designates the MIB object instance + named by the OID. + + (2) An OID with a "+" suffix designates the lexically next MIB + object instance following the OID. + + (3) An OID with a ".*" suffix designates the set of MIB object + instances for which the OID is a strict lexical prefix; this + does not include the MIB object instance named by the OID. + + An OID group in an SNMP URI consists of a set of OIDs in parentheses. + In each case, the OID group semantics are the extension of the single + OID semantics to each OID in the group (e.g., a URI with a "+" suffix + designates the set of MIB object instances consisting of the + lexically next instance for each OID in the OID group). + + When there is a choice among URI formats to designate the same MIB + object instance or instances, the above list is in order of + preference (no suffix is most preferable), as it runs from most + precise to least precise. This is because an OID without a suffix + precisely designates an object instance, whereas a "+" suffix + designates the next object instance, which may change, and the ".*" + suffix could designate multiple object instances. Multiple + syntactically distinct SNMP URIs SHOULD NOT be used to designate the + same MIB object instance or set of instances, as this may cause + unexpected results in URI-based systems that use string comparison to + test URIs for equality. + + SNMP object URIs designate the data to be accessed, as opposed to the + specific SNMP operations to be used for access; Section 4.2.1 + provides examples of how SNMP operations can be used to access data + for SNMP object URIs. Nonetheless, any applicable SNMP operation, + + + +Black, et al. Standards Track [Page 7] + +RFC 4088 URI Scheme for SNMP June 2005 + + + including GetBulk, MAY be used to access data for all or part of one + or more SNMP object URIs (e.g., via use of multiple variable bindings + in a single operation); it is not necessary to use the specific + operations described in Section 4.2.1 as long as the results + (returned variable bindings or error) could have been obtained by + following Section 4.2.1's descriptions. The use of relative + references that do not change the contextName (i.e., ./) should + be viewed as a hint that optimization of SNMP access across multiple + SNMP URIs may be possible. + + An SNMP object URI MAY also be used to specify a MIB object instance + or instances to be written; this causes generation of an SNMP Set + operation instead of a Get. The "+" and ".*" suffixes MUST NOT be + used in this case; any attempt to do so is an error that MUST NOT + generate any SNMP Set operations. Values to be written to the MIB + object instance or instances are not specified within an SNMP object + URI. + + SNMP object URIs designate data in SNMP MIBs and hence do not provide + the means to generate all possible SNMP protocol operations. For + example, data access for an SNMP object URI cannot directly generate + either Snmpv2-Trap or InformRequest notifications, although side + effects of data access could cause such notifications (depending on + the MIB). In addition, whether and how GetBulk is used for an SNMP + object URI with a ".*" suffix is implementation specific. + +4.2.1. SNMP Object URI Data Access + + Data access based on an SNMP object URI returns an SNMP variable + binding for each MIB object instance designated by the URI, or an + SNMP error if the operation fails. An SNMP variable binding binds a + variable name (OID) to a value or an SNMP exception (see [RFC3416]). + The SNMP operation or operations needed to access data designated by + an SNMP object URI depend on the OID or OID group suffix or absence + thereof. The following descriptions are not the only method of + performing data access for an SNMP object URI; any suitable SNMP + operations may be used as long as the results (returned variable + bindings or error) are functionally equivalent. + + (1) For an OID or OID group without a suffix, an SNMP Get + operation is generated using each OID as a variable binding + name. If an SNMP error occurs, that error is the result of + URI data access; otherwise, the returned variable binding or + bindings are the result of URI data access. Note that any + returned variable binding may contain an SNMP "noSuchObject" + or "noSuchInstance" exception. + + + + + +Black, et al. Standards Track [Page 8] + +RFC 4088 URI Scheme for SNMP June 2005 + + + (2) For an OID or OID group with a "+" suffix, an SNMP GetNext + operation is generated using each OID as a variable binding + name. If an SNMP error occurs, that error is the result of + URI data access; otherwise, the returned variable binding or + bindings are the result of URI data access. Note that any + returned variable binding may contain an SNMP "endOfMibView" + exception. + + (3) For an OID or OID group with a ".*" suffix, an SNMP GetNext + operation is initially generated using each OID as a variable + binding name. If the result is an SNMP error, that error is + the result of URI data access. If all returned variable + bindings contain either a) an OID for which the corresponding + URI OID is not a lexical prefix or b) an SNMP "endOfMibView" + exception, then the returned variable bindings are the result + of URI data access. + + Otherwise, the results of the GetNext operation are saved, and + another SNMP GetNext operation is generated using the newly + returned OIDs as variable binding names. This is repeated + (save the results and generate a GetNext with newly returned + OIDs as variable binding names) until all the returned + variable bindings from a GetNext contain either a) an OID for + which the corresponding URI OID is not a lexical prefix or b) + an SNMP "endOfMibView" exception. The results from all of the + GetNext operations are combined to become the overall result + of URI data access; this may include variable bindings whose + OID is not a lexical extension of the corresponding URI OID. + If the OID subtrees (set of OIDs for which a specific URI OID + is a lexical prefix) are not the same size for all OIDs in the + OID group, the largest subtree determines when this iteration + ends. SNMP GetBulk operations MAY be used to optimize this + iterated access. + + Whenever a returned variable binding contains an OID for which + the corresponding URI OID is not a lexical prefix or an SNMP + "endOfMibView" exception, iteration of that element of the OID + group MAY cease, reducing the number of variable bindings used + in subsequent GetNext operations. In this case, the results + of URI data access for the SNMP URI will not consist entirely + of OID-group-sized sets of variable bindings. Even if this + does not occur, the last variable binding returned for each + member of the OID group will generally contain an SNMP + "endOfMibView" exception or an OID for which the corresponding + URI OID is not a lexical prefix. + + + + + + +Black, et al. Standards Track [Page 9] + +RFC 4088 URI Scheme for SNMP June 2005 + + +4.3. OID Groups in SNMP URIs + + Parenthesized OID groups in SNMP URIs are intended to support MIB + object instances for which access via a single SNMP operation is + required to ensure consistent results. Therefore, the OIDs within an + OID group in an SNMP URI SHOULD be accessed by a single SNMP + operation containing a variable binding corresponding to each OID in + the group. A specific example involves the InetAddress and + InetAddressType textual conventions defined in [RFC4001], for which + the format of an InetAddress instance is specified by an associated + InetAddressType instance. If two such associated instances are read + via separate SNMP operations, the resulting values could be + inconsistent (e.g., due to an intervening Set), causing the + InetAddress value to be interpreted incorrectly. + + This single operation requirement ("SHOULD") also applies to each OID + group resulting from iterated access for an SNMP URI with a ".*" + suffix. When members of an SNMP URI OID group differ in the number + of OIDs for which each is a lexical prefix, this iteration may + overrun by returning numerous variable bindings for which the + corresponding OID in the OID group is not a lexical prefix. Such + overrun can be avoided by using relative references within the same + context (i.e., ./.* ) when it is not important to access + multiple MIB object instances in a single SNMP operation. + +4.4. Interoperability Considerations + + This document defines a transport-independent "snmp" scheme that is + intended to accommodate SNMP transports other than UDP. UDP is the + default transport for access to information specified by an SNMP URI + for backward compatibility with existing usage, but other transports + MAY be used. If more than one transport can be used (e.g., SNMP over + TCP [RFC3430] in addition to SNMP over UDP), the information or SNMP + service access designated by an SNMP URI SHOULD NOT depend on which + transport is used (for SNMP over TCP, this is implied by Section 2 of + [RFC3430]). + + An SNMP URI designates use of SNMPv3 as specified by [RFC3416], + [RFC3417], and related documents, but older versions of SNMP MAY be + used in accordance with [RFC3584] when usage of such older versions + is unavoidable. For SNMPv1 and SNMPv2c, the securityName, + contextName, and contextEngineID elements of an SNMP URI are mapped + to/from the community name, as described in [RFC3584]. When the + community name is kept secret as a weak form of authentication, this + mapping should be configured so that these three elements do not + reveal information about the community name. If this is not done, + then any SNMP URI component that would disclose significant + information about a secret community name SHOULD be omitted. Note + + + +Black, et al. Standards Track [Page 10] + +RFC 4088 URI Scheme for SNMP June 2005 + + + that some community names contain reserved characters (e.g., "@") + that require percent encoding when they are used in an SNMP URI. + SNMP versions (e.g., v3) have been omitted from the SNMP URI scheme + to permit use of older versions of SNMP, as well as any possible + future successor to SNMPv3. + +5. Examples + + snmp://example.com + + This example designates the default SNMP context at the SNMP agent at + port 161 of host example.com . + + snmp://tester5@example.com:8161 + + This example designates the default SNMP context at the SNMP agent at + port 8161 of host example.com and indicates that the SNMP + securityName "tester5" is to be used to access that agent. A + possible reason to use a non-standard port is for testing a new + version of SNMP agent code. + + snmp://example.com/bridge1 + + This example designates the "bridge1" SNMP context at example.com. + Because the contextEngineID component of the URI is omitted, there + SHOULD be at most one SNMP context engine at example.com that + supports the "bridge1" context. + + snmp://example.com/bridge1;800002b804616263 + + This example designates the "bridge1" context at snmp.example.com via + the SNMP context engine 800002b804616263 (string representation of a + hexadecimal value). This avoids ambiguity if any other context + engine supports a "bridge1" context. The above two examples are + based on the figure in Section 3.3 of [RFC3411]. + + snmp://example.com// + snmp://example.com// + snmp://example.com//* + + These three examples all designate the sysUpTime.0 object instance in + the SNMPv2-MIB or RFC1213-MIB for the default SNMP context ("") at + example.com as sysUpTime.0 is: + + a) designated directly by OID, + + b) the lexically next MIB object instance after the OID +, and + + + +Black, et al. Standards Track [Page 11] + +RFC 4088 URI Scheme for SNMP June 2005 + + + c) the only MIB object instance whose OID has as a + lexical prefix. + + These three examples are provided for illustrative purposes only, as + multiple syntactically distinct URIs SHOULD NOT be used to designate + the same MIB object instance, in order to avoid unexpected results in + URI-based systems that use string comparison to test URIs for + equality. + + snmp://example.com/bridge1/* + + This example designates the ifOperStatus column of the IF-MIB in the + bridge1 SNMP context at example.com. + + snmp://example.com//(,* + + This example designates all (ifAdminStatus, ifOperStatus) pairs in + the IF-MIB in the default SNMP context at example.com. + +6. Security Considerations + + An intended use of this URI scheme is designation of the location of + management access to communication devices. Such location + information may be considered sensitive in some environments, making + it important to control access to this information and possibly even + to encrypt it when it is sent over the network. All uses of this URI + scheme should provide security mechanisms appropriate to the + environments in which such uses are likely to be deployed. + + The SNMP architecture includes control of access to management + information (see Section 4.3 of [RFC3411]). An SNMP URI does not + contain sufficient security information to obtain access in all + situations, as the SNMP URI syntax is incapable of encoding SNMP + securityModels, SNMP securityLevels, and credential or keying + information for SNMP securityNames. Other means are necessary to + provide such information; one possibility is out-of-band pre- + configuration of the SNMP manager, as shown in the diagrams in + Section 2. + + By itself, the presence of a securityName in an SNMP URI does not + authorize use of that securityName to access management information. + Instead, the SNMP manager SHOULD match the securityName in the URI to + an SNMP securityName and associated security information that have + been pre-configured for use by the manager. If an SNMP URI contains + a securityName that the SNMP manager is not provisioned to use, SNMP + operations for that URI SHOULD NOT be generated. + + + + + +Black, et al. Standards Track [Page 12] + +RFC 4088 URI Scheme for SNMP June 2005 + + + SNMP versions prior to SNMPv3 did not include adequate security. + Even if the network itself is secure (for example, via use of IPsec), + there is no control over who on the secure network is allowed to + access and GET/SET (read/change/create/delete) the objects in MIB + modules. It is RECOMMENDED that implementers consider the security + features provided by the SNMPv3 framework (see [RFC3410], Section 8, + for an overview), including full support for SNMPv3 cryptographic + mechanisms (for authentication and privacy). This is of additional + importance for MIB elements considered sensitive or vulnerable + because GETs have side effects. + + Further, deployment of SNMP versions prior to SNMPv3 is NOT + RECOMMENDED. Instead, it is RECOMMENDED to deploy SNMPv3 and to + enable cryptographic security. It is then a customer/operator + responsibility to ensure that the SNMP entity giving access to a MIB + module instance is properly configured to give access to the objects + only to those principals (users) that have legitimate rights to + indeed GET or SET (read/change/create/delete) them. + +6.1. SNMP URI to SNMP Gateway Security Considerations + + Additional security considerations apply to SNMP gateways that + generate SNMP operations for SNMP URIs and return the results to + clients (see Section 2) because management information is + communicated beyond the SNMP framework. In general, an SNMP gateway + should have some knowledge of the structure and function of the + management information that it accesses via SNMP. Among other + benefits, this allows an SNMP gateway to avoid SNMP access control + failures because the gateway can reject an SNMP URI that will cause + such failures before generating any SNMP operations. + + SNMP gateways SHOULD impose authorization or access-control checks on + all clients. If an SNMP gateway does not impose authorization or + access controls, the gateway MUST NOT automatically obtain or use + SNMP authentication material for arbitrary securityNames, as doing so + would defeat SNMP's access controls. Instead, all SNMP gateways + SHOULD authenticate each client and check the client's authorization + to use a securityName in an SNMP URI before using the securityName on + behalf of that client. + + An SNMP gateway is also responsible for ensuring that all of its + communication is appropriately secured. Specifically, an SNMP + gateway SHOULD ensure that communication of management information + with any client is protected to at least the SNMP securityLevel used + for the corresponding SNMP access (see Section 3.4.3 of [RFC3411] for + more information on securityLevel). If the client provides SNMP + security information, the SNMP gateway SHOULD authenticate the client + and SHOULD ensure that an authenticated cryptographic integrity check + + + +Black, et al. Standards Track [Page 13] + +RFC 4088 URI Scheme for SNMP June 2005 + + + is used for that communication to prevent modification of the + security information. In addition, if a client provides any key or + secret, the SNMP gateway SHOULD ensure that encryption is used in + addition to the integrity check for that communication to prevent + disclosure of keys or secrets. + + There are management objects defined in SNMP MIBs whose MAX-ACCESS is + read-write and/or read-create. Such objects may be considered + sensitive or vulnerable in some network environments. SNMP gateway + support for SNMP SET operations in a non-secure environment without + proper protection can have a negative effect on network operations. + The individual MIB module specifications, and especially their + security considerations, should be consulted for further information. + + Some readable objects in some MIB modules (i.e., objects with a MAX- + ACCESS other than not-accessible) may be considered sensitive or + vulnerable in some network environments. It is thus important to + control even GET access to these objects via an SNMP gateway and + possibly to even encrypt the values of these objects when they are + sent over the network. The individual MIB module specifications, and + especially their security considerations, should be consulted for + further information. This consideration also applies to objects for + which read operations have side effects. + +7. IANA Considerations + + The IANA has registered the URL registration template found in + Appendix A in accordance with [RFC2717]. + +8. Normative References + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2234] Crocker, D. and P. Overell, "Augmented BNF for Syntax + Specifications: ABNF", RFC 2234, November 1997. + + [RFC3061] Mealling, M., "A URN Namespace of Object Identifiers", RFC + 3061, February 2001. + + [RFC3411] Harrington, D., Presuhn, R., and B. Wijnen, "An + Architecture for Describing Simple Network Management + Protocol (SNMP) Management Frameworks", STD 62, RFC 3411, + December 2002. + + [RFC3416] Presuhn, R., "Version 2 of the Protocol Operations for the + Simple Network Management Protocol (SNMP)", STD 62, RFC + 3416, December 2002. + + + +Black, et al. Standards Track [Page 14] + +RFC 4088 URI Scheme for SNMP June 2005 + + + [RFC3417] Presuhn, R., "Transport Mappings for the Simple Network + Management Protocol (SNMP)", STD 62, RFC 3417, December + 2002. + + [RFC3584] Frye, R., Levi, D., Routhier, S., and B. Wijnen, + "Coexistence between Version 1, Version 2, and Version 3 of + the Internet-standard Network Management Framework", BCP + 74, RFC 3584, August 2003. + + [RFC3986] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifier (URI): Generic Syntax", STD 66, RFC + 3986, January 2005. + +9. Informative References + + [RFC1738] Berners-Lee, T., Masinter, L., and M. McCahill, "Uniform + Resource Locators (URL)", RFC 1738, December 1994. + + [RFC1900] Carpenter, B. and Y. Rekhter, "Renumbering Needs Work", RFC + 1900, February 1996. + + [RFC2717] Petke, R. and I. King, "Registration Procedures for URL + Scheme Names", BCP 35, RFC 2717, November 1999. + + [RFC3410] Case, J., Mundy, R., Partain, D., and B. Stewart, + "Introduction and Applicability Statements for Internet- + Standard Management Framework", RFC 3410, December 2002. + + [RFC3430] Schoenwaelder, J., "Simple Network Management Protocol Over + Transmission Control Protocol Transport Mapping", RFC 3430, + December 2002. + + [RFC3617] Lear, E., "Uniform Resource Identifier (URI) Scheme and + Applicability Statement for the Trivial File Transfer + Protocol (TFTP)", RFC 3617, October 2003. + + [RFC4001] Daniele, M., Haberman, B., Routhier, S., and J. + Schoenwaelder, "Textual Conventions for Internet Network + Addresses", RFC 4001, February 2005. + + + + + + + + + + + + +Black, et al. Standards Track [Page 15] + +RFC 4088 URI Scheme for SNMP June 2005 + + +10. Acknowledgements + + Portions of this document were adapted from Eliot Lear's TFTP URI + scheme specification [RFC3617]. Portions of the security + considerations were adapted from the widely used security + considerations "boilerplate" for MIB modules. Comments from Ted + Hardie, Michael Mealing, Larry Masinter, Frank Strauss, Bert Wijnen, + Steve Bellovin, the mreview@ops.ietf.org mailing list and the + uri@w3c.org mailing list on earlier versions of this document have + resulted in significant improvements and are gratefully acknowledged. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Black, et al. Standards Track [Page 16] + +RFC 4088 URI Scheme for SNMP June 2005 + + +Appendix A. Registration Template + + URL scheme name: snmp + URL scheme syntax: Section 3 + Character encoding considerations: Section 3 + Intended usage: Sections 1 and 2 + Applications and/or protocols which use this scheme: SNMP, all + versions, see [RFC3410] and [RFC3584]. Also SNMP over TCP, + see [RFC3430]. + Interoperability considerations: Section 4.4 + Security considerations: Section 6 + Relevant publications: See [RFC3410] for list. Also [RFC3430] + and [RFC3584]. + Contact: David L. Black, see below + Author/Change Controller: IESG + +Authors' Addresses + + David L. Black + EMC Corporation + 176 South Street + Hopkinton, MA 01748 + + Phone: +1 (508) 293-7953 + EMail: black_david@emc.com + + + Keith McCloghrie + Cisco Systems, Inc. + 170 West Tasman Drive + San Jose, CA USA 95134 + + Phone: +1 (408) 526-5260 + EMail: kzm@cisco.com + + + Juergen Schoenwaelder + International University Bremen + P.O. Box 750 561 + 28725 Bremen + Germany + + Phone: +49 421 200 3587 + EMail: j.schoenwaelder@iu-bremen.de + + + + + + + +Black, et al. Standards Track [Page 17] + +RFC 4088 URI Scheme for SNMP June 2005 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2005). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at ietf- + ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + +Black, et al. Standards Track [Page 18] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc4271.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc4271.txt new file mode 100644 index 0000000..73f4298 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc4271.txt @@ -0,0 +1,5827 @@ + + + + + + +Network Working Group Y. Rekhter, Ed. +Request for Comments: 4271 T. Li, Ed. +Obsoletes: 1771 S. Hares, Ed. +Category: Standards Track January 2006 + + + A Border Gateway Protocol 4 (BGP-4) + +Status of This Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (2006). + +Abstract + + This document discusses the Border Gateway Protocol (BGP), which is + an inter-Autonomous System routing protocol. + + The primary function of a BGP speaking system is to exchange network + reachability information with other BGP systems. This network + reachability information includes information on the list of + Autonomous Systems (ASes) that reachability information traverses. + This information is sufficient for constructing a graph of AS + connectivity for this reachability from which routing loops may be + pruned, and, at the AS level, some policy decisions may be enforced. + + BGP-4 provides a set of mechanisms for supporting Classless Inter- + Domain Routing (CIDR). These mechanisms include support for + advertising a set of destinations as an IP prefix, and eliminating + the concept of network "class" within BGP. BGP-4 also introduces + mechanisms that allow aggregation of routes, including aggregation of + AS paths. + + This document obsoletes RFC 1771. + + + + + + + + + + +Rekhter, et al. Standards Track [Page 1] + +RFC 4271 BGP-4 January 2006 + + +Table of Contents + + 1. Introduction ....................................................4 + 1.1. Definition of Commonly Used Terms ..........................4 + 1.2. Specification of Requirements ..............................6 + 2. Acknowledgements ................................................6 + 3. Summary of Operation ............................................7 + 3.1. Routes: Advertisement and Storage ..........................9 + 3.2. Routing Information Base ..................................10 + 4. Message Formats ................................................11 + 4.1. Message Header Format .....................................12 + 4.2. OPEN Message Format .......................................13 + 4.3. UPDATE Message Format .....................................14 + 4.4. KEEPALIVE Message Format ..................................21 + 4.5. NOTIFICATION Message Format ...............................21 + 5. Path Attributes ................................................23 + 5.1. Path Attribute Usage ......................................25 + 5.1.1. ORIGIN .............................................25 + 5.1.2. AS_PATH ............................................25 + 5.1.3. NEXT_HOP ...........................................26 + 5.1.4. MULTI_EXIT_DISC ....................................28 + 5.1.5. LOCAL_PREF .........................................29 + 5.1.6. ATOMIC_AGGREGATE ...................................29 + 5.1.7. AGGREGATOR .........................................30 + 6. BGP Error Handling. ............................................30 + 6.1. Message Header Error Handling .............................31 + 6.2. OPEN Message Error Handling ...............................31 + 6.3. UPDATE Message Error Handling .............................32 + 6.4. NOTIFICATION Message Error Handling .......................34 + 6.5. Hold Timer Expired Error Handling .........................34 + 6.6. Finite State Machine Error Handling .......................35 + 6.7. Cease .....................................................35 + 6.8. BGP Connection Collision Detection ........................35 + 7. BGP Version Negotiation ........................................36 + 8. BGP Finite State Machine (FSM) .................................37 + 8.1. Events for the BGP FSM ....................................38 + 8.1.1. Optional Events Linked to Optional Session + Attributes .........................................38 + 8.1.2. Administrative Events ..............................42 + 8.1.3. Timer Events .......................................46 + 8.1.4. TCP Connection-Based Events ........................47 + 8.1.5. BGP Message-Based Events ...........................49 + 8.2. Description of FSM ........................................51 + 8.2.1. FSM Definition .....................................51 + Terms "active" and "passive" ..............52 + FSM and Collision Detection ...............52 + FSM and Optional Session Attributes .......52 + FSM Event Numbers .........................53 + + + +Rekhter, et al. Standards Track [Page 2] + +RFC 4271 BGP-4 January 2006 + + + FSM Actions that are Implementation + Dependent .................................53 + 8.2.2. Finite State Machine ...............................53 + 9. UPDATE Message Handling ........................................75 + 9.1. Decision Process ..........................................76 + 9.1.1. Phase 1: Calculation of Degree of Preference .......77 + 9.1.2. Phase 2: Route Selection ...........................77 + Route Resolvability Condition .............79 + Breaking Ties (Phase 2) ...................80 + 9.1.3. Phase 3: Route Dissemination .......................82 + 9.1.4. Overlapping Routes .................................83 + 9.2. Update-Send Process .......................................84 + 9.2.1. Controlling Routing Traffic Overhead ...............85 + Frequency of Route Advertisement ..........85 + Frequency of Route Origination ............85 + 9.2.2. Efficient Organization of Routing Information ......86 + Information Reduction .....................86 + Aggregating Routing Information ...........87 + 9.3. Route Selection Criteria ..................................89 + 9.4. Originating BGP routes ....................................89 + 10. BGP Timers ....................................................90 + Appendix A. Comparison with RFC 1771 .............................92 + Appendix B. Comparison with RFC 1267 .............................93 + Appendix C. Comparison with RFC 1163 .............................93 + Appendix D. Comparison with RFC 1105 .............................94 + Appendix E. TCP Options that May Be Used with BGP ................94 + Appendix F. Implementation Recommendations .......................95 + Appendix F.1. Multiple Networks Per Message .........95 + Appendix F.2. Reducing Route Flapping ...............96 + Appendix F.3. Path Attribute Ordering ...............96 + Appendix F.4. AS_SET Sorting ........................96 + Appendix F.5. Control Over Version Negotiation ......96 + Appendix F.6. Complex AS_PATH Aggregation ...........96 + Security Considerations ...........................................97 + IANA Considerations ...............................................99 + Normative References .............................................101 + Informative References ...........................................101 + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 3] + +RFC 4271 BGP-4 January 2006 + + +1. Introduction + + The Border Gateway Protocol (BGP) is an inter-Autonomous System + routing protocol. + + The primary function of a BGP speaking system is to exchange network + reachability information with other BGP systems. This network + reachability information includes information on the list of + Autonomous Systems (ASes) that reachability information traverses. + This information is sufficient for constructing a graph of AS + connectivity for this reachability, from which routing loops may be + pruned and, at the AS level, some policy decisions may be enforced. + + BGP-4 provides a set of mechanisms for supporting Classless Inter- + Domain Routing (CIDR) [RFC1518, RFC1519]. These mechanisms include + support for advertising a set of destinations as an IP prefix and + eliminating the concept of network "class" within BGP. BGP-4 also + introduces mechanisms that allow aggregation of routes, including + aggregation of AS paths. + + Routing information exchanged via BGP supports only the destination- + based forwarding paradigm, which assumes that a router forwards a + packet based solely on the destination address carried in the IP + header of the packet. This, in turn, reflects the set of policy + decisions that can (and cannot) be enforced using BGP. BGP can + support only those policies conforming to the destination-based + forwarding paradigm. + +1.1. Definition of Commonly Used Terms + + This section provides definitions for terms that have a specific + meaning to the BGP protocol and that are used throughout the text. + + Adj-RIB-In + The Adj-RIBs-In contains unprocessed routing information that has + been advertised to the local BGP speaker by its peers. + + Adj-RIB-Out + The Adj-RIBs-Out contains the routes for advertisement to specific + peers by means of the local speaker's UPDATE messages. + + Autonomous System (AS) + The classic definition of an Autonomous System is a set of routers + under a single technical administration, using an interior gateway + protocol (IGP) and common metrics to determine how to route + packets within the AS, and using an inter-AS routing protocol to + determine how to route packets to other ASes. Since this classic + definition was developed, it has become common for a single AS to + + + +Rekhter, et al. Standards Track [Page 4] + +RFC 4271 BGP-4 January 2006 + + + use several IGPs and, sometimes, several sets of metrics within an + AS. The use of the term Autonomous System stresses the fact that, + even when multiple IGPs and metrics are used, the administration + of an AS appears to other ASes to have a single coherent interior + routing plan, and presents a consistent picture of the + destinations that are reachable through it. + + BGP Identifier + A 4-octet unsigned integer that indicates the BGP Identifier of + the sender of BGP messages. A given BGP speaker sets the value of + its BGP Identifier to an IP address assigned to that BGP speaker. + The value of the BGP Identifier is determined upon startup and is + the same for every local interface and BGP peer. + + BGP speaker + A router that implements BGP. + + EBGP + External BGP (BGP connection between external peers). + + External peer + Peer that is in a different Autonomous System than the local + system. + + Feasible route + An advertised route that is available for use by the recipient. + + IBGP + Internal BGP (BGP connection between internal peers). + + Internal peer + Peer that is in the same Autonomous System as the local system. + + IGP + Interior Gateway Protocol - a routing protocol used to exchange + routing information among routers within a single Autonomous + System. + + Loc-RIB + The Loc-RIB contains the routes that have been selected by the + local BGP speaker's Decision Process. + + NLRI + Network Layer Reachability Information. + + Route + A unit of information that pairs a set of destinations with the + attributes of a path to those destinations. The set of + + + +Rekhter, et al. Standards Track [Page 5] + +RFC 4271 BGP-4 January 2006 + + + destinations are systems whose IP addresses are contained in one + IP address prefix carried in the Network Layer Reachability + Information (NLRI) field of an UPDATE message. The path is the + information reported in the path attributes field of the same + UPDATE message. + + RIB + Routing Information Base. + + Unfeasible route + A previously advertised feasible route that is no longer available + for use. + +1.2. Specification of Requirements + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [RFC2119]. + +2. Acknowledgements + + This document was originally published as [RFC1267] in October 1991, + jointly authored by Kirk Lougheed and Yakov Rekhter. + + We would like to express our thanks to Guy Almes, Len Bosack, and + Jeffrey C. Honig for their contributions to the earlier version + (BGP-1) of this document. + + We would like to specially acknowledge numerous contributions by + Dennis Ferguson to the earlier version of this document. + + We would like to explicitly thank Bob Braden for the review of the + earlier version (BGP-2) of this document, and for his constructive + and valuable comments. + + We would also like to thank Bob Hinden, Director for Routing of the + Internet Engineering Steering Group, and the team of reviewers he + assembled to review the earlier version (BGP-2) of this document. + This team, consisting of Deborah Estrin, Milo Medin, John Moy, Radia + Perlman, Martha Steenstrup, Mike St. Johns, and Paul Tsuchiya, acted + with a strong combination of toughness, professionalism, and + courtesy. + + Certain sections of the document borrowed heavily from IDRP + [IS10747], which is the OSI counterpart of BGP. For this, credit + should be given to the ANSI X3S3.3 group chaired by Lyman Chapin and + to Charles Kunzinger, who was the IDRP editor within that group. + + + + +Rekhter, et al. Standards Track [Page 6] + +RFC 4271 BGP-4 January 2006 + + + We would also like to thank Benjamin Abarbanel, Enke Chen, Edward + Crabbe, Mike Craren, Vincent Gillet, Eric Gray, Jeffrey Haas, Dimitry + Haskin, Stephen Kent, John Krawczyk, David LeRoy, Dan Massey, + Jonathan Natale, Dan Pei, Mathew Richardson, John Scudder, John + Stewart III, Dave Thaler, Paul Traina, Russ White, Curtis Villamizar, + and Alex Zinin for their comments. + + We would like to specially acknowledge Andrew Lange for his help in + preparing the final version of this document. + + Finally, we would like to thank all the members of the IDR Working + Group for their ideas and the support they have given to this + document. + +3. Summary of Operation + + The Border Gateway Protocol (BGP) is an inter-Autonomous System + routing protocol. It is built on experience gained with EGP (as + defined in [RFC904]) and EGP usage in the NSFNET Backbone (as + described in [RFC1092] and [RFC1093]). For more BGP-related + information, see [RFC1772], [RFC1930], [RFC1997], and [RFC2858]. + + The primary function of a BGP speaking system is to exchange network + reachability information with other BGP systems. This network + reachability information includes information on the list of + Autonomous Systems (ASes) that reachability information traverses. + This information is sufficient for constructing a graph of AS + connectivity, from which routing loops may be pruned, and, at the AS + level, some policy decisions may be enforced. + + In the context of this document, we assume that a BGP speaker + advertises to its peers only those routes that it uses itself (in + this context, a BGP speaker is said to "use" a BGP route if it is the + most preferred BGP route and is used in forwarding). All other cases + are outside the scope of this document. + + In the context of this document, the term "IP address" refers to an + IP Version 4 address [RFC791]. + + Routing information exchanged via BGP supports only the destination- + based forwarding paradigm, which assumes that a router forwards a + packet based solely on the destination address carried in the IP + header of the packet. This, in turn, reflects the set of policy + decisions that can (and cannot) be enforced using BGP. Note that + some policies cannot be supported by the destination-based forwarding + paradigm, and thus require techniques such as source routing (aka + explicit routing) to be enforced. Such policies cannot be enforced + using BGP either. For example, BGP does not enable one AS to send + + + +Rekhter, et al. Standards Track [Page 7] + +RFC 4271 BGP-4 January 2006 + + + traffic to a neighboring AS for forwarding to some destination + (reachable through but) beyond that neighboring AS, intending that + the traffic take a different route to that taken by the traffic + originating in the neighboring AS (for that same destination). On + the other hand, BGP can support any policy conforming to the + destination-based forwarding paradigm. + + BGP-4 provides a new set of mechanisms for supporting Classless + Inter-Domain Routing (CIDR) [RFC1518, RFC1519]. These mechanisms + include support for advertising a set of destinations as an IP prefix + and eliminating the concept of a network "class" within BGP. BGP-4 + also introduces mechanisms that allow aggregation of routes, + including aggregation of AS paths. + + This document uses the term `Autonomous System' (AS) throughout. The + classic definition of an Autonomous System is a set of routers under + a single technical administration, using an interior gateway protocol + (IGP) and common metrics to determine how to route packets within the + AS, and using an inter-AS routing protocol to determine how to route + packets to other ASes. Since this classic definition was developed, + it has become common for a single AS to use several IGPs and, + sometimes, several sets of metrics within an AS. The use of the term + Autonomous System stresses the fact that, even when multiple IGPs and + metrics are used, the administration of an AS appears to other ASes + to have a single coherent interior routing plan and presents a + consistent picture of the destinations that are reachable through it. + + BGP uses TCP [RFC793] as its transport protocol. This eliminates the + need to implement explicit update fragmentation, retransmission, + acknowledgement, and sequencing. BGP listens on TCP port 179. The + error notification mechanism used in BGP assumes that TCP supports a + "graceful" close (i.e., that all outstanding data will be delivered + before the connection is closed). + + A TCP connection is formed between two systems. They exchange + messages to open and confirm the connection parameters. + + The initial data flow is the portion of the BGP routing table that is + allowed by the export policy, called the Adj-Ribs-Out (see 3.2). + Incremental updates are sent as the routing tables change. BGP does + not require a periodic refresh of the routing table. To allow local + policy changes to have the correct effect without resetting any BGP + connections, a BGP speaker SHOULD either (a) retain the current + version of the routes advertised to it by all of its peers for the + duration of the connection, or (b) make use of the Route Refresh + extension [RFC2918]. + + + + + +Rekhter, et al. Standards Track [Page 8] + +RFC 4271 BGP-4 January 2006 + + + KEEPALIVE messages may be sent periodically to ensure that the + connection is live. NOTIFICATION messages are sent in response to + errors or special conditions. If a connection encounters an error + condition, a NOTIFICATION message is sent and the connection is + closed. + + A peer in a different AS is referred to as an external peer, while a + peer in the same AS is referred to as an internal peer. Internal BGP + and external BGP are commonly abbreviated as IBGP and EBGP. + + If a particular AS has multiple BGP speakers and is providing transit + service for other ASes, then care must be taken to ensure a + consistent view of routing within the AS. A consistent view of the + interior routes of the AS is provided by the IGP used within the AS. + For the purpose of this document, it is assumed that a consistent + view of the routes exterior to the AS is provided by having all BGP + speakers within the AS maintain IBGP with each other. + + This document specifies the base behavior of the BGP protocol. This + behavior can be, and is, modified by extension specifications. When + the protocol is extended, the new behavior is fully documented in the + extension specifications. + +3.1. Routes: Advertisement and Storage + + For the purpose of this protocol, a route is defined as a unit of + information that pairs a set of destinations with the attributes of a + path to those destinations. The set of destinations are systems + whose IP addresses are contained in one IP address prefix that is + carried in the Network Layer Reachability Information (NLRI) field of + an UPDATE message, and the path is the information reported in the + path attributes field of the same UPDATE message. + + Routes are advertised between BGP speakers in UPDATE messages. + Multiple routes that have the same path attributes can be advertised + in a single UPDATE message by including multiple prefixes in the NLRI + field of the UPDATE message. + + Routes are stored in the Routing Information Bases (RIBs): namely, + the Adj-RIBs-In, the Loc-RIB, and the Adj-RIBs-Out, as described in + Section 3.2. + + If a BGP speaker chooses to advertise a previously received route, it + MAY add to, or modify, the path attributes of the route before + advertising it to a peer. + + + + + + +Rekhter, et al. Standards Track [Page 9] + +RFC 4271 BGP-4 January 2006 + + + BGP provides mechanisms by which a BGP speaker can inform its peers + that a previously advertised route is no longer available for use. + There are three methods by which a given BGP speaker can indicate + that a route has been withdrawn from service: + + a) the IP prefix that expresses the destination for a previously + advertised route can be advertised in the WITHDRAWN ROUTES + field in the UPDATE message, thus marking the associated route + as being no longer available for use, + + b) a replacement route with the same NLRI can be advertised, or + + c) the BGP speaker connection can be closed, which implicitly + removes all routes the pair of speakers had advertised to each + other from service. + + Changing the attribute(s) of a route is accomplished by advertising a + replacement route. The replacement route carries new (changed) + attributes and has the same address prefix as the original route. + +3.2. Routing Information Base + + The Routing Information Base (RIB) within a BGP speaker consists of + three distinct parts: + + a) Adj-RIBs-In: The Adj-RIBs-In stores routing information learned + from inbound UPDATE messages that were received from other BGP + speakers. Their contents represent routes that are available + as input to the Decision Process. + + b) Loc-RIB: The Loc-RIB contains the local routing information the + BGP speaker selected by applying its local policies to the + routing information contained in its Adj-RIBs-In. These are + the routes that will be used by the local BGP speaker. The + next hop for each of these routes MUST be resolvable via the + local BGP speaker's Routing Table. + + c) Adj-RIBs-Out: The Adj-RIBs-Out stores information the local BGP + speaker selected for advertisement to its peers. The routing + information stored in the Adj-RIBs-Out will be carried in the + local BGP speaker's UPDATE messages and advertised to its + peers. + + In summary, the Adj-RIBs-In contains unprocessed routing information + that has been advertised to the local BGP speaker by its peers; the + Loc-RIB contains the routes that have been selected by the local BGP + + + + + +Rekhter, et al. Standards Track [Page 10] + +RFC 4271 BGP-4 January 2006 + + + speaker's Decision Process; and the Adj-RIBs-Out organizes the routes + for advertisement to specific peers (by means of the local speaker's + UPDATE messages). + + Although the conceptual model distinguishes between Adj-RIBs-In, + Loc-RIB, and Adj-RIBs-Out, this neither implies nor requires that an + implementation must maintain three separate copies of the routing + information. The choice of implementation (for example, 3 copies of + the information vs 1 copy with pointers) is not constrained by the + protocol. + + Routing information that the BGP speaker uses to forward packets (or + to construct the forwarding table used for packet forwarding) is + maintained in the Routing Table. The Routing Table accumulates + routes to directly connected networks, static routes, routes learned + from the IGP protocols, and routes learned from BGP. Whether a + specific BGP route should be installed in the Routing Table, and + whether a BGP route should override a route to the same destination + installed by another source, is a local policy decision, and is not + specified in this document. In addition to actual packet forwarding, + the Routing Table is used for resolution of the next-hop addresses + specified in BGP updates (see Section 5.1.3). + +4. Message Formats + + This section describes message formats used by BGP. + + BGP messages are sent over TCP connections. A message is processed + only after it is entirely received. The maximum message size is 4096 + octets. All implementations are required to support this maximum + message size. The smallest message that may be sent consists of a + BGP header without a data portion (19 octets). + + All multi-octet fields are in network byte order. + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 11] + +RFC 4271 BGP-4 January 2006 + + +4.1. Message Header Format + + Each message has a fixed-size header. There may or may not be a data + portion following the header, depending on the message type. The + layout of these fields is shown below: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + + + + | | + + + + | Marker | + + + + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Length | Type | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Marker: + + This 16-octet field is included for compatibility; it MUST be + set to all ones. + + Length: + + This 2-octet unsigned integer indicates the total length of the + message, including the header in octets. Thus, it allows one + to locate the (Marker field of the) next message in the TCP + stream. The value of the Length field MUST always be at least + 19 and no greater than 4096, and MAY be further constrained, + depending on the message type. "padding" of extra data after + the message is not allowed. Therefore, the Length field MUST + have the smallest value required, given the rest of the + message. + + Type: + + This 1-octet unsigned integer indicates the type code of the + message. This document defines the following type codes: + + 1 - OPEN + 2 - UPDATE + 3 - NOTIFICATION + 4 - KEEPALIVE + + [RFC2918] defines one more type code. + + + +Rekhter, et al. Standards Track [Page 12] + +RFC 4271 BGP-4 January 2006 + + +4.2. OPEN Message Format + + After a TCP connection is established, the first message sent by each + side is an OPEN message. If the OPEN message is acceptable, a + KEEPALIVE message confirming the OPEN is sent back. + + In addition to the fixed-size BGP header, the OPEN message contains + the following fields: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+ + | Version | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | My Autonomous System | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Hold Time | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | BGP Identifier | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Opt Parm Len | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + | Optional Parameters (variable) | + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Version: + + This 1-octet unsigned integer indicates the protocol version + number of the message. The current BGP version number is 4. + + My Autonomous System: + + This 2-octet unsigned integer indicates the Autonomous System + number of the sender. + + Hold Time: + + This 2-octet unsigned integer indicates the number of seconds + the sender proposes for the value of the Hold Timer. Upon + receipt of an OPEN message, a BGP speaker MUST calculate the + value of the Hold Timer by using the smaller of its configured + Hold Time and the Hold Time received in the OPEN message. The + Hold Time MUST be either zero or at least three seconds. An + implementation MAY reject connections on the basis of the Hold + + + + + +Rekhter, et al. Standards Track [Page 13] + +RFC 4271 BGP-4 January 2006 + + + Time. The calculated value indicates the maximum number of + seconds that may elapse between the receipt of successive + KEEPALIVE and/or UPDATE messages from the sender. + + BGP Identifier: + + This 4-octet unsigned integer indicates the BGP Identifier of + the sender. A given BGP speaker sets the value of its BGP + Identifier to an IP address that is assigned to that BGP + speaker. The value of the BGP Identifier is determined upon + startup and is the same for every local interface and BGP peer. + + Optional Parameters Length: + + This 1-octet unsigned integer indicates the total length of the + Optional Parameters field in octets. If the value of this + field is zero, no Optional Parameters are present. + + Optional Parameters: + + This field contains a list of optional parameters, in which + each parameter is encoded as a triplet. + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-... + | Parm. Type | Parm. Length | Parameter Value (variable) + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-... + + Parameter Type is a one octet field that unambiguously + identifies individual parameters. Parameter Length is a one + octet field that contains the length of the Parameter Value + field in octets. Parameter Value is a variable length field + that is interpreted according to the value of the Parameter + Type field. + + [RFC3392] defines the Capabilities Optional Parameter. + + The minimum length of the OPEN message is 29 octets (including the + message header). + +4.3. UPDATE Message Format + + UPDATE messages are used to transfer routing information between BGP + peers. The information in the UPDATE message can be used to + construct a graph that describes the relationships of the various + Autonomous Systems. By applying rules to be discussed, routing + + + +Rekhter, et al. Standards Track [Page 14] + +RFC 4271 BGP-4 January 2006 + + + information loops and some other anomalies may be detected and + removed from inter-AS routing. + + An UPDATE message is used to advertise feasible routes that share + common path attributes to a peer, or to withdraw multiple unfeasible + routes from service (see 3.1). An UPDATE message MAY simultaneously + advertise a feasible route and withdraw multiple unfeasible routes + from service. The UPDATE message always includes the fixed-size BGP + header, and also includes the other fields, as shown below (note, + some of the shown fields may not be present in every UPDATE message): + + +-----------------------------------------------------+ + | Withdrawn Routes Length (2 octets) | + +-----------------------------------------------------+ + | Withdrawn Routes (variable) | + +-----------------------------------------------------+ + | Total Path Attribute Length (2 octets) | + +-----------------------------------------------------+ + | Path Attributes (variable) | + +-----------------------------------------------------+ + | Network Layer Reachability Information (variable) | + +-----------------------------------------------------+ + + Withdrawn Routes Length: + + This 2-octets unsigned integer indicates the total length of + the Withdrawn Routes field in octets. Its value allows the + length of the Network Layer Reachability Information field to + be determined, as specified below. + + A value of 0 indicates that no routes are being withdrawn from + service, and that the WITHDRAWN ROUTES field is not present in + this UPDATE message. + + Withdrawn Routes: + + This is a variable-length field that contains a list of IP + address prefixes for the routes that are being withdrawn from + service. Each IP address prefix is encoded as a 2-tuple of the + form , whose fields are described below: + + +---------------------------+ + | Length (1 octet) | + +---------------------------+ + | Prefix (variable) | + +---------------------------+ + + + + + +Rekhter, et al. Standards Track [Page 15] + +RFC 4271 BGP-4 January 2006 + + + The use and the meaning of these fields are as follows: + + a) Length: + + The Length field indicates the length in bits of the IP + address prefix. A length of zero indicates a prefix that + matches all IP addresses (with prefix, itself, of zero + octets). + + b) Prefix: + + The Prefix field contains an IP address prefix, followed by + the minimum number of trailing bits needed to make the end + of the field fall on an octet boundary. Note that the value + of trailing bits is irrelevant. + + Total Path Attribute Length: + + This 2-octet unsigned integer indicates the total length of the + Path Attributes field in octets. Its value allows the length + of the Network Layer Reachability field to be determined as + specified below. + + A value of 0 indicates that neither the Network Layer + Reachability Information field nor the Path Attribute field is + present in this UPDATE message. + + Path Attributes: + + A variable-length sequence of path attributes is present in + every UPDATE message, except for an UPDATE message that carries + only the withdrawn routes. Each path attribute is a triple + of variable + length. + + Attribute Type is a two-octet field that consists of the + Attribute Flags octet, followed by the Attribute Type Code + octet. + + 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Attr. Flags |Attr. Type Code| + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The high-order bit (bit 0) of the Attribute Flags octet is the + Optional bit. It defines whether the attribute is optional (if + set to 1) or well-known (if set to 0). + + + +Rekhter, et al. Standards Track [Page 16] + +RFC 4271 BGP-4 January 2006 + + + The second high-order bit (bit 1) of the Attribute Flags octet + is the Transitive bit. It defines whether an optional + attribute is transitive (if set to 1) or non-transitive (if set + to 0). + + For well-known attributes, the Transitive bit MUST be set to 1. + (See Section 5 for a discussion of transitive attributes.) + + The third high-order bit (bit 2) of the Attribute Flags octet + is the Partial bit. It defines whether the information + contained in the optional transitive attribute is partial (if + set to 1) or complete (if set to 0). For well-known attributes + and for optional non-transitive attributes, the Partial bit + MUST be set to 0. + + The fourth high-order bit (bit 3) of the Attribute Flags octet + is the Extended Length bit. It defines whether the Attribute + Length is one octet (if set to 0) or two octets (if set to 1). + + The lower-order four bits of the Attribute Flags octet are + unused. They MUST be zero when sent and MUST be ignored when + received. + + The Attribute Type Code octet contains the Attribute Type Code. + Currently defined Attribute Type Codes are discussed in Section + 5. + + If the Extended Length bit of the Attribute Flags octet is set + to 0, the third octet of the Path Attribute contains the length + of the attribute data in octets. + + If the Extended Length bit of the Attribute Flags octet is set + to 1, the third and fourth octets of the path attribute contain + the length of the attribute data in octets. + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 17] + +RFC 4271 BGP-4 January 2006 + + + The remaining octets of the Path Attribute represent the + attribute value and are interpreted according to the Attribute + Flags and the Attribute Type Code. The supported Attribute + Type Codes, and their attribute values and uses are as follows: + + a) ORIGIN (Type Code 1): + + ORIGIN is a well-known mandatory attribute that defines the + origin of the path information. The data octet can assume + the following values: + + Value Meaning + + 0 IGP - Network Layer Reachability Information + is interior to the originating AS + + 1 EGP - Network Layer Reachability Information + learned via the EGP protocol [RFC904] + + 2 INCOMPLETE - Network Layer Reachability + Information learned by some other means + + Usage of this attribute is defined in 5.1.1. + + b) AS_PATH (Type Code 2): + + AS_PATH is a well-known mandatory attribute that is composed + of a sequence of AS path segments. Each AS path segment is + represented by a triple . + + The path segment type is a 1-octet length field with the + following values defined: + + Value Segment Type + + 1 AS_SET: unordered set of ASes a route in the + UPDATE message has traversed + + 2 AS_SEQUENCE: ordered set of ASes a route in + the UPDATE message has traversed + + The path segment length is a 1-octet length field, + containing the number of ASes (not the number of octets) in + the path segment value field. + + The path segment value field contains one or more AS + numbers, each encoded as a 2-octet length field. + + + +Rekhter, et al. Standards Track [Page 18] + +RFC 4271 BGP-4 January 2006 + + + Usage of this attribute is defined in 5.1.2. + + c) NEXT_HOP (Type Code 3): + + This is a well-known mandatory attribute that defines the + (unicast) IP address of the router that SHOULD be used as + the next hop to the destinations listed in the Network Layer + Reachability Information field of the UPDATE message. + + Usage of this attribute is defined in 5.1.3. + + d) MULTI_EXIT_DISC (Type Code 4): + + This is an optional non-transitive attribute that is a + four-octet unsigned integer. The value of this attribute + MAY be used by a BGP speaker's Decision Process to + discriminate among multiple entry points to a neighboring + autonomous system. + + Usage of this attribute is defined in 5.1.4. + + e) LOCAL_PREF (Type Code 5): + + LOCAL_PREF is a well-known attribute that is a four-octet + unsigned integer. A BGP speaker uses it to inform its other + internal peers of the advertising speaker's degree of + preference for an advertised route. + + Usage of this attribute is defined in 5.1.5. + + f) ATOMIC_AGGREGATE (Type Code 6) + + ATOMIC_AGGREGATE is a well-known discretionary attribute of + length 0. + + Usage of this attribute is defined in 5.1.6. + + g) AGGREGATOR (Type Code 7) + + AGGREGATOR is an optional transitive attribute of length 6. + The attribute contains the last AS number that formed the + aggregate route (encoded as 2 octets), followed by the IP + address of the BGP speaker that formed the aggregate route + (encoded as 4 octets). This SHOULD be the same address as + the one used for the BGP Identifier of the speaker. + + Usage of this attribute is defined in 5.1.7. + + + + +Rekhter, et al. Standards Track [Page 19] + +RFC 4271 BGP-4 January 2006 + + + Network Layer Reachability Information: + + This variable length field contains a list of IP address + prefixes. The length, in octets, of the Network Layer + Reachability Information is not encoded explicitly, but can be + calculated as: + + UPDATE message Length - 23 - Total Path Attributes Length + - Withdrawn Routes Length + + where UPDATE message Length is the value encoded in the fixed- + size BGP header, Total Path Attribute Length, and Withdrawn + Routes Length are the values encoded in the variable part of + the UPDATE message, and 23 is a combined length of the fixed- + size BGP header, the Total Path Attribute Length field, and the + Withdrawn Routes Length field. + + Reachability information is encoded as one or more 2-tuples of + the form , whose fields are described below: + + +---------------------------+ + | Length (1 octet) | + +---------------------------+ + | Prefix (variable) | + +---------------------------+ + + The use and the meaning of these fields are as follows: + + a) Length: + + The Length field indicates the length in bits of the IP + address prefix. A length of zero indicates a prefix that + matches all IP addresses (with prefix, itself, of zero + octets). + + b) Prefix: + + The Prefix field contains an IP address prefix, followed by + enough trailing bits to make the end of the field fall on an + octet boundary. Note that the value of the trailing bits is + irrelevant. + + The minimum length of the UPDATE message is 23 octets -- 19 octets + for the fixed header + 2 octets for the Withdrawn Routes Length + 2 + octets for the Total Path Attribute Length (the value of Withdrawn + Routes Length is 0 and the value of Total Path Attribute Length is + 0). + + + + +Rekhter, et al. Standards Track [Page 20] + +RFC 4271 BGP-4 January 2006 + + + An UPDATE message can advertise, at most, one set of path attributes, + but multiple destinations, provided that the destinations share these + attributes. All path attributes contained in a given UPDATE message + apply to all destinations carried in the NLRI field of the UPDATE + message. + + + An UPDATE message can list multiple routes that are to be withdrawn + from service. Each such route is identified by its destination + (expressed as an IP prefix), which unambiguously identifies the route + in the context of the BGP speaker - BGP speaker connection to which + it has been previously advertised. + + + An UPDATE message might advertise only routes that are to be + withdrawn from service, in which case the message will not include + path attributes or Network Layer Reachability Information. + Conversely, it may advertise only a feasible route, in which case the + WITHDRAWN ROUTES field need not be present. + + An UPDATE message SHOULD NOT include the same address prefix in the + WITHDRAWN ROUTES and Network Layer Reachability Information fields. + However, a BGP speaker MUST be able to process UPDATE messages in + this form. A BGP speaker SHOULD treat an UPDATE message of this form + as though the WITHDRAWN ROUTES do not contain the address prefix. + +4.4. KEEPALIVE Message Format + + BGP does not use any TCP-based, keep-alive mechanism to determine if + peers are reachable. Instead, KEEPALIVE messages are exchanged + between peers often enough not to cause the Hold Timer to expire. A + reasonable maximum time between KEEPALIVE messages would be one third + of the Hold Time interval. KEEPALIVE messages MUST NOT be sent more + frequently than one per second. An implementation MAY adjust the + rate at which it sends KEEPALIVE messages as a function of the Hold + Time interval. + + If the negotiated Hold Time interval is zero, then periodic KEEPALIVE + messages MUST NOT be sent. + + A KEEPALIVE message consists of only the message header and has a + length of 19 octets. + +4.5. NOTIFICATION Message Format + + A NOTIFICATION message is sent when an error condition is detected. + The BGP connection is closed immediately after it is sent. + + + + +Rekhter, et al. Standards Track [Page 21] + +RFC 4271 BGP-4 January 2006 + + + In addition to the fixed-size BGP header, the NOTIFICATION message + contains the following fields: + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Error code | Error subcode | Data (variable) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Error Code: + + This 1-octet unsigned integer indicates the type of + NOTIFICATION. The following Error Codes have been defined: + + Error Code Symbolic Name Reference + + 1 Message Header Error Section 6.1 + + 2 OPEN Message Error Section 6.2 + + 3 UPDATE Message Error Section 6.3 + + 4 Hold Timer Expired Section 6.5 + + 5 Finite State Machine Error Section 6.6 + + 6 Cease Section 6.7 + + Error subcode: + + This 1-octet unsigned integer provides more specific + information about the nature of the reported error. Each Error + Code may have one or more Error Subcodes associated with it. + If no appropriate Error Subcode is defined, then a zero + (Unspecific) value is used for the Error Subcode field. + + Message Header Error subcodes: + + 1 - Connection Not Synchronized. + 2 - Bad Message Length. + 3 - Bad Message Type. + + + + + + + + + + +Rekhter, et al. Standards Track [Page 22] + +RFC 4271 BGP-4 January 2006 + + + OPEN Message Error subcodes: + + 1 - Unsupported Version Number. + 2 - Bad Peer AS. + 3 - Bad BGP Identifier. + 4 - Unsupported Optional Parameter. + 5 - [Deprecated - see Appendix A]. + 6 - Unacceptable Hold Time. + + UPDATE Message Error subcodes: + + 1 - Malformed Attribute List. + 2 - Unrecognized Well-known Attribute. + 3 - Missing Well-known Attribute. + 4 - Attribute Flags Error. + 5 - Attribute Length Error. + 6 - Invalid ORIGIN Attribute. + 7 - [Deprecated - see Appendix A]. + 8 - Invalid NEXT_HOP Attribute. + 9 - Optional Attribute Error. + 10 - Invalid Network Field. + 11 - Malformed AS_PATH. + + Data: + + This variable-length field is used to diagnose the reason for + the NOTIFICATION. The contents of the Data field depend upon + the Error Code and Error Subcode. See Section 6 for more + details. + + Note that the length of the Data field can be determined from + the message Length field by the formula: + + Message Length = 21 + Data Length + + The minimum length of the NOTIFICATION message is 21 octets + (including message header). + +5. Path Attributes + + This section discusses the path attributes of the UPDATE message. + + Path attributes fall into four separate categories: + + 1. Well-known mandatory. + 2. Well-known discretionary. + 3. Optional transitive. + 4. Optional non-transitive. + + + +Rekhter, et al. Standards Track [Page 23] + +RFC 4271 BGP-4 January 2006 + + + BGP implementations MUST recognize all well-known attributes. Some + of these attributes are mandatory and MUST be included in every + UPDATE message that contains NLRI. Others are discretionary and MAY + or MAY NOT be sent in a particular UPDATE message. + + Once a BGP peer has updated any well-known attributes, it MUST pass + these attributes to its peers in any updates it transmits. + + In addition to well-known attributes, each path MAY contain one or + more optional attributes. It is not required or expected that all + BGP implementations support all optional attributes. The handling of + an unrecognized optional attribute is determined by the setting of + the Transitive bit in the attribute flags octet. Paths with + unrecognized transitive optional attributes SHOULD be accepted. If a + path with an unrecognized transitive optional attribute is accepted + and passed to other BGP peers, then the unrecognized transitive + optional attribute of that path MUST be passed, along with the path, + to other BGP peers with the Partial bit in the Attribute Flags octet + set to 1. If a path with a recognized, transitive optional attribute + is accepted and passed along to other BGP peers and the Partial bit + in the Attribute Flags octet is set to 1 by some previous AS, it MUST + NOT be set back to 0 by the current AS. Unrecognized non-transitive + optional attributes MUST be quietly ignored and not passed along to + other BGP peers. + + New, transitive optional attributes MAY be attached to the path by + the originator or by any other BGP speaker in the path. If they are + not attached by the originator, the Partial bit in the Attribute + Flags octet is set to 1. The rules for attaching new non-transitive + optional attributes will depend on the nature of the specific + attribute. The documentation of each new non-transitive optional + attribute will be expected to include such rules (the description of + the MULTI_EXIT_DISC attribute gives an example). All optional + attributes (both transitive and non-transitive), MAY be updated (if + appropriate) by BGP speakers in the path. + + The sender of an UPDATE message SHOULD order path attributes within + the UPDATE message in ascending order of attribute type. The + receiver of an UPDATE message MUST be prepared to handle path + attributes within UPDATE messages that are out of order. + + The same attribute (attribute with the same type) cannot appear more + than once within the Path Attributes field of a particular UPDATE + message. + + + + + + + +Rekhter, et al. Standards Track [Page 24] + +RFC 4271 BGP-4 January 2006 + + + The mandatory category refers to an attribute that MUST be present in + both IBGP and EBGP exchanges if NLRI are contained in the UPDATE + message. Attributes classified as optional for the purpose of the + protocol extension mechanism may be purely discretionary, + discretionary, required, or disallowed in certain contexts. + + attribute EBGP IBGP + ORIGIN mandatory mandatory + AS_PATH mandatory mandatory + NEXT_HOP mandatory mandatory + MULTI_EXIT_DISC discretionary discretionary + LOCAL_PREF see Section 5.1.5 required + ATOMIC_AGGREGATE see Section 5.1.6 and 9.1.4 + AGGREGATOR discretionary discretionary + +5.1. Path Attribute Usage + + The usage of each BGP path attribute is described in the following + clauses. + +5.1.1. ORIGIN + + ORIGIN is a well-known mandatory attribute. The ORIGIN attribute is + generated by the speaker that originates the associated routing + information. Its value SHOULD NOT be changed by any other speaker. + +5.1.2. AS_PATH + + AS_PATH is a well-known mandatory attribute. This attribute + identifies the autonomous systems through which routing information + carried in this UPDATE message has passed. The components of this + list can be AS_SETs or AS_SEQUENCEs. + + When a BGP speaker propagates a route it learned from another BGP + speaker's UPDATE message, it modifies the route's AS_PATH attribute + based on the location of the BGP speaker to which the route will be + sent: + + a) When a given BGP speaker advertises the route to an internal + peer, the advertising speaker SHALL NOT modify the AS_PATH + attribute associated with the route. + + b) When a given BGP speaker advertises the route to an external + peer, the advertising speaker updates the AS_PATH attribute as + follows: + + + + + + +Rekhter, et al. Standards Track [Page 25] + +RFC 4271 BGP-4 January 2006 + + + 1) if the first path segment of the AS_PATH is of type + AS_SEQUENCE, the local system prepends its own AS number as + the last element of the sequence (put it in the leftmost + position with respect to the position of octets in the + protocol message). If the act of prepending will cause an + overflow in the AS_PATH segment (i.e., more than 255 ASes), + it SHOULD prepend a new segment of type AS_SEQUENCE and + prepend its own AS number to this new segment. + + 2) if the first path segment of the AS_PATH is of type AS_SET, + the local system prepends a new path segment of type + AS_SEQUENCE to the AS_PATH, including its own AS number in + that segment. + + 3) if the AS_PATH is empty, the local system creates a path + segment of type AS_SEQUENCE, places its own AS into that + segment, and places that segment into the AS_PATH. + + When a BGP speaker originates a route then: + + a) the originating speaker includes its own AS number in a path + segment, of type AS_SEQUENCE, in the AS_PATH attribute of all + UPDATE messages sent to an external peer. In this case, the AS + number of the originating speaker's autonomous system will be + the only entry the path segment, and this path segment will be + the only segment in the AS_PATH attribute. + + b) the originating speaker includes an empty AS_PATH attribute in + all UPDATE messages sent to internal peers. (An empty AS_PATH + attribute is one whose length field contains the value zero). + + Whenever the modification of the AS_PATH attribute calls for + including or prepending the AS number of the local system, the local + system MAY include/prepend more than one instance of its own AS + number in the AS_PATH attribute. This is controlled via local + configuration. + +5.1.3. NEXT_HOP + + The NEXT_HOP is a well-known mandatory attribute that defines the IP + address of the router that SHOULD be used as the next hop to the + destinations listed in the UPDATE message. The NEXT_HOP attribute is + calculated as follows: + + 1) When sending a message to an internal peer, if the route is not + locally originated, the BGP speaker SHOULD NOT modify the + NEXT_HOP attribute unless it has been explicitly configured to + announce its own IP address as the NEXT_HOP. When announcing a + + + +Rekhter, et al. Standards Track [Page 26] + +RFC 4271 BGP-4 January 2006 + + + locally-originated route to an internal peer, the BGP speaker + SHOULD use the interface address of the router through which + the announced network is reachable for the speaker as the + NEXT_HOP. If the route is directly connected to the speaker, + or if the interface address of the router through which the + announced network is reachable for the speaker is the internal + peer's address, then the BGP speaker SHOULD use its own IP + address for the NEXT_HOP attribute (the address of the + interface that is used to reach the peer). + + 2) When sending a message to an external peer, X, and the peer is + one IP hop away from the speaker: + + - If the route being announced was learned from an internal + peer or is locally originated, the BGP speaker can use an + interface address of the internal peer router (or the + internal router) through which the announced network is + reachable for the speaker for the NEXT_HOP attribute, + provided that peer X shares a common subnet with this + address. This is a form of "third party" NEXT_HOP attribute. + + - Otherwise, if the route being announced was learned from an + external peer, the speaker can use an IP address of any + adjacent router (known from the received NEXT_HOP attribute) + that the speaker itself uses for local route calculation in + the NEXT_HOP attribute, provided that peer X shares a common + subnet with this address. This is a second form of "third + party" NEXT_HOP attribute. + + - Otherwise, if the external peer to which the route is being + advertised shares a common subnet with one of the interfaces + of the announcing BGP speaker, the speaker MAY use the IP + address associated with such an interface in the NEXT_HOP + attribute. This is known as a "first party" NEXT_HOP + attribute. + + - By default (if none of the above conditions apply), the BGP + speaker SHOULD use the IP address of the interface that the + speaker uses to establish the BGP connection to peer X in the + NEXT_HOP attribute. + + 3) When sending a message to an external peer X, and the peer is + multiple IP hops away from the speaker (aka "multihop EBGP"): + + - The speaker MAY be configured to propagate the NEXT_HOP + attribute. In this case, when advertising a route that the + speaker learned from one of its peers, the NEXT_HOP attribute + of the advertised route is exactly the same as the NEXT_HOP + + + +Rekhter, et al. Standards Track [Page 27] + +RFC 4271 BGP-4 January 2006 + + + attribute of the learned route (the speaker does not modify + the NEXT_HOP attribute). + + - By default, the BGP speaker SHOULD use the IP address of the + interface that the speaker uses in the NEXT_HOP attribute to + establish the BGP connection to peer X. + + Normally, the NEXT_HOP attribute is chosen such that the shortest + available path will be taken. A BGP speaker MUST be able to support + the disabling advertisement of third party NEXT_HOP attributes in + order to handle imperfectly bridged media. + + A route originated by a BGP speaker SHALL NOT be advertised to a peer + using an address of that peer as NEXT_HOP. A BGP speaker SHALL NOT + install a route with itself as the next hop. + + The NEXT_HOP attribute is used by the BGP speaker to determine the + actual outbound interface and immediate next-hop address that SHOULD + be used to forward transit packets to the associated destinations. + + The immediate next-hop address is determined by performing a + recursive route lookup operation for the IP address in the NEXT_HOP + attribute, using the contents of the Routing Table, selecting one + entry if multiple entries of equal cost exist. The Routing Table + entry that resolves the IP address in the NEXT_HOP attribute will + always specify the outbound interface. If the entry specifies an + attached subnet, but does not specify a next-hop address, then the + address in the NEXT_HOP attribute SHOULD be used as the immediate + next-hop address. If the entry also specifies the next-hop address, + this address SHOULD be used as the immediate next-hop address for + packet forwarding. + +5.1.4. MULTI_EXIT_DISC + + The MULTI_EXIT_DISC is an optional non-transitive attribute that is + intended to be used on external (inter-AS) links to discriminate + among multiple exit or entry points to the same neighboring AS. The + value of the MULTI_EXIT_DISC attribute is a four-octet unsigned + number, called a metric. All other factors being equal, the exit + point with the lower metric SHOULD be preferred. If received over + EBGP, the MULTI_EXIT_DISC attribute MAY be propagated over IBGP to + other BGP speakers within the same AS (see also The + MULTI_EXIT_DISC attribute received from a neighboring AS MUST NOT be + propagated to other neighboring ASes. + + A BGP speaker MUST implement a mechanism (based on local + configuration) that allows the MULTI_EXIT_DISC attribute to be + removed from a route. If a BGP speaker is configured to remove the + + + +Rekhter, et al. Standards Track [Page 28] + +RFC 4271 BGP-4 January 2006 + + + MULTI_EXIT_DISC attribute from a route, then this removal MUST be + done prior to determining the degree of preference of the route and + prior to performing route selection (Decision Process phases 1 and + 2). + + An implementation MAY also (based on local configuration) alter the + value of the MULTI_EXIT_DISC attribute received over EBGP. If a BGP + speaker is configured to alter the value of the MULTI_EXIT_DISC + attribute received over EBGP, then altering the value MUST be done + prior to determining the degree of preference of the route and prior + to performing route selection (Decision Process phases 1 and 2). See + Section for necessary restrictions on this. + +5.1.5. LOCAL_PREF + + LOCAL_PREF is a well-known attribute that SHALL be included in all + UPDATE messages that a given BGP speaker sends to other internal + peers. A BGP speaker SHALL calculate the degree of preference for + each external route based on the locally-configured policy, and + include the degree of preference when advertising a route to its + internal peers. The higher degree of preference MUST be preferred. + A BGP speaker uses the degree of preference learned via LOCAL_PREF in + its Decision Process (see Section 9.1.1). + + A BGP speaker MUST NOT include this attribute in UPDATE messages it + sends to external peers, except in the case of BGP Confederations + [RFC3065]. If it is contained in an UPDATE message that is received + from an external peer, then this attribute MUST be ignored by the + receiving speaker, except in the case of BGP Confederations + [RFC3065]. + +5.1.6. ATOMIC_AGGREGATE + + ATOMIC_AGGREGATE is a well-known discretionary attribute. + + When a BGP speaker aggregates several routes for the purpose of + advertisement to a particular peer, the AS_PATH of the aggregated + route normally includes an AS_SET formed from the set of ASes from + which the aggregate was formed. In many cases, the network + administrator can determine if the aggregate can safely be advertised + without the AS_SET, and without forming route loops. + + If an aggregate excludes at least some of the AS numbers present in + the AS_PATH of the routes that are aggregated as a result of dropping + the AS_SET, the aggregated route, when advertised to the peer, SHOULD + include the ATOMIC_AGGREGATE attribute. + + + + + +Rekhter, et al. Standards Track [Page 29] + +RFC 4271 BGP-4 January 2006 + + + A BGP speaker that receives a route with the ATOMIC_AGGREGATE + attribute SHOULD NOT remove the attribute when propagating the route + to other speakers. + + A BGP speaker that receives a route with the ATOMIC_AGGREGATE + attribute MUST NOT make any NLRI of that route more specific (as + defined in 9.1.4) when advertising this route to other BGP speakers. + + A BGP speaker that receives a route with the ATOMIC_AGGREGATE + attribute needs to be aware of the fact that the actual path to + destinations, as specified in the NLRI of the route, while having the + loop-free property, may not be the path specified in the AS_PATH + attribute of the route. + +5.1.7. AGGREGATOR + + AGGREGATOR is an optional transitive attribute, which MAY be included + in updates that are formed by aggregation (see Section A + BGP speaker that performs route aggregation MAY add the AGGREGATOR + attribute, which SHALL contain its own AS number and IP address. The + IP address SHOULD be the same as the BGP Identifier of the speaker. + +6. BGP Error Handling. + + This section describes actions to be taken when errors are detected + while processing BGP messages. + + When any of the conditions described here are detected, a + NOTIFICATION message, with the indicated Error Code, Error Subcode, + and Data fields, is sent, and the BGP connection is closed (unless it + is explicitly stated that no NOTIFICATION message is to be sent and + the BGP connection is not to be closed). If no Error Subcode is + specified, then a zero MUST be used. + + The phrase "the BGP connection is closed" means the TCP connection + has been closed, the associated Adj-RIB-In has been cleared, and all + resources for that BGP connection have been deallocated. Entries in + the Loc-RIB associated with the remote peer are marked as invalid. + The local system recalculates its best routes for the destinations of + the routes marked as invalid. Before the invalid routes are deleted + from the system, it advertises, to its peers, either withdraws for + the routes marked as invalid, or the new best routes before the + invalid routes are deleted from the system. + + Unless specified explicitly, the Data field of the NOTIFICATION + message that is sent to indicate an error is empty. + + + + + +Rekhter, et al. Standards Track [Page 30] + +RFC 4271 BGP-4 January 2006 + + +6.1. Message Header Error Handling + + All errors detected while processing the Message Header MUST be + indicated by sending the NOTIFICATION message with the Error Code + Message Header Error. The Error Subcode elaborates on the specific + nature of the error. + + The expected value of the Marker field of the message header is all + ones. If the Marker field of the message header is not as expected, + then a synchronization error has occurred and the Error Subcode MUST + be set to Connection Not Synchronized. + + If at least one of the following is true: + + - if the Length field of the message header is less than 19 or + greater than 4096, or + + - if the Length field of an OPEN message is less than the minimum + length of the OPEN message, or + + - if the Length field of an UPDATE message is less than the + minimum length of the UPDATE message, or + + - if the Length field of a KEEPALIVE message is not equal to 19, + or + + - if the Length field of a NOTIFICATION message is less than the + minimum length of the NOTIFICATION message, + + then the Error Subcode MUST be set to Bad Message Length. The Data + field MUST contain the erroneous Length field. + + If the Type field of the message header is not recognized, then the + Error Subcode MUST be set to Bad Message Type. The Data field MUST + contain the erroneous Type field. + +6.2. OPEN Message Error Handling + + All errors detected while processing the OPEN message MUST be + indicated by sending the NOTIFICATION message with the Error Code + OPEN Message Error. The Error Subcode elaborates on the specific + nature of the error. + + If the version number in the Version field of the received OPEN + message is not supported, then the Error Subcode MUST be set to + Unsupported Version Number. The Data field is a 2-octet unsigned + integer, which indicates the largest, locally-supported version + number less than the version the remote BGP peer bid (as indicated in + + + +Rekhter, et al. Standards Track [Page 31] + +RFC 4271 BGP-4 January 2006 + + + the received OPEN message), or if the smallest, locally-supported + version number is greater than the version the remote BGP peer bid, + then the smallest, locally-supported version number. + + If the Autonomous System field of the OPEN message is unacceptable, + then the Error Subcode MUST be set to Bad Peer AS. The determination + of acceptable Autonomous System numbers is outside the scope of this + protocol. + + If the Hold Time field of the OPEN message is unacceptable, then the + Error Subcode MUST be set to Unacceptable Hold Time. An + implementation MUST reject Hold Time values of one or two seconds. + An implementation MAY reject any proposed Hold Time. An + implementation that accepts a Hold Time MUST use the negotiated value + for the Hold Time. + + If the BGP Identifier field of the OPEN message is syntactically + incorrect, then the Error Subcode MUST be set to Bad BGP Identifier. + Syntactic correctness means that the BGP Identifier field represents + a valid unicast IP host address. + + If one of the Optional Parameters in the OPEN message is not + recognized, then the Error Subcode MUST be set to Unsupported + Optional Parameters. + + If one of the Optional Parameters in the OPEN message is recognized, + but is malformed, then the Error Subcode MUST be set to 0 + (Unspecific). + +6.3. UPDATE Message Error Handling + + All errors detected while processing the UPDATE message MUST be + indicated by sending the NOTIFICATION message with the Error Code + UPDATE Message Error. The error subcode elaborates on the specific + nature of the error. + + Error checking of an UPDATE message begins by examining the path + attributes. If the Withdrawn Routes Length or Total Attribute Length + is too large (i.e., if Withdrawn Routes Length + Total Attribute + Length + 23 exceeds the message Length), then the Error Subcode MUST + be set to Malformed Attribute List. + + If any recognized attribute has Attribute Flags that conflict with + the Attribute Type Code, then the Error Subcode MUST be set to + Attribute Flags Error. The Data field MUST contain the erroneous + attribute (type, length, and value). + + + + + +Rekhter, et al. Standards Track [Page 32] + +RFC 4271 BGP-4 January 2006 + + + If any recognized attribute has an Attribute Length that conflicts + with the expected length (based on the attribute type code), then the + Error Subcode MUST be set to Attribute Length Error. The Data field + MUST contain the erroneous attribute (type, length, and value). + + If any of the well-known mandatory attributes are not present, then + the Error Subcode MUST be set to Missing Well-known Attribute. The + Data field MUST contain the Attribute Type Code of the missing, + well-known attribute. + + If any of the well-known mandatory attributes are not recognized, + then the Error Subcode MUST be set to Unrecognized Well-known + Attribute. The Data field MUST contain the unrecognized attribute + (type, length, and value). + + If the ORIGIN attribute has an undefined value, then the Error Sub- + code MUST be set to Invalid Origin Attribute. The Data field MUST + contain the unrecognized attribute (type, length, and value). + + If the NEXT_HOP attribute field is syntactically incorrect, then the + Error Subcode MUST be set to Invalid NEXT_HOP Attribute. The Data + field MUST contain the incorrect attribute (type, length, and value). + Syntactic correctness means that the NEXT_HOP attribute represents a + valid IP host address. + + The IP address in the NEXT_HOP MUST meet the following criteria to be + considered semantically correct: + + a) It MUST NOT be the IP address of the receiving speaker. + + b) In the case of an EBGP, where the sender and receiver are one + IP hop away from each other, either the IP address in the + NEXT_HOP MUST be the sender's IP address that is used to + establish the BGP connection, or the interface associated with + the NEXT_HOP IP address MUST share a common subnet with the + receiving BGP speaker. + + If the NEXT_HOP attribute is semantically incorrect, the error SHOULD + be logged, and the route SHOULD be ignored. In this case, a + NOTIFICATION message SHOULD NOT be sent, and the connection SHOULD + NOT be closed. + + The AS_PATH attribute is checked for syntactic correctness. If the + path is syntactically incorrect, then the Error Subcode MUST be set + to Malformed AS_PATH. + + + + + + +Rekhter, et al. Standards Track [Page 33] + +RFC 4271 BGP-4 January 2006 + + + If the UPDATE message is received from an external peer, the local + system MAY check whether the leftmost (with respect to the position + of octets in the protocol message) AS in the AS_PATH attribute is + equal to the autonomous system number of the peer that sent the + message. If the check determines this is not the case, the Error + Subcode MUST be set to Malformed AS_PATH. + + If an optional attribute is recognized, then the value of this + attribute MUST be checked. If an error is detected, the attribute + MUST be discarded, and the Error Subcode MUST be set to Optional + Attribute Error. The Data field MUST contain the attribute (type, + length, and value). + + If any attribute appears more than once in the UPDATE message, then + the Error Subcode MUST be set to Malformed Attribute List. + + The NLRI field in the UPDATE message is checked for syntactic + validity. If the field is syntactically incorrect, then the Error + Subcode MUST be set to Invalid Network Field. + + If a prefix in the NLRI field is semantically incorrect (e.g., an + unexpected multicast IP address), an error SHOULD be logged locally, + and the prefix SHOULD be ignored. + + An UPDATE message that contains correct path attributes, but no NLRI, + SHALL be treated as a valid UPDATE message. + +6.4. NOTIFICATION Message Error Handling + + If a peer sends a NOTIFICATION message, and the receiver of the + message detects an error in that message, the receiver cannot use a + NOTIFICATION message to report this error back to the peer. Any such + error (e.g., an unrecognized Error Code or Error Subcode) SHOULD be + noticed, logged locally, and brought to the attention of the + administration of the peer. The means to do this, however, lies + outside the scope of this document. + +6.5. Hold Timer Expired Error Handling + + If a system does not receive successive KEEPALIVE, UPDATE, and/or + NOTIFICATION messages within the period specified in the Hold Time + field of the OPEN message, then the NOTIFICATION message with the + Hold Timer Expired Error Code is sent and the BGP connection is + closed. + + + + + + + +Rekhter, et al. Standards Track [Page 34] + +RFC 4271 BGP-4 January 2006 + + +6.6. Finite State Machine Error Handling + + Any error detected by the BGP Finite State Machine (e.g., receipt of + an unexpected event) is indicated by sending the NOTIFICATION message + with the Error Code Finite State Machine Error. + +6.7. Cease + + In the absence of any fatal errors (that are indicated in this + section), a BGP peer MAY choose, at any given time, to close its BGP + connection by sending the NOTIFICATION message with the Error Code + Cease. However, the Cease NOTIFICATION message MUST NOT be used when + a fatal error indicated by this section does exist. + + A BGP speaker MAY support the ability to impose a locally-configured, + upper bound on the number of address prefixes the speaker is willing + to accept from a neighbor. When the upper bound is reached, the + speaker, under control of local configuration, either (a) discards + new address prefixes from the neighbor (while maintaining the BGP + connection with the neighbor), or (b) terminates the BGP connection + with the neighbor. If the BGP speaker decides to terminate its BGP + connection with a neighbor because the number of address prefixes + received from the neighbor exceeds the locally-configured, upper + bound, then the speaker MUST send the neighbor a NOTIFICATION message + with the Error Code Cease. The speaker MAY also log this locally. + +6.8. BGP Connection Collision Detection + + If a pair of BGP speakers try to establish a BGP connection with each + other simultaneously, then two parallel connections well be formed. + If the source IP address used by one of these connections is the same + as the destination IP address used by the other, and the destination + IP address used by the first connection is the same as the source IP + address used by the other, connection collision has occurred. In the + event of connection collision, one of the connections MUST be closed. + + Based on the value of the BGP Identifier, a convention is established + for detecting which BGP connection is to be preserved when a + collision occurs. The convention is to compare the BGP Identifiers + of the peers involved in the collision and to retain only the + connection initiated by the BGP speaker with the higher-valued BGP + Identifier. + + Upon receipt of an OPEN message, the local system MUST examine all of + its connections that are in the OpenConfirm state. A BGP speaker MAY + also examine connections in an OpenSent state if it knows the BGP + Identifier of the peer by means outside of the protocol. If, among + these connections, there is a connection to a remote BGP speaker + + + +Rekhter, et al. Standards Track [Page 35] + +RFC 4271 BGP-4 January 2006 + + + whose BGP Identifier equals the one in the OPEN message, and this + connection collides with the connection over which the OPEN message + is received, then the local system performs the following collision + resolution procedure: + + 1) The BGP Identifier of the local system is compared to the BGP + Identifier of the remote system (as specified in the OPEN + message). Comparing BGP Identifiers is done by converting them + to host byte order and treating them as 4-octet unsigned + integers. + + 2) If the value of the local BGP Identifier is less than the + remote one, the local system closes the BGP connection that + already exists (the one that is already in the OpenConfirm + state), and accepts the BGP connection initiated by the remote + system. + + 3) Otherwise, the local system closes the newly created BGP + connection (the one associated with the newly received OPEN + message), and continues to use the existing one (the one that + is already in the OpenConfirm state). + + Unless allowed via configuration, a connection collision with an + existing BGP connection that is in the Established state causes + closing of the newly created connection. + + Note that a connection collision cannot be detected with connections + that are in Idle, Connect, or Active states. + + Closing the BGP connection (that results from the collision + resolution procedure) is accomplished by sending the NOTIFICATION + message with the Error Code Cease. + +7. BGP Version Negotiation + + BGP speakers MAY negotiate the version of the protocol by making + multiple attempts at opening a BGP connection, starting with the + highest version number each BGP speaker supports. If an open attempt + fails with an Error Code, OPEN Message Error, and an Error Subcode, + Unsupported Version Number, then the BGP speaker has available the + version number it tried, the version number its peer tried, the + version number passed by its peer in the NOTIFICATION message, and + the version numbers it supports. If the two peers do support one or + more common versions, then this will allow them to rapidly determine + the highest common version. In order to support BGP version + negotiation, future versions of BGP MUST retain the format of the + OPEN and NOTIFICATION messages. + + + + +Rekhter, et al. Standards Track [Page 36] + +RFC 4271 BGP-4 January 2006 + + +8. BGP Finite State Machine (FSM) + + The data structures and FSM described in this document are conceptual + and do not have to be implemented precisely as described here, as + long as the implementations support the described functionality and + they exhibit the same externally visible behavior. + + This section specifies the BGP operation in terms of a Finite State + Machine (FSM). The section falls into two parts: + + 1) Description of Events for the State machine (Section 8.1) + 2) Description of the FSM (Section 8.2) + + Session attributes required (mandatory) for each connection are: + + 1) State + 2) ConnectRetryCounter + 3) ConnectRetryTimer + 4) ConnectRetryTime + 5) HoldTimer + 6) HoldTime + 7) KeepaliveTimer + 8) KeepaliveTime + + The state session attribute indicates the current state of the BGP + FSM. The ConnectRetryCounter indicates the number of times a BGP + peer has tried to establish a peer session. + + The mandatory attributes related to timers are described in Section + 10. Each timer has a "timer" and a "time" (the initial value). + + The optional Session attributes are listed below. These optional + attributes may be supported, either per connection or per local + system: + + 1) AcceptConnectionsUnconfiguredPeers + 2) AllowAutomaticStart + 3) AllowAutomaticStop + 4) CollisionDetectEstablishedState + 5) DampPeerOscillations + 6) DelayOpen + 7) DelayOpenTime + 8) DelayOpenTimer + 9) IdleHoldTime + 10) IdleHoldTimer + 11) PassiveTcpEstablishment + 12) SendNOTIFICATIONwithoutOPEN + 13) TrackTcpState + + + +Rekhter, et al. Standards Track [Page 37] + +RFC 4271 BGP-4 January 2006 + + + The optional session attributes support different features of the BGP + functionality that have implications for the BGP FSM state + transitions. Two groups of the attributes which relate to timers + are: + + group 1: DelayOpen, DelayOpenTime, DelayOpenTimer + group 2: DampPeerOscillations, IdleHoldTime, IdleHoldTimer + + The first parameter (DelayOpen, DampPeerOscillations) is an optional + attribute that indicates that the Timer function is active. The + "Time" value specifies the initial value for the "Timer" + (DelayOpenTime, IdleHoldTime). The "Timer" specifies the actual + timer. + + Please refer to Section 8.1.1 for an explanation of the interaction + between these optional attributes and the events signaled to the + state machine. Section also provides a short overview of the + different types of optional attributes (flags or timers). + +8.1. Events for the BGP FSM + +8.1.1. Optional Events Linked to Optional Session Attributes + + The Inputs to the BGP FSM are events. Events can either be mandatory + or optional. Some optional events are linked to optional session + attributes. Optional session attributes enable several groups of FSM + functionality. + + The linkage between FSM functionality, events, and the optional + session attributes are described below. + + Group 1: Automatic Administrative Events (Start/Stop) + + Optional Session Attributes: AllowAutomaticStart, + AllowAutomaticStop, + DampPeerOscillations, + IdleHoldTime, IdleHoldTimer + + Option 1: AllowAutomaticStart + + Description: A BGP peer connection can be started and stopped + by administrative control. This administrative + control can either be manual, based on operator + intervention, or under the control of logic that + is specific to a BGP implementation. The term + "automatic" refers to a start being issued to the + BGP peer connection FSM when such logic determines + that the BGP peer connection should be restarted. + + + +Rekhter, et al. Standards Track [Page 38] + +RFC 4271 BGP-4 January 2006 + + + The AllowAutomaticStart attribute specifies that + this BGP connection supports automatic starting of + the BGP connection. + + If the BGP implementation supports + AllowAutomaticStart, the peer may be repeatedly + restarted. Three other options control the rate + at which the automatic restart occurs: + DampPeerOscillations, IdleHoldTime, and the + IdleHoldTimer. + + The DampPeerOscillations option specifies that the + implementation engages additional logic to damp + the oscillations of BGP peers in the face of + sequences of automatic start and automatic stop. + IdleHoldTime specifies the length of time the BGP + peer is held in the Idle state prior to allowing + the next automatic restart. The IdleHoldTimer is + the timer that holds the peer in Idle state. + + An example of DampPeerOscillations logic is an + increase of the IdleHoldTime value if a BGP peer + oscillates connectivity (connected/disconnected) + repeatedly within a time period. To engage this + logic, a peer could connect and disconnect 10 + times within 5 minutes. The IdleHoldTime value + would be reset from 0 to 120 seconds. + + Values: TRUE or FALSE + + Option 2: AllowAutomaticStop + + Description: This BGP peer session optional attribute indicates + that the BGP connection allows "automatic" + stopping of the BGP connection. An "automatic" + stop is defined as a stop under the control of + implementation-specific logic. The + implementation-specific logic is outside the scope + of this specification. + + Values: TRUE or FALSE + + Option 3: DampPeerOscillations + + Description: The DampPeerOscillations optional session + attribute indicates that the BGP connection is + using logic that damps BGP peer oscillations in + the Idle State. + + + +Rekhter, et al. Standards Track [Page 39] + +RFC 4271 BGP-4 January 2006 + + + Value: TRUE or FALSE + + Option 4: IdleHoldTime + + Description: The IdleHoldTime is the value that is set in the + IdleHoldTimer. + + Values: Time in seconds + + Option 5: IdleHoldTimer + + Description: The IdleHoldTimer aids in controlling BGP peer + oscillation. The IdleHoldTimer is used to keep + the BGP peer in Idle for a particular duration. + The IdleHoldTimer_Expires event is described in + Section 8.1.3. + + Values: Time in seconds + + Group 2: Unconfigured Peers + + Optional Session Attributes: AcceptConnectionsUnconfiguredPeers + + Option 1: AcceptConnectionsUnconfiguredPeers + + Description: The BGP FSM optionally allows the acceptance of + BGP peer connections from neighbors that are not + pre-configured. The + "AcceptConnectionsUnconfiguredPeers" optional + session attribute allows the FSM to support the + state transitions that allow the implementation to + accept or reject these unconfigured peers. + + The AcceptConnectionsUnconfiguredPeers has + security implications. Please refer to the BGP + Vulnerabilities document [RFC4272] for details. + + Value: True or False + + Group 3: TCP processing + + Optional Session Attributes: PassiveTcpEstablishment, + TrackTcpState + + Option 1: PassiveTcpEstablishment + + + + + + +Rekhter, et al. Standards Track [Page 40] + +RFC 4271 BGP-4 January 2006 + + + Description: This option indicates that the BGP FSM will + passively wait for the remote BGP peer to + establish the BGP TCP connection. + + value: TRUE or FALSE + + Option 2: TrackTcpState + + Description: The BGP FSM normally tracks the end result of a + TCP connection attempt rather than individual TCP + messages. Optionally, the BGP FSM can support + additional interaction with the TCP connection + negotiation. The interaction with the TCP events + may increase the amount of logging the BGP peer + connection requires and the number of BGP FSM + changes. + + Value: TRUE or FALSE + + Group 4: BGP Message Processing + + Optional Session Attributes: DelayOpen, DelayOpenTime, + DelayOpenTimer, + SendNOTIFICATIONwithoutOPEN, + CollisionDetectEstablishedState + + Option 1: DelayOpen + + Description: The DelayOpen optional session attribute allows + implementations to be configured to delay sending + an OPEN message for a specific time period + (DelayOpenTime). The delay allows the remote BGP + Peer time to send the first OPEN message. + + Value: TRUE or FALSE + + Option 2: DelayOpenTime + + Description: The DelayOpenTime is the initial value set in the + DelayOpenTimer. + + Value: Time in seconds + + Option 3: DelayOpenTimer + + Description: The DelayOpenTimer optional session attribute is + used to delay the sending of an OPEN message on a + + + + +Rekhter, et al. Standards Track [Page 41] + +RFC 4271 BGP-4 January 2006 + + + connection. The DelayOpenTimer_Expires event + (Event 12) is described in Section 8.1.3. + + Value: Time in seconds + + Option 4: SendNOTIFICATIONwithoutOPEN + + Description: The SendNOTIFICATIONwithoutOPEN allows a peer to + send a NOTIFICATION without first sending an OPEN + message. Without this optional session attribute, + the BGP connection assumes that an OPEN message + must be sent by a peer prior to the peer sending a + NOTIFICATION message. + + Value: True or False + + Option 5: CollisionDetectEstablishedState + + Description: Normally, a Detect Collision (see Section 6.8) + will be ignored in the Established state. This + optional session attribute indicates that this BGP + connection processes collisions in the Established + state. + + Value: True or False + + Note: The optional session attributes clarify the BGP FSM + description for existing features of BGP implementations. + The optional session attributes may be pre-defined for an + implementation and not readable via management interfaces + for existing correct implementations. As newer BGP MIBs + (version 2 and beyond) are supported, these fields will be + accessible via a management interface. + +8.1.2. Administrative Events + + An administrative event is an event in which the operator interface + and BGP Policy engine signal the BGP-finite state machine to start or + stop the BGP state machine. The basic start and stop indications are + augmented by optional connection attributes that signal a certain + type of start or stop mechanism to the BGP FSM. An example of this + combination is Event 5, AutomaticStart_with_PassiveTcpEstablishment. + With this event, the BGP implementation signals to the BGP FSM that + the implementation is using an Automatic Start with the option to use + a Passive TCP Establishment. The Passive TCP establishment signals + that this BGP FSM will wait for the remote side to start the TCP + establishment. + + + + +Rekhter, et al. Standards Track [Page 42] + +RFC 4271 BGP-4 January 2006 + + + Note that only Event 1 (ManualStart) and Event 2 (ManualStop) are + mandatory administrative events. All other administrative events are + optional (Events 3-8). Each event below has a name, definition, + status (mandatory or optional), and the optional session attributes + that SHOULD be set at each stage. When generating Event 1 through + Event 8 for the BGP FSM, the conditions specified in the "Optional + Attribute Status" section are verified. If any of these conditions + are not satisfied, then the local system should log an FSM error. + + The settings of optional session attributes may be implicit in some + implementations, and therefore may not be set explicitly by an + external operator action. Section describes these implicit + settings of the optional session attributes. The administrative + states described below may also be implicit in some implementations + and not directly configurable by an external operator. + + Event 1: ManualStart + + Definition: Local system administrator manually starts the peer + connection. + + Status: Mandatory + + Optional + Attribute + Status: The PassiveTcpEstablishment attribute SHOULD be set + to FALSE. + + Event 2: ManualStop + + Definition: Local system administrator manually stops the peer + connection. + + Status: Mandatory + + Optional + Attribute + Status: No interaction with any optional attributes. + + Event 3: AutomaticStart + + Definition: Local system automatically starts the BGP + connection. + + Status: Optional, depending on local system + + + + + + +Rekhter, et al. Standards Track [Page 43] + +RFC 4271 BGP-4 January 2006 + + + Optional + Attribute + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE if this event occurs. + 2) If the PassiveTcpEstablishment optional session + attribute is supported, it SHOULD be set to + FALSE. + 3) If the DampPeerOscillations is supported, it + SHOULD be set to FALSE when this event occurs. + + Event 4: ManualStart_with_PassiveTcpEstablishment + + Definition: Local system administrator manually starts the peer + connection, but has PassiveTcpEstablishment + enabled. The PassiveTcpEstablishment optional + attribute indicates that the peer will listen prior + to establishing the connection. + + Status: Optional, depending on local system + + Optional + Attribute + Status: 1) The PassiveTcpEstablishment attribute SHOULD be + set to TRUE if this event occurs. + 2) The DampPeerOscillations attribute SHOULD be set + to FALSE when this event occurs. + + Event 5: AutomaticStart_with_PassiveTcpEstablishment + + Definition: Local system automatically starts the BGP + connection with the PassiveTcpEstablishment + enabled. The PassiveTcpEstablishment optional + attribute indicates that the peer will listen prior + to establishing a connection. + + Status: Optional, depending on local system + + Optional + Attribute + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE. + 2) The PassiveTcpEstablishment attribute SHOULD be + set to TRUE. + 3) If the DampPeerOscillations attribute is + supported, the DampPeerOscillations SHOULD be + set to FALSE. + + + + + +Rekhter, et al. Standards Track [Page 44] + +RFC 4271 BGP-4 January 2006 + + + Event 6: AutomaticStart_with_DampPeerOscillations + + Definition: Local system automatically starts the BGP peer + connection with peer oscillation damping enabled. + The exact method of damping persistent peer + oscillations is determined by the implementation + and is outside the scope of this document. + + Status: Optional, depending on local system. + + Optional + Attribute + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE. + 2) The DampPeerOscillations attribute SHOULD be set + to TRUE. + 3) The PassiveTcpEstablishment attribute SHOULD be + set to FALSE. + + Event 7: AutomaticStart_with_DampPeerOscillations_and_ + PassiveTcpEstablishment + + Definition: Local system automatically starts the BGP peer + connection with peer oscillation damping enabled + and PassiveTcpEstablishment enabled. The exact + method of damping persistent peer oscillations is + determined by the implementation and is outside the + scope of this document. + + Status: Optional, depending on local system + + Optional + Attributes + Status: 1) The AllowAutomaticStart attribute SHOULD be set + to TRUE. + 2) The DampPeerOscillations attribute SHOULD be set + to TRUE. + 3) The PassiveTcpEstablishment attribute SHOULD be + set to TRUE. + + Event 8: AutomaticStop + + Definition: Local system automatically stops the BGP + connection. + + An example of an automatic stop event is exceeding + the number of prefixes for a given peer and the + local system automatically disconnecting the peer. + + + +Rekhter, et al. Standards Track [Page 45] + +RFC 4271 BGP-4 January 2006 + + + Status: Optional, depending on local system + + Optional + Attribute + Status: 1) The AllowAutomaticStop attribute SHOULD be TRUE. + +8.1.3. Timer Events + + Event 9: ConnectRetryTimer_Expires + + Definition: An event generated when the ConnectRetryTimer + expires. + + Status: Mandatory + + Event 10: HoldTimer_Expires + + Definition: An event generated when the HoldTimer expires. + + Status: Mandatory + + Event 11: KeepaliveTimer_Expires + + Definition: An event generated when the KeepaliveTimer expires. + + Status: Mandatory + + Event 12: DelayOpenTimer_Expires + + Definition: An event generated when the DelayOpenTimer expires. + + Status: Optional + + Optional + Attribute + Status: If this event occurs, + 1) DelayOpen attribute SHOULD be set to TRUE, + 2) DelayOpenTime attribute SHOULD be supported, + 3) DelayOpenTimer SHOULD be supported. + + Event 13: IdleHoldTimer_Expires + + Definition: An event generated when the IdleHoldTimer expires, + indicating that the BGP connection has completed + waiting for the back-off period to prevent BGP peer + oscillation. + + + + + +Rekhter, et al. Standards Track [Page 46] + +RFC 4271 BGP-4 January 2006 + + + The IdleHoldTimer is only used when the persistent + peer oscillation damping function is enabled by + setting the DampPeerOscillations optional attribute + to TRUE. + + Implementations not implementing the persistent + peer oscillation damping function may not have the + IdleHoldTimer. + + Status: Optional + + Optional + Attribute + Status: If this event occurs: + 1) DampPeerOscillations attribute SHOULD be set to + TRUE. + 2) IdleHoldTimer SHOULD have just expired. + +8.1.4. TCP Connection-Based Events + + Event 14: TcpConnection_Valid + + Definition: Event indicating the local system reception of a + TCP connection request with a valid source IP + address, TCP port, destination IP address, and TCP + Port. The definition of invalid source and invalid + destination IP address is determined by the + implementation. + + BGP's destination port SHOULD be port 179, as + defined by IANA. + + TCP connection request is denoted by the local + system receiving a TCP SYN. + + Status: Optional + + Optional + Attribute + Status: 1) The TrackTcpState attribute SHOULD be set to + TRUE if this event occurs. + + Event 15: Tcp_CR_Invalid + + Definition: Event indicating the local system reception of a + TCP connection request with either an invalid + source address or port number, or an invalid + destination address or port number. + + + +Rekhter, et al. Standards Track [Page 47] + +RFC 4271 BGP-4 January 2006 + + + BGP destination port number SHOULD be 179, as + defined by IANA. + + A TCP connection request occurs when the local + system receives a TCP SYN. + + Status: Optional + + Optional + Attribute + Status: 1) The TrackTcpState attribute should be set to + TRUE if this event occurs. + + Event 16: Tcp_CR_Acked + + Definition: Event indicating the local system's request to + establish a TCP connection to the remote peer. + + The local system's TCP connection sent a TCP SYN, + received a TCP SYN/ACK message, and sent a TCP ACK. + + Status: Mandatory + + Event 17: TcpConnectionConfirmed + + Definition: Event indicating that the local system has received + a confirmation that the TCP connection has been + established by the remote site. + + The remote peer's TCP engine sent a TCP SYN. The + local peer's TCP engine sent a SYN, ACK message and + now has received a final ACK. + + Status: Mandatory + + Event 18: TcpConnectionFails + + Definition: Event indicating that the local system has received + a TCP connection failure notice. + + The remote BGP peer's TCP machine could have sent a + FIN. The local peer would respond with a FIN-ACK. + Another possibility is that the local peer + indicated a timeout in the TCP connection and + downed the connection. + + Status: Mandatory + + + + +Rekhter, et al. Standards Track [Page 48] + +RFC 4271 BGP-4 January 2006 + + +8.1.5. BGP Message-Based Events + + Event 19: BGPOpen + + Definition: An event is generated when a valid OPEN message has + been received. + + Status: Mandatory + + Optional + Attribute + Status: 1) The DelayOpen optional attribute SHOULD be set + to FALSE. + 2) The DelayOpenTimer SHOULD not be running. + + Event 20: BGPOpen with DelayOpenTimer running + + Definition: An event is generated when a valid OPEN message has + been received for a peer that has a successfully + established transport connection and is currently + delaying the sending of a BGP open message. + + Status: Optional + + Optional + Attribute + Status: 1) The DelayOpen attribute SHOULD be set to TRUE. + 2) The DelayOpenTimer SHOULD be running. + + Event 21: BGPHeaderErr + + Definition: An event is generated when a received BGP message + header is not valid. + + Status: Mandatory + + Event 22: BGPOpenMsgErr + + Definition: An event is generated when an OPEN message has been + received with errors. + + Status: Mandatory + + Event 23: OpenCollisionDump + + Definition: An event generated administratively when a + connection collision has been detected while + processing an incoming OPEN message and this + + + +Rekhter, et al. Standards Track [Page 49] + +RFC 4271 BGP-4 January 2006 + + + connection has been selected to be disconnected. + See Section 6.8 for more information on collision + detection. + + Event 23 is an administrative action generated by + implementation logic that determines whether this + connection needs to be dropped per the rules in + Section 6.8. This event may occur if the FSM is + implemented as two linked state machines. + + Status: Optional + + Optional + Attribute + Status: If the state machine is to process this event in + the Established state, + 1) CollisionDetectEstablishedState optional + attribute SHOULD be set to TRUE. + + Please note: The OpenCollisionDump event can occur + in Idle, Connect, Active, OpenSent, and OpenConfirm + without any optional attributes being set. + + Event 24: NotifMsgVerErr + + Definition: An event is generated when a NOTIFICATION message + with "version error" is received. + + Status: Mandatory + + Event 25: NotifMsg + + Definition: An event is generated when a NOTIFICATION message + is received and the error code is anything but + "version error". + + Status: Mandatory + + Event 26: KeepAliveMsg + + Definition: An event is generated when a KEEPALIVE message is + received. + + Status: Mandatory + + + + + + + +Rekhter, et al. Standards Track [Page 50] + +RFC 4271 BGP-4 January 2006 + + + Event 27: UpdateMsg + + Definition: An event is generated when a valid UPDATE message + is received. + + Status: Mandatory + + Event 28: UpdateMsgErr + + Definition: An event is generated when an invalid UPDATE + message is received. + + Status: Mandatory + +8.2. Description of FSM + +8.2.1. FSM Definition + + BGP MUST maintain a separate FSM for each configured peer. Each BGP + peer paired in a potential connection will attempt to connect to the + other, unless configured to remain in the idle state, or configured + to remain passive. For the purpose of this discussion, the active or + connecting side of the TCP connection (the side of a TCP connection + sending the first TCP SYN packet) is called outgoing. The passive or + listening side (the sender of the first SYN/ACK) is called an + incoming connection. (See Section for information on the + terms active and passive used below.) + + A BGP implementation MUST connect to and listen on TCP port 179 for + incoming connections in addition to trying to connect to peers. For + each incoming connection, a state machine MUST be instantiated. + There exists a period in which the identity of the peer on the other + end of an incoming connection is known, but the BGP identifier is not + known. During this time, both an incoming and outgoing connection + may exist for the same configured peering. This is referred to as a + connection collision (see Section 6.8). + + A BGP implementation will have, at most, one FSM for each configured + peering, plus one FSM for each incoming TCP connection for which the + peer has not yet been identified. Each FSM corresponds to exactly + one TCP connection. + + There may be more than one connection between a pair of peers if the + connections are configured to use a different pair of IP addresses. + This is referred to as multiple "configured peerings" to the same + peer. + + + + + +Rekhter, et al. Standards Track [Page 51] + +RFC 4271 BGP-4 January 2006 + + + Terms "active" and "passive" + + The terms active and passive have been in the Internet operator's + vocabulary for almost a decade and have proven useful. The words + active and passive have slightly different meanings when applied to a + TCP connection or a peer. There is only one active side and one + passive side to any one TCP connection, per the definition above and + the state machine below. When a BGP speaker is configured as active, + it may end up on either the active or passive side of the connection + that eventually gets established. Once the TCP connection is + completed, it doesn't matter which end was active and which was + passive. The only difference is in which side of the TCP connection + has port number 179. + + FSM and Collision Detection + + There is one FSM per BGP connection. When the connection collision + occurs prior to determining what peer a connection is associated + with, there may be two connections for one peer. After the + connection collision is resolved (see Section 6.8), the FSM for the + connection that is closed SHOULD be disposed. + + FSM and Optional Session Attributes + + Optional Session Attributes specify either attributes that act as + flags (TRUE or FALSE) or optional timers. For optional attributes + that act as flags, if the optional session attribute can be set to + TRUE on the system, the corresponding BGP FSM actions must be + supported. For example, if the following options can be set in a BGP + implementation: AutoStart and PassiveTcpEstablishment, then Events 3, + 4 and 5 must be supported. If an Optional Session attribute cannot + be set to TRUE, the events supporting that set of options do not have + to be supported. + + Each of the optional timers (DelayOpenTimer and IdleHoldTimer) has a + group of attributes that are: + + - flag indicating support, + - Time set in Timer + - Timer. + + The two optional timers show this format: + + DelayOpenTimer: DelayOpen, DelayOpenTime, DelayOpenTimer + IdleHoldTimer: DampPeerOscillations, IdleHoldTime, + IdleHoldTimer + + + + + +Rekhter, et al. Standards Track [Page 52] + +RFC 4271 BGP-4 January 2006 + + + If the flag indicating support for an optional timer (DelayOpen or + DampPeerOscillations) cannot be set to TRUE, the timers and events + supporting that option do not have to be supported. + + FSM Event Numbers + + The Event numbers (1-28) utilized in this state machine description + aid in specifying the behavior of the BGP state machine. + Implementations MAY use these numbers to provide network management + information. The exact form of an FSM or the FSM events are specific + to each implementation. + + FSM Actions that are Implementation Dependent + + At certain points, the BGP FSM specifies that BGP initialization will + occur or that BGP resources will be deleted. The initialization of + the BGP FSM and the associated resources depend on the policy portion + of the BGP implementation. The details of these actions are outside + the scope of the FSM document. + +8.2.2. Finite State Machine + + Idle state: + + Initially, the BGP peer FSM is in the Idle state. Hereafter, the + BGP peer FSM will be shortened to BGP FSM. + + In this state, BGP FSM refuses all incoming BGP connections for + this peer. No resources are allocated to the peer. In response + to a ManualStart event (Event 1) or an AutomaticStart event (Event + 3), the local system: + + - initializes all BGP resources for the peer connection, + + - sets ConnectRetryCounter to zero, + + - starts the ConnectRetryTimer with the initial value, + + - initiates a TCP connection to the other BGP peer, + + - listens for a connection that may be initiated by the remote + BGP peer, and + + - changes its state to Connect. + + The ManualStop event (Event 2) and AutomaticStop (Event 8) event + are ignored in the Idle state. + + + + +Rekhter, et al. Standards Track [Page 53] + +RFC 4271 BGP-4 January 2006 + + + In response to a ManualStart_with_PassiveTcpEstablishment event + (Event 4) or AutomaticStart_with_PassiveTcpEstablishment event + (Event 5), the local system: + + - initializes all BGP resources, + + - sets the ConnectRetryCounter to zero, + + - starts the ConnectRetryTimer with the initial value, + + - listens for a connection that may be initiated by the remote + peer, and + + - changes its state to Active. + + The exact value of the ConnectRetryTimer is a local matter, but it + SHOULD be sufficiently large to allow TCP initialization. + + If the DampPeerOscillations attribute is set to TRUE, the + following three additional events may occur within the Idle state: + + - AutomaticStart_with_DampPeerOscillations (Event 6), + + - AutomaticStart_with_DampPeerOscillations_and_ + PassiveTcpEstablishment (Event 7), + + - IdleHoldTimer_Expires (Event 13). + + Upon receiving these 3 events, the local system will use these + events to prevent peer oscillations. The method of preventing + persistent peer oscillation is outside the scope of this document. + + Any other event (Events 9-12, 15-28) received in the Idle state + does not cause change in the state of the local system. + + Connect State: + + In this state, BGP FSM is waiting for the TCP connection to be + completed. + + The start events (Events 1, 3-7) are ignored in the Connect state. + + In response to a ManualStop event (Event 2), the local system: + + - drops the TCP connection, + + - releases all BGP resources, + + + + +Rekhter, et al. Standards Track [Page 54] + +RFC 4271 BGP-4 January 2006 + + + - sets ConnectRetryCounter to zero, + + - stops the ConnectRetryTimer and sets ConnectRetryTimer to + zero, and + + - changes its state to Idle. + + In response to the ConnectRetryTimer_Expires event (Event 9), the + local system: + + - drops the TCP connection, + + - restarts the ConnectRetryTimer, + + - stops the DelayOpenTimer and resets the timer to zero, + + - initiates a TCP connection to the other BGP peer, + + - continues to listen for a connection that may be initiated by + the remote BGP peer, and + + - stays in the Connect state. + + If the DelayOpenTimer_Expires event (Event 12) occurs in the + Connect state, the local system: + + - sends an OPEN message to its peer, + + - sets the HoldTimer to a large value, and + + - changes its state to OpenSent. + + If the BGP FSM receives a TcpConnection_Valid event (Event 14), + the TCP connection is processed, and the connection remains in the + Connect state. + + If the BGP FSM receives a Tcp_CR_Invalid event (Event 15), the + local system rejects the TCP connection, and the connection + remains in the Connect state. + + If the TCP connection succeeds (Event 16 or Event 17), the local + system checks the DelayOpen attribute prior to processing. If the + DelayOpen attribute is set to TRUE, the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - sets the DelayOpenTimer to the initial value, and + + + +Rekhter, et al. Standards Track [Page 55] + +RFC 4271 BGP-4 January 2006 + + + - stays in the Connect state. + + If the DelayOpen attribute is set to FALSE, the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - completes BGP initialization + + - sends an OPEN message to its peer, + + - sets the HoldTimer to a large value, and + + - changes its state to OpenSent. + + A HoldTimer value of 4 minutes is suggested. + + If the TCP connection fails (Event 18), the local system checks + the DelayOpenTimer. If the DelayOpenTimer is running, the local + system: + + - restarts the ConnectRetryTimer with the initial value, + + - stops the DelayOpenTimer and resets its value to zero, + + - continues to listen for a connection that may be initiated by + the remote BGP peer, and + + - changes its state to Active. + + If the DelayOpenTimer is not running, the local system: + + - stops the ConnectRetryTimer to zero, + + - drops the TCP connection, + + - releases all BGP resources, and + + - changes its state to Idle. + + If an OPEN message is received while the DelayOpenTimer is running + (Event 20), the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - completes the BGP initialization, + + + + +Rekhter, et al. Standards Track [Page 56] + +RFC 4271 BGP-4 January 2006 + + + - stops and clears the DelayOpenTimer (sets the value to zero), + + - sends an OPEN message, + + - sends a KEEPALIVE message, + + - if the HoldTimer initial value is non-zero, + + - starts the KeepaliveTimer with the initial value and + + - resets the HoldTimer to the negotiated value, + + else, if the HoldTimer initial value is zero, + + - resets the KeepaliveTimer and + + - resets the HoldTimer value to zero, + + - and changes its state to OpenConfirm. + + If the value of the autonomous system field is the same as the + local Autonomous System number, set the connection status to an + internal connection; otherwise it will be "external". + + If BGP message header checking (Event 21) or OPEN message checking + detects an error (Event 22) (see Section 6.2), the local system: + + - (optionally) If the SendNOTIFICATIONwithoutOPEN attribute is + set to TRUE, then the local system first sends a NOTIFICATION + message with the appropriate error code, and then + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a NOTIFICATION message is received with a version error (Event + 24), the local system checks the DelayOpenTimer. If the + DelayOpenTimer is running, the local system: + + + +Rekhter, et al. Standards Track [Page 57] + +RFC 4271 BGP-4 January 2006 + + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - stops and resets the DelayOpenTimer (sets to zero), + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + If the DelayOpenTimer is not running, the local system: + + - stops the ConnectRetryTimer and sets the ConnectRetryTimer to + zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - performs peer oscillation damping if the DampPeerOscillations + attribute is set to True, and + + - changes its state to Idle. + + In response to any other events (Events 8, 10-11, 13, 19, 23, + 25-28), the local system: + + - if the ConnectRetryTimer is running, stops and resets the + ConnectRetryTimer (sets to zero), + + - if the DelayOpenTimer is running, stops and resets the + DelayOpenTimer (sets to zero), + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - performs peer oscillation damping if the DampPeerOscillations + attribute is set to True, and + + - changes its state to Idle. + + + + + +Rekhter, et al. Standards Track [Page 58] + +RFC 4271 BGP-4 January 2006 + + + Active State: + + In this state, BGP FSM is trying to acquire a peer by listening + for, and accepting, a TCP connection. + + The start events (Events 1, 3-7) are ignored in the Active state. + + In response to a ManualStop event (Event 2), the local system: + + - If the DelayOpenTimer is running and the + SendNOTIFICATIONwithoutOPEN session attribute is set, the + local system sends a NOTIFICATION with a Cease, + + - releases all BGP resources including stopping the + DelayOpenTimer + + - drops the TCP connection, + + - sets ConnectRetryCounter to zero, + + - stops the ConnectRetryTimer and sets the ConnectRetryTimer to + zero, and + + - changes its state to Idle. + + In response to a ConnectRetryTimer_Expires event (Event 9), the + local system: + + - restarts the ConnectRetryTimer (with initial value), + + - initiates a TCP connection to the other BGP peer, + + - continues to listen for a TCP connection that may be initiated + by a remote BGP peer, and + + - changes its state to Connect. + + If the local system receives a DelayOpenTimer_Expires event (Event + 12), the local system: + + - sets the ConnectRetryTimer to zero, + + - stops and clears the DelayOpenTimer (set to zero), + + - completes the BGP initialization, + + - sends the OPEN message to its remote peer, + + + + +Rekhter, et al. Standards Track [Page 59] + +RFC 4271 BGP-4 January 2006 + + + - sets its hold timer to a large value, and + + - changes its state to OpenSent. + + A HoldTimer value of 4 minutes is also suggested for this state + transition. + + If the local system receives a TcpConnection_Valid event (Event + 14), the local system processes the TCP connection flags and stays + in the Active state. + + If the local system receives a Tcp_CR_Invalid event (Event 15), + the local system rejects the TCP connection and stays in the + Active State. + + In response to the success of a TCP connection (Event 16 or Event + 17), the local system checks the DelayOpen optional attribute + prior to processing. + + If the DelayOpen attribute is set to TRUE, the local system: + + - stops the ConnectRetryTimer and sets the ConnectRetryTimer + to zero, + + - sets the DelayOpenTimer to the initial value + (DelayOpenTime), and + + - stays in the Active state. + + If the DelayOpen attribute is set to FALSE, the local system: + + - sets the ConnectRetryTimer to zero, + + - completes the BGP initialization, + + - sends the OPEN message to its peer, + + - sets its HoldTimer to a large value, and + + - changes its state to OpenSent. + + A HoldTimer value of 4 minutes is suggested as a "large value" for + the HoldTimer. + + If the local system receives a TcpConnectionFails event (Event + 18), the local system: + + - restarts the ConnectRetryTimer (with the initial value), + + + +Rekhter, et al. Standards Track [Page 60] + +RFC 4271 BGP-4 January 2006 + + + - stops and clears the DelayOpenTimer (sets the value to zero), + + - releases all BGP resource, + + - increments the ConnectRetryCounter by 1, + + - optionally performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If an OPEN message is received and the DelayOpenTimer is running + (Event 20), the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - stops and clears the DelayOpenTimer (sets to zero), + + - completes the BGP initialization, + + - sends an OPEN message, + + - sends a KEEPALIVE message, + + - if the HoldTimer value is non-zero, + + - starts the KeepaliveTimer to initial value, + + - resets the HoldTimer to the negotiated value, + + else if the HoldTimer is zero + + - resets the KeepaliveTimer (set to zero), + + - resets the HoldTimer to zero, and + + - changes its state to OpenConfirm. + + If the value of the autonomous system field is the same as the + local Autonomous System number, set the connection status to an + internal connection; otherwise it will be external. + + If BGP message header checking (Event 21) or OPEN message checking + detects an error (Event 22) (see Section 6.2), the local system: + + + + + + +Rekhter, et al. Standards Track [Page 61] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) sends a NOTIFICATION message with the appropriate + error code if the SendNOTIFICATIONwithoutOPEN attribute is set + to TRUE, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a NOTIFICATION message is received with a version error (Event + 24), the local system checks the DelayOpenTimer. If the + DelayOpenTimer is running, the local system: + + - stops the ConnectRetryTimer (if running) and sets the + ConnectRetryTimer to zero, + + - stops and resets the DelayOpenTimer (sets to zero), + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + If the DelayOpenTimer is not running, the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + + + + +Rekhter, et al. Standards Track [Page 62] + +RFC 4271 BGP-4 January 2006 + + + In response to any other event (Events 8, 10-11, 13, 19, 23, + 25-28), the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by one, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + OpenSent: + + In this state, BGP FSM waits for an OPEN message from its peer. + + The start events (Events 1, 3-7) are ignored in the OpenSent + state. + + If a ManualStop event (Event 2) is issued in the OpenSent state, + the local system: + + - sends the NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - sets the ConnectRetryCounter to zero, and + + - changes its state to Idle. + + If an AutomaticStop event (Event 8) is issued in the OpenSent + state, the local system: + + - sends the NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all the BGP resources, + + - drops the TCP connection, + + + +Rekhter, et al. Standards Track [Page 63] + +RFC 4271 BGP-4 January 2006 + + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the HoldTimer_Expires (Event 10), the local system: + + - sends a NOTIFICATION message with the error code Hold Timer + Expired, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a TcpConnection_Valid (Event 14), Tcp_CR_Acked (Event 16), or a + TcpConnectionConfirmed event (Event 17) is received, a second TCP + connection may be in progress. This second TCP connection is + tracked per Connection Collision processing (Section 6.8) until an + OPEN message is received. + + A TCP Connection Request for an Invalid port (Tcp_CR_Invalid + (Event 15)) is ignored. + + If a TcpConnectionFails event (Event 18) is received, the local + system: + + - closes the BGP connection, + + - restarts the ConnectRetryTimer, + + - continues to listen for a connection that may be initiated by + the remote BGP peer, and + + - changes its state to Active. + + + + + + +Rekhter, et al. Standards Track [Page 64] + +RFC 4271 BGP-4 January 2006 + + + When an OPEN message is received, all fields are checked for + correctness. If there are no errors in the OPEN message (Event + 19), the local system: + + - resets the DelayOpenTimer to zero, + + - sets the BGP ConnectRetryTimer to zero, + + - sends a KEEPALIVE message, and + + - sets a KeepaliveTimer (via the text below) + + - sets the HoldTimer according to the negotiated value (see + Section 4.2), + + - changes its state to OpenConfirm. + + If the negotiated hold time value is zero, then the HoldTimer and + KeepaliveTimer are not started. If the value of the Autonomous + System field is the same as the local Autonomous System number, + then the connection is an "internal" connection; otherwise, it is + an "external" connection. (This will impact UPDATE processing as + described below.) + + If the BGP message header checking (Event 21) or OPEN message + checking detects an error (Event 22)(see Section 6.2), the local + system: + + - sends a NOTIFICATION message with the appropriate error code, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is TRUE, and + + - changes its state to Idle. + + Collision detection mechanisms (Section 6.8) need to be applied + when a valid BGP OPEN message is received (Event 19 or Event 20). + Please refer to Section 6.8 for the details of the comparison. A + + + + + +Rekhter, et al. Standards Track [Page 65] + +RFC 4271 BGP-4 January 2006 + + + CollisionDetectDump event occurs when the BGP implementation + determines, by means outside the scope of this document, that a + connection collision has occurred. + + If a connection in the OpenSent state is determined to be the + connection that must be closed, an OpenCollisionDump (Event 23) is + signaled to the state machine. If such an event is received in + the OpenSent state, the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If a NOTIFICATION message is received with a version error (Event + 24), the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + In response to any other event (Events 9, 11-13, 20, 25-28), the + local system: + + - sends the NOTIFICATION with the Error Code Finite State + Machine Error, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + + +Rekhter, et al. Standards Track [Page 66] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + OpenConfirm State: + + In this state, BGP waits for a KEEPALIVE or NOTIFICATION message. + + Any start event (Events 1, 3-7) is ignored in the OpenConfirm + state. + + In response to a ManualStop event (Event 2) initiated by the + operator, the local system: + + - sends the NOTIFICATION message with a Cease, + + - releases all BGP resources, + + - drops the TCP connection, + + - sets the ConnectRetryCounter to zero, + + - sets the ConnectRetryTimer to zero, and + + - changes its state to Idle. + + In response to the AutomaticStop event initiated by the system + (Event 8), the local system: + + - sends the NOTIFICATION message with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the HoldTimer_Expires event (Event 10) occurs before a + KEEPALIVE message is received, the local system: + + + + +Rekhter, et al. Standards Track [Page 67] + +RFC 4271 BGP-4 January 2006 + + + - sends the NOTIFICATION message with the Error Code Hold Timer + Expired, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the local system receives a KeepaliveTimer_Expires event (Event + 11), the local system: + + - sends a KEEPALIVE message, + + - restarts the KeepaliveTimer, and + + - remains in the OpenConfirmed state. + + In the event of a TcpConnection_Valid event (Event 14), or the + success of a TCP connection (Event 16 or Event 17) while in + OpenConfirm, the local system needs to track the second + connection. + + If a TCP connection is attempted with an invalid port (Event 15), + the local system will ignore the second connection attempt. + + If the local system receives a TcpConnectionFails event (Event 18) + from the underlying TCP or a NOTIFICATION message (Event 25), the + local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + + + +Rekhter, et al. Standards Track [Page 68] + +RFC 4271 BGP-4 January 2006 + + + - changes its state to Idle. + + If the local system receives a NOTIFICATION message with a version + error (NotifMsgVerErr (Event 24)), the local system: + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, and + + - changes its state to Idle. + + If the local system receives a valid OPEN message (BGPOpen (Event + 19)), the collision detect function is processed per Section 6.8. + If this connection is to be dropped due to connection collision, + the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection (send TCP FIN), + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If an OPEN message is received, all fields are checked for + correctness. If the BGP message header checking (BGPHeaderErr + (Event 21)) or OPEN message checking detects an error (see Section + 6.2) (BGPOpenMsgErr (Event 22)), the local system: + + - sends a NOTIFICATION message with the appropriate error code, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + + + +Rekhter, et al. Standards Track [Page 69] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If, during the processing of another OPEN message, the BGP + implementation determines, by a means outside the scope of this + document, that a connection collision has occurred and this + connection is to be closed, the local system will issue an + OpenCollisionDump event (Event 23). When the local system + receives an OpenCollisionDump event (Event 23), the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the local system receives a KEEPALIVE message (KeepAliveMsg + (Event 26)), the local system: + + - restarts the HoldTimer and + + - changes its state to Established. + + In response to any other event (Events 9, 12-13, 20, 27-28), the + local system: + + - sends a NOTIFICATION with a code of Finite State Machine + Error, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + + + +Rekhter, et al. Standards Track [Page 70] + +RFC 4271 BGP-4 January 2006 + + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + Established State: + + In the Established state, the BGP FSM can exchange UPDATE, + NOTIFICATION, and KEEPALIVE messages with its peer. + + Any Start event (Events 1, 3-7) is ignored in the Established + state. + + In response to a ManualStop event (initiated by an operator) + (Event 2), the local system: + + - sends the NOTIFICATION message with a Cease, + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases BGP resources, + + - drops the TCP connection, + + - sets the ConnectRetryCounter to zero, and + + - changes its state to Idle. + + In response to an AutomaticStop event (Event 8), the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero + + - deletes all routes associated with this connection, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + + +Rekhter, et al. Standards Track [Page 71] + +RFC 4271 BGP-4 January 2006 + + + One reason for an AutomaticStop event is: A BGP receives an UPDATE + messages with a number of prefixes for a given peer such that the + total prefixes received exceeds the maximum number of prefixes + configured. The local system automatically disconnects the peer. + + If the HoldTimer_Expires event occurs (Event 10), the local + system: + + - sends a NOTIFICATION message with the Error Code Hold Timer + Expired, + + - sets the ConnectRetryTimer to zero, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + If the KeepaliveTimer_Expires event occurs (Event 11), the local + system: + + - sends a KEEPALIVE message, and + + - restarts its KeepaliveTimer, unless the negotiated HoldTime + value is zero. + + Each time the local system sends a KEEPALIVE or UPDATE message, it + restarts its KeepaliveTimer, unless the negotiated HoldTime value + is zero. + + A TcpConnection_Valid (Event 14), received for a valid port, will + cause the second connection to be tracked. + + An invalid TCP connection (Tcp_CR_Invalid event (Event 15)) will + be ignored. + + In response to an indication that the TCP connection is + successfully established (Event 16 or Event 17), the second + connection SHALL be tracked until it sends an OPEN message. + + + + + + +Rekhter, et al. Standards Track [Page 72] + +RFC 4271 BGP-4 January 2006 + + + If a valid OPEN message (BGPOpen (Event 19)) is received, and if + the CollisionDetectEstablishedState optional attribute is TRUE, + the OPEN message will be checked to see if it collides (Section + 6.8) with any other connection. If the BGP implementation + determines that this connection needs to be terminated, it will + process an OpenCollisionDump event (Event 23). If this connection + needs to be terminated, the local system: + + - sends a NOTIFICATION with a Cease, + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations is set to TRUE, and + + - changes its state to Idle. + + If the local system receives a NOTIFICATION message (Event 24 or + Event 25) or a TcpConnectionFails (Event 18) from the underlying + TCP, the local system: + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases all the BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - changes its state to Idle. + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 73] + +RFC 4271 BGP-4 January 2006 + + + If the local system receives a KEEPALIVE message (Event 26), the + local system: + + - restarts its HoldTimer, if the negotiated HoldTime value is + non-zero, and + + - remains in the Established state. + + If the local system receives an UPDATE message (Event 27), the + local system: + + - processes the message, + + - restarts its HoldTimer, if the negotiated HoldTime value is + non-zero, and + + - remains in the Established state. + + If the local system receives an UPDATE message, and the UPDATE + message error handling procedure (see Section 6.3) detects an + error (Event 28), the local system: + + - sends a NOTIFICATION message with an Update error, + + - sets the ConnectRetryTimer to zero, + + - deletes all routes associated with this connection, + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + + In response to any other event (Events 9, 12-13, 20-22), the local + system: + + - sends a NOTIFICATION message with the Error Code Finite State + Machine Error, + + - deletes all routes associated with this connection, + + - sets the ConnectRetryTimer to zero, + + + +Rekhter, et al. Standards Track [Page 74] + +RFC 4271 BGP-4 January 2006 + + + - releases all BGP resources, + + - drops the TCP connection, + + - increments the ConnectRetryCounter by 1, + + - (optionally) performs peer oscillation damping if the + DampPeerOscillations attribute is set to TRUE, and + + - changes its state to Idle. + +9. UPDATE Message Handling + + An UPDATE message may be received only in the Established state. + Receiving an UPDATE message in any other state is an error. When an + UPDATE message is received, each field is checked for validity, as + specified in Section 6.3. + + If an optional non-transitive attribute is unrecognized, it is + quietly ignored. If an optional transitive attribute is + unrecognized, the Partial bit (the third high-order bit) in the + attribute flags octet is set to 1, and the attribute is retained for + propagation to other BGP speakers. + + If an optional attribute is recognized and has a valid value, then, + depending on the type of the optional attribute, it is processed + locally, retained, and updated, if necessary, for possible + propagation to other BGP speakers. + + If the UPDATE message contains a non-empty WITHDRAWN ROUTES field, + the previously advertised routes, whose destinations (expressed as IP + prefixes) are contained in this field, SHALL be removed from the + Adj-RIB-In. This BGP speaker SHALL run its Decision Process because + the previously advertised route is no longer available for use. + + If the UPDATE message contains a feasible route, the Adj-RIB-In will + be updated with this route as follows: if the NLRI of the new route + is identical to the one the route currently has stored in the Adj- + RIB-In, then the new route SHALL replace the older route in the Adj- + RIB-In, thus implicitly withdrawing the older route from service. + Otherwise, if the Adj-RIB-In has no route with NLRI identical to the + new route, the new route SHALL be placed in the Adj-RIB-In. + + Once the BGP speaker updates the Adj-RIB-In, the speaker SHALL run + its Decision Process. + + + + + + +Rekhter, et al. Standards Track [Page 75] + +RFC 4271 BGP-4 January 2006 + + +9.1. Decision Process + + The Decision Process selects routes for subsequent advertisement by + applying the policies in the local Policy Information Base (PIB) to + the routes stored in its Adj-RIBs-In. The output of the Decision + Process is the set of routes that will be advertised to peers; the + selected routes will be stored in the local speaker's Adj-RIBs-Out, + according to policy. + + The BGP Decision Process described here is conceptual, and does not + have to be implemented precisely as described, as long as the + implementations support the described functionality and they exhibit + the same externally visible behavior. + + The selection process is formalized by defining a function that takes + the attribute of a given route as an argument and returns either (a) + a non-negative integer denoting the degree of preference for the + route, or (b) a value denoting that this route is ineligible to be + installed in Loc-RIB and will be excluded from the next phase of + route selection. + + The function that calculates the degree of preference for a given + route SHALL NOT use any of the following as its inputs: the existence + of other routes, the non-existence of other routes, or the path + attributes of other routes. Route selection then consists of the + individual application of the degree of preference function to each + feasible route, followed by the choice of the one with the highest + degree of preference. + + The Decision Process operates on routes contained in the Adj-RIBs-In, + and is responsible for: + + - selection of routes to be used locally by the speaker + + - selection of routes to be advertised to other BGP peers + + - route aggregation and route information reduction + + The Decision Process takes place in three distinct phases, each + triggered by a different event: + + a) Phase 1 is responsible for calculating the degree of preference + for each route received from a peer. + + b) Phase 2 is invoked on completion of phase 1. It is responsible + for choosing the best route out of all those available for each + distinct destination, and for installing each chosen route into + the Loc-RIB. + + + +Rekhter, et al. Standards Track [Page 76] + +RFC 4271 BGP-4 January 2006 + + + c) Phase 3 is invoked after the Loc-RIB has been modified. It is + responsible for disseminating routes in the Loc-RIB to each + peer, according to the policies contained in the PIB. Route + aggregation and information reduction can optionally be + performed within this phase. + +9.1.1. Phase 1: Calculation of Degree of Preference + + The Phase 1 decision function is invoked whenever the local BGP + speaker receives, from a peer, an UPDATE message that advertises a + new route, a replacement route, or withdrawn routes. + + The Phase 1 decision function is a separate process,f which completes + when it has no further work to do. + + The Phase 1 decision function locks an Adj-RIB-In prior to operating + on any route contained within it, and unlocks it after operating on + all new or unfeasible routes contained within it. + + For each newly received or replacement feasible route, the local BGP + speaker determines a degree of preference as follows: + + If the route is learned from an internal peer, either the value of + the LOCAL_PREF attribute is taken as the degree of preference, or + the local system computes the degree of preference of the route + based on preconfigured policy information. Note that the latter + may result in formation of persistent routing loops. + + If the route is learned from an external peer, then the local BGP + speaker computes the degree of preference based on preconfigured + policy information. If the return value indicates the route is + ineligible, the route MAY NOT serve as an input to the next phase + of route selection; otherwise, the return value MUST be used as + the LOCAL_PREF value in any IBGP readvertisement. + + The exact nature of this policy information, and the computation + involved, is a local matter. + +9.1.2. Phase 2: Route Selection + + The Phase 2 decision function is invoked on completion of Phase 1. + The Phase 2 function is a separate process, which completes when it + has no further work to do. The Phase 2 process considers all routes + that are eligible in the Adj-RIBs-In. + + + + + + + +Rekhter, et al. Standards Track [Page 77] + +RFC 4271 BGP-4 January 2006 + + + The Phase 2 decision function is blocked from running while the Phase + 3 decision function is in process. The Phase 2 function locks all + Adj-RIBs-In prior to commencing its function, and unlocks them on + completion. + + If the NEXT_HOP attribute of a BGP route depicts an address that is + not resolvable, or if it would become unresolvable if the route was + installed in the routing table, the BGP route MUST be excluded from + the Phase 2 decision function. + + If the AS_PATH attribute of a BGP route contains an AS loop, the BGP + route should be excluded from the Phase 2 decision function. AS loop + detection is done by scanning the full AS path (as specified in the + AS_PATH attribute), and checking that the autonomous system number of + the local system does not appear in the AS path. Operations of a BGP + speaker that is configured to accept routes with its own autonomous + system number in the AS path are outside the scope of this document. + + It is critical that BGP speakers within an AS do not make conflicting + decisions regarding route selection that would cause forwarding loops + to occur. + + For each set of destinations for which a feasible route exists in the + Adj-RIBs-In, the local BGP speaker identifies the route that has: + + a) the highest degree of preference of any route to the same set + of destinations, or + + b) is the only route to that destination, or + + c) is selected as a result of the Phase 2 tie breaking rules + specified in Section + + The local speaker SHALL then install that route in the Loc-RIB, + replacing any route to the same destination that is currently being + held in the Loc-RIB. When the new BGP route is installed in the + Routing Table, care must be taken to ensure that existing routes to + the same destination that are now considered invalid are removed from + the Routing Table. Whether the new BGP route replaces an existing + non-BGP route in the Routing Table depends on the policy configured + on the BGP speaker. + + The local speaker MUST determine the immediate next-hop address from + the NEXT_HOP attribute of the selected route (see Section 5.1.3). If + either the immediate next-hop or the IGP cost to the NEXT_HOP (where + the NEXT_HOP is resolved through an IGP route) changes, Phase 2 Route + Selection MUST be performed again. + + + + +Rekhter, et al. Standards Track [Page 78] + +RFC 4271 BGP-4 January 2006 + + + Notice that even though BGP routes do not have to be installed in the + Routing Table with the immediate next-hop(s), implementations MUST + take care that, before any packets are forwarded along a BGP route, + its associated NEXT_HOP address is resolved to the immediate + (directly connected) next-hop address, and that this address (or + multiple addresses) is finally used for actual packet forwarding. + + Unresolvable routes SHALL be removed from the Loc-RIB and the routing + table. However, corresponding unresolvable routes SHOULD be kept in + the Adj-RIBs-In (in case they become resolvable). + + Route Resolvability Condition + + As indicated in Section 9.1.2, BGP speakers SHOULD exclude + unresolvable routes from the Phase 2 decision. This ensures that + only valid routes are installed in Loc-RIB and the Routing Table. + + The route resolvability condition is defined as follows: + + 1) A route Rte1, referencing only the intermediate network + address, is considered resolvable if the Routing Table contains + at least one resolvable route Rte2 that matches Rte1's + intermediate network address and is not recursively resolved + (directly or indirectly) through Rte1. If multiple matching + routes are available, only the longest matching route SHOULD be + considered. + + 2) Routes referencing interfaces (with or without intermediate + addresses) are considered resolvable if the state of the + referenced interface is up and if IP processing is enabled on + this interface. + + BGP routes do not refer to interfaces, but can be resolved through + the routes in the Routing Table that can be of both types (those that + specify interfaces or those that do not). IGP routes and routes to + directly connected networks are expected to specify the outbound + interface. Static routes can specify the outbound interface, the + intermediate address, or both. + + Note that a BGP route is considered unresolvable in a situation where + the BGP speaker's Routing Table contains no route matching the BGP + route's NEXT_HOP. Mutually recursive routes (routes resolving each + other or themselves) also fail the resolvability check. + + It is also important that implementations do not consider feasible + routes that would become unresolvable if they were installed in the + Routing Table, even if their NEXT_HOPs are resolvable using the + current contents of the Routing Table (an example of such routes + + + +Rekhter, et al. Standards Track [Page 79] + +RFC 4271 BGP-4 January 2006 + + + would be mutually recursive routes). This check ensures that a BGP + speaker does not install routes in the Routing Table that will be + removed and not used by the speaker. Therefore, in addition to local + Routing Table stability, this check also improves behavior of the + protocol in the network. + + Whenever a BGP speaker identifies a route that fails the + resolvability check because of mutual recursion, an error message + SHOULD be logged. + + Breaking Ties (Phase 2) + + In its Adj-RIBs-In, a BGP speaker may have several routes to the same + destination that have the same degree of preference. The local + speaker can select only one of these routes for inclusion in the + associated Loc-RIB. The local speaker considers all routes with the + same degrees of preference, both those received from internal peers, + and those received from external peers. + + The following tie-breaking procedure assumes that, for each candidate + route, all the BGP speakers within an autonomous system can ascertain + the cost of a path (interior distance) to the address depicted by the + NEXT_HOP attribute of the route, and follow the same route selection + algorithm. + + The tie-breaking algorithm begins by considering all equally + preferable routes to the same destination, and then selects routes to + be removed from consideration. The algorithm terminates as soon as + only one route remains in consideration. The criteria MUST be + applied in the order specified. + + Several of the criteria are described using pseudo-code. Note that + the pseudo-code shown was chosen for clarity, not efficiency. It is + not intended to specify any particular implementation. BGP + implementations MAY use any algorithm that produces the same results + as those described here. + + a) Remove from consideration all routes that are not tied for + having the smallest number of AS numbers present in their + AS_PATH attributes. Note that when counting this number, an + AS_SET counts as 1, no matter how many ASes are in the set. + + b) Remove from consideration all routes that are not tied for + having the lowest Origin number in their Origin attribute. + + + + + + + +Rekhter, et al. Standards Track [Page 80] + +RFC 4271 BGP-4 January 2006 + + + c) Remove from consideration routes with less-preferred + MULTI_EXIT_DISC attributes. MULTI_EXIT_DISC is only comparable + between routes learned from the same neighboring AS (the + neighboring AS is determined from the AS_PATH attribute). + Routes that do not have the MULTI_EXIT_DISC attribute are + considered to have the lowest possible MULTI_EXIT_DISC value. + + This is also described in the following procedure: + + for m = all routes still under consideration + for n = all routes still under consideration + if (neighborAS(m) == neighborAS(n)) and (MED(n) < MED(m)) + remove route m from consideration + + In the pseudo-code above, MED(n) is a function that returns the + value of route n's MULTI_EXIT_DISC attribute. If route n has + no MULTI_EXIT_DISC attribute, the function returns the lowest + possible MULTI_EXIT_DISC value (i.e., 0). + + Similarly, neighborAS(n) is a function that returns the + neighbor AS from which the route was received. If the route is + learned via IBGP, and the other IBGP speaker didn't originate + the route, it is the neighbor AS from which the other IBGP + speaker learned the route. If the route is learned via IBGP, + and the other IBGP speaker either (a) originated the route, or + (b) created the route by aggregation and the AS_PATH attribute + of the aggregate route is either empty or begins with an + AS_SET, it is the local AS. + + If a MULTI_EXIT_DISC attribute is removed before re-advertising + a route into IBGP, then comparison based on the received EBGP + MULTI_EXIT_DISC attribute MAY still be performed. If an + implementation chooses to remove MULTI_EXIT_DISC, then the + optional comparison on MULTI_EXIT_DISC, if performed, MUST be + performed only among EBGP-learned routes. The best EBGP- + learned route may then be compared with IBGP-learned routes + after the removal of the MULTI_EXIT_DISC attribute. If + MULTI_EXIT_DISC is removed from a subset of EBGP-learned + routes, and the selected "best" EBGP-learned route will not + have MULTI_EXIT_DISC removed, then the MULTI_EXIT_DISC must be + used in the comparison with IBGP-learned routes. For IBGP- + learned routes, the MULTI_EXIT_DISC MUST be used in route + comparisons that reach this step in the Decision Process. + Including the MULTI_EXIT_DISC of an EBGP-learned route in the + comparison with an IBGP-learned route, then removing the + MULTI_EXIT_DISC attribute, and advertising the route has been + proven to cause route loops. + + + + +Rekhter, et al. Standards Track [Page 81] + +RFC 4271 BGP-4 January 2006 + + + d) If at least one of the candidate routes was received via EBGP, + remove from consideration all routes that were received via + IBGP. + + e) Remove from consideration any routes with less-preferred + interior cost. The interior cost of a route is determined by + calculating the metric to the NEXT_HOP for the route using the + Routing Table. If the NEXT_HOP hop for a route is reachable, + but no cost can be determined, then this step should be skipped + (equivalently, consider all routes to have equal costs). + + This is also described in the following procedure. + + for m = all routes still under consideration + for n = all routes in still under consideration + if (cost(n) is lower than cost(m)) + remove m from consideration + + In the pseudo-code above, cost(n) is a function that returns + the cost of the path (interior distance) to the address given + in the NEXT_HOP attribute of the route. + + f) Remove from consideration all routes other than the route that + was advertised by the BGP speaker with the lowest BGP + Identifier value. + + g) Prefer the route received from the lowest peer address. + +9.1.3. Phase 3: Route Dissemination + + The Phase 3 decision function is invoked on completion of Phase 2, or + when any of the following events occur: + + a) when routes in the Loc-RIB to local destinations have changed + + b) when locally generated routes learned by means outside of BGP + have changed + + c) when a new BGP speaker connection has been established + + The Phase 3 function is a separate process that completes when it has + no further work to do. The Phase 3 Routing Decision function is + blocked from running while the Phase 2 decision function is in + process. + + All routes in the Loc-RIB are processed into Adj-RIBs-Out according + to configured policy. This policy MAY exclude a route in the Loc-RIB + from being installed in a particular Adj-RIB-Out. A route SHALL NOT + + + +Rekhter, et al. Standards Track [Page 82] + +RFC 4271 BGP-4 January 2006 + + + be installed in the Adj-Rib-Out unless the destination, and NEXT_HOP + described by this route, may be forwarded appropriately by the + Routing Table. If a route in Loc-RIB is excluded from a particular + Adj-RIB-Out, the previously advertised route in that Adj-RIB-Out MUST + be withdrawn from service by means of an UPDATE message (see 9.2). + + Route aggregation and information reduction techniques (see Section + may optionally be applied. + + Any local policy that results in routes being added to an Adj-RIB-Out + without also being added to the local BGP speaker's forwarding table + is outside the scope of this document. + + When the updating of the Adj-RIBs-Out and the Routing Table is + complete, the local BGP speaker runs the Update-Send process of 9.2. + +9.1.4. Overlapping Routes + + A BGP speaker may transmit routes with overlapping Network Layer + Reachability Information (NLRI) to another BGP speaker. NLRI overlap + occurs when a set of destinations are identified in non-matching + multiple routes. Because BGP encodes NLRI using IP prefixes, overlap + will always exhibit subset relationships. A route describing a + smaller set of destinations (a longer prefix) is said to be more + specific than a route describing a larger set of destinations (a + shorter prefix); similarly, a route describing a larger set of + destinations is said to be less specific than a route describing a + smaller set of destinations. + + The precedence relationship effectively decomposes less specific + routes into two parts: + + - a set of destinations described only by the less specific route, + and + + - a set of destinations described by the overlap of the less + specific and the more specific routes + + The set of destinations described by the overlap represents a portion + of the less specific route that is feasible, but is not currently in + use. If a more specific route is later withdrawn, the set of + destinations described by the overlap will still be reachable using + the less specific route. + + If a BGP speaker receives overlapping routes, the Decision Process + MUST consider both routes based on the configured acceptance policy. + If both a less and a more specific route are accepted, then the + Decision Process MUST install, in Loc-RIB, either both the less and + + + +Rekhter, et al. Standards Track [Page 83] + +RFC 4271 BGP-4 January 2006 + + + the more specific routes or aggregate the two routes and install, in + Loc-RIB, the aggregated route, provided that both routes have the + same value of the NEXT_HOP attribute. + + If a BGP speaker chooses to aggregate, then it SHOULD either include + all ASes used to form the aggregate in an AS_SET, or add the + ATOMIC_AGGREGATE attribute to the route. This attribute is now + primarily informational. With the elimination of IP routing + protocols that do not support classless routing, and the elimination + of router and host implementations that do not support classless + routing, there is no longer a need to de-aggregate. Routes SHOULD + NOT be de-aggregated. In particular, a route that carries the + ATOMIC_AGGREGATE attribute MUST NOT be de-aggregated. That is, the + NLRI of this route cannot be more specific. Forwarding along such a + route does not guarantee that IP packets will actually traverse only + ASes listed in the AS_PATH attribute of the route. + +9.2. Update-Send Process + + The Update-Send process is responsible for advertising UPDATE + messages to all peers. For example, it distributes the routes chosen + by the Decision Process to other BGP speakers, which may be located + in either the same autonomous system or a neighboring autonomous + system. + + When a BGP speaker receives an UPDATE message from an internal peer, + the receiving BGP speaker SHALL NOT re-distribute the routing + information contained in that UPDATE message to other internal peers + (unless the speaker acts as a BGP Route Reflector [RFC2796]). + + As part of Phase 3 of the route selection process, the BGP speaker + has updated its Adj-RIBs-Out. All newly installed routes and all + newly unfeasible routes for which there is no replacement route SHALL + be advertised to its peers by means of an UPDATE message. + + A BGP speaker SHOULD NOT advertise a given feasible BGP route from + its Adj-RIB-Out if it would produce an UPDATE message containing the + same BGP route as was previously advertised. + + Any routes in the Loc-RIB marked as unfeasible SHALL be removed. + Changes to the reachable destinations within its own autonomous + system SHALL also be advertised in an UPDATE message. + + If, due to the limits on the maximum size of an UPDATE message (see + Section 4), a single route doesn't fit into the message, the BGP + speaker MUST not advertise the route to its peers and MAY choose to + log an error locally. + + + + +Rekhter, et al. Standards Track [Page 84] + +RFC 4271 BGP-4 January 2006 + + +9.2.1. Controlling Routing Traffic Overhead + + The BGP protocol constrains the amount of routing traffic (that is, + UPDATE messages), in order to limit both the link bandwidth needed to + advertise UPDATE messages and the processing power needed by the + Decision Process to digest the information contained in the UPDATE + messages. + + Frequency of Route Advertisement + + The parameter MinRouteAdvertisementIntervalTimer determines the + minimum amount of time that must elapse between an advertisement + and/or withdrawal of routes to a particular destination by a BGP + speaker to a peer. This rate limiting procedure applies on a per- + destination basis, although the value of + MinRouteAdvertisementIntervalTimer is set on a per BGP peer basis. + + Two UPDATE messages sent by a BGP speaker to a peer that advertise + feasible routes and/or withdrawal of unfeasible routes to some common + set of destinations MUST be separated by at least + MinRouteAdvertisementIntervalTimer. This can only be achieved by + keeping a separate timer for each common set of destinations. This + would be unwarranted overhead. Any technique that ensures that the + interval between two UPDATE messages sent from a BGP speaker to a + peer that advertise feasible routes and/or withdrawal of unfeasible + routes to some common set of destinations will be at least + MinRouteAdvertisementIntervalTimer, and will also ensure that a + constant upper bound on the interval is acceptable. + + Since fast convergence is needed within an autonomous system, either + (a) the MinRouteAdvertisementIntervalTimer used for internal peers + SHOULD be shorter than the MinRouteAdvertisementIntervalTimer used + for external peers, or (b) the procedure describe in this section + SHOULD NOT apply to routes sent to internal peers. + + This procedure does not limit the rate of route selection, but only + the rate of route advertisement. If new routes are selected multiple + times while awaiting the expiration of + MinRouteAdvertisementIntervalTimer, the last route selected SHALL be + advertised at the end of MinRouteAdvertisementIntervalTimer. + + Frequency of Route Origination + + The parameter MinASOriginationIntervalTimer determines the minimum + amount of time that must elapse between successive advertisements of + UPDATE messages that report changes within the advertising BGP + speaker's own autonomous systems. + + + + +Rekhter, et al. Standards Track [Page 85] + +RFC 4271 BGP-4 January 2006 + + +9.2.2. Efficient Organization of Routing Information + + Having selected the routing information it will advertise, a BGP + speaker may avail itself of several methods to organize this + information in an efficient manner. + + Information Reduction + + Information reduction may imply a reduction in granularity of policy + control - after information is collapsed, the same policies will + apply to all destinations and paths in the equivalence class. + + The Decision Process may optionally reduce the amount of information + that it will place in the Adj-RIBs-Out by any of the following + methods: + + a) Network Layer Reachability Information (NLRI): + + Destination IP addresses can be represented as IP address + prefixes. In cases where there is a correspondence between the + address structure and the systems under control of an + autonomous system administrator, it will be possible to reduce + the size of the NLRI carried in the UPDATE messages. + + b) AS_PATHs: + + AS path information can be represented as ordered AS_SEQUENCEs + or unordered AS_SETs. AS_SETs are used in the route + aggregation algorithm described in Section They + reduce the size of the AS_PATH information by listing each AS + number only once, regardless of how many times it may have + appeared in multiple AS_PATHs that were aggregated. + + An AS_SET implies that the destinations listed in the NLRI can + be reached through paths that traverse at least some of the + constituent autonomous systems. AS_SETs provide sufficient + information to avoid routing information looping; however, + their use may prune potentially feasible paths because such + paths are no longer listed individually in the form of + AS_SEQUENCEs. In practice, this is not likely to be a problem + because once an IP packet arrives at the edge of a group of + autonomous systems, the BGP speaker is likely to have more + detailed path information and can distinguish individual paths + from destinations. + + + + + + + +Rekhter, et al. Standards Track [Page 86] + +RFC 4271 BGP-4 January 2006 + + + Aggregating Routing Information + + Aggregation is the process of combining the characteristics of + several different routes in such a way that a single route can be + advertised. Aggregation can occur as part of the Decision Process to + reduce the amount of routing information that will be placed in the + Adj-RIBs-Out. + + Aggregation reduces the amount of information that a BGP speaker must + store and exchange with other BGP speakers. Routes can be aggregated + by applying the following procedure, separately, to path attributes + of the same type and to the Network Layer Reachability Information. + + Routes that have different MULTI_EXIT_DISC attributes SHALL NOT be + aggregated. + + If the aggregated route has an AS_SET as the first element in its + AS_PATH attribute, then the router that originates the route SHOULD + NOT advertise the MULTI_EXIT_DISC attribute with this route. + + Path attributes that have different type codes cannot be aggregated + together. Path attributes of the same type code may be aggregated, + according to the following rules: + + NEXT_HOP: + When aggregating routes that have different NEXT_HOP + attributes, the NEXT_HOP attribute of the aggregated route + SHALL identify an interface on the BGP speaker that performs + the aggregation. + + ORIGIN attribute: + If at least one route among routes that are aggregated has + ORIGIN with the value INCOMPLETE, then the aggregated route + MUST have the ORIGIN attribute with the value INCOMPLETE. + Otherwise, if at least one route among routes that are + aggregated has ORIGIN with the value EGP, then the aggregated + route MUST have the ORIGIN attribute with the value EGP. In + all other cases,, the value of the ORIGIN attribute of the + aggregated route is IGP. + + AS_PATH attribute: + If routes to be aggregated have identical AS_PATH attributes, + then the aggregated route has the same AS_PATH attribute as + each individual route. + + For the purpose of aggregating AS_PATH attributes, we model + each AS within the AS_PATH attribute as a tuple , + where "type" identifies a type of the path segment the AS + + + +Rekhter, et al. Standards Track [Page 87] + +RFC 4271 BGP-4 January 2006 + + + belongs to (e.g., AS_SEQUENCE, AS_SET), and "value" identifies + the AS number. If the routes to be aggregated have different + AS_PATH attributes, then the aggregated AS_PATH attribute SHALL + satisfy all of the following conditions: + + - all tuples of type AS_SEQUENCE in the aggregated AS_PATH + SHALL appear in all of the AS_PATHs in the initial set of + routes to be aggregated. + + - all tuples of type AS_SET in the aggregated AS_PATH SHALL + appear in at least one of the AS_PATHs in the initial set + (they may appear as either AS_SET or AS_SEQUENCE types). + + - for any tuple X of type AS_SEQUENCE in the aggregated + AS_PATH, which precedes tuple Y in the aggregated AS_PATH, + X precedes Y in each AS_PATH in the initial set, which + contains Y, regardless of the type of Y. + + - No tuple of type AS_SET with the same value SHALL appear + more than once in the aggregated AS_PATH. + + - Multiple tuples of type AS_SEQUENCE with the same value may + appear in the aggregated AS_PATH only when adjacent to + another tuple of the same type and value. + + An implementation may choose any algorithm that conforms to + these rules. At a minimum, a conformant implementation SHALL + be able to perform the following algorithm that meets all of + the above conditions: + + - determine the longest leading sequence of tuples (as + defined above) common to all the AS_PATH attributes of the + routes to be aggregated. Make this sequence the leading + sequence of the aggregated AS_PATH attribute. + + - set the type of the rest of the tuples from the AS_PATH + attributes of the routes to be aggregated to AS_SET, and + append them to the aggregated AS_PATH attribute. + + - if the aggregated AS_PATH has more than one tuple with the + same value (regardless of tuple's type), eliminate all but + one such tuple by deleting tuples of the type AS_SET from + the aggregated AS_PATH attribute. + + - for each pair of adjacent tuples in the aggregated AS_PATH, + if both tuples have the same type, merge them together, as + long as doing so will not cause a segment with a length + greater than 255 to be generated. + + + +Rekhter, et al. Standards Track [Page 88] + +RFC 4271 BGP-4 January 2006 + + + Appendix F, Section F.6 presents another algorithm that + satisfies the conditions and allows for more complex policy + configurations. + + ATOMIC_AGGREGATE: + If at least one of the routes to be aggregated has + ATOMIC_AGGREGATE path attribute, then the aggregated route + SHALL have this attribute as well. + + AGGREGATOR: + Any AGGREGATOR attributes from the routes to be aggregated MUST + NOT be included in the aggregated route. The BGP speaker + performing the route aggregation MAY attach a new AGGREGATOR + attribute (see Section 5.1.7). + +9.3. Route Selection Criteria + + Generally, additional rules for comparing routes among several + alternatives are outside the scope of this document. There are two + exceptions: + + - If the local AS appears in the AS path of the new route being + considered, then that new route cannot be viewed as better than + any other route (provided that the speaker is configured to + accept such routes). If such a route were ever used, a routing + loop could result. + + - In order to achieve a successful distributed operation, only + routes with a likelihood of stability can be chosen. Thus, an + AS SHOULD avoid using unstable routes, and it SHOULD NOT make + rapid, spontaneous changes to its choice of route. Quantifying + the terms "unstable" and "rapid" (from the previous sentence) + will require experience, but the principle is clear. Routes + that are unstable can be "penalized" (e.g., by using the + procedures described in [RFC2439]). + +9.4. Originating BGP routes + + A BGP speaker may originate BGP routes by injecting routing + information acquired by some other means (e.g., via an IGP) into BGP. + A BGP speaker that originates BGP routes assigns the degree of + preference (e.g., according to local configuration) to these routes + by passing them through the Decision Process (see Section 9.1). + These routes MAY also be distributed to other BGP speakers within the + local AS as part of the update process (see Section 9.2). The + decision of whether to distribute non-BGP acquired routes within an + AS via BGP depends on the environment within the AS (e.g., type of + IGP) and SHOULD be controlled via configuration. + + + +Rekhter, et al. Standards Track [Page 89] + +RFC 4271 BGP-4 January 2006 + + +10. BGP Timers + + BGP employs five timers: ConnectRetryTimer (see Section 8), HoldTimer + (see Section 4.2), KeepaliveTimer (see Section 8), + MinASOriginationIntervalTimer (see Section, and + MinRouteAdvertisementIntervalTimer (see Section + + Two optional timers MAY be supported: DelayOpenTimer, IdleHoldTimer + by BGP (see Section 8). Section 8 describes their use. The full + operation of these optional timers is outside the scope of this + document. + + ConnectRetryTime is a mandatory FSM attribute that stores the initial + value for the ConnectRetryTimer. The suggested default value for the + ConnectRetryTime is 120 seconds. + + HoldTime is a mandatory FSM attribute that stores the initial value + for the HoldTimer. The suggested default value for the HoldTime is + 90 seconds. + + During some portions of the state machine (see Section 8), the + HoldTimer is set to a large value. The suggested default for this + large value is 4 minutes. + + The KeepaliveTime is a mandatory FSM attribute that stores the + initial value for the KeepaliveTimer. The suggested default value + for the KeepaliveTime is 1/3 of the HoldTime. + + The suggested default value for the MinASOriginationIntervalTimer is + 15 seconds. + + The suggested default value for the + MinRouteAdvertisementIntervalTimer on EBGP connections is 30 seconds. + + The suggested default value for the + MinRouteAdvertisementIntervalTimer on IBGP connections is 5 seconds. + + An implementation of BGP MUST allow the HoldTimer to be configurable + on a per-peer basis, and MAY allow the other timers to be + configurable. + + To minimize the likelihood that the distribution of BGP messages by a + given BGP speaker will contain peaks, jitter SHOULD be applied to the + timers associated with MinASOriginationIntervalTimer, KeepaliveTimer, + MinRouteAdvertisementIntervalTimer, and ConnectRetryTimer. A given + BGP speaker MAY apply the same jitter to each of these quantities, + regardless of the destinations to which the updates are being sent; + that is, jitter need not be configured on a per-peer basis. + + + +Rekhter, et al. Standards Track [Page 90] + +RFC 4271 BGP-4 January 2006 + + + The suggested default amount of jitter SHALL be determined by + multiplying the base value of the appropriate timer by a random + factor, which is uniformly distributed in the range from 0.75 to 1.0. + A new random value SHOULD be picked each time the timer is set. The + range of the jitter's random value MAY be configurable. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 91] + +RFC 4271 BGP-4 January 2006 + + +Appendix A. Comparison with RFC 1771 + + There are numerous editorial changes in comparison to [RFC1771] (too + many to list here). + + The following list the technical changes: + + Changes to reflect the usage of features such as TCP MD5 + [RFC2385], BGP Route Reflectors [RFC2796], BGP Confederations + [RFC3065], and BGP Route Refresh [RFC2918]. + + Clarification of the use of the BGP Identifier in the AGGREGATOR + attribute. + + Procedures for imposing an upper bound on the number of prefixes + that a BGP speaker would accept from a peer. + + The ability of a BGP speaker to include more than one instance of + its own AS in the AS_PATH attribute for the purpose of inter-AS + traffic engineering. + + Clarification of the various types of NEXT_HOPs. + + Clarification of the use of the ATOMIC_AGGREGATE attribute. + + The relationship between the immediate next hop, and the next hop + as specified in the NEXT_HOP path attribute. + + Clarification of the tie-breaking procedures. + + Clarification of the frequency of route advertisements. + + Optional Parameter Type 1 (Authentication Information) has been + deprecated. + + UPDATE Message Error subcode 7 (AS Routing Loop) has been + deprecated. + + OPEN Message Error subcode 5 (Authentication Failure) has been + deprecated. + + Use of the Marker field for authentication has been deprecated. + + Implementations MUST support TCP MD5 [RFC2385] for authentication. + + Clarification of BGP FSM. + + + + + +Rekhter, et al. Standards Track [Page 92] + +RFC 4271 BGP-4 January 2006 + + +Appendix B. Comparison with RFC 1267 + + All the changes listed in Appendix A, plus the following. + + BGP-4 is capable of operating in an environment where a set of + reachable destinations may be expressed via a single IP prefix. The + concept of network classes, or subnetting, is foreign to BGP-4. To + accommodate these capabilities, BGP-4 changes the semantics and + encoding associated with the AS_PATH attribute. New text has been + added to define semantics associated with IP prefixes. These + abilities allow BGP-4 to support the proposed supernetting scheme + [RFC1518, RFC1519]. + + To simplify configuration, this version introduces a new attribute, + LOCAL_PREF, that facilitates route selection procedures. + + The INTER_AS_METRIC attribute has been renamed MULTI_EXIT_DISC. + + A new attribute, ATOMIC_AGGREGATE, has been introduced to insure that + certain aggregates are not de-aggregated. Another new attribute, + AGGREGATOR, can be added to aggregate routes to advertise which AS + and which BGP speaker within that AS caused the aggregation. + + To ensure that Hold Timers are symmetric, the Hold Timer is now + negotiated on a per-connection basis. Hold Timers of zero are now + supported. + +Appendix C. Comparison with RFC 1163 + + All of the changes listed in Appendices A and B, plus the following. + + To detect and recover from BGP connection collision, a new field (BGP + Identifier) has been added to the OPEN message. New text (Section + 6.8) has been added to specify the procedure for detecting and + recovering from collision. + + The new document no longer restricts the router that is passed in the + NEXT_HOP path attribute to be part of the same Autonomous System as + the BGP Speaker. + + The new document optimizes and simplifies the exchange of information + about previously reachable routes. + + + + + + + + + +Rekhter, et al. Standards Track [Page 93] + +RFC 4271 BGP-4 January 2006 + + +Appendix D. Comparison with RFC 1105 + + All of the changes listed in Appendices A, B, and C, plus the + following. + + Minor changes to the [RFC1105] Finite State Machine were necessary to + accommodate the TCP user interface provided by BSD version 4.3. + + The notion of Up/Down/Horizontal relations presented in RFC 1105 has + been removed from the protocol. + + The changes in the message format from RFC 1105 are as follows: + + 1. The Hold Time field has been removed from the BGP header and + added to the OPEN message. + + 2. The version field has been removed from the BGP header and + added to the OPEN message. + + 3. The Link Type field has been removed from the OPEN message. + + 4. The OPEN CONFIRM message has been eliminated and replaced with + implicit confirmation, provided by the KEEPALIVE message. + + 5. The format of the UPDATE message has been changed + significantly. New fields were added to the UPDATE message to + support multiple path attributes. + + 6. The Marker field has been expanded and its role broadened to + support authentication. + + Note that quite often BGP, as specified in RFC 1105, is referred to + as BGP-1; BGP, as specified in [RFC1163], is referred to as BGP-2; + BGP, as specified in RFC 1267 is referred to as BGP-3; and BGP, as + specified in this document is referred to as BGP-4. + +Appendix E. TCP Options that May Be Used with BGP + + If a local system TCP user interface supports the TCP PUSH function, + then each BGP message SHOULD be transmitted with PUSH flag set. + Setting PUSH flag forces BGP messages to be transmitted to the + receiver promptly. + + If a local system TCP user interface supports setting the DSCP field + [RFC2474] for TCP connections, then the TCP connection used by BGP + SHOULD be opened with bits 0-2 of the DSCP field set to 110 (binary). + + An implementation MUST support the TCP MD5 option [RFC2385]. + + + +Rekhter, et al. Standards Track [Page 94] + +RFC 4271 BGP-4 January 2006 + + +Appendix F. Implementation Recommendations + + This section presents some implementation recommendations. + +Appendix F.1. Multiple Networks Per Message + + The BGP protocol allows for multiple address prefixes with the same + path attributes to be specified in one message. Using this + capability is highly recommended. With one address prefix per + message there is a substantial increase in overhead in the receiver. + Not only does the system overhead increase due to the reception of + multiple messages, but the overhead of scanning the routing table for + updates to BGP peers and other routing protocols (and sending the + associated messages) is incurred multiple times as well. + + One method of building messages that contain many address prefixes + per path attribute set from a routing table that is not organized on + a per path attribute set basis is to build many messages as the + routing table is scanned. As each address prefix is processed, a + message for the associated set of path attributes is allocated, if it + does not exist, and the new address prefix is added to it. If such a + message exists, the new address prefix is appended to it. If the + message lacks the space to hold the new address prefix, it is + transmitted, a new message is allocated, and the new address prefix + is inserted into the new message. When the entire routing table has + been scanned, all allocated messages are sent and their resources are + released. Maximum compression is achieved when all destinations + covered by the address prefixes share a common set of path + attributes, making it possible to send many address prefixes in one + 4096-byte message. + + When peering with a BGP implementation that does not compress + multiple address prefixes into one message, it may be necessary to + take steps to reduce the overhead from the flood of data received + when a peer is acquired or when a significant network topology change + occurs. One method of doing this is to limit the rate of updates. + This will eliminate the redundant scanning of the routing table to + provide flash updates for BGP peers and other routing protocols. A + disadvantage of this approach is that it increases the propagation + latency of routing information. By choosing a minimum flash update + interval that is not much greater than the time it takes to process + the multiple messages, this latency should be minimized. A better + method would be to read all received messages before sending updates. + + + + + + + + +Rekhter, et al. Standards Track [Page 95] + +RFC 4271 BGP-4 January 2006 + + +Appendix F.2. Reducing Route Flapping + + To avoid excessive route flapping, a BGP speaker that needs to + withdraw a destination and send an update about a more specific or + less specific route should combine them into the same UPDATE message. + +Appendix F.3. Path Attribute Ordering + + Implementations that combine update messages (as described above in + Section 6.1) may prefer to see all path attributes presented in a + known order. This permits them to quickly identify sets of + attributes from different update messages that are semantically + identical. To facilitate this, it is a useful optimization to order + the path attributes according to type code. This optimization is + entirely optional. + +Appendix F.4. AS_SET Sorting + + Another useful optimization that can be done to simplify this + situation is to sort the AS numbers found in an AS_SET. This + optimization is entirely optional. + +Appendix F.5. Control Over Version Negotiation + + Because BGP-4 is capable of carrying aggregated routes that cannot be + properly represented in BGP-3, an implementation that supports BGP-4 + and another BGP version should provide the capability to only speak + BGP-4 on a per-peer basis. + +Appendix F.6. Complex AS_PATH Aggregation + + An implementation that chooses to provide a path aggregation + algorithm retaining significant amounts of path information may wish + to use the following procedure: + + For the purpose of aggregating AS_PATH attributes of two routes, + we model each AS as a tuple , where "type" identifies + a type of the path segment the AS belongs to (e.g., AS_SEQUENCE, + AS_SET), and "value" is the AS number. Two ASes are said to be + the same if their corresponding tuples are the same. + + The algorithm to aggregate two AS_PATH attributes works as + follows: + + a) Identify the same ASes (as defined above) within each + AS_PATH attribute that are in the same relative order within + both AS_PATH attributes. Two ASes, X and Y, are said to be + in the same order if either: + + + +Rekhter, et al. Standards Track [Page 96] + +RFC 4271 BGP-4 January 2006 + + + - X precedes Y in both AS_PATH attributes, or + - Y precedes X in both AS_PATH attributes. + + b) The aggregated AS_PATH attribute consists of ASes identified + in (a), in exactly the same order as they appear in the + AS_PATH attributes to be aggregated. If two consecutive + ASes identified in (a) do not immediately follow each other + in both of the AS_PATH attributes to be aggregated, then the + intervening ASes (ASes that are between the two consecutive + ASes that are the same) in both attributes are combined into + an AS_SET path segment that consists of the intervening ASes + from both AS_PATH attributes. This segment is then placed + between the two consecutive ASes identified in (a) of the + aggregated attribute. If two consecutive ASes identified in + (a) immediately follow each other in one attribute, but do + not follow in another, then the intervening ASes of the + latter are combined into an AS_SET path segment. This + segment is then placed between the two consecutive ASes + identified in (a) of the aggregated attribute. + + c) For each pair of adjacent tuples in the aggregated AS_PATH, + if both tuples have the same type, merge them together if + doing so will not cause a segment of a length greater than + 255 to be generated. + + If, as a result of the above procedure, a given AS number appears + more than once within the aggregated AS_PATH attribute, all but + the last instance (rightmost occurrence) of that AS number should + be removed from the aggregated AS_PATH attribute. + +Security Considerations + + A BGP implementation MUST support the authentication mechanism + specified in RFC 2385 [RFC2385]. The authentication provided by this + mechanism could be done on a per-peer basis. + + BGP makes use of TCP for reliable transport of its traffic between + peer routers. To provide connection-oriented integrity and data + origin authentication on a point-to-point basis, BGP specifies use of + the mechanism defined in RFC 2385. These services are intended to + detect and reject active wiretapping attacks against the inter-router + TCP connections. Absent the use of mechanisms that effect these + security services, attackers can disrupt these TCP connections and/or + masquerade as a legitimate peer router. Because the mechanism + defined in the RFC does not provide peer-entity authentication, these + connections may be subject to some forms of replay attacks that will + not be detected at the TCP layer. Such attacks might result in + delivery (from TCP) of "broken" or "spoofed" BGP messages. + + + +Rekhter, et al. Standards Track [Page 97] + +RFC 4271 BGP-4 January 2006 + + + The mechanism defined in RFC 2385 augments the normal TCP checksum + with a 16-byte message authentication code (MAC) that is computed + over the same data as the TCP checksum. This MAC is based on a one- + way hash function (MD5) and use of a secret key. The key is shared + between peer routers and is used to generate MAC values that are not + readily computed by an attacker who does not have access to the key. + A compliant implementation must support this mechanism, and must + allow a network administrator to activate it on a per-peer basis. + + RFC 2385 does not specify a means of managing (e.g., generating, + distributing, and replacing) the keys used to compute the MAC. RFC + 3562 [RFC3562] (an informational document) provides some guidance in + this area, and provides rationale to support this guidance. It notes + that a distinct key should be used for communication with each + protected peer. If the same key is used for multiple peers, the + offered security services may be degraded, e.g., due to an increased + risk of compromise at one router that adversely affects other + routers. + + The keys used for MAC computation should be changed periodically, to + minimize the impact of a key compromise or successful cryptanalytic + attack. RFC 3562 suggests a crypto period (the interval during which + a key is employed) of, at most, 90 days. More frequent key changes + reduce the likelihood that replay attacks (as described above) will + be feasible. However, absent a standard mechanism for effecting such + changes in a coordinated fashion between peers, one cannot assume + that BGP-4 implementations complying with this RFC will support + frequent key changes. + + Obviously, each should key also be chosen to be difficult for an + attacker to guess. The techniques specified in RFC 1750 for random + number generation provide a guide for generation of values that could + be used as keys. RFC 2385 calls for implementations to support keys + "composed of a string of printable ASCII of 80 bytes or less." RFC + 3562 suggests keys used in this context be 12 to 24 bytes of random + (pseudo-random) bits. This is fairly consistent with suggestions for + analogous MAC algorithms, which typically employ keys in the range of + 16 to 20 bytes. To provide enough random bits at the low end of this + range, RFC 3562 also observes that a typical ACSII text string would + have to be close to the upper bound for the key length specified in + RFC 2385. + + BGP vulnerabilities analysis is discussed in [RFC4272]. + + + + + + + + +Rekhter, et al. Standards Track [Page 98] + +RFC 4271 BGP-4 January 2006 + + +IANA Considerations + + All the BGP messages contain an 8-bit message type, for which IANA + has created and is maintaining a registry entitled "BGP Message + Types". This document defines the following message types: + + Name Value Definition + ---- ----- ---------- + OPEN 1 See Section 4.2 + UPDATE 2 See Section 4.3 + NOTIFICATION 3 See Section 4.5 + KEEPALIVE 4 See Section 4.4 + + Future assignments are to be made using either the Standards Action + process defined in [RFC2434], or the Early IANA Allocation process + defined in [RFC4020]. Assignments consist of a name and the value. + + The BGP UPDATE messages may carry one or more Path Attributes, where + each Attribute contains an 8-bit Attribute Type Code. IANA is + already maintaining such a registry, entitled "BGP Path Attributes". + This document defines the following Path Attributes Type Codes: + + Name Value Definition + ---- ----- ---------- + ORIGIN 1 See Section 5.1.1 + AS_PATH 2 See Section 5.1.2 + NEXT_HOP 3 See Section 5.1.3 + MULTI_EXIT_DISC 4 See Section 5.1.4 + LOCAL_PREF 5 See Section 5.1.5 + ATOMIC_AGGREGATE 6 See Section 5.1.6 + AGGREGATOR 7 See Section 5.1.7 + + Future assignments are to be made using either the Standards Action + process defined in [RFC2434], or the Early IANA Allocation process + defined in [RFC4020]. Assignments consist of a name and the value. + + The BGP NOTIFICATION message carries an 8-bit Error Code, for which + IANA has created and is maintaining a registry entitled "BGP Error + Codes". This document defines the following Error Codes: + + Name Value Definition + ------------ ----- ---------- + Message Header Error 1 Section 6.1 + OPEN Message Error 2 Section 6.2 + UPDATE Message Error 3 Section 6.3 + Hold Timer Expired 4 Section 6.5 + Finite State Machine Error 5 Section 6.6 + Cease 6 Section 6.7 + + + +Rekhter, et al. Standards Track [Page 99] + +RFC 4271 BGP-4 January 2006 + + + Future assignments are to be made using either the Standards Action + process defined in [RFC2434], or the Early IANA Allocation process + defined in [RFC4020]. Assignments consist of a name and the value. + + The BGP NOTIFICATION message carries an 8-bit Error Subcode, where + each Subcode has to be defined within the context of a particular + Error Code, and thus has to be unique only within that context. + + IANA has created and is maintaining a set of registries, "Error + Subcodes", with a separate registry for each BGP Error Code. Future + assignments are to be made using either the Standards Action process + defined in [RFC2434], or the Early IANA Allocation process defined in + [RFC4020]. Assignments consist of a name and the value. + + This document defines the following Message Header Error subcodes: + + Name Value Definition + -------------------- ----- ---------- + Connection Not Synchronized 1 See Section 6.1 + Bad Message Length 2 See Section 6.1 + Bad Message Type 3 See Section 6.1 + + This document defines the following OPEN Message Error subcodes: + + Name Value Definition + -------------------- ----- ---------- + Unsupported Version Number 1 See Section 6.2 + Bad Peer AS 2 See Section 6.2 + Bad BGP Identifier 3 See Section 6.2 + Unsupported Optional Parameter 4 See Section 6.2 + [Deprecated] 5 See Appendix A + Unacceptable Hold Time 6 See Section 6.2 + + This document defines the following UPDATE Message Error subcodes: + + Name Value Definition + -------------------- --- ---------- + Malformed Attribute List 1 See Section 6.3 + Unrecognized Well-known Attribute 2 See Section 6.3 + Missing Well-known Attribute 3 See Section 6.3 + Attribute Flags Error 4 See Section 6.3 + Attribute Length Error 5 See Section 6.3 + Invalid ORIGIN Attribute 6 See Section 6.3 + [Deprecated] 7 See Appendix A + Invalid NEXT_HOP Attribute 8 See Section 6.3 + Optional Attribute Error 9 See Section 6.3 + Invalid Network Field 10 See Section 6.3 + Malformed AS_PATH 11 See Section 6.3 + + + +Rekhter, et al. Standards Track [Page 100] + +RFC 4271 BGP-4 January 2006 + + +Normative References + + [RFC791] Postel, J., "Internet Protocol", STD 5, RFC 791, September + 1981. + + [RFC793] Postel, J., "Transmission Control Protocol", STD 7, RFC + 793, September 1981. + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [RFC2385] Heffernan, A., "Protection of BGP Sessions via the TCP MD5 + Signature Option", RFC 2385, August 1998. + + [RFC2434] Narten, T. and H. Alvestrand, "Guidelines for Writing an + IANA Considerations Section in RFCs", BCP 26, RFC 2434, + October 1998. + +Informative References + + [RFC904] Mills, D., "Exterior Gateway Protocol formal + specification", RFC 904, April 1984. + + [RFC1092] Rekhter, J., "EGP and policy based routing in the new + NSFNET backbone", RFC 1092, February 1989. + + [RFC1093] Braun, H., "NSFNET routing architecture", RFC 1093, + February 1989. + + [RFC1105] Lougheed, K. and Y. Rekhter, "Border Gateway Protocol + (BGP)", RFC 1105, June 1989. + + [RFC1163] Lougheed, K. and Y. Rekhter, "Border Gateway Protocol + (BGP)", RFC 1163, June 1990. + + [RFC1267] Lougheed, K. and Y. Rekhter, "Border Gateway Protocol 3 + (BGP-3)", RFC 1267, October 1991. + + [RFC1771] Rekhter, Y. and T. Li, "A Border Gateway Protocol 4 (BGP- + 4)", RFC 1771, March 1995. + + [RFC1772] Rekhter, Y. and P. Gross, "Application of the Border + Gateway Protocol in the Internet", RFC 1772, March 1995. + + [RFC1518] Rekhter, Y. and T. Li, "An Architecture for IP Address + Allocation with CIDR", RFC 1518, September 1993. + + + + + +Rekhter, et al. Standards Track [Page 101] + +RFC 4271 BGP-4 January 2006 + + + [RFC1519] Fuller, V., Li, T., Yu, J., and K. Varadhan, "Classless + Inter-Domain Routing (CIDR): an Address Assignment and + Aggregation Strategy", RFC 1519, September 1993. + + [RFC1930] Hawkinson, J. and T. Bates, "Guidelines for creation, + selection, and registration of an Autonomous System (AS)", + BCP 6, RFC 1930, March 1996. + + [RFC1997] Chandra, R., Traina, P., and T. Li, "BGP Communities + Attribute", RFC 1997, August 1996. + + [RFC2439] Villamizar, C., Chandra, R., and R. Govindan, "BGP Route + Flap Damping", RFC 2439, November 1998. + + [RFC2474] Nichols, K., Blake, S., Baker, F., and D. Black, + "Definition of the Differentiated Services Field (DS Field) + in the IPv4 and IPv6 Headers", RFC 2474, December 1998. + + [RFC2796] Bates, T., Chandra, R., and E. Chen, "BGP Route Reflection + - An Alternative to Full Mesh IBGP", RFC 2796, April 2000. + + [RFC2858] Bates, T., Rekhter, Y., Chandra, R., and D. Katz, + "Multiprotocol Extensions for BGP-4", RFC 2858, June 2000. + + [RFC3392] Chandra, R. and J. Scudder, "Capabilities Advertisement + with BGP-4", RFC 3392, November 2002. + + [RFC2918] Chen, E., "Route Refresh Capability for BGP-4", RFC 2918, + September 2000. + + [RFC3065] Traina, P., McPherson, D., and J. Scudder, "Autonomous + System Confederations for BGP", RFC 3065, February 2001. + + [RFC3562] Leech, M., "Key Management Considerations for the TCP MD5 + Signature Option", RFC 3562, July 2003. + + [IS10747] "Information Processing Systems - Telecommunications and + Information Exchange between Systems - Protocol for + Exchange of Inter-domain Routeing Information among + Intermediate Systems to Support Forwarding of ISO 8473 + PDUs", ISO/IEC IS10747, 1993. + + [RFC4272] Murphy, S., "BGP Security Vulnerabilities Analysis", RFC + 4272, January 2006 + + [RFC4020] Kompella, K. and A. Zinin, "Early IANA Allocation of + Standards Track Code Points", BCP 100, RFC 4020, February + 2005. + + + +Rekhter, et al. Standards Track [Page 102] + +RFC 4271 BGP-4 January 2006 + + +Editors' Addresses + + Yakov Rekhter + Juniper Networks + + EMail: yakov@juniper.net + + + Tony Li + + EMail: tony.li@tony.li + + + Susan Hares + NextHop Technologies, Inc. + 825 Victors Way + Ann Arbor, MI 48108 + + Phone: (734)222-1610 + EMail: skh@nexthop.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Rekhter, et al. Standards Track [Page 103] + +RFC 4271 BGP-4 January 2006 + + +Full Copyright Statement + + Copyright (C) The Internet Society (2006). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET + ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is provided by the IETF + Administrative Support Activity (IASA). + + + + + + + +Rekhter, et al. Standards Track [Page 104] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc4838.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc4838.txt new file mode 100644 index 0000000..d4ac8a7 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc4838.txt @@ -0,0 +1,1963 @@ + + + + + + +Network Working Group V. Cerf +Request for Comments: 4838 Google/Jet Propulsion Laboratory +Category: Informational S. Burleigh + A. Hooke + L. Torgerson + NASA/Jet Propulsion Laboratory + R. Durst + K. Scott + The MITRE Corporation + K. Fall + Intel Corporation + H. Weiss + SPARTA, Inc. + April 2007 + + + Delay-Tolerant Networking Architecture + +Status of This Memo + + This memo provides information for the Internet community. It does + not specify an Internet standard of any kind. Distribution of this + memo is unlimited. + +Copyright Notice + + Copyright (C) The IETF Trust (2007). + +IESG Note + + This RFC is a product of the Internet Research Task Force and is not + a candidate for any level of Internet Standard. The IRTF publishes + the results of Internet-related research and development activities. + These results might not be suitable for deployment on the public + Internet. + +Abstract + + This document describes an architecture for delay-tolerant and + disruption-tolerant networks, and is an evolution of the architecture + originally designed for the Interplanetary Internet, a communication + system envisioned to provide Internet-like services across + interplanetary distances in support of deep space exploration. This + document describes an architecture that addresses a variety of + problems with internetworks having operational and performance + characteristics that make conventional (Internet-like) networking + approaches either unworkable or impractical. We define a message- + oriented overlay that exists above the transport (or other) layers of + + + +Cerf, et al. Informational [Page 1] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + the networks it interconnects. The document presents a motivation + for the architecture, an architectural overview, review of state + management required for its operation, and a discussion of + application design issues. This document represents the consensus of + the IRTF DTN research group and has been widely reviewed by that + group. + +Table of Contents + + 1. Introduction ....................................................3 + 2. Why an Architecture for Delay-Tolerant Networking? ..............4 + 3. DTN Architectural Description ...................................5 + 3.1. Virtual Message Switching Using Store-and-Forward + Operation ..................................................5 + 3.2. Nodes and Endpoints ........................................7 + 3.3. Endpoint Identifiers (EIDs) and Registrations ..............8 + 3.4. Anycast and Multicast .....................................10 + 3.5. Priority Classes ..........................................10 + 3.6. Postal-Style Delivery Options and Administrative Records ..11 + 3.7. Primary Bundle Fields .....................................15 + 3.8. Routing and Forwarding ....................................16 + 3.9. Fragmentation and Reassembly ..............................18 + 3.10. Reliability and Custody Transfer .........................19 + 3.11. DTN Support for Proxies and Application Layer Gateways ...21 + 3.12. Timestamps and Time Synchronization ......................22 + 3.13. Congestion and Flow Control at the Bundle Layer ..........22 + 3.14. Security .................................................23 + 4. State Management Considerations ................................25 + 4.1. Application Registration State ............................25 + 4.2. Custody Transfer State ....................................26 + 4.3. Bundle Routing and Forwarding State .......................26 + 4.4. Security-Related State ....................................27 + 4.5. Policy and Configuration State ............................27 + 5. Application Structuring Issues .................................28 + 6. Convergence Layer Considerations for Use of Underlying + Protocols ......................................................28 + 7. Summary ........................................................29 + 8. Security Considerations ........................................29 + 9. IANA Considerations ............................................30 + 10. Normative References ..........................................30 + 11. Informative References ........................................30 + 12. Acknowledgments ...............................................32 + + + + + + + + + +Cerf, et al. Informational [Page 2] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +1. Introduction + + This document describes an architecture for delay and disruption- + tolerant interoperable networking (DTN). The architecture embraces + the concepts of occasionally-connected networks that may suffer from + frequent partitions and that may be comprised of more than one + divergent set of protocols or protocol families. The basis for this + architecture lies with that of the Interplanetary Internet, which + focused primarily on the issue of deep space communication in high- + delay environments. We expect the DTN architecture described here to + be utilized in various operational environments, including those + subject to disruption and disconnection and those with high-delay; + the case of deep space is one specialized example of these, and is + being pursued as a specialization of this architecture (See [IPN01] + and [SB03] for more details). + + Other networks to which we believe this architecture applies include + sensor-based networks using scheduled intermittent connectivity, + terrestrial wireless networks that cannot ordinarily maintain end-to- + end connectivity, satellite networks with moderate delays and + periodic connectivity, and underwater acoustic networks with moderate + delays and frequent interruptions due to environmental factors. A + DTN tutorial [FW03], aimed at introducing DTN and the types of + networks for which it is designed, is available to introduce new + readers to the fundamental concepts and motivation. More technical + descriptions may be found in [KF03], [JFP04], [JDPF05], and [WJMF05]. + + We define an end-to-end message-oriented overlay called the "bundle + layer" that exists at a layer above the transport (or other) layers + of the networks on which it is hosted and below applications. + Devices implementing the bundle layer are called DTN nodes. The + bundle layer forms an overlay that employs persistent storage to help + combat network interruption. It includes a hop-by-hop transfer of + reliable delivery responsibility and optional end-to-end + acknowledgement. It also includes a number of diagnostic and + management features. For interoperability, it uses a flexible naming + scheme (based on Uniform Resource Identifiers [RFC3986]) capable of + encapsulating different naming and addressing schemes in the same + overall naming syntax. It also has a basic security model, + optionally enabled, aimed at protecting infrastructure from + unauthorized use. + + The bundle layer provides functionality similar to the internet layer + of gateways described in the original ARPANET/Internet designs + [CK74]. It differs from ARPANET gateways, however, because it is + layer-agnostic and is focused on virtual message forwarding rather + than packet switching. However, both generally provide + interoperability between underlying protocols specific to one + + + +Cerf, et al. Informational [Page 3] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + environment and those protocols specific to another, and both provide + a store-and-forward forwarding service (with the bundle layer + employing persistent storage for its store and forward function). + + In a sense, the DTN architecture provides a common method for + interconnecting heterogeneous gateways or proxies that employ store- + and-forward message routing to overcome communication disruptions. + It provides services similar to electronic mail, but with enhanced + naming, routing, and security capabilities. Nodes unable to support + the full capabilities required by this architecture may be supported + by application-layer proxies acting as DTN applications. + +2. Why an Architecture for Delay-Tolerant Networking? + + Our motivation for pursuing an architecture for delay tolerant + networking stems from several factors. These factors are summarized + below; much more detail on their rationale can be explored in [SB03], + [KF03], and [DFS02]. + + The existing Internet protocols do not work well for some + environments, due to some fundamental assumptions built into the + Internet architecture: + + - that an end-to-end path between source and destination exists for + the duration of a communication session + + - (for reliable communication) that retransmissions based on timely + and stable feedback from data receivers is an effective means for + repairing errors + + - that end-to-end loss is relatively small + + - that all routers and end stations support the TCP/IP protocols + + - that applications need not worry about communication performance + + - that endpoint-based security mechanisms are sufficient for meeting + most security concerns + + - that packet switching is the most appropriate abstraction for + interoperability and performance + + - that selecting a single route between sender and receiver is + sufficient for achieving acceptable communication performance + + The DTN architecture is conceived to relax most of these assumptions, + based on a number of design principles that are summarized here (and + further discussed in [KF03]): + + + +Cerf, et al. Informational [Page 4] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + - Use variable-length (possibly long) messages (not streams or + limited-sized packets) as the communication abstraction to help + enhance the ability of the network to make good scheduling/path + selection decisions when possible. + + - Use a naming syntax that supports a wide range of naming and + addressing conventions to enhance interoperability. + + - Use storage within the network to support store-and-forward + operation over multiple paths, and over potentially long timescales + (i.e., to support operation in environments where many and/or no + end-to-end paths may ever exist); do not require end-to-end + reliability. + + - Provide security mechanisms that protect the infrastructure from + unauthorized use by discarding traffic as quickly as possible. + + - Provide coarse-grained classes of service, delivery options, and a + way to express the useful lifetime of data to allow the network to + better deliver data in serving the needs of applications. + + The use of the bundle layer is guided not only by its own design + principles, but also by a few application design principles: + + - Applications should minimize the number of round-trip exchanges. + + - Applications should cope with restarts after failure while network + transactions remain pending. + + - Applications should inform the network of the useful life and + relative importance of data to be delivered. + + These issues are discussed in further detail in Section 5. + +3. DTN Architectural Description + + The previous section summarized the design principles that guide the + definition of the DTN architecture. This section presents a + description of the major features of the architecture resulting from + design decisions guided by the aforementioned design principles. + +3.1. Virtual Message Switching Using Store-and-Forward Operation + + A DTN-enabled application sends messages of arbitrary length, also + called Application Data Units or ADUs [CT90], which are subject to + any implementation limitations. The relative order of ADUs might not + be preserved. ADUs are typically sent by and delivered to + + + + +Cerf, et al. Informational [Page 5] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + applications in complete units, although a system interface that + behaves differently is not precluded. + + ADUs are transformed by the bundle layer into one or more protocol + data units called "bundles", which are forwarded by DTN nodes. + Bundles have a defined format containing two or more "blocks" of + data. Each block may contain either application data or other + information used to deliver the containing bundle to its + destination(s). Blocks serve the purpose of holding information + typically found in the header or payload portion of protocol data + units in other protocol architectures. The term "block" is used + instead of "header" because blocks may not appear at the beginning of + a bundle due to particular processing requirements (e.g., digital + signatures). + + Bundles may be split up ("fragmented") into multiple constituent + bundles (also called "fragments" or "bundle fragments") during + transmission. Fragments are themselves bundles, and may be further + fragmented. Two or more fragments may be reassembled anywhere in the + network, forming a new bundle. + + Bundle sources and destinations are identified by (variable-length) + Endpoint Identifiers (EIDs, described below), which identify the + original sender and final destination(s) of bundles, respectively. + Bundles also contain a "report-to" EID used when special operations + are requested to direct diagnostic output to an arbitrary entity + (e.g., other than the source). An EID may refer to one or more DTN + nodes (i.e., for multicast destinations or "report-to" destinations). + + While IP networks are based on "store-and-forward" operation, there + is an assumption that the "storing" will not persist for more than a + modest amount of time, on the order of the queuing and transmission + delay. In contrast, the DTN architecture does not expect that + network links are always available or reliable, and instead expects + that nodes may choose to store bundles for some time. We anticipate + that most DTN nodes will use some form of persistent storage for this + -- disk, flash memory, etc. -- and that stored bundles will survive + system restarts. + + Bundles contain an originating timestamp, useful life indicator, a + class of service designator, and a length. This information provides + bundle-layer routing with a priori knowledge of the size and + performance requirements of requested data transfers. When there is + a significant amount of queuing that can occur in the network (as is + the case in the DTN version of store-and-forward), the advantage + provided by knowing this information may be significant for making + scheduling and path selection decisions [JFP04]. An alternative + abstraction (i.e., of stream-based delivery based on packets) would + + + +Cerf, et al. Informational [Page 6] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + make such scheduling much more difficult. Although packets provide + some of the same benefits as bundles, larger aggregates provide a way + for the network to apply scheduling and buffer management to units of + data that are more useful to applications. + + An essential element of the bundle-based style of forwarding is that + bundles have a place to wait in a queue until a communication + opportunity ("contact") is available. This highlights the following + assumptions: + + 1. that storage is available and well-distributed throughout the + network, + + 2. that storage is sufficiently persistent and robust to store + bundles until forwarding can occur, and + + 3. (implicitly) that this "store-and-forward" model is a better + choice than attempting to effect continuous connectivity or other + alternatives. + + For a network to effectively support the DTN architecture, these + assumptions must be considered and must be found to hold. Even so, + the inclusion of long-term storage as a fundamental aspect of the DTN + architecture poses new problems, especially with respect to + congestion management and denial-of-service mitigation. Node storage + in essence represents a new resource that must be managed and + protected. Much of the research in DTN revolves around exploring + these issues. Congestion is discussed in Section 3.13, and security + mechanisms, including methods for DTN nodes to protect themselves + from handling unauthorized traffic from other nodes, are discussed in + [DTNSEC] and [DTNSOV]. + +3.2. Nodes and Endpoints + + A DTN node (or simply "node" in this document) is an engine for + sending and receiving bundles -- an implementation of the bundle + layer. Applications utilize DTN nodes to send or receive ADUs + carried in bundles (applications also use DTN nodes when acting as + report-to destinations for diagnostic information carried in + bundles). Nodes may be members of groups called "DTN endpoints". A + DTN endpoint is therefore a set of DTN nodes. A bundle is considered + to have been successfully delivered to a DTN endpoint when some + minimum subset of the nodes in the endpoint has received the bundle + without error. This subset is called the "minimum reception group" + (MRG) of the endpoint. The MRG of an endpoint may refer to one node + (unicast), one of a group of nodes (anycast), or all of a group of + nodes (multicast and broadcast). A single node may be in the MRG of + multiple endpoints. + + + +Cerf, et al. Informational [Page 7] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.3. Endpoint Identifiers (EIDs) and Registrations + + An Endpoint Identifier (EID) is a name, expressed using the general + syntax of URIs (see below), that identifies a DTN endpoint. Using an + EID, a node is able to determine the MRG of the DTN endpoint named by + the EID. Each node is also required to have at least one EID that + uniquely identifies it. + + Applications send ADUs destined for an EID, and may arrange for ADUs + sent to a particular EID to be delivered to them. Depending on the + construction of the EID being used (see below), there may be a + provision for wildcarding some portion of an EID, which is often + useful for diagnostic and routing purposes. + + An application's desire to receive ADUs destined for a particular EID + is called a "registration", and in general is maintained persistently + by a DTN node. This allows application registration information to + survive application and operating system restarts. + + An application's attempt to establish a registration is not + guaranteed to succeed. For example, an application could request to + register itself to receive ADUs by specifying an Endpoint ID that is + uninterpretable or unavailable to the DTN node servicing the request. + Such requests are likely to fail. + +3.3.1. URI Schemes + + Each Endpoint ID is expressed syntactically as a Uniform Resource + Identifier (URI) [RFC3986]. The URI syntax has been designed as a + way to express names or addresses for a wide range of purposes, and + is therefore useful for constructing names for DTN endpoints. + + In URI terminology, each URI begins with a scheme name. The scheme + name is an element of the set of globally-managed scheme names + maintained by IANA [ISCHEMES]. Lexically following the scheme name + in a URI is a series of characters constrained by the syntax defined + by the scheme. This portion of the URI is called the scheme-specific + part (SSP), and can be quite general. (See, as one example, the URI + scheme for SNMP [RFC4088]). Note that scheme-specific syntactical + and semantic restrictions may be more constraining than the basic + rules of RFC 3986. Section 3.1 of RFC 3986 provides guidance on the + syntax of scheme names. + + URI schemes are a key concept in the DTN architecture, and evolved + from an earlier concept called regions, which were tied more closely + to assumptions of the network topology. Using URIs, significant + flexibility is attained in the structuring of EIDs. They might, for + example, be constructed based on DNS names, or might look like + + + +Cerf, et al. Informational [Page 8] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + "expressions of interest" or forms of database-like queries as in a + directed diffusion-routed network [IGE00] or in intentional naming + [WSBL99]. As names, EIDs are not required to be related to routing + or topological organization. Such a relationship is not prohibited, + however, and in some environments using EIDs this way may be + advantageous. + + A single EID may refer to an endpoint containing more than one DTN + node, as suggested above. It is the responsibility of a scheme + designer to define how to interpret the SSP of an EID so as to + determine whether it refers to a unicast, multicast, or anycast set + of nodes. See Section 3.4 for more details. + + URIs are constructed based on rules specified in RFC 3986, using the + US-ASCII character set. However, note this excerpt from RFC 3986, + Section 1.2.1, on dealing with characters that cannot be represented + by US-ASCII: "Percent-encoded octets (Section 2.1) may be used + within a URI to represent characters outside the range of the US- + ASCII coded character set if this representation is allowed by the + scheme or by the protocol element in which the URI is referenced. + Such a definition should specify the character encoding used to map + those characters to octets prior to being percent-encoded for the + URI". + +3.3.2. Late Binding + + Binding means interpreting the SSP of an EID for the purpose of + carrying an associated message towards a destination. For example, + binding might require mapping an EID to a next-hop EID or to a lower- + layer address for transmission. "Late binding" means that the + binding of a bundle's destination to a particular set of destination + identifiers or addresses does not necessarily happen at the bundle + source. Because the destination EID is potentially re-interpreted at + each hop, the binding may occur at the source, during transit, or + possibly at the destination(s). This contrasts with the name-to- + address binding of Internet communications where a DNS lookup at the + source fixes the IP address of the destination node before data is + sent. Such a circumstance would be considered "early binding" + because the name-to-address translation is performed prior to data + being sent into the network. + + In a frequently-disconnected network, late binding may be + advantageous because the transit time of a message may exceed the + validity time of a binding, making binding at the source impossible + or invalid. Furthermore, use of name-based routing with late binding + may reduce the amount of administrative (mapping) information that + + + + + +Cerf, et al. Informational [Page 9] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + must propagate through the network, and may also limit the scope of + mapping synchronization requirements to a local topological + neighborhood where changes are made. + +3.4. Anycast and Multicast + + As mentioned above, an EID may refer to an endpoint containing one or + more DTN nodes. When referring to a group of size greater than one, + the delivery semantics may be of either the anycast or multicast + variety (broadcast is considered to be of the multicast variety). + For anycast group delivery, a bundle is delivered to one node among a + group of potentially many nodes, and for multicast delivery it is + intended to be delivered to all of them, subject to the normal DTN + class of service and maximum useful lifetime semantics. + + Multicast group delivery in a DTN presents an unfamiliar issue with + respect to group membership. In relatively low-delay networks, such + as the Internet, nodes may be considered to be part of the group if + they have expressed interest to join it "recently". In a DTN, + however, nodes may wish to receive data sent to a group during an + interval of time earlier than when they are actually able to receive + it [ZAZ05]. More precisely, an application expresses its desire to + receive data sent to EID e at time t. Prior to this, during the + interval [t0, t1], t > t1, data may have been generated for group e. + For the application to receive any of this data, the data must be + available a potentially long time after senders have ceased sending + to the group. Thus, the data may need to be stored within the + network in order to support temporal group semantics of this kind. + How to design and implement this remains a research issue, as it is + likely to be at least as hard as problems related to reliable + multicast. + +3.5. Priority Classes + + The DTN architecture offers *relative* measures of priority (low, + medium, high) for delivering ADUs. These priorities differentiate + traffic based upon an application's desire to affect the delivery + urgency for ADUs, and are carried in bundle blocks generated by the + bundle layer based on information specified by the application. + + The (U.S. or similar) Postal Service provides a strong metaphor for + the priority classes offered by the forwarding abstraction offered by + the DTN architecture. Traffic is generally not interactive and is + often one-way. There are generally no strong guarantees of timely + delivery, yet there are some forms of class of service, reliability, + and security. + + + + + +Cerf, et al. Informational [Page 10] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + We have defined three relative priority classes to date. These + priority classes typically imply some relative scheduling + prioritization among bundles in queue at a sender: + + - Bulk - Bulk bundles are shipped on a "least effort" basis. No + bundles of this class will be shipped until all bundles of other + classes bound for the same destination and originating from the + same source have been shipped. + + - Normal - Normal-class bundles are shipped prior to any bulk-class + bundles and are otherwise the same as bulk bundles. + + - Expedited - Expedited bundles, in general, are shipped prior to + bundles of other classes and are otherwise the same. + + Applications specify their requested priority class and data lifetime + (see below) for each ADU they send. This information, coupled with + policy applied at DTN nodes that select how messages are forwarded + and which routing algorithms are in use, affects the overall + likelihood and timeliness of ADU delivery. + + The priority class of a bundle is only required to relate to other + bundles from the same source. This means that a high priority bundle + from one source may not be delivered faster (or with some other + superior quality of service) than a medium priority bundle from a + different source. It does mean that a high priority bundle from one + source will be handled preferentially to a lower priority bundle sent + from the same source. + + Depending on a particular DTN node's forwarding/scheduling policy, + priority may or may not be enforced across different sources. That + is, in some DTN nodes, expedited bundles might always be sent prior + to any bulk bundles, irrespective of source. Many variations are + possible. + +3.6. Postal-Style Delivery Options and Administrative Records + + Continuing with the postal analogy, the DTN architecture supports + several delivery options that may be selected by an application when + it requests the transmission of an ADU. In addition, the + architecture defines two types of administrative records: "status + reports" and "signals". These records are bundles that provide + information about the delivery of other bundles, and are used in + conjunction with the delivery options. + + + + + + + +Cerf, et al. Informational [Page 11] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.6.1. Delivery Options + + We have defined eight delivery options. Applications sending an ADU + (the "subject ADU") may request any combination of the following, + which are carried in each of the bundles produced ("sent bundles") by + the bundle layer resulting from the application's request to send the + subject ADU: + + - Custody Transfer Requested - requests sent bundles be delivered + with enhanced reliability using custody transfer procedures. Sent + bundles will be transmitted by the bundle layer using reliable + transfer protocols (if available), and the responsibility for + reliable delivery of the bundle to its destination(s) may move + among one or more "custodians" in the network. This capability is + described in more detail in Section 3.10. + + - Source Node Custody Acceptance Required - requires the source DTN + node to provide custody transfer for the sent bundles. If custody + transfer is not available at the source when this delivery option + is requested, the requested transmission fails. This provides a + means for applications to insist that the source DTN node take + custody of the sent bundles (e.g., by storing them in persistent + storage). + + - Report When Bundle Delivered - requests a (single) Bundle Delivery + Status Report be generated when the subject ADU is delivered to its + intended recipient(s). This request is also known as "return- + receipt". + + - Report When Bundle Acknowledged by Application - requests an + Acknowledgement Status Report be generated when the subject ADU is + acknowledged by a receiving application. This only happens by + action of the receiving application, and differs from the Bundle + Delivery Status Report. It is intended for cases where the + application may be acting as a form of application layer gateway + and wishes to indicate the status of a protocol operation external + to DTN back to the requesting source. See Section 11 for more + details. + + - Report When Bundle Received - requests a Bundle Reception Status + Report be generated when each sent bundle arrives at a DTN node. + This is designed primarily for diagnostic purposes. + + - Report When Bundle Custody Accepted - requests a Custody + Acceptance Status Report be generated when each sent bundle has + been accepted using custody transfer. This is designed primarily + for diagnostic purposes. + + + + +Cerf, et al. Informational [Page 12] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + - Report When Bundle Forwarded - requests a Bundle Forwarding Status + Report be generated when each sent bundle departs a DTN node after + forwarding. This is designed primarily for diagnostic purposes. + + - Report When Bundle Deleted - requests a Bundle Deletion Status + Report be generated when each sent bundle is deleted at a DTN node. + This is designed primarily for diagnostic purposes. + + The first four delivery options are designed for ordinary use by + applications. The last four are designed primarily for diagnostic + purposes and their use may be restricted or limited in environments + subject to congestion or attack. + + If the security procedures defined in [DTNSEC] are also enabled, then + three additional delivery options become available: + + - Confidentiality Required - requires the subject ADU be made secret + from parties other than the source and the members of the + destination EID. + + - Authentication Required - requires all non-mutable fields in the + bundle blocks of the sent bundles (i.e., those which do not change + as the bundle is forwarded) be made strongly verifiable (i.e., + cryptographically strong). This protects several fields, including + the source and destination EIDs and the bundle's data. See Section + 3.7 and [BSPEC] for more details. + + - Error Detection Required - requires modifications to the non- + mutable fields of each sent bundle be made detectable with high + probability at each destination. + +3.6.2. Administrative Records: Bundle Status Reports and Custody + Signals + + Administrative records are used to report status information or error + conditions related to the bundle layer. There are two types of + administrative records defined: bundle status reports (BSRs) and + custody signals. Administrative records correspond (approximately) + to messages in the ICMP protocol in IP [RFC792]. In ICMP, however, + messages are returned to the source. In DTN, they are instead + directed to the report-to EID for BSRs and the EID of the current + custodian for custody signals, which might differ from the source's + EID. Administrative records are sent as bundles with a source EID + set to one of the EIDs associated with the DTN node generating the + administrative record. In some cases, arrival of a single bundle or + bundle fragment may elicit multiple administrative records (e.g., in + the case where a bundle is replicated for multicast forwarding). + + + + +Cerf, et al. Informational [Page 13] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + The following BSRs are currently defined (also see [BSPEC] for more + details): + + - Bundle Reception - sent when a bundle arrives at a DTN node. + Generation of this message may be limited by local policy. + + - Custody Acceptance - sent when a node has accepted custody of a + bundle with the Custody Transfer Requested option set. Generation + of this message may be limited by local policy. + + - Bundle Forwarded - sent when a bundle containing a Report When + Bundle Forwarded option departs from a DTN node after having been + forwarded. Generation of this message may be limited by local + policy. + + - Bundle Deletion - sent from a DTN node when a bundle containing a + Report When Bundle Deleted option is discarded. This can happen + for several reasons, such as expiration. Generation of this + message may be limited by local policy but is required in cases + where the deletion is performed by a bundle's current custodian. + + - Bundle Delivery - sent from a final recipient's (destination) node + when a complete ADU comprising sent bundles containing Report When + Bundle Delivered options is consumed by an application. + + - Acknowledged by application - sent from a final recipient's + (destination) node when a complete ADU comprising sent bundles + containing Application Acknowledgment options has been processed by + an application. This generally involves specific action on the + receiving application's part. + + In addition to the status reports, the custody signal is currently + defined to indicate the status of a custody transfer. These are sent + to the current-custodian EID contained in an arriving bundle: + + - Custody Signal - indicates that custody has been successfully + transferred. This signal appears as a Boolean indicator, and may + therefore indicate either a successful or a failed custody transfer + attempt. + + Administrative records must reference a received bundle. This is + accomplished by a method for uniquely identifying bundles based on a + transmission timestamp and sequence number discussed in Section 3.12. + + + + + + + + +Cerf, et al. Informational [Page 14] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.7. Primary Bundle Fields + + The bundles carried between and among DTN nodes obey a standard + bundle protocol specified in [BSPEC]. Here we provide an overview of + most of the fields carried with every bundle. The protocol is + designed with a mandatory primary block, an optional payload block + (which contains the ADU data itself), and a set of optional extension + blocks. Blocks may be cascaded in a way similar to extension headers + in IPv6. The following selected fields are all present in the + primary block, and therefore are present for every bundle and + fragment: + + - Creation Timestamp - a concatenation of the bundle's creation time + and a monotonically increasing sequence number such that the + creation timestamp is guaranteed to be unique for each ADU + originating from the same source. The creation timestamp is based + on the time-of-day an application requested an ADU to be sent (not + when the corresponding bundle(s) are sent into the network). DTN + nodes are assumed to have a basic time synchronization capability + (see Section 3.12). + + - Lifespan - the time-of-day at which the message is no longer + useful. If a bundle is stored in the network (including the + source's DTN node) when its lifespan is reached, it may be + discarded. The lifespan of a bundle is expressed as an offset + relative to its creation time. + + - Class of Service Flags - indicates the delivery options and + priority class for the bundle. Priority classes may be one of + bulk, normal, or expedited. See Section 3.6.1. + + - Source EID - EID of the source (the first sender). + + - Destination EID - EID of the destination (the final intended + recipient(s)). + + - Report-To Endpoint ID - an EID identifying where reports (return- + receipt, route-tracing functions) should be sent. This may or may + not identify the same endpoint as the Source EID. + + - Custodian EID - EID of the current custodian of a bundle (if any). + + The payload block indicates information about the contained payload + (e.g., its length) and the payload itself. In addition to the fields + found in the primary and payload blocks, each bundle may have fields + in additional blocks carried with each bundle. See [BSPEC] for + additional details. + + + + +Cerf, et al. Informational [Page 15] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.8. Routing and Forwarding + + The DTN architecture provides a framework for routing and forwarding + at the bundle layer for unicast, anycast, and multicast messages. + Because nodes in a DTN network might be interconnected using more + than one type of underlying network technology, a DTN network is best + described abstractly using a *multigraph* (a graph where vertices may + be interconnected with more than one edge). Edges in this graph are, + in general, time-varying with respect to their delay and capacity and + directional because of the possibility of one-way connectivity. When + an edge has zero capacity, it is considered to not be connected. + + Because edges in a DTN graph may have significant delay, it is + important to distinguish where time is measured when expressing an + edge's capacity or delay. We adopt the convention of expressing + capacity and delay as functions of time where time is measured at the + point where data is inserted into a network edge. For example, + consider an edge having capacity C(t) and delay D(t) at time t. If B + bits are placed in this edge at time t, they completely arrive by + time t + D(t) + (1/C(t))*B. We assume C(t) and D(t) do not change + significantly during the interval [t, t+D(t)+(1/C(t))*B]. + + Because edges may vary between positive and zero capacity, it is + possible to describe a period of time (interval) during which the + capacity is strictly positive, and the delay and capacity can be + considered to be constant [AF03]. This period of time is called a + "contact". In addition, the product of the capacity and the interval + is known as a contact's "volume". If contacts and their volumes are + known ahead of time, intelligent routing and forwarding decisions can + be made (optimally for small networks) [JFP04]. Optimally using a + contact's volume, however, requires the ability to divide large ADUs + and bundles into smaller routable units. This is provided by DTN + fragmentation (see Section 3.9). + + When delivery paths through a DTN graph are lossy or contact + intervals and volumes are not known precisely ahead of time, routing + computations become especially challenging. How to handle these + situations is an active area of work in the (emerging) research area + of delay tolerant networking. + +3.8.1. Types of Contacts + + Contacts typically fall into one of several categories, based largely + on the predictability of their performance characteristics and + whether some action is required to bring them into existence. To + date, the following major types of contacts have been defined: + + + + + +Cerf, et al. Informational [Page 16] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + Persistent Contacts + + Persistent contacts are always available (i.e., no connection- + initiation action is required to instantiate a persistent + contact). An 'always-on' Internet connection such as a DSL or + Cable Modem connection would be a representative of this class. + + On-Demand Contacts + + On-Demand contacts require some action in order to instantiate, + but then function as persistent contacts until terminated. A + dial-up connection is an example of an On-Demand contact (at + least, from the viewpoint of the dialer; it may be viewed as an + Opportunistic Contact, below, from the viewpoint of the dial-up + service provider). + + Intermittent - Scheduled Contacts + + A scheduled contact is an agreement to establish a contact at a + particular time, for a particular duration. An example of a + scheduled contact is a link with a low-earth orbiting satellite. + A node's list of contacts with the satellite can be constructed + from the satellite's schedule of view times, capacities, and + latencies. Note that for networks with substantial delays, the + notion of the "particular time" is delay-dependent. For example, + a single scheduled contact between Earth and Mars would not be at + the same instant in each location, but would instead be offset by + the (non-negligible) propagation delay. + + Intermittent - Opportunistic Contacts + + Opportunistic contacts are not scheduled, but rather present + themselves unexpectedly. For example, an unscheduled aircraft + flying overhead and beaconing, advertising its availability for + communication, would present an opportunistic contact. Another + type of opportunistic contact might be via an infrared or + Bluetooth communication link between a personal digital assistant + (PDA) and a kiosk in an airport concourse. The opportunistic + contact begins as the PDA is brought near the kiosk, lasting an + undetermined amount of time (i.e., until the link is lost or + terminated). + + Intermittent - Predicted Contacts + + Predicted contacts are based on no fixed schedule, but rather are + predictions of likely contact times and durations based on a + history of previously observed contacts or some other information. + Given a great enough confidence in a predicted contact, routes may + + + +Cerf, et al. Informational [Page 17] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + be chosen based on this information. This is an active research + area, and a few approaches having been proposed [LFC05]. + +3.9. Fragmentation and Reassembly + + DTN fragmentation and reassembly are designed to improve the + efficiency of bundle transfers by ensuring that contact volumes are + fully utilized and by avoiding retransmission of partially-forwarded + bundles. There are two forms of DTN fragmentation/reassembly: + + Proactive Fragmentation + + A DTN node may divide a block of application data into multiple + smaller blocks and transmit each such block as an independent + bundle. In this case, the *final destination(s)* are responsible + for extracting the smaller blocks from incoming bundles and + reassembling them into the original larger bundle and, ultimately, + ADU. This approach is called proactive fragmentation because it + is used primarily when contact volumes are known (or predicted) in + advance. + + Reactive Fragmentation + + DTN nodes sharing an edge in the DTN graph may fragment a bundle + cooperatively when a bundle is only partially transferred. In + this case, the receiving bundle layer modifies the incoming bundle + to indicate it is a fragment, and forwards it normally. The + previous- hop sender may learn (via convergence-layer protocols, + see Section 6) that only a portion of the bundle was delivered to + the next hop, and send the remaining portion(s) when subsequent + contacts become available (possibly to different next-hops if + routing changes). This is called reactive fragmentation because + the fragmentation process occurs after an attempted transmission + has taken place. + + As an example, consider a ground station G, and two store-and- + forward satellites S1 and S2, in opposite low-earth orbit. While + G is transmitting a large bundle to S1, a reliable transport layer + protocol below the bundle layer at each indicates the transmission + has terminated, but that half the transfer has completed + successfully. In this case, G can form a smaller bundle fragment + consisting of the second half of the original bundle and forward + it to S2 when available. In addition, S1 (now out of range of G) + can form a new bundle consisting of the first half of the original + bundle and forward it to whatever next hop(s) it deems + appropriate. + + + + + +Cerf, et al. Informational [Page 18] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + The reactive fragmentation capability is not required to be available + in every DTN implementation, as it requires a certain level of + support from underlying protocols that may not be present, and + presents significant challenges with respect to handling digital + signatures and authentication codes on messages. When a signed + message is only partially received, most message authentication codes + will fail. When DTN security is present and enabled, it may + therefore be necessary to proactively fragment large bundles into + smaller units that are more convenient for digital signatures. + + Even if reactive fragmentation is not present in an implementation, + the ability to reassemble fragments at a destination is required in + order to support DTN fragmentation. Furthermore, for contacts with + volumes that are small compared to typical bundle sizes, some + incremental delivery approach must be used (e.g., checkpoint/restart) + to prevent data delivery livelock. Reactive fragmentation is one + such approach, but other protocol layers could potentially handle + this issue as well. + +3.10. Reliability and Custody Transfer + + The most basic service provided by the bundle layer is + unacknowledged, prioritized (but not guaranteed) unicast message + delivery. It also provides two options for enhancing delivery + reliability: end-to-end acknowledgments and custody transfer. + Applications wishing to implement their own end-to-end message + reliability mechanisms are free to utilize the acknowledgment. The + custody transfer feature of the DTN architecture only specifies a + coarse-grained retransmission capability, described next. + + Transmission of bundles with the Custody Transfer Requested option + specified generally involves moving the responsibility for reliable + delivery of an ADU's bundles among different DTN nodes in the + network. For unicast delivery, this will typically involve moving + bundles "closer" (in terms of some routing metric) to their ultimate + destination(s), and retransmitting when necessary. The nodes + receiving these bundles along the way (and agreeing to accept the + reliable delivery responsibility) are called "custodians". The + movement of a bundle (and its delivery responsibility) from one node + to another is called a "custody transfer". It is analogous to a + database commit transaction [FHM03]. The exact meaning and design of + custody transfer for multicast and anycast delivery remains to be + fully explored. + + Custody transfer allows the source to delegate retransmission + responsibility and recover its retransmission-related resources + relatively soon after sending a bundle (on the order of the minimum + round-trip time to the first bundle hop(s)). Not all nodes in a DTN + + + +Cerf, et al. Informational [Page 19] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + are required by the DTN architecture to accept custody transfers, so + it is not a true 'hop-by-hop' mechanism. For example, some nodes may + have sufficient storage resources to sometimes act as custodians, but + may elect to not offer such services when congested or running low on + power. + + The existence of custodians can alter the way DTN routing is + performed. In some circumstances, it may be beneficial to move a + bundle to a custodian as quickly as possible even if the custodian is + further away (in terms of distance, time or some routing metric) from + the bundle's final destination(s) than some other reachable node. + Designing a system with this capability involves constructing more + than one routing graph, and is an area of continued research. + + Custody transfer in DTN not only provides a method for tracking + bundles that require special handling and identifying DTN nodes that + participate in custody transfer, it also provides a (weak) mechanism + for enhancing the reliability of message delivery. Generally + speaking, custody transfer relies on underlying reliable delivery + protocols of the networks that it operates over to provide the + primary means of reliable transfer from one bundle node to the next + (set). However, when custody transfer is requested, the bundle layer + provides an additional coarse-grained timeout and retransmission + mechanism and an accompanying (bundle-layer) custodian-to-custodian + acknowledgment signaling mechanism. When an application does *not* + request custody transfer, this bundle layer timeout and + retransmission mechanism is typically not employed, and successful + bundle layer delivery depends solely on the reliability mechanisms of + the underlying protocols. + + When a node accepts custody for a bundle that contains the Custody + Transfer Requested option, a Custody Transfer Accepted Signal is sent + by the bundle layer to the Current Custodian EID contained in the + primary bundle block. In addition, the Current Custodian EID is + updated to contain one of the forwarding node's (unicast) EIDs before + the bundle is forwarded. + + When an application requests an ADU to be delivered with custody + transfer, the request is advisory. In some circumstances, a source + of a bundle for which custody transfer has been requested may not be + able to provide this service. In such circumstances, the subject + bundle may traverse multiple DTN nodes before it obtains a custodian. + Bundles in this condition are specially marked with their Current + Custodian EID field set to a null endpoint. In cases where + applications wish to require the source to take custody of the + bundle, they may supply the Source Node Custody Acceptance Required + + + + + +Cerf, et al. Informational [Page 20] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + delivery option. This may be useful to applications that desire a + continuous "chain" of custody or that wish to exit after being + ensured their data is safely held in a custodian. + + In a DTN network where one or more custodian-to-custodian hops are + strictly one directional (and cannot be reversed), the DTN custody + transfer mechanism will be affected over such hops due to the lack of + any way to receive a custody signal (or any other information) back + across the path, resulting in the expiration of the bundle at the + ingress to the one-way hop. This situation does not necessarily mean + the bundle has been lost; nodes on the other side of the hop may + continue to transfer custody, and the bundle may be delivered + successfully to its destination(s). However, in this circumstance a + source that has requested to receive expiration BSRs for this bundle + will receive an expiration report for the bundle, and possibly + conclude (incorrectly) that the bundle has been discarded and not + delivered. Although this problem cannot be fully solved in this + situation, a mechanism is provided to help ameliorate the seemingly + incorrect information that may be reported when the bundle expires + after having been transferred over a one-way hop. This is + accomplished by the node at the ingress to the one-way hop reporting + the existence of a known one-way path using a variant of a bundle + status report. These types of reports are provided if the subject + bundle requests the report using the 'Report When Bundle Forwarded' + delivery option. + +3.11. DTN Support for Proxies and Application Layer Gateways + + One of the aims of DTN is to provide a common method for + interconnecting application layer gateways and proxies. In cases + where existing Internet applications can be made to tolerate delays, + local proxies can be constructed to benefit from the existing + communication capabilities provided by DTN [S05, T02]. Making such + proxies compatible with DTN reduces the burden on the proxy author + from being concerned with how to implement routing and reliability + management and allows existing TCP/IP-based applications to operate + unmodified over a DTN-based network. + + When DTN is used to provide a form of tunnel encapsulation for other + protocols, it can be used in constructing overlay networks comprised + of application layer gateways. The application acknowledgment + capability is designed for such circumstances. This provides a + common way for remote application layer gateways to signal the + success or failure of non-DTN protocol operations initiated as a + result of receiving DTN ADUs. Without this capability, such + indicators would have to be implemented by applications themselves in + non-standard ways. + + + + +Cerf, et al. Informational [Page 21] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +3.12. Timestamps and Time Synchronization + + The DTN architecture depends on time synchronization among DTN nodes + (supported by external, non-DTN protocols) for four primary purposes: + bundle and fragment identification, routing with scheduled or + predicted contacts, bundle expiration time computations, and + application registration expiration. + + Bundle identification and expiration are supported by placing a + creation timestamp and an explicit expiration field (expressed in + seconds after the source timestamp) in each bundle. The origination + timestamps on arriving bundles are made available to consuming + applications in ADUs they receive by some system interface function. + Each set of bundles corresponding to an ADU is required to contain a + timestamp unique to the sender's EID. The EID, timestamp, and data + offset/length information together uniquely identify a bundle. + Unique bundle identification is used for a number of purposes, + including custody transfer and reassembly of bundle fragments. + + Time is also used in conjunction with application registrations. + When an application expresses its desire to receive ADUs destined for + a particular EID, this registration is only maintained for a finite + period of time, and may be specified by the application. For + multicast registrations, an application may also specify a time range + or "interest interval" for its registration. In this case, traffic + sent to the specified EID any time during the specified interval will + eventually be delivered to the application (unless such traffic has + expired due to the expiration time provided by the application at the + source or some other reason prevents such delivery). + +3.13. Congestion and Flow Control at the Bundle Layer + + The subject of congestion control and flow control at the bundle + layer is one on which the authors of this document have not yet + reached complete consensus. We have unresolved concerns about the + efficiency and efficacy of congestion and flow control schemes + implemented across long and/or highly variable delay environments, + especially with the custody transfer mechanism that may require nodes + to retain bundles for long periods of time. + + For the purposes of this document, we define "flow control" as a + means of assuring that the average rate at which a sending node + transmits data to a receiving node does not exceed the average rate + at which the receiving node is prepared to receive data from that + sender. (Note that this is a generalized notion of flow control, + rather than one that applies only to end-to-end communication.) We + define "congestion control" as a means of assuring that the aggregate + rate at which all traffic sources inject data into a network does not + + + +Cerf, et al. Informational [Page 22] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + exceed the maximum aggregate rate at which the network can deliver + data to destination nodes over time. If flow control is propagated + backward from congested nodes toward traffic sources, then the flow + control mechanism can be used as at least a partial solution to the + problem of congestion as well. + + DTN flow control decisions must be made within the bundle layer + itself based on information about resources (in this case, primarily + persistent storage) available within the bundle node. When storage + resources become scarce, a DTN node has only a certain degree of + freedom in handling the situation. It can always discard bundles + which have expired -- an activity DTN nodes should perform regularly + in any case. If it ordinarily is willing to accept custody for + bundles, it can cease doing so. If storage resources are available + elsewhere in the network, it may be able to make use of them in some + way for bundle storage. It can also discard bundles which have not + expired but for which it has not accepted custody. A node must avoid + discarding bundles for which it has accepted custody, and do so only + as a last resort. Determining when a node should engage in or cease + to engage in custody transfers is a resource allocation and + scheduling problem of current research interest. + + In addition to the bundle layer mechanisms described above, a DTN + node may be able to avail itself of support from lower-layer + protocols in affecting its own resource utilization. For example, a + DTN node receiving a bundle using TCP/IP might intentionally slow + down its receiving rate by performing read operations less frequently + in order to reduce its offered load. This is possible because TCP + provides its own flow control, so reducing the application data + consumption rate could effectively implement a form of hop-by-hop + flow control. Unfortunately, it may also lead to head-of-line + blocking issues, depending on the nature of bundle multiplexing + within a TCP connection. A protocol with more relaxed ordering + constraints (e.g. SCTP [RFC2960]) might be preferable in such + circumstances. + + Congestion control is an ongoing research topic. + +3.14. Security + + The possibility of severe resource scarcity in some delay-tolerant + networks dictates that some form of authentication and access control + to the network itself is required in many circumstances. It is not + acceptable for an unauthorized user to flood the network with traffic + easily, possibly denying service to authorized users. In many cases + it is also not acceptable for unauthorized traffic to be forwarded + over certain network links at all. This is especially true for + exotic, mission-critical links. In light of these considerations, + + + +Cerf, et al. Informational [Page 23] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + several goals are established for the security component of the DTN + architecture: + + - Promptly prevent unauthorized applications from having their data + carried through or stored in the DTN. + + - Prevent unauthorized applications from asserting control over the + DTN infrastructure. + + - Prevent otherwise authorized applications from sending bundles at a + rate or class of service for which they lack permission. + + - Promptly discard bundles that are damaged or improperly modified in + transit. + + - Promptly detect and de-authorize compromised entities. + + Many existing authentication and access control protocols designed + for operation in low-delay, connected environments may not perform + well in DTNs. In particular, updating access control lists and + revoking ("blacklisting") credentials may be especially difficult. + Also, approaches that require frequent access to centralized servers + to complete an authentication or authorization transaction are not + attractive. The consequences of these difficulties include delays in + the onset of communication, delays in detecting and recovering from + system compromise, and delays in completing transactions due to + inappropriate access control or authentication settings. + + To help satisfy these security requirements in light of the + challenges, the DTN architecture adopts a standard but optionally + deployed security architecture [DTNSEC] that utilizes hop-by-hop and + end-to-end authentication and integrity mechanisms. The purpose of + using both approaches is to be able to handle access control for data + forwarding and storage separately from application-layer data + integrity. While the end-to-end mechanism provides authentication + for a principal such as a user (of which there may be many), the hop- + by-hop mechanism is intended to authenticate DTN nodes as legitimate + transceivers of bundles to each-other. Note that it is conceivable + to construct a DTN in which only a subset of the nodes participate in + the security mechanisms, resulting in a secure DTN overlay existing + atop an insecure DTN overlay. This idea is relatively new and is + still being explored. + + In accordance with the goals listed above, DTN nodes discard traffic + as early as possible if authentication or access control checks fail. + This approach meets the goals of removing unwanted traffic from being + forwarded over specific high-value links, but also has the associated + benefit of making denial-of-service attacks considerably harder to + + + +Cerf, et al. Informational [Page 24] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + mount more generally, as compared with conventional Internet routers. + However, the obvious cost for this capability is potentially larger + computation and credential storage overhead required at DTN nodes. + + For more detailed information on DTN security provisions, refer to + [DTNSEC] and [DTNSOV]. + +4. State Management Considerations + + An important aspect of any networking architecture is its management + of state. This section describes the state managed at the bundle + layer and discusses how it is established and removed. + +4.1. Application Registration State + + In long/variable delay environments, an asynchronous application + interface seems most appropriate. Such interfaces typically include + methods for applications to register callback actions when certain + triggering events occur (e.g., when ADUs arrive). These + registrations create state information called application + registration state. + + Application registration state is typically created by explicit + request of the application, and is removed by a separate explicit + request, but may also be removed by an application-specified timer + (it is thus "firm" state). In most cases, there must be a provision + for retaining this state across application and operating system + termination/restart conditions because a client/server bundle round- + trip time may exceed the requesting application's execution time (or + hosting system's uptime). In cases where applications are not + automatically restarted but application registration state remains + persistent, a method must be provided to indicate to the system what + action to perform when the triggering event occurs (e.g., restarting + some application, ignoring the event, etc.). + + To initiate a registration and thereby establish application + registration state, an application specifies an Endpoint ID for which + it wishes to receive ADUs, along with an optional time value + indicating how long the registration should remain active. This + operation is somewhat analogous to the bind() operation in the common + sockets API. + + For registrations to groups (i.e., joins), a time interval may also + be specified. The time interval refers to the range of origination + times of ADUs sent to the specified EID. See Section 3.4 above for + more details. + + + + + +Cerf, et al. Informational [Page 25] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +4.2. Custody Transfer State + + Custody transfer state includes information required to keep account + of bundles for which a node has taken custody, as well as the + protocol state related to transferring custody for one or more of + them. The accounting-related state is created when a bundle is + received. Custody transfer retransmission state is created when a + transfer of custody is initiated by forwarding a bundle with the + custody transfer requested delivery option specified. Retransmission + state and accounting state may be released upon receipt of one or + more Custody Transfer Succeeded signals, indicating custody has been + moved. In addition, the bundle's expiration time (possibly mitigated + by local policy) provides an upper bound on the time when this state + is purged from the system in the event that it is not purged + explicitly due to receipt of a signal. + +4.3. Bundle Routing and Forwarding State + + As with the Internet architecture, we distinguish between routing and + forwarding. Routing refers to the execution of a (possibly + distributed) algorithm for computing routing paths according to some + objective function (see [JFP04], for example). Forwarding refers to + the act of moving a bundle from one DTN node to another. Routing + makes use of routing state (the RIB, or routing information base), + while forwarding makes use of state derived from routing, and is + maintained as forwarding state (the FIB, or forwarding information + base). The structure of the FIB and the rules for maintaining it are + implementation choices. In some DTNs, exchange of information used + to update state in the RIB may take place on network paths distinct + from those where exchange of application data takes place. + + The maintenance of state in the RIB is dependent on the type of + routing algorithm being used. A routing algorithm may consider + requested class of service and the location of potential custodians + (for custody transfer, see section 3.10), and this information will + tend to increase the size of the RIB. The separation between FIB and + RIB is not required by this document, as these are implementation + details to be decided by system implementers. The choice of routing + algorithms is still under study. + + Bundles may occupy queues in nodes for a considerable amount of time. + For unicast or anycast delivery, the amount of time is likely to be + the interval between when a bundle arrives at a node and when it can + be forwarded to its next hop. For multicast delivery of bundles, + this could be significantly longer, up to a bundle's expiration time. + This situation occurs when multicast delivery is utilized in such a + way that nodes joining a group can obtain information previously sent + to the group. In such cases, some nodes may act as "archivers" that + + + +Cerf, et al. Informational [Page 26] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + provide copies of bundles to new participants that have already been + delivered to other participants. + +4.4. Security-Related State + + The DTN security approach described in [DTNSEC], when used, requires + maintenance of state in all DTN nodes that use it. All such nodes + are required to store their own private information (including their + own policy and authentication material) and a block of information + used to verify credentials. Furthermore, in most cases, DTN nodes + will cache some public information (and possibly the credentials) of + their next-hop (bundle) neighbors. All cached information has + expiration times, and nodes are responsible for acquiring and + distributing updates of public information and credentials prior to + the expiration of the old set (in order to avoid a disruption in + network service). + + In addition to basic end-to-end and hop-by-hop authentication, access + control may be used in a DTN by one or more mechanisms such as + capabilities or access control lists (ACLs). ACLs would represent + another block of state present in any node that wishes to enforce + security policy. ACLs are typically initialized at node + configuration time and may be updated dynamically by DTN bundles or + by some out of band technique. Capabilities or credentials may be + revoked, requiring the maintenance of a revocation list ("black + list", another form of state) to check for invalid authentication + material that has already been distributed. + + Some DTNs may implement security boundaries enforced by selected + nodes in the network, where end-to-end credentials may be checked in + addition to checking the hop-by-hop credentials. (Doing so may + require routing to be adjusted to ensure all bundles comprising each + ADU pass through these points.) Public information used to verify + end-to-end authentication will typically be cached at these points. + +4.5. Policy and Configuration State + + DTN nodes will contain some amount of configuration and policy + information. Such information may alter the behavior of bundle + forwarding. Examples of policy state include the types of + cryptographic algorithms and access control procedures to use if DTN + security is employed, whether nodes may become custodians, what types + of convergence layer (see Section 6) and routing protocols are in + use, how bundles of differing priorities should be scheduled, where + and for how long bundles and other data is stored, what status + reports may be generated or at what rate, etc. + + + + + +Cerf, et al. Informational [Page 27] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +5. Application Structuring Issues + + DTN bundle delivery is intended to operate in a delay-tolerant + fashion over a broad range of network types. This does not mean + there *must* be large delays in the network; it means there *may* be + very significant delays (including extended periods of disconnection + between sender and intended recipient(s)). The DTN protocols are + delay tolerant, so applications using them must also be delay + tolerant in order to operate effectively in environments subject to + significant delay or disruption. + + The communication primitives provided by the DTN architecture are + based on asynchronous, message-oriented communication which differs + from conversational request/response communication. In general, + applications should attempt to include enough information in an ADU + so that it may be treated as an independent unit of work by the + network and receiver(s). The goal is to minimize synchronous + interchanges between applications that are separated by a network + characterized by long and possibly highly variable delays. A single + file transfer request message, for example, might include + authentication information, file location information, and requested + file operation (thus "bundling" this information together). + Comparing this style of operation to a classic FTP transfer, one sees + that the bundled model can complete in one round trip, whereas an FTP + file "put" operation can take as many as eight round trips to get to + a point where file data can flow [DFS02]. + + Delay-tolerant applications must consider additional factors beyond + the conversational implications of long delay paths. For example, an + application may terminate (voluntarily or not) between the time it + sends a message and the time it expects a response. If this + possibility has been anticipated, the application can be "re- + instantiated" with state information saved in persistent storage. + This is an implementation issue, but also an application design + consideration. + + Some consideration of delay-tolerant application design can result in + applications that work reasonably well in low-delay environments, and + that do not suffer extraordinarily in high or highly-variable delay + environments. + +6. Convergence Layer Considerations for Use of Underlying Protocols + + Implementation experience with the DTN architecture has revealed an + important architectural construct and interface for DTN nodes + [DBFJHP04]. Not all underlying protocols in different protocol + families provide the same exact functionality, so some additional + adaptation or augmentation on a per-protocol or per-protocol-family + + + +Cerf, et al. Informational [Page 28] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + basis may be required. This adaptation is accomplished by a set of + convergence layers placed between the bundle layer and underlying + protocols. The convergence layers manage the protocol-specific + details of interfacing with particular underlying protocols and + present a consistent interface to the bundle layer. + + The complexity of one convergence layer may vary substantially from + another, depending on the type of underlying protocol it adapts. For + example, a TCP/IP convergence layer for use in the Internet might + only have to add message boundaries to TCP streams, whereas a + convergence layer for some network where no reliable transport + protocol exists might be considerably more complex (e.g., it might + have to implement reliability, fragmentation, flow-control, etc.) if + reliable delivery is to be offered to the bundle layer. + + As convergence layers implement protocols above and beyond the basic + bundle protocol specified in [BSPEC], they will be defined in their + own documents (in a fashion similar to the way encapsulations for IP + datagrams are specified on a per-underlying-protocol basis, such as + in RFC 894 [RFC894]). + +7. Summary + + The DTN architecture addresses many of the problems of heterogeneous + networks that must operate in environments subject to long delays and + discontinuous end-to-end connectivity. It is based on asynchronous + messaging and uses postal mail as a model of service classes and + delivery semantics. It accommodates many different forms of + connectivity, including scheduled, predicted, and opportunistically + connected delivery paths. It introduces a novel approach to end-to- + end reliability across frequently partitioned and unreliable + networks. It also proposes a model for securing the network + infrastructure against unauthorized access. + + It is our belief that this architecture is applicable to many + different types of challenged environments. + +8. Security Considerations + + Security is an integral concern for the design of the Delay Tolerant + Network Architecture, but its use is optional. Sections 3.6.1, 3.14, + and 4.4 of this document present some factors to consider for + securing the DTN architecture, but separate documents [DTNSOV] and + [DTNSEC] define the security architecture in much more detail. + + + + + + + +Cerf, et al. Informational [Page 29] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +9. IANA Considerations + + This document specifies the architecture for Delay Tolerant + Networking, which uses Internet-standard URIs for its Endpoint + Identifiers. URIs intended for use with DTN should be compliant with + the guidelines given in [RFC3986]. + +10. Normative References + + [RFC3986] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifier (URI): Generic Syntax", STD 66, RFC + 3986, January 2005. + +11. Informative References + + [IPN01] InterPlaNetary Internet Project, Internet Society IPN + Special Interest Group, http://www.ipnsig.org. + + [SB03] S. Burleigh, et al., "Delay-Tolerant Networking - An + Approach to Interplanetary Internet", IEEE Communications + Magazine, July 2003. + + [FW03] F. Warthman, "Delay-Tolerant Networks (DTNs): A Tutorial + v1.1", Wartham Associates, 2003. Available from + http://www.dtnrg.org. + + [KF03] K. Fall, "A Delay-Tolerant Network Architecture for + Challenged Internets", Proceedings SIGCOMM, Aug 2003. + + [JFP04] S. Jain, K. Fall, R. Patra, "Routing in a Delay Tolerant + Network", Proceedings SIGCOMM, Aug/Sep 2004. + + [DFS02] R. Durst, P. Feighery, K. Scott, "Why not use the + Standard Internet Suite for the Interplanetary + Internet?", MITRE White Paper, 2002. Available from + http://www.ipnsig.org/reports/TCP_IP.pdf. + + [CK74] V. Cerf, R. Kahn, "A Protocol for Packet Network + Intercommunication", IEEE Trans. on Comm., COM-22(5), May + 1974. + + [IGE00] C. Intanagonwiwat, R. Govindan, D. Estrin, "Directed + Diffusion: A Scalable and Robust Communication Paradigm + for Sensor Networks", Proceedings MobiCOM, Aug 2000. + + + + + + + +Cerf, et al. Informational [Page 30] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + [WSBL99] W. Adjie-Winoto, E. Schwartz, H. Balakrishnan, J. Lilley, + "The Design and Implementation of an Intentional Naming + System", Proc. 17th ACM SOSP, Kiawah Island, SC, Dec. + 1999. + + [CT90] D. Clark, D. Tennenhouse, "Architectural Considerations + for a New Generation of Protocols", Proceedings SIGCOMM, + 1990. + + [ISCHEMES] IANA, Uniform Resource Identifer (URI) Schemes, + http://www.iana.org/assignments/uri-schemes.html. + + [JDPF05] S. Jain, M. Demmer, R. Patra, K. Fall, "Using Redundancy + to Cope with Failures in a Delay Tolerant Network", + Proceedings SIGCOMM, 2005. + + [WJMF05] Y. Wang, S. Jain, M. Martonosi, K. Fall, "Erasure Coding + Based Routing in Opportunistic Networks", Proceedings + SIGCOMM Workshop on Delay Tolerant Networks, 2005. + + [ZAZ05] W. Zhao, M. Ammar, E. Zegura, "Multicast in Delay + Tolerant Networks", Proceedings SIGCOMM Workshop on Delay + Tolerant Networks, 2005. + + [LFC05] J. Leguay, T. Friedman, V. Conan, "DTN Routing in a + Mobility Pattern Space", Proceedings SIGCOMM Workshop on + Delay Tolerant Networks, 2005. + + [AF03] J. Alonso, K. Fall, "A Linear Programming Formulation of + Flows over Time with Piecewise Constant Capacity and + Transit Times", Intel Research Technical Report IRB-TR- + 03-007, June 2003. + + [FHM03] K. Fall, W. Hong, S. Madden, "Custody Transfer for + Reliable Delivery in Delay Tolerant Networks", Intel + Research Technical Report IRB-TR-03-030, July 2003. + + [BSPEC] K. Scott, S. Burleigh, "Bundle Protocol Specification", + Work in Progress, December 2006. + + [DTNSEC] S. Symington, S. Farrell, H. Weiss, "Bundle Security + Protocol Specification", Work in Progress, October 2006. + + [DTNSOV] S. Farrell, S. Symington, H. Weiss, "Delay-Tolerant + Networking Security Overview", Work in Progress, October + 2006. + + + + + +Cerf, et al. Informational [Page 31] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + [DBFJHP04] M. Demmer, E. Brewer, K. Fall, S. Jain, M. Ho, R. Patra, + "Implementing Delay Tolerant Networking", Intel Research + Technical Report IRB-TR-04-020, Dec. 2004. + + [RFC792] Postel, J., "Internet Control Message Protocol", STD 5, + RFC 792, September 1981. + + [RFC894] Hornig, C., "A Standard for the Transmission of IP + Datagrams over Ethernet Networks", STD 41, RFC 894, April + 1 1984. + + [RFC2960] Stewart, R., Xie, Q., Morneault, K., Sharp, C., + Schwarzbauer, H., Taylor, T., Rytina, I., Kalla, M., + Zhang, L., and V. Paxson, "Stream Control Transmission + Protocol", RFC 2960, October 2000. + + [RFC4088] Black, D., McCloghrie, K., and J. Schoenwaelder, "Uniform + Resource Identifier (URI) Scheme for the Simple Network + Management Protocol (SNMP)", RFC 4088, June 2005. + + [S05] K. Scott, "Disruption Tolerant Networking Proxies for + On-the-Move Tactical Networks", Proc. MILCOM 2005 + (unclassified track), Oct. 2005. + + [T02] W. Thies, et al., "Searching the World Wide Web in Low- + Connectivity Communities", Proc. WWW Conference (Global + Community track), May 2002. + +12. Acknowledgments + + John Wroclawski, David Mills, Greg Miller, James P. G. Sterbenz, Joe + Touch, Steven Low, Lloyd Wood, Robert Braden, Deborah Estrin, Stephen + Farrell, Melissa Ho, Ting Liu, Mike Demmer, Jakob Ericsson, Susan + Symington, Andrei Gurtov, Avri Doria, Tom Henderson, Mark Allman, + Michael Welzl, and Craig Partridge all contributed useful thoughts + and criticisms to versions of this document. We are grateful for + their time and participation. + + This work was performed in part under DOD Contract DAA-B07-00-CC201, + DARPA AO H912; JPL Task Plan No. 80-5045, DARPA AO H870; and NASA + Contract NAS7-1407. + + + + + + + + + + +Cerf, et al. Informational [Page 32] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +Authors' Addresses + + Dr. Vinton G. Cerf + Google Corporation + Suite 384 + 13800 Coppermine Rd. + Herndon, VA 20171 + Phone: +1 (703) 234-1823 + Fax: +1 (703) 848-0727 + EMail: vint@google.com + + Scott C. Burleigh + Jet Propulsion Laboratory + 4800 Oak Grove Drive + M/S: 179-206 + Pasadena, CA 91109-8099 + Phone: +1 (818) 393-3353 + Fax: +1 (818) 354-1075 + EMail: Scott.Burleigh@jpl.nasa.gov + + Robert C. Durst + The MITRE Corporation + 7515 Colshire Blvd., M/S H440 + McLean, VA 22102 + Phone: +1 (703) 983-7535 + Fax: +1 (703) 983-7142 + EMail: durst@mitre.org + + Dr. Kevin Fall + Intel Research, Berkeley + 2150 Shattuck Ave., #1300 + Berkeley, CA 94704 + Phone: +1 (510) 495-3014 + Fax: +1 (510) 495-3049 + EMail: kfall@intel.com + + Adrian J. Hooke + Jet Propulsion Laboratory + 4800 Oak Grove Drive + M/S: 303-400 + Pasadena, CA 91109-8099 + Phone: +1 (818) 354-3063 + Fax: +1 (818) 393-3575 + EMail: Adrian.Hooke@jpl.nasa.gov + + + + + + + +Cerf, et al. Informational [Page 33] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + + Dr. Keith L. Scott + The MITRE Corporation + 7515 Colshire Blvd., M/S H440 + McLean, VA 22102 + Phone: +1 (703) 983-6547 + Fax: +1 (703) 983-7142 + EMail: kscott@mitre.org + + Leigh Torgerson + Jet Propulsion Laboratory + 4800 Oak Grove Drive + M/S: 238-412 + Pasadena, CA 91109-8099 + Phone: +1 (818) 393-0695 + Fax: +1 (818) 354-6825 + EMail: ltorgerson@jpl.nasa.gov + + Howard S. Weiss + SPARTA, Inc. + 7075 Samuel Morse Drive + Columbia, MD 21046 + Phone: +1 (410) 872-1515 x201 + Fax: +1 (410) 872-8079 + EMail: howard.weiss@sparta.com + + Please refer comments to dtn-interest@mailman.dtnrg.org. The Delay + Tolerant Networking Research Group (DTNRG) web site is located at + http://www.dtnrg.org. + + + + + + + + + + + + + + + + + + + + + + + +Cerf, et al. Informational [Page 34] + +RFC 4838 Delay-Tolerant Networking Architecture April 2007 + + +Full Copyright Statement + + Copyright (C) The IETF Trust (2007). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78, and except as set forth therein, the authors + retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND + THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF + THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + +Cerf, et al. Informational [Page 35] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc5050.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc5050.txt new file mode 100644 index 0000000..2a77197 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc5050.txt @@ -0,0 +1,2803 @@ + + + + + + +Network Working Group K. Scott +Request for Comments: 5050 The MITRE Corporation +Category: Experimental S. Burleigh + NASA Jet Propulsion Laboratory + November 2007 + + + Bundle Protocol Specification + +Status of This Memo + + This memo defines an Experimental Protocol for the Internet + community. It does not specify an Internet standard of any kind. + Discussion and suggestions for improvement are requested. + Distribution of this memo is unlimited. + +IESG Note + + This RFC is not a candidate for any level of Internet Standard. The + IETF disclaims any knowledge of the fitness of this RFC for any + purpose and in particular notes that the decision to publish is not + based on IETF review for such things as security, congestion control, + or inappropriate interaction with deployed protocols. The RFC Editor + has chosen to publish this document at its discretion. Readers of + this document should exercise caution in evaluating its value for + implementation and deployment. See RFC 3932 for more information. + +Abstract + + This document describes the end-to-end protocol, block formats, and + abstract service description for the exchange of messages (bundles) + in Delay Tolerant Networking (DTN). + + This document was produced within the IRTF's Delay Tolerant + Networking Research Group (DTNRG) and represents the consensus of all + of the active contributors to this group. See http://www.dtnrg.org + for more information. + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 1] + +RFC 5050 Bundle Protocol Specification November 2007 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . . 3 + 2. Requirements Notation . . . . . . . . . . . . . . . . . . . . 4 + 3. Service Description . . . . . . . . . . . . . . . . . . . . . 5 + 3.1. Definitions . . . . . . . . . . . . . . . . . . . . . . . 5 + 3.2. Implementation Architectures . . . . . . . . . . . . . . . 9 + 3.3. Services Offered by Bundle Protocol Agents . . . . . . . . 11 + 4. Bundle Format . . . . . . . . . . . . . . . . . . . . . . . . 11 + 4.1. Self-Delimiting Numeric Values (SDNVs) . . . . . . . . . . 12 + 4.2. Bundle Processing Control Flags . . . . . . . . . . . . . 13 + 4.3. Block Processing Control Flags . . . . . . . . . . . . . . 15 + 4.4. Endpoint IDs . . . . . . . . . . . . . . . . . . . . . . . 16 + 4.5. Formats of Bundle Blocks . . . . . . . . . . . . . . . . . 17 + 4.5.1. Primary Bundle Block . . . . . . . . . . . . . . . . . 19 + 4.5.2. Canonical Bundle Block Format . . . . . . . . . . . . 22 + 4.5.3. Bundle Payload Block . . . . . . . . . . . . . . . . . 23 + 4.6. Extension Blocks . . . . . . . . . . . . . . . . . . . . . 24 + 4.7. Dictionary Revision . . . . . . . . . . . . . . . . . . . 24 + 5. Bundle Processing . . . . . . . . . . . . . . . . . . . . . . 24 + 5.1. Generation of Administrative Records . . . . . . . . . . . 25 + 5.2. Bundle Transmission . . . . . . . . . . . . . . . . . . . 26 + 5.3. Bundle Dispatching . . . . . . . . . . . . . . . . . . . . 26 + 5.4. Bundle Forwarding . . . . . . . . . . . . . . . . . . . . 27 + 5.4.1. Forwarding Contraindicated . . . . . . . . . . . . . . 28 + 5.4.2. Forwarding Failed . . . . . . . . . . . . . . . . . . 29 + 5.5. Bundle Expiration . . . . . . . . . . . . . . . . . . . . 29 + 5.6. Bundle Reception . . . . . . . . . . . . . . . . . . . . . 30 + 5.7. Local Bundle Delivery . . . . . . . . . . . . . . . . . . 31 + 5.8. Bundle Fragmentation . . . . . . . . . . . . . . . . . . . 32 + 5.9. Application Data Unit Reassembly . . . . . . . . . . . . . 33 + 5.10. Custody Transfer . . . . . . . . . . . . . . . . . . . . . 34 + 5.10.1. Custody Acceptance . . . . . . . . . . . . . . . . . . 34 + 5.10.2. Custody Release . . . . . . . . . . . . . . . . . . . 35 + 5.11. Custody Transfer Success . . . . . . . . . . . . . . . . . 35 + 5.12. Custody Transfer Failure . . . . . . . . . . . . . . . . . 35 + 5.13. Bundle Deletion . . . . . . . . . . . . . . . . . . . . . 36 + 5.14. Discarding a Bundle . . . . . . . . . . . . . . . . . . . 36 + 5.15. Canceling a Transmission . . . . . . . . . . . . . . . . . 36 + 5.16. Polling . . . . . . . . . . . . . . . . . . . . . . . . . 36 + 6. Administrative Record Processing . . . . . . . . . . . . . . . 37 + 6.1. Administrative Records . . . . . . . . . . . . . . . . . . 37 + 6.1.1. Bundle Status Reports . . . . . . . . . . . . . . . . 38 + 6.1.2. Custody Signals . . . . . . . . . . . . . . . . . . . 41 + 6.2. Generation of Administrative Records . . . . . . . . . . . 44 + 6.3. Reception of Custody Signals . . . . . . . . . . . . . . . 44 + + + + + +Scott & Burleigh Experimental [Page 2] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 7. Services Required of the Convergence Layer . . . . . . . . . . 44 + 7.1. The Convergence Layer . . . . . . . . . . . . . . . . . . 44 + 7.2. Summary of Convergence Layer Services . . . . . . . . . . 45 + 8. Security Considerations . . . . . . . . . . . . . . . . . . . 45 + 9. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 47 + 10. References . . . . . . . . . . . . . . . . . . . . . . . . . . 47 + 10.1. Normative References . . . . . . . . . . . . . . . . . . . 47 + 10.2. Informative References . . . . . . . . . . . . . . . . . . 47 + Appendix A. Contributors . . . . . . . . . . . . . . . . . . . . 49 + Appendix B. Comments . . . . . . . . . . . . . . . . . . . . . . 49 + +1. Introduction + + This document describes version 6 of the Delay Tolerant Networking + (DTN) "bundle" protocol (BP). Delay Tolerant Networking is an end- + to-end architecture providing communications in and/or through highly + stressed environments. Stressed networking environments include + those with intermittent connectivity, large and/or variable delays, + and high bit error rates. To provide its services, BP sits at the + application layer of some number of constituent internets, forming a + store-and-forward overlay network. Key capabilities of BP include: + + o Custody-based retransmission + + o Ability to cope with intermittent connectivity + + o Ability to take advantage of scheduled, predicted, and + opportunistic connectivity (in addition to continuous + connectivity) + + o Late binding of overlay network endpoint identifiers to + constituent internet addresses + + For descriptions of these capabilities and the rationale for the DTN + architecture, see [ARCH] and [SIGC]. [TUT] contains a tutorial-level + overview of DTN concepts. + + This is an experimental protocol, produced within the IRTF's Delay + Tolerant Networking Research Group (DTNRG) and represents the + consensus of all of the active contributors to this group. If this + protocol is used on the Internet, IETF standard protocols for + security and congestion control should be used. + + BP's location within the standard protocol stack is as shown in + Figure 1. BP uses the "native" internet protocols for communications + within a given internet. Note that "internet" in the preceding is + used in a general sense and does not necessarily refer to TCP/IP. + The interface between the common bundle protocol and a specific + + + +Scott & Burleigh Experimental [Page 3] + +RFC 5050 Bundle Protocol Specification November 2007 + + + internetwork protocol suite is termed a "convergence layer adapter". + Figure 1 shows three distinct transport and network protocols + (denoted T1/N1, T2/N2, and T3/N3). + + +-----------+ +-----------+ + | BP app | | BP app | + +---------v-| +->>>>>>>>>>v-+ +->>>>>>>>>>v-+ +-^---------+ + | BP v | | ^ BP v | | ^ BP v | | ^ BP | + +---------v-+ +-^---------v-+ +-^---------v-+ +-^---------+ + | Trans1 v | + ^ T1/T2 v | + ^ T2/T3 v | | ^ Trans3 | + +---------v-+ +-^---------v-+ +-^---------v + +-^---------+ + | Net1 v | | ^ N1/N2 v | | ^ N2/N3 v | | ^ Net3 | + +---------v-+ +-^---------v + +-^---------v-+ +-^---------+ + | >>>>>>>>^ >>>>>>>>>>^ >>>>>>>>^ | + +-----------+ +-------------+ +-------------+ +-----------+ + | | | | + |<--- An internet --->| |<--- An internet --->| + | | | | + + Figure 1: The Bundle Protocol Sits at + the Application Layer of the Internet Model + + This document describes the format of the protocol data units (called + bundles) passed between entities participating in BP communications. + The entities are referred to as "bundle nodes". This document does + not address: + + o Operations in the convergence layer adapters that bundle nodes use + to transport data through specific types of internets. (However, + the document does discuss the services that must be provided by + each adapter at the convergence layer.) + + o The bundle routing algorithm. + + o Mechanisms for populating the routing or forwarding information + bases of bundle nodes. + +2. Requirements Notation + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in [RFC2119]. + + + + + + + + + +Scott & Burleigh Experimental [Page 4] + +RFC 5050 Bundle Protocol Specification November 2007 + + +3. Service Description + +3.1. Definitions + + Bundle - A bundle is a protocol data unit of the DTN bundle + protocol. Each bundle comprises a sequence of two or more + "blocks" of protocol data, which serve various purposes. Multiple + instances of the same bundle (the same unit of DTN protocol data) + might exist concurrently in different parts of a network -- + possibly in different representations -- in the memory local to + one or more bundle nodes and/or in transit between nodes. In the + context of the operation of a bundle node, a bundle is an instance + of some bundle in the network that is in that node's local memory. + + Bundle payload - A bundle payload (or simply "payload") is the + application data whose conveyance to the bundle's destination is + the purpose for the transmission of a given bundle. The terms + "bundle content", "bundle payload", and "payload" are used + interchangeably in this document. The "nominal" payload for a + bundle forwarded in response to a bundle transmission request is + the application data unit whose location is provided as a + parameter to that request. The nominal payload for a bundle + forwarded in response to reception of that bundle is the payload + of the received bundle. + + Fragment - A fragment is a bundle whose payload block contains a + fragmentary payload. A fragmentary payload is either the first N + bytes or the last N bytes of some other payload -- either a + nominal payload or a fragmentary payload -- of length M, such that + 0 < N < M. + + Bundle node - A bundle node (or, in the context of this document, + simply a "node") is any entity that can send and/or receive + bundles. In the most familiar case, a bundle node is instantiated + as a single process running on a general-purpose computer, but in + general the definition is meant to be broader: a bundle node might + alternatively be a thread, an object in an object-oriented + operating system, a special-purpose hardware device, etc. Each + bundle node has three conceptual components, defined below: a + "bundle protocol agent", a set of zero or more "convergence layer + adapters", and an "application agent". + + Bundle protocol agent - The bundle protocol agent (BPA) of a node is + the node component that offers the BP services and executes the + procedures of the bundle protocol. The manner in which it does so + is wholly an implementation matter. For example, BPA + functionality might be coded into each node individually; it might + be implemented as a shared library that is used in common by any + + + +Scott & Burleigh Experimental [Page 5] + +RFC 5050 Bundle Protocol Specification November 2007 + + + number of bundle nodes on a single computer; it might be + implemented as a daemon whose services are invoked via inter- + process or network communication by any number of bundle nodes on + one or more computers; it might be implemented in hardware. + + Convergence layer adapters - A convergence layer adapter (CLA) sends + and receives bundles on behalf of the BPA, utilizing the services + of some 'native' internet protocol that is supported in one of the + internets within which the node is functionally located. The + manner in which a CLA sends and receives bundles is wholly an + implementation matter, exactly as described for the BPA. + + Application agent - The application agent (AA) of a node is the node + component that utilizes the BP services to effect communication + for some purpose. The application agent in turn has two elements, + an administrative element and an application-specific element. + The application-specific element of an AA constructs, requests + transmission of, accepts delivery of, and processes application- + specific application data units; the only interface between the + BPA and the application-specific element of the AA is the BP + service interface. The administrative element of an AA constructs + and requests transmission of administrative records (status + reports and custody signals), and it accepts delivery of and + processes any custody signals that the node receives. In addition + to the BP service interface, there is a (conceptual) private + control interface between the BPA and the administrative element + of the AA that enables each to direct the other to take action + under specific circumstances. In the case of a node that serves + simply as a "router" in the overlay network, the AA may have no + application-specific element at all. The application-specific + elements of other nodes' AAs may perform arbitrarily complex + application functions, perhaps even offering multiplexed DTN + communication services to a number of other applications. As with + the BPA, the manner in which the AA performs its functions is + wholly an implementation matter; in particular, the administrative + element of an AA might be built into the library or daemon or + hardware that implements the BPA, and the application-specific + element of an AA might be implemented either in software or in + hardware. + + Bundle endpoint - A bundle endpoint (or simply "endpoint") is a set + of zero or more bundle nodes that all identify themselves for BP + purposes by some single text string, called a "bundle endpoint ID" + (or, in this document, simply "endpoint ID"; endpoint IDs are + described in detail in Section 4.4 below). The special case of an + endpoint that never contains more than one node is termed a + "singleton" endpoint; every bundle node must be a member of at + least one singleton endpoint. Singletons are the most familiar + + + +Scott & Burleigh Experimental [Page 6] + +RFC 5050 Bundle Protocol Specification November 2007 + + + sort of endpoint, but in general the endpoint notion is meant to + be broader. For example, the nodes in a sensor network might + constitute a set of bundle nodes that identify themselves by a + single common endpoint ID and thus form a single bundle endpoint. + *Note* too that a given bundle node might identify itself by + multiple endpoint IDs and thus be a member of multiple bundle + endpoints. + + Forwarding - When the bundle protocol agent of a node determines + that a bundle must be "forwarded" to an endpoint, it causes the + bundle to be sent to all of the nodes that the bundle protocol + agent currently believes are in the "minimum reception group" of + that endpoint. The minimum reception group of an endpoint may be + any one of the following: (a) ALL of the nodes registered in an + endpoint that is permitted to contain multiple nodes (in which + case forwarding to the endpoint is functionally similar to + "multicast" operations in the Internet, though possibly very + different in implementation); (b) ANY N of the nodes registered in + an endpoint that is permitted to contain multiple nodes, where N + is in the range from zero to the cardinality of the endpoint (in + which case forwarding to the endpoint is functionally similar to + "anycast" operations in the Internet); or (c) THE SOLE NODE + registered in a singleton endpoint (in which case forwarding to + the endpoint is functionally similar to "unicast" operations in + the Internet). The nature of the minimum reception group for a + given endpoint can be determined from the endpoint's ID (again, + see Section 4.4 below): for some endpoint ID "schemes", the nature + of the minimum reception group is fixed - in a manner that is + defined by the scheme - for all endpoints identified under the + scheme; for other schemes, the nature of the minimum reception + group is indicated by some lexical feature of the "scheme-specific + part" of the endpoint ID, in a manner that is defined by the + scheme. + + Registration - A registration is the state machine characterizing a + given node's membership in a given endpoint. Any number of + registrations may be concurrently associated with a given + endpoint, and any number of registrations may be concurrently + associated with a given node. Any single registration must at any + time be in one of two states: Active or Passive. A registration + always has an associated "delivery failure action", the action + that is to be taken when a bundle that is "deliverable" (see + below) subject to that registration is received at a time when the + registration is in the Passive state. Delivery failure action + must be one of the following: + + * defer "delivery" (see below) of the bundle subject to this + registration until (a) this bundle is the least recently + + + +Scott & Burleigh Experimental [Page 7] + +RFC 5050 Bundle Protocol Specification November 2007 + + + received of all bundles currently deliverable subject to this + registration and (b) either the registration is polled or else + the registration is in the Active state; or + + * "abandon" (see below) delivery of the bundle subject to this + registration. + + An additional implementation-specific delivery deferral procedure + may optionally be associated with the registration. While the + state of a registration is Active, reception of a bundle that is + deliverable subject to this registration must cause the bundle to + be delivered automatically as soon as it is the least recently + received bundle that is currently deliverable subject to the + registration. While the state of a registration is Passive, + reception of a bundle that is deliverable subject to this + registration must cause delivery of the bundle to be abandoned or + deferred as mandated by the registration's current delivery + failure action; in the latter case, any additional delivery + deferral procedure associated with the registration must also be + performed. + + Delivery - Upon reception, the processing of a bundle that has been + sent to a given node depends on whether or not the receiving node + is registered in the bundle's destination endpoint. If it is, and + if the payload of the bundle is non-fragmentary (possibly as a + result of successful payload reassembly from fragmentary payloads, + including the original payload of the received bundle), then the + bundle is normally "delivered" to the node's application agent + subject to the registration characterizing the node's membership + in the destination endpoint. A bundle is considered to have been + delivered at a node subject to a registration as soon as the + application data unit that is the payload of the bundle, together + with the value of the bundle's "Acknowledgement by application is + requested" flag and any other relevant metadata (an implementation + matter), has been presented to the node's application agent in a + manner consistent with the state of that registration and, as + applicable, the registration's delivery failure action. + + Deliverability, Abandonment - A bundle is considered "deliverable" + subject to a registration if and only if (a) the bundle's + destination endpoint is the endpoint with which the registration + is associated, (b) the bundle has not yet been delivered subject + to this registration, and (c) delivery of the bundle subject to + this registration has not been abandoned. To "abandon" delivery + of a bundle subject to a registration is simply to declare it no + longer deliverable subject to that registration; normally only + registrations' registered delivery failure actions cause + deliveries to be abandoned. + + + +Scott & Burleigh Experimental [Page 8] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Deletion, Discarding - A bundle protocol agent "discards" a bundle + by simply ceasing all operations on the bundle and functionally + erasing all references to it; the specific procedures by which + this is accomplished are an implementation matter. Bundles are + discarded silently; i.e., the discarding of a bundle does not + result in generation of an administrative record. "Retention + constraints" are elements of the bundle state that prevent a + bundle from being discarded; a bundle cannot be discarded while it + has any retention constraints. A bundle protocol agent "deletes" + a bundle in response to some anomalous condition by notifying the + bundle's report-to endpoint of the deletion (provided such + notification is warranted; see Section 5.13 for details) and then + arbitrarily removing all of the bundle's retention constraints, + enabling the bundle to be discarded. + + Transmission - A transmission is a sustained effort by a node's + bundle protocol agent to cause a bundle to be sent to all nodes in + the minimum reception group of some endpoint (which may be the + bundle's destination or may be some intermediate forwarding + endpoint) in response to a transmission request issued by the + node's application agent. Any number of transmissions may be + concurrently undertaken by the bundle protocol agent of a given + node. + + Custody - To "accept custody" upon forwarding a bundle is to commit + to retaining a copy of the bundle -- possibly re-forwarding the + bundle when necessary -- until custody of that bundle is + "released". Custody of a bundle whose destination is a singleton + endpoint is released when either (a) notification is received that + some other node has accepted custody of the same bundle; (b) + notification is received that the bundle has been delivered at the + (sole) node registered in the bundle's destination endpoint; or + (c) the bundle is explicitly deleted for some reason, such as + lifetime expiration. The condition(s) under which custody of a + bundle whose destination is not a singleton endpoint may be + released are not defined in this specification. To "refuse + custody" of a bundle is to decide not to accept custody of the + bundle. A "custodial node" of a bundle is a node that has + accepted custody of the bundle and has not yet released that + custody. A "custodian" of a bundle is a singleton endpoint whose + sole member is one of the bundle's custodial nodes. + +3.2. Implementation Architectures + + The above definitions are intended to enable the bundle protocol's + operations to be specified in a manner that minimizes bias toward any + particular implementation architecture. To illustrate the range of + interoperable implementation models that might conform to this + + + +Scott & Burleigh Experimental [Page 9] + +RFC 5050 Bundle Protocol Specification November 2007 + + + specification, four example architectures are briefly described + below. + + 1. Bundle protocol application server + + A single bundle protocol application server, constituting a + single bundle node, runs as a daemon process on each computer. + The daemon's functionality includes all functions of the bundle + protocol agent, all convergence layer adapters, and both the + administrative and application-specific elements of the + application agent. The application-specific element of the + application agent functions as a server, offering bundle protocol + service over a local area network: it responds to remote + procedure calls from application processes (on the same computer + and/or remote computers) that need to communicate via the bundle + protocol. The server supports its clients by creating a new + (conceptual) node for each one and registering each such node in + a client-specified endpoint. The conceptual nodes managed by the + server function as clients' bundle protocol service access + points. + + 2. Peer application nodes + + Any number of bundle protocol application processes, each one + constituting a single bundle node, run in ad-hoc fashion on each + computer. The functionality of the bundle protocol agent, all + convergence layer adapters, and the administrative element of the + application agent is provided by a library to which each node + process is dynamically linked at run time. The application- + specific element of each node's application agent is node- + specific application code. + + 3. Sensor network nodes + + Each node of the sensor network is the self-contained + implementation of a single bundle node. All functions of the + bundle protocol agent, all convergence layer adapters, and the + administrative element of the application agent are implemented + in simplified form in Application-Specific Integrated Circuits + (ASICs), while the application-specific element of each node's + application agent is implemented in a programmable + microcontroller. Forwarding is rudimentary: all bundles are + forwarded on a hard-coded default route. + + + + + + + + +Scott & Burleigh Experimental [Page 10] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 4. Dedicated bundle router + + Each computer constitutes a single bundle node that functions + solely as a high-performance bundle forwarder. Many standard + functions of the bundle protocol agent, the convergence layer + adapters, and the administrative element of the application agent + are implemented in ASICs, but some functions are implemented in a + high-speed processor to enable reprogramming as necessary. The + node's application agent has no application-specific element. + Substantial non-volatile storage resources are provided, and + arbitrarily complex forwarding algorithms are supported. + +3.3. Services Offered by Bundle Protocol Agents + + The bundle protocol agent of each node is expected to provide the + following services to the node's application agent: + + o commencing a registration (registering a node in an endpoint); + + o terminating a registration; + + o switching a registration between Active and Passive states; + + o transmitting a bundle to an identified bundle endpoint; + + o canceling a transmission; + + o polling a registration that is in the passive state; + + o delivering a received bundle. + +4. Bundle Format + + Each bundle shall be a concatenated sequence of at least two block + structures. The first block in the sequence must be a primary bundle + block, and no bundle may have more than one primary bundle block. + Additional bundle protocol blocks of other types may follow the + primary block to support extensions to the bundle protocol, such as + the Bundle Security Protocol [BSP]. At most one of the blocks in the + sequence may be a payload block. The last block in the sequence must + have the "last block" flag (in its block processing control flags) + set to 1; for every other block in the bundle after the primary + block, this flag must be set to zero. + + + + + + + + +Scott & Burleigh Experimental [Page 11] + +RFC 5050 Bundle Protocol Specification November 2007 + + +4.1. Self-Delimiting Numeric Values (SDNVs) + + The design of the bundle protocol attempts to reconcile minimal + consumption of transmission bandwidth with: + + o extensibility to address requirements not yet identified, and + + o scalability across a wide range of network scales and payload + sizes. + + A key strategic element in the design is the use of self-delimiting + numeric values (SDNVs). The SDNV encoding scheme is closely adapted + from the Abstract Syntax Notation One Basic Encoding Rules for + subidentifiers within an object identifier value [ASN1]. An SDNV is + a numeric value encoded in N octets, the last of which has its most + significant bit (MSB) set to zero; the MSB of every other octet in + the SDNV must be set to 1. The value encoded in an SDNV is the + unsigned binary number obtained by concatenating into a single bit + string the 7 least significant bits of each octet of the SDNV. + + The following examples illustrate the encoding scheme for various + hexadecimal values. + + 0xABC : 1010 1011 1100 + is encoded as + {1 00 10101} {0 0111100} + = 10010101 00111100 + + 0x1234 : 0001 0010 0011 0100 + = 1 0010 0011 0100 + is encoded as + {1 0 100100} {0 0110100} + = 10100100 00110100 + + 0x4234 : 0100 0010 0011 0100 + = 100 0010 0011 0100 + is encoded as + {1 000000 1} {1 0000100} {0 0110100} + = 10000001 10000100 00110100 + + 0x7F : 0111 1111 + = 111 1111 + is encoded as + {0 1111111} + = 01111111 + + Figure 2: SDNV Example + + + + +Scott & Burleigh Experimental [Page 12] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Note: Care must be taken to make sure that the value to be encoded is + (in concept) padded with high-order zero bits to make its bitwise + length a multiple of 7 before encoding. Also note that, while there + is no theoretical limit on the size of an SDNV field, the overhead of + the SDNV scheme is 1:7, i.e., one bit of overhead for every 7 bits of + actual data to be encoded. Thus, a 7-octet value (a 56-bit quantity + with no leading zeroes) would be encoded in an 8-octet SDNV; an + 8-octet value (a 64-bit quantity with no leading zeroes) would be + encoded in a 10-octet SDNV (one octet containing the high-order bit + of the value padded with six leading zero bits, followed by nine + octets containing the remaining 63 bits of the value). 148 bits of + overhead would be consumed in encoding a 1024-bit RSA encryption key + directly in an SDNV. In general, an N-bit quantity with no leading + zeroes is encoded in an SDNV occupying ceil(N/7) octets, where ceil + is the integer ceiling function. + + Implementations of the bundle protocol may handle as an invalid + numeric value any SDNV that encodes an integer that is larger than + (2^64 - 1). + + An SDNV can be used to represent both very large and very small + integer values. However, SDNV is clearly not the best way to + represent every numeric value. For example, an SDNV is a poor way to + represent an integer whose value typically falls in the range 128 to + 255. In general, though, we believe that SDNV representation of + numeric values in bundle blocks yields the smallest block sizes + without sacrificing scalability. + +4.2. Bundle Processing Control Flags + + The bundle processing control flags field in the primary bundle block + of each bundle is an SDNV; the value encoded in this SDNV is a string + of bits used to invoke selected bundle processing control features. + The significance of the value in each currently defined position of + this bit string is described here. Note that in the figure and + descriptions, the bit label numbers denote position (from least + significant ('0') to most significant) within the decoded bit string, + and not within the representation of the bits on the wire. This is + why the descriptions in this section and the next do not follow + standard RFC conventions with bit 0 on the left; if fields are added + in the future, the SDNV will grow to the left, and using this + representation allows the references here to remain valid. + + + + + + + + + +Scott & Burleigh Experimental [Page 13] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 2 1 0 + 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |Status Report|Class of Svc.| General | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Figure 3: Bundle Processing Control Flags Bit Layout + + The bits in positions 0 through 6 are flags that characterize the + bundle as follows: + + 0 -- Bundle is a fragment. + + 1 -- Application data unit is an administrative record. + + 2 -- Bundle must not be fragmented. + + 3 -- Custody transfer is requested. + + 4 -- Destination endpoint is a singleton. + + 5 -- Acknowledgement by application is requested. + + 6 -- Reserved for future use. + + The bits in positions 7 through 13 are used to indicate the bundle's + class of service. The bits in positions 8 and 7 constitute a two-bit + priority field indicating the bundle's priority, with higher values + being of higher priority: 00 = bulk, 01 = normal, 10 = expedited, 11 + is reserved for future use. Within this field, bit 8 is the most + significant bit. The bits in positions 9 through 13 are reserved for + future use. + + The bits in positions 14 through 20 are status report request flags. + These flags are used to request status reports as follows: + + 14 -- Request reporting of bundle reception. + + 15 -- Request reporting of custody acceptance. + + 16 -- Request reporting of bundle forwarding. + + 17 -- Request reporting of bundle delivery. + + 18 -- Request reporting of bundle deletion. + + 19 -- Reserved for future use. + + + + +Scott & Burleigh Experimental [Page 14] + +RFC 5050 Bundle Protocol Specification November 2007 + + + 20 -- Reserved for future use. + + If the bundle processing control flags indicate that the bundle's + application data unit is an administrative record, then the custody + transfer requested flag must be zero and all status report request + flags must be zero. If the custody transfer requested flag is 1, + then the sending node requests that the receiving node accept custody + of the bundle. If the bundle's source endpoint ID is "dtn:none" (see + below), then the bundle is not uniquely identifiable and all bundle + protocol features that rely on bundle identity must therefore be + disabled: the bundle's custody transfer requested flag must be zero, + the "Bundle must not be fragmented" flag must be 1, and all status + report request flags must be zero. + +4.3. Block Processing Control Flags + + The block processing control flags field in every block other than + the primary bundle block is an SDNV; the value encoded in this SDNV + is a string of bits used to invoke selected block processing control + features. The significance of the values in all currently defined + positions of this bit string, in order from least significant + position in the decoded bit string (labeled '0') to most significant + (labeled '6'), is described here. + + 0 + 6 5 4 3 2 1 0 + +-+-+-+-+-+-+-+ + | Flags | + +-+-+-+-+-+-+-+ + + Figure 4: Block Processing Control Flags Bit Layout + + 0 - Block must be replicated in every fragment. + + 1 - Transmit status report if block can't be processed. + + 2 - Delete bundle if block can't be processed. + + 3 - Last block. + + 4 - Discard block if it can't be processed. + + 5 - Block was forwarded without being processed. + + 6 - Block contains an EID-reference field. + + + + + + +Scott & Burleigh Experimental [Page 15] + +RFC 5050 Bundle Protocol Specification November 2007 + + + For each bundle whose primary block's bundle processing control flags + (see above) indicate that the bundle's application data unit is an + administrative record, the "Transmit status report if block can't be + processed" flag in the block processing flags field of every other + block in the bundle must be zero. + + The 'Block must be replicated in every fragment' bit in the block + processing flags must be set to zero on all blocks that follow the + payload block. + +4.4. Endpoint IDs + + The destinations of bundles are bundle endpoints, identified by text + strings termed "endpoint IDs" (see Section 3.1). Each endpoint ID + conveyed in any bundle block takes the form of a Uniform Resource + Identifier (URI; [URI]). As such, each endpoint ID can be + characterized as having this general structure: + + < scheme name > : < scheme-specific part, or "SSP" > + + As used for the purposes of the bundle protocol, neither the length + of a scheme name nor the length of an SSP may exceed 1023 bytes. + + Bundle blocks cite a number of endpoint IDs for various purposes of + the bundle protocol. Many, though not necessarily all, of the + endpoint IDs referred to in the blocks of a given bundle are conveyed + in the "dictionary" byte array in the bundle's primary block. This + array is simply the concatenation of any number of null-terminated + scheme names and SSPs. + + "Endpoint ID references" are used to cite endpoint IDs that are + contained in the dictionary; all endpoint ID citations in the primary + bundle block are endpoint ID references, and other bundle blocks may + contain endpoint ID references as well. Each endpoint ID reference + is an ordered pair of SDNVs: + + o The first SDNV contains the offset within the dictionary of the + first character of the referenced endpoint ID's scheme name. + + o The second SDNV contains the offset within the dictionary of the + first character of the referenced endpoint ID's SSP. + + This encoding enables a degree of block compression: when the source + and report-to of a bundle are the same endpoint, for example, the + text of that endpoint's ID may be cited twice yet appear only once in + the dictionary. + + + + + +Scott & Burleigh Experimental [Page 16] + +RFC 5050 Bundle Protocol Specification November 2007 + + + The scheme identified by the < scheme name > in an endpoint ID is a + set of syntactic and semantic rules that fully explain how to parse + and interpret the SSP. The set of allowable schemes is effectively + unlimited. Any scheme conforming to [URIREG] may be used in a bundle + protocol endpoint ID. In addition, a single additional scheme is + defined by the present document: + + o The "dtn" scheme, which is used at minimum in the representation + of the null endpoint ID "dtn:none". The forwarding of a bundle to + the null endpoint is never contraindicated, and the minimum + reception group for the null endpoint is the empty set. + + Note that, although the endpoint IDs conveyed in bundle blocks are + expressed as URIs, implementations of the BP service interface may + support expression of endpoint IDs in some internationalized manner + (e.g., Internationalized Resource Identifiers (IRIs); see [RFC3987]). + +4.5. Formats of Bundle Blocks + + This section describes the formats of the primary block and payload + block. Rules for processing these blocks appear in Section 5 of this + document. + + Note that supplementary DTN protocol specifications (including, but + not restricted to, the Bundle Security Protocol [BSP]) may require + that BP implementations conforming to those protocols construct and + process additional blocks. + + The format of the two basic BP blocks is shown in Figure 5 below. + + + + + + + + + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 17] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Primary Bundle Block + +----------------+----------------+----------------+----------------+ + | Version | Proc. Flags (*) | + +----------------+----------------+----------------+----------------+ + | Block length (*) | + +----------------+----------------+---------------------------------+ + | Destination scheme offset (*) | Destination SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Source scheme offset (*) | Source SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Report-to scheme offset (*) | Report-to SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Custodian scheme offset (*) | Custodian SSP offset (*) | + +----------------+----------------+----------------+----------------+ + | Creation Timestamp time (*) | + +---------------------------------+---------------------------------+ + | Creation Timestamp sequence number (*) | + +---------------------------------+---------------------------------+ + | Lifetime (*) | + +----------------+----------------+----------------+----------------+ + | Dictionary length (*) | + +----------------+----------------+----------------+----------------+ + | Dictionary byte array (variable) | + +----------------+----------------+---------------------------------+ + | [Fragment offset (*)] | + +----------------+----------------+---------------------------------+ + | [Total application data unit length (*)] | + +----------------+----------------+---------------------------------+ + + + Bundle Payload Block + +----------------+----------------+----------------+----------------+ + | Block type | Proc. Flags (*)| Block length(*) | + +----------------+----------------+----------------+----------------+ + / Bundle Payload (variable) / + +-------------------------------------------------------------------+ + + Figure 5: Bundle Block Formats + + (*) Notes: + + The bundle processing control ("Proc.") flags field in the Primary + Bundle Block is an SDNV and is therefore variable length. A three- + octet SDNV is shown here for convenience in representation. + + The block length field of the Primary Bundle Block is an SDNV and is + therefore variable length. A four-octet SDNV is shown here for + convenience in representation. + + + +Scott & Burleigh Experimental [Page 18] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Each of the eight offset fields in the Primary Bundle Block is an + SDNV and is therefore variable length. Two-octet SDNVs are shown + here for convenience in representation. + + The Creation Timestamp time field in the Primary Bundle Block is an + SDNV and is therefore variable length. A four-octet SDNV is shown + here for convenience in representation. + + The Creation Timestamp sequence number field in the Primary Bundle + Block is an SDNV and is therefore variable length. A four-octet SDNV + is shown here for convenience in representation. + + The Lifetime field in the Primary Bundle Block is an SDNV and is + therefore variable length. A four-octet SDNV is shown here for + convenience in representation. + + The dictionary length field of the Primary Bundle Block is an SDNV + and is therefore variable length. A four-octet SDNV is shown here + for convenience in representation. + + The fragment offset field of the Primary Bundle Block is present only + if the Fragment flag in the block's processing flags byte is set to + 1. It is an SDNV and is therefore variable length; a four-octet SDNV + is shown here for convenience in representation. + + The total application data unit length field of the Primary Bundle + Block is present only if the Fragment flag in the block's processing + flags byte is set to 1. It is an SDNV and is therefore variable + length; a four-octet SDNV is shown here for convenience in + representation. + + The block processing control ("Proc.") flags field of the Payload + Block is an SDNV and is therefore variable length. A one-octet SDNV + is shown here for convenience in representation. + + The block length field of the Payload Block is an SDNV and is + therefore variable length. A two-octet SDNV is shown here for + convenience in representation. + +4.5.1. Primary Bundle Block + + The primary bundle block contains the basic information needed to + route bundles to their destinations. The fields of the primary + bundle block are: + + + + + + + +Scott & Burleigh Experimental [Page 19] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Version: A 1-byte field indicating the version of the bundle + protocol that constructed this block. The present document + describes version 0x06 of the bundle protocol. + + Bundle Processing Control Flags: The Bundle Processing Control + Flags field is an SDNV that contains the bundle processing control + flags discussed in Section 4.2 above. + + Block Length: The Block Length field is an SDNV that contains the + aggregate length of all remaining fields of the block. + + Destination Scheme Offset: The Destination Scheme Offset field + contains the offset within the dictionary byte array of the scheme + name of the endpoint ID of the bundle's destination, i.e., the + endpoint containing the node(s) at which the bundle is to be + delivered. + + Destination SSP Offset: The Destination SSP Offset field contains + the offset within the dictionary byte array of the scheme-specific + part of the endpoint ID of the bundle's destination. + + Source Scheme Offset: The Source Scheme Offset field contains the + offset within the dictionary byte array of the scheme name of the + endpoint ID of the bundle's nominal source, i.e., the endpoint + nominally containing the node from which the bundle was initially + transmitted. + + Source SSP Offset: The Source SSP Offset field contains the offset + within the dictionary byte array of the scheme-specific part of + the endpoint ID of the bundle's nominal source. + + Report-to Scheme Offset: The Report-to Scheme Offset field contains + the offset within the dictionary byte array of the scheme name of + the ID of the endpoint to which status reports pertaining to the + forwarding and delivery of this bundle are to be transmitted. + + Report-to SSP Offset: The Report-to SSP Offset field contains the + offset within the dictionary byte array of the scheme-specific + part of the ID of the endpoint to which status reports pertaining + to the forwarding and delivery of this bundle are to be + transmitted. + + Custodian Scheme Offset: The "current custodian endpoint ID" of a + primary bundle block identifies an endpoint whose membership + includes the node that most recently accepted custody of the + bundle upon forwarding this bundle. The Custodian Scheme Offset + field contains the offset within the dictionary byte array of the + scheme name of the current custodian endpoint ID. + + + +Scott & Burleigh Experimental [Page 20] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Custodian SSP Offset: The Custodian SSP Offset field contains the + offset within the dictionary byte array of the scheme-specific + part of the current custodian endpoint ID. + + Creation Timestamp: The creation timestamp is a pair of SDNVs that, + together with the source endpoint ID and (if the bundle is a + fragment) the fragment offset and payload length, serve to + identify the bundle. The first SDNV of the timestamp is the + bundle's creation time, while the second is the bundle's creation + timestamp sequence number. Bundle creation time is the time -- + expressed in seconds since the start of the year 2000, on the + Coordinated Universal Time (UTC) scale [UTC] -- at which the + transmission request was received that resulted in the creation of + the bundle. Sequence count is the latest value (as of the time at + which that transmission request was received) of a monotonically + increasing positive integer counter managed by the source node's + bundle protocol agent that may be reset to zero whenever the + current time advances by one second. A source Bundle Protocol + Agent must never create two distinct bundles with the same source + endpoint ID and bundle creation timestamp. The combination of + source endpoint ID and bundle creation timestamp therefore serves + to identify a single transmission request, enabling it to be + acknowledged by the receiving application (provided the source + endpoint ID is not "dtn:none"). + + Lifetime: The lifetime field is an SDNV that indicates the time at + which the bundle's payload will no longer be useful, encoded as a + number of seconds past the creation time. When the current time + is greater than the creation time plus the lifetime, bundle nodes + need no longer retain or forward the bundle; the bundle may be + deleted from the network. + + Dictionary Length: The Dictionary Length field is an SDNV that + contains the length of the dictionary byte array. + + Dictionary: The Dictionary field is an array of bytes formed by + concatenating the null-terminated scheme names and SSPs of all + endpoint IDs referenced by any fields in this Primary Block + together with, potentially, other endpoint IDs referenced by + fields in other TBD DTN protocol blocks. Its length is given by + the value of the Dictionary Length field. + + Fragment Offset: If the Bundle Processing Control Flags of this + Primary block indicate that the bundle is a fragment, then the + Fragment Offset field is an SDNV indicating the offset from the + start of the original application data unit at which the bytes + comprising the payload of this bundle were located. If not, then + the Fragment Offset field is omitted from the block. + + + +Scott & Burleigh Experimental [Page 21] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Total Application Data Unit Length: If the Bundle Processing + Control Flags of this Primary block indicate that the bundle is a + fragment, then the Total Application Data Unit Length field is an + SDNV indicating the total length of the original application data + unit of which this bundle's payload is a part. If not, then the + Total Application Data Unit Length field is omitted from the + block. + +4.5.2. Canonical Bundle Block Format + + Every bundle block of every type other than the primary bundle block + comprises the following fields, in this order: + + o Block type code, expressed as an 8-bit unsigned binary integer. + Bundle block type code 1 indicates that the block is a bundle + payload block. Block type codes 192 through 255 are not defined + in this specification and are available for private and/or + experimental use. All other values of the block type code are + reserved for future use. + + o Block processing control flags, an unsigned integer expressed as + an SDNV. The individual bits of this integer are used to invoke + selected block processing control features. + + o Block EID reference count and EID references (optional). If and + only if the block references EID elements in the primary block's + dictionary, the 'block contains an EID-reference field' flag in + the block processing control flags is set to 1 and the block + includes an EID reference field consisting of a count of EID + references expressed as an SDNV followed by the EID references + themselves. Each EID reference is a pair of SDNVs. The first + SDNV of each EID reference contains the offset of a scheme name in + the primary block's dictionary, and the second SDNV of each + reference contains the offset of a scheme-specific part in the + dictionary. + + o Block data length, an unsigned integer expressed as an SDNV. The + Block data length field contains the aggregate length of all + remaining fields of the block, i.e., the block-type-specific data + fields. + + o Block-type-specific data fields, whose format and order are type- + specific and whose aggregate length in octets is the value of the + block data length field. All multi-byte block-type-specific data + fields are represented in network byte order. + + + + + + +Scott & Burleigh Experimental [Page 22] + +RFC 5050 Bundle Protocol Specification November 2007 + + + +-----------+-----------+-----------+-----------+ + |Block type | Block processing ctrl flags (SDNV)| + +-----------+-----------+-----------+-----------+ + | Block length (SDNV) | + +-----------+-----------+-----------+-----------+ + / Block body data (variable) / + +-----------+-----------+-----------+-----------+ + + Figure 6: Block Layout without EID Reference List + + + +-----------+-----------+-----------+-----------+ + |Block Type | Block processing ctrl flags (SDNV)| + +-----------+-----------+-----------+-----------+ + | EID Reference Count (SDNV) | + +-----------+-----------+-----------+-----------+ + | Ref_scheme_1 (SDNV) | Ref_ssp_1 (SDNV) | + +-----------+-----------+-----------+-----------+ + | Ref_scheme_2 (SDNV) | Ref_ssp_2 (SDNV) | + +-----------+-----------+-----------+-----------+ + | Block length (SDNV) | + +-----------+-----------+-----------+-----------+ + / Block body data (variable) / + +-----------+-----------+-----------+-----------+ + + Figure 7: Block Layout Showing Two EID References + +4.5.3. Bundle Payload Block + + The fields of the bundle payload block are: + + Block Type: The Block Type field is a 1-byte field that indicates + the type of the block. For the bundle payload block, this field + contains the value 1. + + Block Processing Control Flags: The Block Processing Control Flags + field is an SDNV that contains the block processing control flags + discussed in Section 4.3 above. + + Block Length: The Block Length field is an SDNV that contains the + aggregate length of all remaining fields of the block - which is + to say, the length of the bundle's payload. + + Payload: The Payload field contains the application data carried by + this bundle. + + That is, bundle payload blocks follow the canonical format of the + previous section with the restriction that the 'block contains an + + + +Scott & Burleigh Experimental [Page 23] + +RFC 5050 Bundle Protocol Specification November 2007 + + + EID-reference field' bit of the block processing control flags is + never set. The block body data for payload blocks is the application + data carried by the bundle. + +4.6. Extension Blocks + + "Extension blocks" are all blocks other than the primary and payload + blocks. Because extension blocks are not defined in the Bundle + Protocol specification (the present document), not all nodes + conforming to this specification will necessarily instantiate Bundle + Protocol implementations that include procedures for processing (that + is, recognizing, parsing, acting on, and/or producing) all extension + blocks. It is therefore possible for a node to receive a bundle that + includes extension blocks that the node cannot process. + + Whenever a bundle is forwarded that contains one or more extension + blocks that could not be processed, the "Block was forwarded without + being processed" flag must be set to 1 within the block processing + flags of each such block. For each block flagged in this way, the + flag may optionally be cleared (i.e., set to zero) by another node + that subsequently receives the bundle and is able to process that + block; the specifications defining the various extension blocks are + expected to define the circumstances under which this flag may be + cleared, if any. + +4.7. Dictionary Revision + + Any strings (scheme names and SSPs) in a bundle's dictionary that are + referenced neither from the bundle's primary block nor from the block + EID reference field of any extension block may be removed from the + dictionary at the time the bundle is forwarded. + + Whenever removal of a string from the dictionary causes the offsets + (within the dictionary byte array) of any other strings to change, + all endpoint ID references that refer to those strings must be + adjusted at the same time. Note that these references may be in the + primary block and/or in the block EID reference fields of extension + blocks. + +5. Bundle Processing + + The bundle processing procedures mandated in this section and in + Section 6 govern the operation of the Bundle Protocol Agent and the + Application Agent administrative element of each bundle node. They + are neither exhaustive nor exclusive. That is, supplementary DTN + protocol specifications (including, but not restricted to, the Bundle + Security Protocol [BSP]) may require that additional measures be + taken at specified junctures in these procedures. Such additional + + + +Scott & Burleigh Experimental [Page 24] + +RFC 5050 Bundle Protocol Specification November 2007 + + + measures shall not override or supersede the mandated bundle protocol + procedures, except that they may in some cases make these procedures + moot by requiring, for example, that implementations conforming to + the supplementary protocol terminate the processing of a given + incoming or outgoing bundle due to a fault condition recognized by + that protocol. + +5.1. Generation of Administrative Records + + All initial transmission of bundles is in response to bundle + transmission requests presented by nodes' application agents. When + required to "generate" an administrative record (a bundle status + report or a custody signal), the bundle protocol agent itself is + responsible for causing a new bundle to be transmitted, conveying + that record. In concept, the bundle protocol agent discharges this + responsibility by directing the administrative element of the node's + application agent to construct the record and request its + transmission as detailed in Section 6 below. In practice, the manner + in which administrative record generation is accomplished is an + implementation matter, provided the constraints noted in Section 6 + are observed. + + Under some circumstances, the requesting of status reports could + result in an unacceptable increase in the bundle traffic in the + network. For this reason, the generation of status reports is + mandatory only in one case, the deletion of a bundle for which + custody transfer is requested. In all other cases, the decision on + whether or not to generate a requested status report is left to the + discretion of the bundle protocol agent. Mechanisms that could + assist in making such decisions, such as pre-placed agreements + authorizing the generation of status reports under specified + circumstances, are beyond the scope of this specification. + + Notes on administrative record terminology: + + o A "bundle reception status report" is a bundle status report with + the "reporting node received bundle" flag set to 1. + + o A "custody acceptance status report" is a bundle status report + with the "reporting node accepted custody of bundle" flag set to + 1. + + o A "bundle forwarding status report" is a bundle status report with + the "reporting node forwarded the bundle" flag set to 1. + + o A "bundle delivery status report" is a bundle status report with + the "reporting node delivered the bundle" flag set to 1. + + + + +Scott & Burleigh Experimental [Page 25] + +RFC 5050 Bundle Protocol Specification November 2007 + + + o A "bundle deletion status report" is a bundle status report with + the "reporting node deleted the bundle" flag set to 1. + + o A "Succeeded" custody signal is a custody signal with the "custody + transfer succeeded" flag set to 1. + + o A "Failed" custody signal is a custody signal with the "custody + transfer succeeded" flag set to zero. + + o The "current custodian" of a bundle is the endpoint identified by + the current custodian endpoint ID in the bundle's primary block. + +5.2. Bundle Transmission + + The steps in processing a bundle transmission request are: + + Step 1: If custody transfer is requested for this bundle + transmission and, moreover, custody acceptance by the source node + is required, then either the bundle protocol agent must commit to + accepting custody of the bundle -- in which case processing + proceeds from Step 2 -- or the request cannot be honored and all + remaining steps of this procedure must be skipped. The bundle + protocol agent must not commit to accepting custody of a bundle if + the conditions under which custody of the bundle may be accepted + are not satisfied. The conditions under which a node may accept + custody of a bundle whose destination is not a singleton endpoint + are not defined in this specification. + + Step 2: Transmission of the bundle is initiated. An outbound + bundle must be created per the parameters of the bundle + transmission request, with current custodian endpoint ID set to + the null endpoint ID "dtn:none" and with the retention constraint + "Dispatch pending". The source endpoint ID of the bundle must be + either the ID of an endpoint of which the node is a member or the + null endpoint ID "dtn:none". + + Step 3: Processing proceeds from Step 1 of Section 5.4. + +5.3. Bundle Dispatching + + The steps in dispatching a bundle are: + + Step 1: If the bundle's destination endpoint is an endpoint of + which the node is a member, the bundle delivery procedure defined + in Section 5.7 must be followed. + + Step 2: Processing proceeds from Step 1 of Section 5.4. + + + + +Scott & Burleigh Experimental [Page 26] + +RFC 5050 Bundle Protocol Specification November 2007 + + +5.4. Bundle Forwarding + + The steps in forwarding a bundle are: + + Step 1: The retention constraint "Forward pending" must be added to + the bundle, and the bundle's "Dispatch pending" retention + constraint must be removed. + + Step 2: The bundle protocol agent must determine whether or not + forwarding is contraindicated for any of the reasons listed in + Figure 12. In particular: + + * The bundle protocol agent must determine which endpoint(s) to + forward the bundle to. The bundle protocol agent may choose + either to forward the bundle directly to its destination + endpoint (if possible) or to forward the bundle to some other + endpoint(s) for further forwarding. The manner in which this + decision is made may depend on the scheme name in the + destination endpoint ID but in any case is beyond the scope of + this document. If the agent finds it impossible to select any + endpoint(s) to forward the bundle to, then forwarding is + contraindicated. + + * Provided the bundle protocol agent succeeded in selecting the + endpoint(s) to forward the bundle to, the bundle protocol agent + must select the convergence layer adapter(s) whose services + will enable the node to send the bundle to the nodes of the + minimum reception group of each selected endpoint. The manner + in which the appropriate convergence layer adapters are + selected may depend on the scheme name in the destination + endpoint ID but in any case is beyond the scope of this + document. If the agent finds it impossible to select + convergence layer adapters to use in forwarding this bundle, + then forwarding is contraindicated. + + Step 3: If forwarding of the bundle is determined to be + contraindicated for any of the reasons listed in Figure 12, then + the Forwarding Contraindicated procedure defined in Section 5.4.1 + must be followed; the remaining steps of Section 5 are skipped at + this time. + + Step 4: If the bundle's custody transfer requested flag (in the + bundle processing flags field) is set to 1, then the custody + transfer procedure defined in Section 5.10.2 must be followed. + + + + + + + +Scott & Burleigh Experimental [Page 27] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Step 5: For each endpoint selected for forwarding, the bundle + protocol agent must invoke the services of the selected + convergence layer adapter(s) in order to effect the sending of the + bundle to the nodes constituting the minimum reception group of + that endpoint. Determining the time at which the bundle is to be + sent by each convergence layer adapter is an implementation + matter. + + To keep from possibly invalidating bundle security, the sequencing + of the blocks in a forwarded bundle must not be changed as it + transits a node; received blocks must be transmitted in the same + relative order as that in which they were received. While blocks + may be added to bundles as they transit intermediate nodes, + removal of blocks that do not have their 'Discard block if it + can't be processed' flag in the block processing control flags set + to 1 may cause security to fail. + + Step 6: When all selected convergence layer adapters have informed + the bundle protocol agent that they have concluded their data + sending procedures with regard to this bundle: + + * If the "request reporting of bundle forwarding" flag in the + bundle's status report request field is set to 1, then a bundle + forwarding status report should be generated, destined for the + bundle's report-to endpoint ID. If the bundle has the + retention constraint "custody accepted" and all of the nodes in + the minimum reception group of the endpoint selected for + forwarding are known to be unable to send bundles back to this + node, then the reason code on this bundle forwarding status + report must be "forwarded over unidirectional link"; otherwise, + the reason code must be "no additional information". + + * The bundle's "Forward pending" retention constraint must be + removed. + +5.4.1. Forwarding Contraindicated + + The steps in responding to contraindication of forwarding for some + reason are: + + Step 1: The bundle protocol agent must determine whether or not to + declare failure in forwarding the bundle for this reason. Note: + this decision is likely to be influenced by the reason for which + forwarding is contraindicated. + + + + + + + +Scott & Burleigh Experimental [Page 28] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Step 2: If forwarding failure is declared, then the Forwarding + Failed procedure defined in Section 5.4.2 must be followed. + Otherwise, (a) if the bundle's custody transfer requested flag (in + the bundle processing flags field) is set to 1, then the custody + transfer procedure defined in Section 5.10 must be followed; (b) + when -- at some future time - the forwarding of this bundle ceases + to be contraindicated, processing proceeds from Step 5 of + Section 5.4. + +5.4.2. Forwarding Failed + + The steps in responding to a declaration of forwarding failure for + some reason are: + + Step 1: If the bundle's custody transfer requested flag (in the + bundle processing flags field) is set to 1, custody transfer + failure must be handled. Procedures for handling failure of + custody transfer for a bundle whose destination is not a singleton + endpoint are not defined in this specification. For a bundle + whose destination is a singleton endpoint, the bundle protocol + agent must handle the custody transfer failure by generating a + "Failed" custody signal for the bundle, destined for the bundle's + current custodian; the custody signal must contain a reason code + corresponding to the reason for which forwarding was determined to + be contraindicated. (Note that discarding the bundle will not + delete it from the network, since the current custodian still has + a copy.) + + Step 2: If the bundle's destination endpoint is an endpoint of + which the node is a member, then the bundle's "Forward pending" + retention constraint must be removed. Otherwise, the bundle must + be deleted: the bundle deletion procedure defined in Section 5.13 + must be followed, citing the reason for which forwarding was + determined to be contraindicated. + +5.5. Bundle Expiration + + A bundle expires when the current time is greater than the bundle's + creation time plus its lifetime as specified in the primary bundle + block. Bundle expiration may occur at any point in the processing of + a bundle. When a bundle expires, the bundle protocol agent must + delete the bundle for the reason "lifetime expired": the bundle + deletion procedure defined in Section 5.13 must be followed. + + + + + + + + +Scott & Burleigh Experimental [Page 29] + +RFC 5050 Bundle Protocol Specification November 2007 + + +5.6. Bundle Reception + + The steps in processing a bundle received from another node are: + + Step 1: The retention constraint "Dispatch pending" must be added + to the bundle. + + Step 2: If the "request reporting of bundle reception" flag in the + bundle's status report request field is set to 1, then a bundle + reception status report with reason code "No additional + information" should be generated, destined for the bundle's + report-to endpoint ID. + + Step 3: For each block in the bundle that is an extension block + that the bundle protocol agent cannot process: + + * If the block processing flags in that block indicate that a + status report is requested in this event, then a bundle + reception status report with reason code "Block unintelligible" + should be generated, destined for the bundle's report-to + endpoint ID. + + * If the block processing flags in that block indicate that the + bundle must be deleted in this event, then the bundle protocol + agent must delete the bundle for the reason "Block + unintelligible"; the bundle deletion procedure defined in + Section 5.13 must be followed and all remaining steps of the + bundle reception procedure must be skipped. + + * If the block processing flags in that block do NOT indicate + that the bundle must be deleted in this event but do indicate + that the block must be discarded, then the bundle protocol + agent must remove this block from the bundle. + + * If the block processing flags in that block indicate NEITHER + that the bundle must be deleted NOR that the block must be + discarded, then the bundle protocol agent must set to 1 the + "Block was forwarded without being processed" flag in the block + processing flags of the block. + + Step 4: If the bundle's custody transfer requested flag (in the + bundle processing flags field) is set to 1 and the bundle has the + same source endpoint ID, creation timestamp, and (if the bundle is + a fragment) fragment offset and payload length as another bundle + that (a) has not been discarded and (b) currently has the + retention constraint "Custody accepted", custody transfer + redundancy must be handled. Otherwise, processing proceeds from + Step 5. Procedures for handling redundancy in custody transfer + + + +Scott & Burleigh Experimental [Page 30] + +RFC 5050 Bundle Protocol Specification November 2007 + + + for a bundle whose destination is not a singleton endpoint are not + defined in this specification. For a bundle whose destination is + a singleton endpoint, the bundle protocol agent must handle + custody transfer redundancy by generating a "Failed" custody + signal for this bundle with reason code "Redundant reception", + destined for this bundle's current custodian, and removing this + bundle's "Dispatch pending" retention constraint. + + Step 5: Processing proceeds from Step 1 of Section 5.3. + +5.7. Local Bundle Delivery + + The steps in processing a bundle that is destined for an endpoint of + which this node is a member are: + + Step 1: If the received bundle is a fragment, the application data + unit reassembly procedure described in Section 5.9 must be + followed. If this procedure results in reassembly of the entire + original application data unit, processing of this bundle (whose + fragmentary payload has been replaced by the reassembled + application data unit) proceeds from Step 2; otherwise, the + retention constraint "Reassembly pending" must be added to the + bundle and all remaining steps of this procedure are skipped. + + Step 2: Delivery depends on the state of the registration whose + endpoint ID matches that of the destination of the bundle: + + * If the registration is in the Active state, then the bundle + must be delivered subject to this registration (see Section 3.1 + above) as soon as all previously received bundles that are + deliverable subject to this registration have been delivered. + + * If the registration is in the Passive state, then the + registration's delivery failure action must be taken (see + Section 3.1 above). + + Step 3: As soon as the bundle has been delivered: + + * If the "request reporting of bundle delivery" flag in the + bundle's status report request field is set to 1, then a bundle + delivery status report should be generated, destined for the + bundle's report-to endpoint ID. Note that this status report + only states that the payload has been delivered to the + application agent, not that the application agent has processed + that payload. + + + + + + +Scott & Burleigh Experimental [Page 31] + +RFC 5050 Bundle Protocol Specification November 2007 + + + * If the bundle's custody transfer requested flag (in the bundle + processing flags field) is set to 1, custodial delivery must be + reported. Procedures for reporting custodial delivery for a + bundle whose destination is not a singleton endpoint are not + defined in this specification. For a bundle whose destination + is a singleton endpoint, the bundle protocol agent must report + custodial delivery by generating a "Succeeded" custody signal + for the bundle, destined for the bundle's current custodian. + +5.8. Bundle Fragmentation + + It may at times be necessary for bundle protocol agents to reduce the + sizes of bundles in order to forward them. This might be the case, + for example, if the endpoint to which a bundle is to be forwarded is + accessible only via intermittent contacts and no upcoming contact is + long enough to enable the forwarding of the entire bundle. + + The size of a bundle can be reduced by "fragmenting" the bundle. To + fragment a bundle whose payload is of size M is to replace it with + two "fragments" -- new bundles with the same source endpoint ID and + creation timestamp as the original bundle -- whose payloads are the + first N and the last (M - N) bytes of the original bundle's payload, + where 0 < N < M. Note that fragments may themselves be fragmented, + so fragmentation may in effect replace the original bundle with more + than two fragments. (However, there is only one 'level' of + fragmentation, as in IP fragmentation.) + + Any bundle whose primary block's bundle processing flags do NOT + indicate that it must not be fragmented may be fragmented at any + time, for any purpose, at the discretion of the bundle protocol + agent. + + Fragmentation shall be constrained as follows: + + o The concatenation of the payloads of all fragments produced by + fragmentation must always be identical to the payload of the + bundle that was fragmented. Note that the payloads of fragments + resulting from different fragmentation episodes, in different + parts of the network, may be overlapping subsets of the original + bundle's payload. + + o The bundle processing flags in the primary block of each fragment + must be modified to indicate that the bundle is a fragment, and + both fragment offset and total application data unit length must + be provided at the end of each fragment's primary bundle block. + + o The primary blocks of the fragments will differ from that of the + fragmented bundle as noted above. + + + +Scott & Burleigh Experimental [Page 32] + +RFC 5050 Bundle Protocol Specification November 2007 + + + o The payload blocks of fragments will differ from that of the + fragmented bundle as noted above. + + o All blocks that precede the payload block at the time of + fragmentation must be replicated in the fragment with the lowest + offset. + + o All blocks that follow the payload block at the time of + fragmentation must be replicated in the fragment with the highest + offset. + + o If the 'Block must be replicated in every fragment' bit is set to + 1, then the block must be replicated in every fragment. + + o If the 'Block must be replicated in every fragment' bit is set to + zero, the block should be replicated in only one fragment. + + o The relative order of all blocks that are present in a fragment + must be the same as in the bundle prior to fragmentation. + +5.9. Application Data Unit Reassembly + + If the concatenation -- as informed by fragment offsets and payload + lengths -- of the payloads of all previously received fragments with + the same source endpoint ID and creation timestamp as this fragment, + together with the payload of this fragment, forms a byte array whose + length is equal to the total application data unit length in the + fragment's primary block, then: + + o This byte array -- the reassembled application data unit -- must + replace the payload of this fragment. + + o The "Reassembly pending" retention constraint must be removed from + every other fragment whose payload is a subset of the reassembled + application data unit. + + Note: reassembly of application data units from fragments occurs at + destination endpoints as necessary; an application data unit may also + be reassembled at some other endpoint on the route to the + destination. + + + + + + + + + + + +Scott & Burleigh Experimental [Page 33] + +RFC 5050 Bundle Protocol Specification November 2007 + + +5.10. Custody Transfer + + The conditions under which a node may accept custody of a bundle + whose destination is not a singleton endpoint are not defined in this + specification. + + The decision as to whether or not to accept custody of a bundle whose + destination is a singleton endpoint is an implementation matter that + may involve both resource and policy considerations; however, if the + bundle protocol agent has committed to accepting custody of the + bundle (as described in Step 1 of Section 5.2), then custody must be + accepted. + + If the bundle protocol agent elects to accept custody of the bundle, + then it must follow the custody acceptance procedure defined in + Section 5.10.1. + +5.10.1. Custody Acceptance + + Procedures for acceptance of custody of a bundle whose destination is + not a singleton endpoint are not defined in this specification. + + Procedures for acceptance of custody of a bundle whose destination is + a singleton endpoint are defined as follows. + + The retention constraint "Custody accepted" must be added to the + bundle. + + If the "request reporting of custody acceptance" flag in the bundle's + status report request field is set to 1, a custody acceptance status + report should be generated, destined for the report-to endpoint ID of + the bundle. However, if a bundle reception status report was + generated for this bundle (Step 1 of Section 5.6), then this report + should be generated by simply turning on the "Reporting node accepted + custody of bundle" flag in that earlier report's status flags byte. + + The bundle protocol agent must generate a "Succeeded" custody signal + for the bundle, destined for the bundle's current custodian. + + The bundle protocol agent must assert the new current custodian for + the bundle. It does so by changing the current custodian endpoint ID + in the bundle's primary block to the endpoint ID of one of the + singleton endpoints in which the node is registered. This may entail + appending that endpoint ID's null-terminated scheme name and SSP to + the dictionary byte array in the bundle's primary block, and in some + case it may also enable the (optional) removal of the current + custodian endpoint ID's scheme name and/or SSP from the dictionary. + + + + +Scott & Burleigh Experimental [Page 34] + +RFC 5050 Bundle Protocol Specification November 2007 + + + The bundle protocol agent may set a custody transfer countdown timer + for this bundle; upon expiration of this timer prior to expiration of + the bundle itself and prior to custody transfer success for this + bundle, the custody transfer failure procedure detailed in + Section 5.12 must be followed. The manner in which the countdown + interval for such a timer is determined is an implementation matter. + + The bundle should be retained in persistent storage if possible. + +5.10.2. Custody Release + + Procedures for release of custody of a bundle whose destination is + not a singleton endpoint are not defined in this specification. + + When custody of a bundle is released, where the destination of the + bundle is a singleton endpoint, the "Custody accepted" retention + constraint must be removed from the bundle and any custody transfer + timer that has been established for this bundle must be destroyed. + +5.11. Custody Transfer Success + + Procedures for determining custody transfer success for a bundle + whose destination is not a singleton endpoint are not defined in this + specification. + + Upon receipt of a "Succeeded" custody signal at a node that is a + custodial node of the bundle identified in the custody signal, where + the destination of the bundle is a singleton endpoint, custody of the + bundle must be released as described in Section 5.10.2. + +5.12. Custody Transfer Failure + + Procedures for determining custody transfer failure for a bundle + whose destination is not a singleton endpoint are not defined in this + specification. Custody transfer for a bundle whose destination is a + singleton endpoint is determined to have failed at a custodial node + for that bundle when either (a) that node's custody transfer timer + for that bundle (if any) expires or (b) a "Failed" custody signal for + that bundle is received at that node. + + Upon determination of custody transfer failure, the action taken by + the bundle protocol agent is implementation-specific and may depend + on the nature of the failure. For example, if custody transfer + failure was inferred from expiration of a custody transfer timer or + was asserted by a "Failed" custody signal with the "Depleted storage" + reason code, the bundle protocol agent might choose to re-forward the + bundle, possibly on a different route (Section 5.4). Receipt of a + "Failed" custody signal with the "Redundant reception" reason code, + + + +Scott & Burleigh Experimental [Page 35] + +RFC 5050 Bundle Protocol Specification November 2007 + + + on the other hand, might cause the bundle protocol agent to release + custody of the bundle and to revise its algorithm for computing + countdown intervals for custody transfer timers. + +5.13. Bundle Deletion + + The steps in deleting a bundle are: + + Step 1: If the retention constraint "Custody accepted" currently + prevents this bundle from being discarded, and the destination of + the bundle is a singleton endpoint, then: + + * Custody of the node is released as described in Section 5.10.2. + + * A bundle deletion status report citing the reason for deletion + must be generated, destined for the bundle's report-to endpoint + ID. + + Otherwise, if the "request reporting of bundle deletion" flag in + the bundle's status report request field is set to 1, then a + bundle deletion status report citing the reason for deletion + should be generated, destined for the bundle's report-to endpoint + ID. + + Step 2: All of the bundle's retention constraints must be removed. + +5.14. Discarding a Bundle + + As soon as a bundle has no remaining retention constraints it may be + discarded. + +5.15. Canceling a Transmission + + When requested to cancel a specified transmission, where the bundle + created upon initiation of the indicated transmission has not yet + been discarded, the bundle protocol agent must delete that bundle for + the reason "transmission cancelled". For this purpose, the procedure + defined in Section 5.13 must be followed. + +5.16. Polling + + When requested to poll a specified registration that is in the + Passive state, the bundle protocol agent must immediately deliver the + least recently received bundle that is deliverable subject to the + indicated registration, if any. + + + + + + +Scott & Burleigh Experimental [Page 36] + +RFC 5050 Bundle Protocol Specification November 2007 + + +6. Administrative Record Processing + +6.1. Administrative Records + + Administrative records are standard application data units that are + used in providing some of the features of the Bundle Protocol. Two + types of administrative records have been defined to date: bundle + status reports and custody signals. + + Every administrative record consists of a four-bit record type code + followed by four bits of administrative record flags, followed by + record content in type-specific format. Record type codes are + defined as follows: + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0001 | Bundle status report. | + +---------+--------------------------------------------+ + | 0010 | Custody signal. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 8: Administrative Record Type Codes + + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0001 | Record is for a fragment; fragment | + | | offset and length fields are present. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 9: Administrative Record Flags + + All time values in administrative records are UTC times expressed in + "DTN time" representation. A DTN time consists of an SDNV indicating + the number of seconds since the start of the year 2000, followed by + an SDNV indicating the number of nanoseconds since the start of the + indicated second. + + The contents of the various types of administrative records are + described below. + + + + + +Scott & Burleigh Experimental [Page 37] + +RFC 5050 Bundle Protocol Specification November 2007 + + +6.1.1. Bundle Status Reports + + The transmission of 'bundle status reports' under specified + conditions is an option that can be invoked when transmission of a + bundle is requested. These reports are intended to provide + information about how bundles are progressing through the system, + including notices of receipt, custody transfer, forwarding, final + delivery, and deletion. They are transmitted to the Report-to + endpoints of bundles. + + +----------------+----------------+----------------+----------------+ + | Status Flags | Reason code | Fragment offset (*) (if + +----------------+----------------+----------------+----------------+ + present) | Fragment length (*) (if present) | + +----------------+----------------+----------------+----------------+ + | Time of receipt of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of custody acceptance of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of forwarding of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of delivery of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Time of deletion of bundle X (a DTN time, if present) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp time (*) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp sequence number (*) | + +----------------+----------------+----------------+----------------+ + | Length of X's source endpoint ID (*) | Source + +----------------+---------------------------------+ + + endpoint ID of bundle X (variable) | + +----------------+----------------+----------------+----------------+ + + Figure 10: Bundle Status Report Format + + (*) Notes: + + The Fragment Offset field, if present, is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The Fragment Length field, if present, is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + + + + + +Scott & Burleigh Experimental [Page 38] + +RFC 5050 Bundle Protocol Specification November 2007 + + + The Creation Timestamp fields replicate the Creation Timestamp fields + in the primary block of the subject bundle. As such they are SDNVs + (see Section 4.5.1 above) and are therefore variable length. Four- + octet SDNVs are shown here for convenience in representation. + + The source endpoint ID length field is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The fields in a bundle status report are: + + Status Flags: A 1-byte field containing the following flags: + + +----------+--------------------------------------------+ + | Value | Meaning | + +==========+============================================+ + | 00000001 | Reporting node received bundle. | + +----------+--------------------------------------------+ + | 00000010 | Reporting node accepted custody of bundle.| + +----------+--------------------------------------------+ + | 00000100 | Reporting node forwarded the bundle. | + +----------+--------------------------------------------+ + | 00001000 | Reporting node delivered the bundle. | + +----------+--------------------------------------------+ + | 00010000 | Reporting node deleted the bundle. | + +----------+--------------------------------------------+ + | 00100000 | Unused. | + +----------+--------------------------------------------+ + | 01000000 | Unused. | + +----------+--------------------------------------------+ + | 10000000 | Unused. | + +----------+--------------------------------------------+ + + Figure 11: Status Flags for Bundle Status Reports + + Reason Code: A 1-byte field explaining the value of the flags in + the status flags byte. The list of status report reason codes + provided here is neither exhaustive nor exclusive; supplementary + DTN protocol specifications (including, but not restricted to, the + Bundle Security Protocol [BSP]) may define additional reason + codes. Status report reason codes are defined as follows: + + + + + + + + + + +Scott & Burleigh Experimental [Page 39] + +RFC 5050 Bundle Protocol Specification November 2007 + + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0x00 | No additional information. | + +---------+--------------------------------------------+ + | 0x01 | Lifetime expired. | + +---------+--------------------------------------------+ + | 0x02 | Forwarded over unidirectional link. | + +---------+--------------------------------------------+ + | 0x03 | Transmission canceled. | + +---------+--------------------------------------------+ + | 0x04 | Depleted storage. | + +---------+--------------------------------------------+ + | 0x05 | Destination endpoint ID unintelligible. | + +---------+--------------------------------------------+ + | 0x06 | No known route to destination from here. | + +---------+--------------------------------------------+ + | 0x07 | No timely contact with next node on route.| + +---------+--------------------------------------------+ + | 0x08 | Block unintelligible. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 12: Status Report Reason Codes + + Fragment Offset: If the bundle fragment bit is set in the status + flags, then the offset (within the original application data unit) + of the payload of the bundle that caused the status report to be + generated is included here. + + Fragment length: If the bundle fragment bit is set in the status + flags, then the length of the payload of the subject bundle is + included here. + + Time of Receipt (if present): If the bundle-received bit is set in + the status flags, then a DTN time indicating the time at which the + bundle was received at the reporting node is included here. + + Time of Custody Acceptance (if present): If the custody-accepted + bit is set in the status flags, then a DTN time indicating the + time at which custody was accepted at the reporting node is + included here. + + Time of Forward (if present): If the bundle-forwarded bit is set in + the status flags, then a DTN time indicating the time at which the + bundle was first forwarded at the reporting node is included here. + + + + +Scott & Burleigh Experimental [Page 40] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Time of Delivery (if present): If the bundle-delivered bit is set + in the status flags, then a DTN time indicating the time at which + the bundle was delivered at the reporting node is included here. + + Time of Deletion (if present): If the bundle-deleted bit is set in + the status flags, then a DTN time indicating the time at which the + bundle was deleted at the reporting node is included here. + + Creation Timestamp of Subject Bundle: A copy of the creation + timestamp of the bundle that caused the status report to be + generated. + + Length of Source Endpoint ID: The length in bytes of the source + endpoint ID of the bundle that caused the status report to be + generated. + + Source Endpoint ID text: The text of the source endpoint ID of the + bundle that caused the status report to be generated. + +6.1.2. Custody Signals + + Custody signals are administrative records that effect custody + transfer operations. They are transmitted to the endpoints that are + the current custodians of bundles. + + Custody signals have the following format. + + Custody signal regarding bundle 'X': + + +----------------+----------------+----------------+----------------+ + | Status | Fragment offset (*) (if present) | + +----------------+----------------+----------------+----------------+ + | Fragment length (*) (if present) | + +----------------+----------------+----------------+----------------+ + | Time of signal (a DTN time) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp time (*) | + +----------------+----------------+----------------+----------------+ + | Copy of bundle X's Creation Timestamp sequence number (*) | + +----------------+----------------+----------------+----------------+ + | Length of X's source endpoint ID (*) | Source + +----------------+---------------------------------+ + + endpoint ID of bundle X (variable) | + +----------------+----------------+----------------+----------------+ + + Figure 13: Custody Signal Format + + + + + +Scott & Burleigh Experimental [Page 41] + +RFC 5050 Bundle Protocol Specification November 2007 + + + (*) Notes: + + The Fragment Offset field, if present, is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The Fragment Length field, if present, is an SDNV and is therefore + variable length. A four-octet SDNV is shown here for convenience in + representation. + + The Creation Timestamp fields replicate the Creation Timestamp fields + in the primary block of the subject bundle. As such they are SDNVs + (see Section 4.5.1 above) and are therefore variable length. Four- + octet SDNVs are shown here for convenience in representation. + + The source endpoint ID length field is an SDNV and is therefore + variable length. A three-octet SDNV is shown here for convenience in + representation. + + The fields in a custody signal are: + + Status: A 1-byte field containing a 1-bit "custody transfer + succeeded" flag followed by a 7-bit reason code explaining the + value of that flag. Custody signal reason codes are defined as + follows: + + + + + + + + + + + + + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 42] + +RFC 5050 Bundle Protocol Specification November 2007 + + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | 0x00 | No additional information. | + +---------+--------------------------------------------+ + | 0x01 | Reserved for future use. | + +---------+--------------------------------------------+ + | 0x02 | Reserved for future use. | + +---------+--------------------------------------------+ + | 0x03 | Redundant reception (reception by a node | + | | that is a custodial node for this bundle).| + +---------+--------------------------------------------+ + | 0x04 | Depleted storage. | + +---------+--------------------------------------------+ + | 0x05 | Destination endpoint ID unintelligible. | + +---------+--------------------------------------------+ + | 0x06 | No known route to destination from here. | + +---------+--------------------------------------------+ + | 0x07 | No timely contact with next node on route.| + +---------+--------------------------------------------+ + | 0x08 | Block unintelligible. | + +---------+--------------------------------------------+ + | (other) | Reserved for future use. | + +---------+--------------------------------------------+ + + Figure 14: Custody Signal Reason Codes + + Fragment offset: If the bundle fragment bit is set in the status + flags, then the offset (within the original application data unit) + of the payload of the bundle that caused the status report to be + generated is included here. + + Fragment length: If the bundle fragment bit is set in the status + flags, then the length of the payload of the subject bundle is + included here. + + Time of Signal: A DTN time indicating the time at which the signal + was generated. + + Creation Timestamp of Subject Bundle: A copy of the creation + timestamp of the bundle to which the signal applies. + + Length of Source Endpoint ID: The length in bytes of the source + endpoint ID of the bundle to which the signal applied. + + + + + + + +Scott & Burleigh Experimental [Page 43] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Source Endpoint ID text: The text of the source endpoint ID of the + bundle to which the signal applies. + +6.2. Generation of Administrative Records + + Whenever the application agent's administrative element is directed + by the bundle protocol agent to generate an administrative record + with reference to some bundle, the following procedure must be + followed: + + Step 1: The administrative record must be constructed. If the + referenced bundle is a fragment, the administrative record must + have the Fragment flag set and must contain the fragment offset + and fragment length fields. The value of the fragment offset + field must be the value of the referenced bundle's fragment + offset, and the value of the fragment length field must be the + length of the referenced bundle's payload. + + Step 2: A request for transmission of a bundle whose payload is + this administrative record must be presented to the bundle + protocol agent. + +6.3. Reception of Custody Signals + + For each received custody signal that has the "custody transfer + succeeded" flag set to 1, the administrative element of the + application agent must direct the bundle protocol agent to follow the + custody transfer success procedure in Section 5.11. + + For each received custody signal that has the "custody transfer + succeeded" flag set to 0, the administrative element of the + application agent must direct the bundle protocol agent to follow the + custody transfer failure procedure in Section 5.12. + +7. Services Required of the Convergence Layer + +7.1. The Convergence Layer + + The successful operation of the end-to-end bundle protocol depends on + the operation of underlying protocols at what is termed the + "convergence layer"; these protocols accomplish communication between + nodes. A wide variety of protocols may serve this purpose, so long + as each convergence layer protocol adapter provides a defined minimal + set of services to the bundle protocol agent. This convergence layer + service specification enumerates those services. + + + + + + +Scott & Burleigh Experimental [Page 44] + +RFC 5050 Bundle Protocol Specification November 2007 + + +7.2. Summary of Convergence Layer Services + + Each convergence layer protocol adapter is expected to provide the + following services to the bundle protocol agent: + + o sending a bundle to all bundle nodes in the minimum reception + group of the endpoint identified by a specified endpoint ID that + are reachable via the convergence layer protocol; and + + o delivering to the bundle protocol agent a bundle that was sent by + a remote bundle node via the convergence layer protocol. + + The convergence layer service interface specified here is neither + exhaustive nor exclusive. That is, supplementary DTN protocol + specifications (including, but not restricted to, the Bundle Security + Protocol [BSP]) may expect convergence layer adapters that serve BP + implementations conforming to those protocols to provide additional + services. + +8. Security Considerations + + The bundle protocol has taken security into concern from the outset + of its design. It was always assumed that security services would be + needed in the use of the bundle protocol. As a result, the bundle + protocol security architecture and the available security services + are specified in an accompanying document, the Bundle Security + Protocol specification [BSP]; an informative overview of this + architecture is provided in [SECO]. + + The bundle protocol has been designed with the notion that it will be + run over networks with scarce resources. For example, the networks + might have limited bandwidth, limited connectivity, constrained + storage in relay nodes, etc. Therefore, the bundle protocol must + ensure that only those entities authorized to send bundles over such + constrained environments are actually allowed to do so. All + unauthorized entities should be prevented from consuming valuable + resources. + + Likewise, because of the potentially long latencies and delays + involved in the networks that make use of the bundle protocol, data + sources should be concerned with the integrity of the data received + at the intended destination(s) and may also be concerned with + ensuring confidentiality of the data as it traverses the network. + Without integrity, the bundle payload data might be corrupted while + in transit without the destination able to detect it. Similarly, the + data source can be concerned with ensuring that the data can only be + used by those authorized, hence the need for confidentiality. + + + + +Scott & Burleigh Experimental [Page 45] + +RFC 5050 Bundle Protocol Specification November 2007 + + + Internal to the bundle-aware overlay network, the bundle nodes should + be concerned with the authenticity of other bundle nodes as well as + the preservation of bundle payload data integrity as it is forwarded + between bundle nodes. + + As a result, bundle security is concerned with the authenticity, + integrity, and confidentiality of bundles conveyed among bundle + nodes. This is accomplished via the use of three independent + security-specific bundle blocks, which may be used together to + provide multiple bundle security services or independently of one + another, depending on perceived security threats, mandated security + requirements, and security policies that must be enforced. + + The Bundle Authentication Block (BAB) ensures the authenticity and + integrity of bundles on a hop-by-hop basis between bundle nodes. The + BAB allows each bundle node to verify a bundle's authenticity before + processing or forwarding the bundle. In this way, entities that are + not authorized to send bundles will have unauthorized transmissions + blocked by security-aware bundle nodes. + + Additionally, to provide "security-source" to "security-destination" + bundle authenticity and integrity, the Payload Security Block (PSB) + is used. A "security-source" may not actually be the origination + point of the bundle but instead may be the first point along the path + that is security-aware and is able to apply security services. For + example, an enclave of networked systems may generate bundles but + only their gateway may be required and/or able to apply security + services. The PSB allows any security-enabled entity along the + delivery path, in addition to the "security-destination" (the + recipient counterpart to the "security-source"), to ensure the + bundle's authenticity. + + Finally, to provide payload confidentiality, the use of the + Confidentiality Block (CB) is available. The bundle payload may be + encrypted to provide "security-source" to "security-destination" + payload confidentiality/privacy. The CB indicates the cryptographic + algorithm and key IDs that were used to encrypt the payload. + + Note that removal of strings from the dictionary at a given point in + a bundle's end-to-end path, and attendant adjustment of endpoint ID + references in the blocks of that bundle, may make it necessary to re- + compute values in one or more of the bundle's security blocks. + + Bundle security must not be invalidated by forwarding nodes even + though they themselves might not use the Bundle Security Protocol. + In particular, the sequencing of the blocks in a forwarded bundle + must not be changed as it transits a node; received blocks must be + transmitted in the same relative order as that in which they were + + + +Scott & Burleigh Experimental [Page 46] + +RFC 5050 Bundle Protocol Specification November 2007 + + + received. While blocks may be added to bundles as they transit + intermediate nodes, removal of blocks that do not have their 'Discard + block if it can't be processed' flag in the block processing control + flags set to 1 may cause security to fail. + + Inclusion of the Bundle Security Protocol in any Bundle Protocol + implementation is RECOMMENDED. Use of the Bundle Security Protocol + in Bundle Protocol operations is OPTIONAL. + +9. IANA Considerations + + The "dtn:" URI scheme has been provisionally registered by IANA. See + http://www.iana.org/assignments/uri-schemes.html for the latest + details. + +10. References + +10.1. Normative References + + [RFC2119] Bradner, S., "Key words for use in RFCs to Indicate + Requirement Levels", BCP 14, RFC 2119, March 1997. + + [URI] Berners-Lee, T., Fielding, R., and L. Masinter, "Uniform + Resource Identifier (URI): Generic Syntax", RFC 3986, + STD 66, January 2005. + + [URIREG] Hansen, T., Hardie, T., and L. Masinter, "Guidelines and + Registration Procedures for New URI Schemes", RFC 4395, + BCP 115, February 2006. + +10.2. Informative References + + [ARCH] V. Cerf et. al., "Delay-Tolerant Network Architecture", + RFC 4838, April 2007. + + [ASN1] "Abstract Syntax Notation One (ASN.1), "ASN.1 Encoding + Rules: Specification of Basic Encoding Rules (BER), + Canonical Encoding Rules (CER) and Distinguished Encoding + Rules (DER)," ITU-T Rec. X.690 (2002) | ISO/IEC 8825- + 1:2002", 2003. + + [BSP] Symington, S., "Bundle Security Protocol Specification", + Work Progress, October 2007. + + [RFC3987] Duerst, M. and M. Suignard, "Internationalized Resource + Identifiers (IRIs)", RFC 3987, January 2005. + + + + + +Scott & Burleigh Experimental [Page 47] + +RFC 5050 Bundle Protocol Specification November 2007 + + + [SECO] Farrell, S., Symington, S., Weiss, H., and P. Lovell, + "Delay-Tolerant Networking Security Overview", + Work Progress, July 2007. + + [SIGC] Fall, K., "A Delay-Tolerant Network Architecture for + Challenged Internets", SIGCOMM 2003 . + + [TUT] Warthman, F., "Delay-Tolerant Networks (DTNs): A + Tutorial", . + + [UTC] Arias, E. and B. Guinot, ""Coordinated universal time UTC: + historical background and perspectives" in Journees + systemes de reference spatio-temporels", 2004. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 48] + +RFC 5050 Bundle Protocol Specification November 2007 + + +Appendix A. Contributors + + This was an effort of the Delay Tolerant Networking Research Group. + The following DTNRG participants contributed significant technical + material and/or inputs: Dr. Vinton Cerf of Google, Scott Burleigh, + Adrian Hooke, and Leigh Torgerson of the Jet Propulsion Laboratory, + Michael Demmer of the University of California at Berkeley, Robert + Durst, Keith Scott, and Susan Symington of The MITRE Corporation, + Kevin Fall of Intel Research, Stephen Farrell of Trinity College + Dublin, Peter Lovell of SPARTA, Inc., Manikantan Ramadas of Ohio + University (most of Section 4.1), and Howard Weiss of SPARTA, Inc. + (text of Section 8). + +Appendix B. Comments + + Please refer comments to dtn-interest@mailman.dtnrg.org. The Delay + Tolerant Networking Research Group (DTNRG) Web site is located at + http://www.dtnrg.org. + +Authors' Addresses + + Keith L. Scott + The MITRE Corporation + 7515 Colshire Drive + McLean, VA 21102 + US + + Phone: +1 703 983 6547 + Fax: +1 703 983 7142 + EMail: kscott@mitre.org + + + Scott Burleigh + NASA Jet Propulsion Laboratory + 4800 Oak Grove Dr. + Pasadena, CA 91109-8099 + US + + Phone: +1 818 393 3353 + Fax: +1 818 354 1075 + EMail: Scott.Burleigh@jpl.nasa.gov + + + + + + + + + + +Scott & Burleigh Experimental [Page 49] + +RFC 5050 Bundle Protocol Specification November 2007 + + +Full Copyright Statement + + Copyright (C) The IETF Trust (2007). + + This document is subject to the rights, licenses and restrictions + contained in BCP 78 and at www.rfc-editor.org/copyright.html, and + except as set forth therein, the authors retain all their rights. + + This document and the information contained herein are provided on an + "AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS + OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY, THE IETF TRUST AND + THE INTERNET ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF + THE INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED + WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Intellectual Property + + The IETF takes no position regarding the validity or scope of any + Intellectual Property Rights or other rights that might be claimed to + pertain to the implementation or use of the technology described in + this document or the extent to which any license under such rights + might or might not be available; nor does it represent that it has + made any independent effort to identify any such rights. Information + on the procedures with respect to rights in RFC documents can be + found in BCP 78 and BCP 79. + + Copies of IPR disclosures made to the IETF Secretariat and any + assurances of licenses to be made available, or the result of an + attempt made to obtain a general license or permission for the use of + such proprietary rights by implementers or users of this + specification can be obtained from the IETF on-line IPR repository at + http://www.ietf.org/ipr. + + The IETF invites any interested party to bring to its attention any + copyrights, patents or patent applications, or other proprietary + rights that may cover technology that may be required to implement + this standard. Please address the information to the IETF at + ietf-ipr@ietf.org. + + + + + + + + + + + + +Scott & Burleigh Experimental [Page 50] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc7098.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc7098.txt new file mode 100644 index 0000000..9b48f34 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc7098.txt @@ -0,0 +1,731 @@ + + + + + + +Internet Engineering Task Force (IETF) B. Carpenter +Request for Comments: 7098 Univ. of Auckland +Category: Informational S. Jiang +ISSN: 2070-1721 Huawei Technologies Co., Ltd + W. Tarreau + HAProxy Technologies, Inc. + January 2014 + + + Using the IPv6 Flow Label for Load Balancing in Server Farms + +Abstract + + This document describes how the currently specified IPv6 flow label + can be used to enhance layer 3/4 (L3/4) load distribution and + balancing for large server farms. + +Status of This Memo + + This document is not an Internet Standards Track specification; it is + published for informational purposes. + + This document is a product of the Internet Engineering Task Force + (IETF). It represents the consensus of the IETF community. It has + received public review and has been approved for publication by the + Internet Engineering Steering Group (IESG). Not all documents + approved by the IESG are a candidate for any level of Internet + Standard; see Section 2 of RFC 5741. + + Information about the current status of this document, any errata, + and how to provide feedback on it may be obtained at + http://www.rfc-editor.org/info/rfc7098. + +Copyright Notice + + Copyright (c) 2014 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (http://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + + + + +Carpenter, et al. Informational [Page 1] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 2 + 2. Summary of Flow Label Specification . . . . . . . . . . . . . 2 + 3. Summary of Server Farm Load-Balancing Techniques . . . . . . 4 + 4. Applying the Flow Label to Layer 3/4 Load Balancing . . . . . 8 + 5. Security Considerations . . . . . . . . . . . . . . . . . . . 10 + 6. Acknowledgements . . . . . . . . . . . . . . . . . . . . . . 11 + 7. References . . . . . . . . . . . . . . . . . . . . . . . . . 12 + 7.1. Normative References . . . . . . . . . . . . . . . . . . 12 + 7.2. Informative References . . . . . . . . . . . . . . . . . 12 + +1. Introduction + + The IPv6 flow label has been redefined [RFC6437] and is now a + recommended IPv6 node requirement [RFC6434]. Its use for load + sharing in multipath routing has been specified [RFC6438]. Another + scenario in which the flow label could be used is in load + distribution for large server farms. Load distribution is a slightly + more general term than load balancing, but the latter is more + commonly used. In the context of a server farm, both terms refer to + mechanisms that distribute the workload of a server farm among + different servers in order to optimize performance. Server load + balancing commonly applies to HTTP traffic, but most of the + techniques described would apply to other upper-layer applications as + well. This document starts with brief introductions to the flow + label and to server load-balancing techniques, and then describes how + the flow label can be used to enhance load balancers operating on IP + packets and TCP sessions, commonly known as layer 3/4 load balancers. + + The motivation for this approach is to improve the performance of + most types of layer 3/4 load balancers, especially for traffic + including multiple IPv6 extension headers and in particular for + fragmented packets. Fragmented packets, often the result of + customers reaching the load balancer via a VPN with a limited MTU, + are a common performance problem. + +2. Summary of Flow Label Specification + + The IPv6 flow label [RFC6437] is a 20-bit field included in every + IPv6 header [RFC2460]. It is recommended to be supported in all IPv6 + nodes by [RFC6434]. There is additional background material in + [RFC6436] and [RFC6294]. According to its definition, the flow label + should be set to a constant value for a given traffic flow (such as + an HTTP connection), and that value will belong to a uniform + statistical distribution, making it potentially valuable for load- + balancing purposes. + + + + +Carpenter, et al. Informational [Page 2] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + Any device that has access to the IPv6 header has access to the flow + label, and it is at a fixed position in every IPv6 packet. In + contrast, transport-layer information, such as the port numbers, is + not always in a fixed position, since it follows any IPv6 extension + headers that may be present. In fact, the logic of finding the + transport header is always more complex for IPv6 than for IPv4, due + to the absence of an Internet Header Length field in IPv6. + Additionally, if packets are fragmented, the flow label will be + present in all fragments, but the transport header will only be in + one packet. Therefore, within the lifetime of a given transport- + layer connection, the flow label can be a more convenient "handle" + than the port number for identifying that particular connection. + + According to RFC 6437, source hosts should set the flow label; + however, if they do not (i.e., its value is zero), forwarding nodes + (such as the first-hop router) may set it instead. In both cases, + the flow label value must be constant for a given transport session, + normally identified by the IPv6 and Transport header 5-tuple. By + default, the flow label value should be calculated by a stateless + algorithm. The resulting value should form part of a statistically + uniform distribution, regardless of which node sets it. + + It is recognized that at the time of writing, very few traffic flows + include a non-zero flow label value. The mechanism described below + is one that can be added to existing load-balancing mechanisms, so + that it will become effective as more and more flows contain a non- + zero label. Even if the flow label is chosen from an imperfectly + uniform distribution, it will nevertheless increase the information + entropy of the IPv6 header as a whole. This allows for progressive + introduction of load balancing based on the flow label. + + If the recommendations in Section 3 of RFC 6437 are followed for + traffic from a given source accessing a well-known TCP port at a + given destination, the flow label can act as a substitute for the + port numbers as far as a load balancer is concerned, and it can be + found at a fixed position in the layer 3 header even if extension + headers are present. + + The flow label is defined as an end-to-end component of the IPv6 + header, but there are three qualifications to this: + + 1. Until the IPv6 flow label specification in RFC 6437 is widely + implemented as recommended by RFC 6434, the flow label will often + be set to the default value of zero. + + + + + + + +Carpenter, et al. Informational [Page 3] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + 2. Because of the recommendation to use a stateless algorithm to + calculate the label, there is a low (but non-zero) probability + that two simultaneous flows from the same source to the same + destination have the same flow label value despite having + different transport-protocol port numbers. + + 3. The Flow Label field is in an unprotected part of the IPv6 + header, which means that intentional or unintentional changes to + its value cannot be easily detected by a receiver. + + The first two points are addressed below in Section 4 and the third + in Section 5. + +3. Summary of Server Farm Load-Balancing Techniques + + Load balancing for server farms is achieved by a variety of methods, + often used in combination [Tarreau]. This section gives a general + overview of common methods, although the flow label is not relevant + to all of them. The actual load-balancing algorithm (the choice of + which server to use for a new client session) is irrelevant to this + discussion. We give examples for HTTP, but analogous techniques may + be used for other application protocols. + + o The simplest method is using the DNS to return different server + addresses for a single name such as www.example.com to different + users. This is typically done by rotating the order in which + different addresses within the server site are listed by the + relevant authoritative DNS server, on the assumption that the + client will pick the first one. Routing may be configured such + that the different addresses are handled by different ingress + routers. Several variants of this load-balancing mechanism exist, + such as expecting some clients to use all the advertised addresses + when multiple connections are involved, or directing the traffic + to multiple sites, also known as global load balancing. None of + these mechanisms are in the scope of this document, and the + proposal in this document does not affect their usability nor aim + to replace them, so they will not be discussed further. + + o Another method, for HTTP servers, is to operate a layer 7 reverse + proxy in front of the server farm. The reverse proxy will present + a single IP address to the world, communicated to clients by a + single AAAA record. For each new client session (an incoming TCP + connection and HTTP request), it will pick a particular server and + proxy the session to it. The act of proxying should be more + efficient and less resource-intensive than the act of serving the + required content. The proxy must retain TCP state and proxy state + for the duration of the session. This TCP state could, + potentially, include the incoming flow label value. + + + +Carpenter, et al. Informational [Page 4] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + o A component of some load-balancing systems is an SSL reverse proxy + farm. The individual SSL proxies handle all cryptographic aspects + and exchange unencrypted HTTP with the actual servers. Thus, from + the load-balancing point of view, this really looks just like a + server farm, except that it's specialized for HTTPS. Each proxy + will retain SSL and TCP and maybe HTTP state for the duration of + the session, and the TCP state could potentially include the flow + label. + + o Finally the "front end" of many load-balancing systems is a layer + 3/4 load balancer. While it can be a dedicated device, it is also + a standard function of some network switches or routers (e.g. + using Equal-Cost Multipath Routing (ECMP) [RFC2991]). In this + case, it is the layer 3/4 load balancer whose IP address is + published as the primary AAAA record for the service. All client + sessions will pass through this device. Depending on the specific + scenario, the balancer will assign new sessions among the actual + application servers, across an SSL proxy farm, or among a set of + layer 7 proxies. In all cases, the layer 3/4 load balancer has to + classify incoming packets very quickly and choose the target + server or proxy so as to ensure persistence. 'Persistence' is + defined as the guarantee that a given client session will run to + completion on a single server. The layer 3/4 load balancer + therefore needs to inspect each incoming packet to classify it. + There are two common types of layer 3/4 load balancers, the + totally stateless ones which only act on single packets, generally + involving a per-packet hashing of easy-to-find information such as + the source address and/or port into a server number, and the + stateful ones that take the routing decision on the very first + packets of a session and maintain the same direction for all + packets belonging to the same session. Clearly, both types of + layer 3/4 balancers could inspect and make use of the flow label + value. + + Our focus is on how the balancer identifies a particular flow. + For clarity, note that two aspects of layer 3/4 load balancers are + not affected by use of the flow label to identify sessions: + + 1. Balancers use various techniques to redirect traffic to a + specific target server. + + + All servers are configured with the same IP address, they + are all on the same LAN, and the load balancer sends + directly to their individual MAC addresses. In this case, + return packets from the server to the client are sent back + without passing through the balancer, a technique known as + direct server return, but we are not concerned here with + the return packets. + + + +Carpenter, et al. Informational [Page 5] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + + All servers are configured with the same IP address, + treated locally as an anycast address by layer 3 ECMP + routing. + + + Each server has its own IP address, and the balancer uses + an IP-in-IP tunnel to reach it. + + + Each server has its own IP address, and the balancer + performs NAPT (Network Address and Port Translation) to + deliver the client's packets to that address. + + + The choice between these methods is not affected by use of + the flow label. + + 2. A layer 3/4 balancer must correctly handle Path MTU Discovery + by forwarding relevant ICMPv6 packets in both directions. + This too is not directly affected by use of the flow label. + It should be noted that there may be difficulty correlating an + ICMPv6 "Packet too big" response with the session it refers + to, but that is out of the scope of the present document. + + The following diagram, inspired by [Tarreau], shows a layout with + various methods in use together. (Below, "ASIC" stands for + "Application-Specific Integrated Circuit".) + + + + + + + + + + + + + + + + + + + + + + + + + + + +Carpenter, et al. Informational [Page 6] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + ___________________________________________ + ( ) + ( Clients in the Internet ) + (___________________________________________) + | | + ------------ DNS-based ------------ + | Ingress | load splitting | Ingress | + | router | affects | router | + ------------ routing ------------ + ___|____________________________|___ + | | + | | + | | + ------------ ------------ + | L3/4 ASIC| | L3/4 ASIC| + | balancer | | balancer | + ------------ ------------ + | load | + | spreading | + __________|________________________|___________ + | | | | + ------------ ------------ -------- -------- + |HTTP proxy|...|HTTP proxy| | SSL |...| SSL | + | balancer | | balancer | | proxy| | proxy| + ------------ ------------ -------- -------- + ____|_____________|_____________|_________|_____ + | | | | | + -------- -------- -------- -------- -------- + |HTTP | |HTTP | |HTTP | |HTTP | |HTTP | + |server| |server| |server| |server| |server| + -------- -------- -------- -------- -------- + + From the previous paragraphs, we can identify several points in this + diagram where the flow label might be relevant: + + 1. Layer 3/4 load balancers. + + 2. SSL proxies. + + 3. HTTP proxies. + + However, usage by the proxies seems unlikely to affect performance, + because they must in any case process the application-layer header, + so in this document we focus only on layer 3/4 balancers. + + + + + + + +Carpenter, et al. Informational [Page 7] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +4. Applying the Flow Label to Layer 3/4 Load Balancing + + The suggested model for using the flow label to enhance an layer 3/4 + load-balancing mechanism is as follows: + + o We are only concerned with IPv6 traffic in which the flow label + value has been set according to [RFC6437]. If the flow label of + an incoming packet is zero, load balancers will continue to use + the transport header in the traditional way. As the use of the + flow label becomes more prevalent according to RFC 6434, load + balancers, and therefore users, will reap a growing performance + benefit. + + o If the flow label of an incoming packet is non-zero, layer 3/4 + load balancers can use the 2-tuple {source address, flow label} as + the session key for whatever load distribution algorithm they + support. Alternatively, they might use the 3-tuple {dest address, + source address, flow label}, especially if the server farm + supports multiple server IP addresses, but using the 3-tuple will + be significantly quicker than searching for the transport port + numbers later in the packet. Moreover, the transport-layer + information such as the source port is not repeated in fragments, + which generally prevents stateless load balancers from supporting + fragmented traffic since they generally cannot reassemble + fragments. + + A stateless layer 3/4 load balancer would simply apply a hash + algorithm to the 2-tuple or 3-tuple on all packets in order to + select the same target server consistently for a given flow. + Needless to say, the hash algorithm has to be well chosen for its + purpose, but this problem is common to several forms of stateless + load balancing. The discussion in [RFC6438] applies. + + A stateful layer 3/4 load balancer would apply its usual load + distribution algorithm to the first packet of a session, and store + the {tuple, server} association in a table so that subsequent + packets belonging to the same session are forwarded to the same + server. Thus, for all subsequent packets of the session, it can + ignore all IPv6 extension headers, which should lead to a + performance benefit. Whether this benefit is valuable will depend + on engineering details of the specific load balancer. + + Note that such a balancer will not identify new transport sessions + from the same source that use the same flow label; they will be + delivered to the same server. This is like the behavior of + existing hash-based layer 4 balancers that always send similarly + hashed packets to the same destination. However, a global state + table in a flow label balancer cannot be shared between multiple + + + +Carpenter, et al. Informational [Page 8] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + services if these services rely on transport-layer information, + since the goal of using the flow label is to avoid looking up that + information. + + A related issue is that the balancer will not detect FIN/ACK + sequences at the end of sessions. Therefore, it will rely on + inactivity timers to delete session state. However, all existing + balancers must maintain such timers to deal with hung sessions, + and the practical impact on memory utilization is unlikely to be + significant. + + o Layer 3/4 balancers that redirect the incoming packets by NAPT are + not expected to obtain any saving of time by using the flow label, + because they have no choice but to follow the extension header + chain in order to locate and modify the port number and transport + checksum. The same would apply to balancers that perform TCP + state tracking for any reason. + + o Note that correct handling of ICMPv6 for Path MTU Discovery + requires the layer 3/4 balancer to keep state for the client + source address, independently of either the port numbers or the + flow label. + + o SSL and HTTP proxies, if present, should forward the flow label + value towards the server. This usually has no performance + benefit, but it is consistent with the general model for the flow + label described in RFC 6437. + + It should be noted that the performance benefit, if any, depends + entirely on engineering trade-offs in the design of the layer 3/4 + balancer. An extra test is needed to check if the label is non-zero, + but if there is a non-zero label, all logic for handling extension + headers can be skipped except for the first packet of a new flow. + Since the identifying state to be stored is only the tuple and the + server identifier, storage requirements will be reduced. + Additionally, the method will work for fragmented traffic and for + flows where the transport information is missing (unknown transport + protocol) or obfuscated (e.g., IPsec). Traffic reaching the load + balancer via a VPN is particularly prone to the fragmentation issue, + due to MTU size issues. For some load-balancer designs, these are + very significant advantages. + + In the unlikely event of two simultaneous flows from the same source + address having the same flow label value, the two flows would end up + assigned to the same server, where they would be distinguished as + normal by their port numbers. There are approximately one million + possible flow label values, and if the rules for flow label + generation [RFC6437] are followed, this would be a statistically rare + + + +Carpenter, et al. Informational [Page 9] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + event, and would not damage the overall load-balancing effect. + Moreover, with a million possible label values, it is very likely + that there will be many more flow label values than servers at most + sites, so it is already expected that multiple flow label values will + end up on the same server for a given client IP address. + + In the case that many thousands of clients are hidden behind the same + large-scale NAPT with a single shared IP address, the assumption of + low probability of conflicts might become incorrect, unless flow + label values are random enough to avoid following similar sequences + for all clients. This is not expected to be a factor for IPv6 + anyway, since there is no need to implement large-scale NAPT with + address sharing [RFC4864]. The probability of conflicts is low for + sites that implement network prefix translation [RFC6296], since this + technique provides a different address for each client. + +5. Security Considerations + + Security aspects of the flow label are discussed in [RFC6437]. As + noted there, a malicious source or man-in-the-middle could disturb + load balancing by manipulating flow labels. This risk already exists + today where the source address and port are used as a hashing key in + layer 3/4 load balancers, as well as where a persistence cookie is + used in HTTP to designate a server. It even exists on layer 3 + components that only rely on the source address to select a + destination, making them more DDoS-prone. Nevertheless, all these + methods are currently used because the benefits for load balancing + and persistence hugely outweigh the risks. The flow label does not + significantly alter this situation. + + Specifically, the IPv6 flow label specification [RFC6437] states that + "stateless classifiers should not use the flow label alone to control + load distribution, and stateful classifiers should include explicit + methods to detect and ignore suspect flow label values." The former + point is answered by also using the source address. The latter point + is more complex. If the risk is considered serious, the site ingress + router or the layer 3/4 balancer should use a suitable heuristic to + verify incoming flows with non-zero flow label values. If a flow + from a given source address and port number does not have a constant + flow label value, it is suspect and should be dropped. This would + deal with both intentional and accidental changes to the flow label. + + A malicious source or man-in-the-middle could generate a flow in + which the flow label is constant but the transport port numbers in + some packets are invalid. Such packets, if load-balanced only on the + basis of the flow label, could reach the target server and create a + single-source DoS attack on its TCP engine. + + + + +Carpenter, et al. Informational [Page 10] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + + RFC 6437 notes in its Security Considerations that if the covert + channel risk is considered significant, a firewall might rewrite non- + zero flow labels. As long as this is done as described in RFC 6437, + it will not invalidate the mechanisms described above. + + The flow label may be of use in protecting against DDoS attacks + against servers. As noted in RFC 6437, a source should generate flow + label values that are hard to predict, most likely by including a + secret nonce in the hash used to generate each label. The attacker + does not know the nonce and therefore has no way to invent flow + labels that will all target the same server, even with knowledge of + both the hash algorithm and the load-balancing algorithm. Still, it + is important to understand that it is always trivial to force a load + balancer to stick to the same server during an attack, so the + security of the whole solution must not rely on the unpredictability + of the flow label values alone, but should include defensive measures + like most load balancers already have against abnormal use of source + addresses or session cookies. + + New flows are assigned to a server according to any of the usual + algorithms available on the load balancer (e.g., least connections, + round robin, etc.). The association between the 2-tuple {source + address, flow label} and the server is stored in a table (often + called stick table) so that future traffic from the same source using + the same flow label can be sent to the same server. This method is + more robust against a loss of server and also makes it harder for an + attacker to target a specific server, because the association between + a flow label value and a server is not known externally. + + In the case that a stateless hash function is used to assign client + packets to specific servers, it may be advisable to use a + cryptographic hash function of some kind, to ensure that an attacker + cannot predict the behavior of the load balancer. + +6. Acknowledgements + + Valuable comments and contributions were made by Fred Baker, Olivier + Bonaventure, Ben Campbell, Lorenzo Colitti, Linda Dunbar, Donald + Eastlake, Joel Jaeggli, Gurudeep Kamat, Warren Kumari, Julia + Renouard, Julius Volz, and others. + + + + + + + + + + + +Carpenter, et al. Informational [Page 11] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +7. References + +7.1. Normative References + + [RFC2460] Deering, S. and R. Hinden, "Internet Protocol, Version 6 + (IPv6) Specification", RFC 2460, December 1998. + + [RFC6434] Jankiewicz, E., Loughney, J., and T. Narten, "IPv6 Node + Requirements", RFC 6434, December 2011. + + [RFC6437] Amante, S., Carpenter, B., Jiang, S., and J. Rajahalme, + "IPv6 Flow Label Specification", RFC 6437, November 2011. + +7.2. Informative References + + [RFC2991] Thaler, D. and C. Hopps, "Multipath Issues in Unicast and + Multicast Next-Hop Selection", RFC 2991, November 2000. + + [RFC4864] Van de Velde, G., Hain, T., Droms, R., Carpenter, B., and + E. Klein, "Local Network Protection for IPv6", RFC 4864, + May 2007. + + [RFC6294] Hu, Q. and B. Carpenter, "Survey of Proposed Use Cases for + the IPv6 Flow Label", RFC 6294, June 2011. + + [RFC6296] Wasserman, M. and F. Baker, "IPv6-to-IPv6 Network Prefix + Translation", RFC 6296, June 2011. + + [RFC6436] Amante, S., Carpenter, B., and S. Jiang, "Rationale for + Update to the IPv6 Flow Label Specification", RFC 6436, + November 2011. + + [RFC6438] Carpenter, B. and S. Amante, "Using the IPv6 Flow Label + for Equal Cost Multipath Routing and Link Aggregation in + Tunnels", RFC 6438, November 2011. + + [Tarreau] Tarreau, W., "Making applications scalable with load + balancing", 2006, . + + + + + + + + + + + + + +Carpenter, et al. Informational [Page 12] + +RFC 7098 Flow Label for Server Load Balancing January 2014 + + +Authors' Addresses + + Brian Carpenter + Department of Computer Science + University of Auckland + PB 92019 + Auckland 1142 + New Zealand + + EMail: brian.e.carpenter@gmail.com + + + Sheng Jiang + Huawei Technologies Co., Ltd + Q14, Huawei Campus + No.156 Beiqing Road + Hai-Dian District, Beijing 100095 + P.R. China + + EMail: jiangsheng@huawei.com + + + Willy Tarreau + HAProxy Technologies, Inc. + R&D Network Products + 3 rue du petit Robinson + 78350 Jouy-en-Josas + France + + EMail: willy@haproxy.com + + + + + + + + + + + + + + + + + + + + + +Carpenter, et al. Informational [Page 13] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc761.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc761.txt new file mode 100644 index 0000000..6e02a8d --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc761.txt @@ -0,0 +1,5185 @@ + + +RFC: 761 +IEN: 129 + + + + + + + + DOD STANDARD + + TRANSMISSION CONTROL PROTOCOL + + + + January 1980 + + + + + + + + + + + + + + + + prepared for + + Defense Advanced Research Projects Agency + Information Processing Techniques Office + 1400 Wilson Boulevard + Arlington, Virginia 22209 + + + + + + + + by + + Information Sciences Institute + University of Southern California + 4676 Admiralty Way + Marina del Rey, California 90291 + +January 1980 + Transmission Control Protocol + + + + TABLE OF CONTENTS + + PREFACE ........................................................ iii + +1. INTRODUCTION ..................................................... 1 + + 1.1 Motivation .................................................... 1 + 1.2 Scope ......................................................... 2 + 1.3 About This Document ........................................... 2 + 1.4 Interfaces .................................................... 3 + 1.5 Operation ..................................................... 3 + +2. PHILOSOPHY ....................................................... 7 + + 2.1 Elements of the Internetwork System ........................... 7 + 2.2 Model of Operation ............................................ 7 + 2.3 The Host Environment .......................................... 8 + 2.4 Interfaces .................................................... 9 + 2.5 Relation to Other Protocols ................................... 9 + 2.6 Reliable Communication ....................................... 10 + 2.7 Connection Establishment and Clearing ........................ 10 + 2.8 Data Communication ........................................... 12 + 2.9 Precedence and Security ...................................... 13 + 2.10 Robustness Principle ......................................... 13 + +3. FUNCTIONAL SPECIFICATION ........................................ 15 + + 3.1 Header Format ................................................ 15 + 3.2 Terminology .................................................. 19 + 3.3 Sequence Numbers ............................................. 24 + 3.4 Establishing a connection .................................... 29 + 3.5 Closing a Connection ......................................... 35 + 3.6 Precedence and Security ...................................... 38 + 3.7 Data Communication ........................................... 38 + 3.8 Interfaces ................................................... 42 + 3.9 Event Processing ............................................. 52 + +GLOSSARY ............................................................ 75 + +REFERENCES .......................................................... 83 + + + + + + + + + + + + [Page i] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page ii] + + +January 1980 + Transmission Control Protocol + + + + PREFACE + + + +This document describes the DoD Standard Transmission Control Protocol +(TCP). There have been eight earlier editions of the ARPA TCP +specification on which this standard is based, and the present text +draws heavily from them. There have been many contributors to this work +both in terms of concepts and in terms of text. This edition +incorporates the addition of security, compartmentation, and precedence +concepts into the TCP specification. + + Jon Postel + + Editor + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page iii] + + +January 1980 +RFC:761 +IEN:129 +Replaces: IENs 124, 112, +81, 55, 44, 40, 27, 21, 5 + + DOD STANDARD + + TRANSMISSION CONTROL PROTOCOL + + + + 1. INTRODUCTION + +The Transmission Control Protocol (TCP) is intended for use as a highly +reliable host-to-host protocol between hosts in packet-switched computer +communication networks, and especially in interconnected systems of such +networks. + +This document describes the functions to be performed by the +Transmission Control Protocol, the program that implements it, and its +interface to programs or users that require its services. + +1.1. Motivation + + Computer communication systems are playing an increasingly important + role in military, government, and civilian environments. This + document primarily focuses its attention on military computer + communication requirements, especially robustness in the presence of + communication unreliability and availability in the presence of + congestion, but many of these problems are found in the civilian and + government sector as well. + + As strategic and tactical computer communication networks are + developed and deployed, it is essential to provide means of + interconnecting them and to provide standard interprocess + communication protocols which can support a broad range of + applications. In anticipation of the need for such standards, the + Deputy Undersecretary of Defense for Research and Engineering has + declared the Transmission Control Protocol (TCP) described herein to + be a basis for DoD-wide inter-process communication protocol + standardization. + + TCP is a connection-oriented, end-to-end reliable protocol designed to + fit into a layered hierarchy of protocols which support multi-network + applications. The TCP provides for reliable inter-process + communication between pairs of processes in host computers attached to + distinct but interconnected computer communication networks. Very few + assumptions are made as to the reliability of the communication + protocols below the TCP layer. TCP assumes it can obtain a simple, + potentially unreliable datagram service from the lower level + protocols. In principle, the TCP should be able to operate above a + wide spectrum of communication systems ranging from hard-wired + connections to packet-switched or circuit-switched networks. + + + [Page 1] + + + January 1980 +Transmission Control Protocol +Introduction + + + + TCP is based on concepts first described by Cerf and Kahn in [1]. The + TCP fits into a layered protocol architecture just above a basic + Internet Protocol [2] which provides a way for the TCP to send and + receive variable-length segments of information enclosed in internet + datagram "envelopes". The internet datagram provides a means for + addressing source and destination TCPs in different networks. The + internet protocol also deals with any fragmentation or reassembly of + the TCP segments required to achieve transport and delivery through + multiple networks and interconnecting gateways. The internet protocol + also carries information on the precedence, security classification + and compartmentation of the TCP segments, so this information can be + communicated end-to-end across multiple networks. + + Protocol Layering + + +---------------------+ + | higher-level | + +---------------------+ + | TCP | + +---------------------+ + | internet protocol | + +---------------------+ + |communication network| + +---------------------+ + + Figure 1 + + Much of this document is written in the context of TCP implementations + which are co-resident with higher level protocols in the host + computer. As a practical matter, many computer systems will be + connected to networks via front-end computers which house the TCP and + internet protocol layers, as well as network specific software. The + TCP specification describes an interface to the higher level protocols + which appears to be implementable even for the front-end case, as long + as a suitable host-to-front end protocol is implemented. + +1.2. Scope + + The TCP is intended to provide a reliable process-to-process + communication service in a multinetwork environment. The TCP is + intended to be a host-to-host protocol in common use in multiple + networks. + +1.3. About this Document + + This document represents a specification of the behavior required of + any TCP implementation, both in its interactions with higher level + protocols and in its interactions with other TCPs. The rest of this + + +[Page 2] + + +January 1980 + Transmission Control Protocol + Introduction + + + + section offers a very brief view of the protocol interfaces and + operation. Section 2 summarizes the philosophical basis for the TCP + design. Section 3 offers both a detailed description of the actions + required of TCP when various events occur (arrival of new segments, + user calls, errors, etc.) and the details of the formats of TCP + segments. + +1.4. Interfaces + + The TCP interfaces on one side to user or application processes and on + the other side to a lower level protocol such as Internet Protocol. + + The interface between an application process and the TCP is + illustrated in reasonable detail. This interface consists of a set of + calls much like the calls an operating system provides to an + application process for manipulating files. For example, there are + calls to open and close connections and to send and receive letters on + established connections. It is also expected that the TCP can + asynchronously communicate with application programs. Although + considerable freedom is permitted to TCP implementors to design + interfaces which are appropriate to a particular operating system + environment, a minimum functionality is required at the TCP/user + interface for any valid implementation. + + The interface between TCP and lower level protocol is essentially + unspecified except that it is assumed there is a mechanism whereby the + two levels can asynchronously pass information to each other. + Typically, one expects the lower level protocol to specify this + interface. TCP is designed to work in a very general environment of + interconnected networks. The lower level protocol which is assumed + throughout this document is the Internet Protocol [2]. + +1.5. Operation + + As noted above, the primary purpose of the TCP is to provide reliable, + securable logical circuit or connection service between pairs of + processes. To provide this service on top of a less reliable internet + communication system requires facilities in the following areas: + + Basic Data Transfer + Reliability + Flow Control + Multiplexing + Connections + Precedence and Security + + The basic operation of the TCP in each of these areas is described in + the following paragraphs. + + + [Page 3] + + + January 1980 +Transmission Control Protocol +Introduction + + + + Basic Data Transfer: + + The TCP is able to transfer a continuous stream of octets in each + direction between its users by packaging some number of octets into + segments for transmission through the internet system. In this + stream mode, the TCPs decide when to block and forward data at their + own convenience. + + For users who desire a record-oriented service, the TCP also permits + the user to submit records, called letters, for transmission. When + the sending user indicates a record boundary (end-of-letter), this + causes the TCPs to promptly forward and deliver data up to that + point to the receiver. + + Reliability: + + The TCP must recover from data that is damaged, lost, duplicated, or + delivered out of order by the internet communication system. This + is achieved by assigning a sequence number to each octet + transmitted, and requiring a positive acknowledgment (ACK) from the + receiving TCP. If the ACK is not received within a timeout + interval, the data is retransmitted. At the receiver, the sequence + numbers are used to correctly order segments that may be received + out of order and to eliminate duplicates. Damage is handled by + adding a checksum to each segment transmitted, checking it at the + receiver, and discarding damaged segments. + + As long as the TCPs continue to function properly and the internet + system does not become completely partitioned, no transmission + errors will affect the users. TCP recovers from internet + communication system errors. + + Flow Control: + + TCP provides a means for the receiver to govern the amount of data + sent by the sender. This is achieved by returning a "window" with + every ACK indicating a range of acceptable sequence numbers beyond + the last segment successfully received. For stream mode, the window + indicates an allowed number of octets that the sender may transmit + before receiving further permission. For record mode, the window + indicates an allowed amount of buffer space the sender may consume, + this may be more than the number of data octets transmitted if there + is a mismatch between letter size and buffer size. + + + + + + + +[Page 4] + + +January 1980 + Transmission Control Protocol + Introduction + + + + Multiplexing: + + To allow for many processes within a single Host to use TCP + communication facilities simultaneously, the TCP provides a set of + addresses or ports within each host. Concatenated with the network + and host addresses from the internet communication layer, this forms + a socket. A pair of sockets uniquely identifies each connection. + That is, a socket may be simultaneously used in multiple + connections. + + The binding of ports to processes is handled independently by each + Host. However, it proves useful to attach frequently used processes + (e.g., a "logger" or timesharing service) to fixed sockets which are + made known to the public. These services can then be accessed + through the known addresses. Establishing and learning the port + addresses of other processes may involve more dynamic mechanisms. + + Connections: + + The reliability and flow control mechanisms described above require + that TCPs initialize and maintain certain status information for + each data stream. The combination of this information, including + sockets, sequence numbers, and window sizes, is called a connection. + Each connection is uniquely specified by a pair of sockets + identifying its two sides. + + When two processes wish to communicate, their TCP's must first + establish a connection (initialize the status information on each + side). When their communication is complete, the connection is + terminated or closed to free the resources for other uses. + + Since connections must be established between unreliable hosts and + over the unreliable internet communication system, a handshake + mechanism with clock-based sequence numbers is used to avoid + erroneous initialization of connections. + + Precedence and Security: + + The users of TCP may indicate the security and precedence of their + communication. Provision is made for default values to be used when + these features are not needed. + + + + + + + + + + [Page 5] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 6] + + +January 1980 + Transmission Control Protocol + + + + 2. PHILOSOPHY + +2.1. Elements of the Internetwork System + + The internetwork environment consists of hosts connected to networks + which are in turn interconnected via gateways. It is assumed here + that the networks may be either local networks (e.g., the ETHERNET) or + large networks (e.g., the ARPANET), but in any case are based on + packet switching technology. The active agents that produce and + consume messages are processes. Various levels of protocols in the + networks, the gateways, and the hosts support an interprocess + communication system that provides two-way data flow on logical + connections between process ports. + + We specifically assume that data is transmitted from host to host + through means of a set of networks. When we say network, we have in + mind a packet switched network (PSN). This assumption is probably + unnecessary, since a circuit switched network or a hybrid combination + of the two could also be used; but for concreteness, we explicitly + assume that the hosts are connected to one or more packet switches of + a PSN. + + The term packet is used generically here to mean the data of one + transaction between a host and a packet switch. The format of data + blocks exchanged between the packet switches in a network will + generally not be of concern to us. + + Hosts are computers attached to a network, and from the communication + network's point of view, are the sources and destinations of packets. + Processes are viewed as the active elements in host computers (in + accordance with the fairly common definition of a process as a program + in execution). Even terminals and files or other I/O devices are + viewed as communicating with each other through the use of processes. + Thus, all communication is viewed as inter-process communication. + + Since a process may need to distinguish among several communication + streams between itself and another process (or processes), we imagine + that each process may have a number of ports through which it + communicates with the ports of other processes. + +2.2. Model of Operation + + Processes transmit data by calling on the TCP and passing buffers of + data as arguments. The TCP packages the data from these buffers into + segments and calls on the internet module to transmit each segment to + the destination TCP. The receiving TCP places the data from a segment + into the receiving user's buffer and notifies the receiving user. The + TCPs include control information in the segments which they use to + ensure reliable ordered data transmission. + + + [Page 7] + + + January 1980 +Transmission Control Protocol +Philosophy + + + + The model of internet communication is that there is an internet + protocol module associated with each TCP which provides an interface + to the local network. This internet module packages TCP segments + inside internet datagrams and routes these datagrams to a destination + internet module or intermediate gateway. To transmit the datagram + through the local network, it is embedded in a local network packet. + + The packet switches may perform further packaging, fragmentation, or + other operations to achieve the delivery of the local packet to the + destination internet module. + + At a gateway between networks, the internet datagram is "unwrapped" + from its local packet and examined to determine through which network + the internet datagram should travel next. The internet datagram is + then "wrapped" in a local packet suitable to the next network and + routed to the next gateway, or to the final destination. + + A gateway is permitted to break up an internet datagram into smaller + internet datagram fragments if this is necessary for transmission + through the next network. To do this, the gateway produces a set of + internet datagrams; each carrying a fragment. Fragments may be broken + into smaller ones at intermediate gateways. The internet datagram + fragment format is designed so that the destination internet module + can reassemble fragments into internet datagrams. + + A destination internet module unwraps the segment from the datagram + (after reassembling the datagram, if necessary) and passes it to the + destination TCP. + + This simple model of the operation glosses over many details. One + important feature is the type of service. This provides information + to the gateway (or internet module) to guide it in selecting the + service parameters to be used in traversing the next network. + Included in the type of service information is the precedence of the + datagram. Datagrams may also carry security information to permit + host and gateways that operate in multilevel secure environments to + properly segregate datagrams for security considerations. + +2.3. The Host Environment + + The TCP is assumed to be a module in a time sharing operating system. + The users access the TCP much like they would access the file system. + The TCP may call on other operating system functions, for example, to + manage data structures. The actual interface to the network is + assumed to be controlled by a device driver module. The TCP does not + call on the network device driver directly, but rather calls on the + internet datagram protocol module which may in turn call on the device + driver. + + +[Page 8] + + +January 1980 + Transmission Control Protocol + Philosophy + + + + Though it is assumed here that processes are supported by the host + operating system, the mechanisms of TCP do not preclude implementation + of the TCP in a front-end processor. However, in such an + implementation, a host-to-front-end protocol must provide the + functionality to support the type of TCP-user interface described + above. + +2.4. Interfaces + + The TCP/user interface provides for calls made by the user on the TCP + to OPEN or CLOSE a connection, to SEND or RECEIVE data, or to obtain + STATUS about a connection. These calls are like other calls from user + programs on the operating system, for example, the calls to open, read + from, and close a file. + + The TCP/internet interface provides calls to send and receive + datagrams addressed to TCP modules in hosts anywhere in the internet + system. These calls have parameters for passing the address, type of + service, precedence, security, and other control information. + +2.5. Relation to Other Protocols + + The following diagram illustrates the place of the TCP in the protocol + hierarchy: + + + +------+ +-----+ +-----+ +-----+ + |Telnet| | FTP | |Voice| ... | | Application Level + +------+ +-----+ +-----+ +-----+ + | | | | + +-----+ +-----+ +-----+ + | TCP | | RTP | ... | | Host Level + +-----+ +-----+ +-----+ + | | | + +-------------------------------+ + | Internet Protocol | Gateway Level + +-------------------------------+ + | + +---------------------------+ + | Local Network Protocol | Network Level + +---------------------------+ + | + + + + Protocol Relationships + + Figure 2. + + + [Page 9] + + + January 1980 +Transmission Control Protocol +Philosophy + + + + It is expected that the TCP will be able to support higher level + protocols efficiently. It should be easy to interface higher level + protocols like the ARPANET Telnet [3] or AUTODIN II THP to the TCP. + +2.6. Reliable Communication + + A stream of data sent on a TCP connection is delivered reliably and in + order at the destination. + + Transmission is made reliable via the use of sequence numbers and + acknowledgments. Conceptually, each octet of data is assigned a + sequence number. The sequence number of the first octet of data in a + segment is the sequence number transmitted with that segment and is + called the segment sequence number. Segments also carry an + acknowledgment number which is the sequence number of the next + expected data octet of transmissions in the reverse direction. When + the TCP transmits a segment, it puts a copy on a retransmission queue + and starts a timer; when the acknowledgment for that data is received, + the segment is deleted from the queue. If the acknowledgment is not + received before the timer runs out, the segment is retransmitted. + + An acknowledgment by TCP does not guarantee that the data has been + delivered to the end user, but only that the receiving TCP has taken + the responsibility to do so. + + To govern the flow of data into a TCP, a flow control mechanism is + employed. The the data receiving TCP reports a window to the sending + TCP. This window specifies the number of octets, starting with the + acknowledgment number that the data receiving TCP is currently + prepared to receive. + +2.7. Connection Establishment and Clearing + + To identify the separate data streams that a TCP may handle, the TCP + provides a port identifier. Since port identifiers are selected + independently by each operating system, TCP, or user, they might not + be unique. To provide for unique addresses at each TCP, we + concatenate an internet address identifying the TCP with a port + identifier to create a socket which will be unique throughout all + networks connected together. + + A connection is fully specified by the pair of sockets at the ends. A + local socket may participate in many connections to different foreign + sockets. A connection can be used to carry data in both directions, + that is, it is "full duplex". + + TCPs are free to associate ports with processes however they choose. + However, several basic concepts seem necessary in any implementation. + + +[Page 10] + + +January 1980 + Transmission Control Protocol + Philosophy + + + + There must be well-known sockets which the TCP associates only with + the "appropriate" processes by some means. We envision that processes + may "own" ports, and that processes can only initiate connections on + the ports they own. (Means for implementing ownership is a local + issue, but we envision a Request Port user command, or a method of + uniquely allocating a group of ports to a given process, e.g., by + associating the high order bits of a port name with a given process.) + + A connection is specified in the OPEN call by the local port and + foreign socket arguments. In return, the TCP supplies a (short) local + connection name by which the user refers to the connection in + subsequent calls. There are several things that must be remembered + about a connection. To store this information we imagine that there + is a data structure called a Transmission Control Block (TCB). One + implementation strategy would have the local connection name be a + pointer to the TCB for this connection. The OPEN call also specifies + whether the connection establishment is to be actively pursued, or to + be passively waited for. + + A passive OPEN request means that the process wants to accept incoming + connection requests rather than attempting to initiate a connection. + Often the process requesting a passive OPEN will accept a connection + request from any caller. In this case a foreign socket of all zeros + is used to denote an unspecified socket. Unspecified foreign sockets + are allowed only on passive OPENs. + + A service process that wished to provide services for unknown other + processes could issue a passive OPEN request with an unspecified + foreign socket. Then a connection could be made with any process that + requested a connection to this local socket. It would help if this + local socket were known to be associated with this service. + + Well-known sockets are a convenient mechanism for a priori associating + a socket address with a standard service. For instance, the + "Telnet-Server" process might be permanently assigned to a particular + socket, and other sockets might be reserved for File Transfer, Remote + Job Entry, Text Generator, Echoer, and Sink processes (the last three + being for test purposes). A socket address might be reserved for + access to a "Look-Up" service which would return the specific socket + at which a newly created service would be provided. The concept of a + well-known socket is part of the TCP specification, but the assignment + of sockets to services is outside this specification. + + Processes can issue passive OPENs and wait for matching calls from + other processes and be informed by the TCP when connections have been + established. Two processes which issue calls to each other at the + same time are correctly connected. This flexibility is critical for + + + + [Page 11] + + + January 1980 +Transmission Control Protocol +Philosophy + + + + the support of distributed computing in which components act + asynchronously with respect to each other. + + There are two cases for matching the sockets in the local request and + an incoming segment. In the first case, the local request has fully + specified the foreign socket. In this case, the match must be exact. + In the second case, the local request has left the foreign socket + unspecified. In this case, any foreign socket is acceptable as long + as the local sockets match. + + If there are several pending passive OPENs (recorded in TCBs) with the + same local socket, an incoming segment should be matched to a request + with the specific foreign socket in the segment, if such a request + exists, before selecting a request with an unspecified foreign socket. + + The procedures to establish and clear connections utilize synchronize + (SYN) and finis (FIN) control flags and involve an exchange of three + messages. This exchange has been termed a three-way hand shake [4]. + + A connection is initiated by the rendezvous of an arriving segment + containing a SYN and a waiting TCB entry created by a user OPEN + command. The matching of local and foreign sockets determines when a + connection has been initiated. The connection becomes "established" + when sequence numbers have been synchronized in both directions. + + The clearing of a connection also involves the exchange of segments, + in this case carrying the FIN control flag. + +2.8. Data Communication + + The data that flows on a connection may be thought of as a stream of + octets, or as a sequence of records. In TCP the records are called + letters and are of variable length. The sending user indicates in + each SEND call whether the data in that call completes a letter by the + setting of the end-of-letter parameter. + + The length of a letter may be such that it must be broken into + segments before it can be transmitted to its destination. We assume + that the segments will normally be reassembled into a letter before + being passed to the receiving process. A segment may contain all or a + part of a letter, but a segment never contains parts of more than one + letter. The end of a letter is marked by the appearance of an EOL + control flag in a segment. A sending TCP is allowed to collect data + from the sending user and to send that data in segments at its own + convenience, until the end of letter is signaled then it must send all + unsent data. When a receiving TCP has a complete letter, it must not + wait for more data from the sending TCP before passing the letter to + the receiving process. + + +[Page 12] + + +January 1980 + Transmission Control Protocol + Philosophy + + + + There is a coupling between letters as sent and the use of buffers of + data that cross the TCP/user interface. Each time an end-of-letter + (EOL) flag is associated with data placed into the receiving user's + buffer, the buffer is returned to the user for processing even if the + buffer is not filled. If a letter is longer than the user's buffer, + the letter is passed to the user in buffer size units, the last of + which may be only partly full. The receiving TCP's buffer size may be + communicated to the sending TCP when the connection is being + established. + + The TCP is responsible for regulating the flow of segments on the + connections, as a way of preventing itself from becoming saturated or + overloaded with traffic. This is done using a window flow control + mechanism. The data receiving TCP reports to the data sending TCP a + window which is the range of sequence numbers of data octets that data + receiving TCP is currently prepared to accept. + + TCP also provides a means to communicate to the receiver of data that + at some point further along in the data stream than the receiver is + currently reading there is urgent data. TCP does not attempt to + define what the user specifically does upon being notified of pending + urgent data, but the general notion is that the receiving process + should take action to read through the end urgent data quickly. + +2.9. Precedence and Security + + The TCP makes use of the internet protocol type of service field and + security option to provide precedence and security on a per connection + basis to TCP users. Not all TCP modules will necessarily function in + a multilevel secure environment, some may be limited to unclassified + use only, and others may operate at only one security level and + compartment. Consequently, some TCP implementations and services to + users may be limited to a subset of the multilevel secure case. + + TCP modules which operate in a multilevel secure environment should + properly mark outgoing segments with the security, compartment, and + precedence. Such TCP modules should also provide to their users or + higher level protocols such as Telnet or THP an interface to allow + them to specify the desired security level, compartment, and + precedence of connections. + +2.10. Robustness Principle + + TCP implementations should follow a general principle of robustness: + be conservative in what you do, be liberal in what you accept from + others. + + + + + [Page 13] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 14] + + +January 1980 + Transmission Control Protocol + + + + 3. FUNCTIONAL SPECIFICATION + +3.1. Header Format + + TCP segments are sent as internet datagrams. The Internet Protocol + header carries several information fields, including the source and + destination host addresses [2]. A TCP header follows the internet + header, supplying information specific to the TCP protocol. This + division allows for the existence of host level protocols other than + TCP. + + TCP Header Format + + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Source Port | Destination Port | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Acknowledgment Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Data | |U|A|E|R|S|F| | + | Offset| Reserved |R|C|O|S|Y|I| Window | + | | |G|K|L|T|N|N| | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Checksum | Urgent Pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Options | Padding | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | data | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + TCP Header Format + + Note that one tick mark represents one bit position. + + Figure 3. + + Source Port: 16 bits + + The source port number. + + Destination Port: 16 bits + + The destination port number. + + + + + [Page 15] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + Sequence Number: 32 bits + + The sequence number of the first data octet in this segment (except + when SYN is present). + + Acknowledgment Number: 32 bits + + If the ACK control bit is set this field contains the value of the + next sequence number the sender of the segment is expecting to + receive. Once a connection is established this is always sent. + + Data Offset: 4 bits + + The number of 32 bit words in the TCP Header. This indicates where + the data begins. The TCP header including options is an integral + number of 32 bits long. + + Reserved: 6 bits + + Reserved for future use. Must be zero. + + Control Bits: 8 bits (from left to right): + + URG: Urgent Pointer field significant + ACK: Acknowledgment field significant + EOL: End of Letter + RST: Reset the connection + SYN: Synchronize sequence numbers + FIN: No more data from sender + + Window: 16 bits + + The number of data octets beginning with the one indicated in the + acknowledgment field which the sender of this segment is willing to + accept. + + Checksum: 16 bits + + The checksum field is the 16 bit one's complement of the one's + complement sum of all 16 bit words in the header and text. If a + segment contains an odd number of header and text octets to be + checksummed, the last octet is padded on the right with zeros to + form a 16 bit word for checksum purposes. The pad is not + transmitted as part of the segment. While computing the checksum, + the checksum field itself is replaced with zeros. + + The checksum also covers a 96 bit pseudo header conceptually + prefixed to the TCP header. This pseudo header contains the Source + + +[Page 16] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Address, the Destination Address, the Protocol, and TCP length. + This gives the TCP protection against misrouted segments. This + information is carried in the Internet Protocol and is transferred + across the TCP/Network interface in the arguments or results of + calls by the TCP on the IP. + + +--------------------------+ + | Source Address | + +--------------------------+ + | Destination Address | + +--------------------------+ + | zero | PTCL | TCP Length | + +--------------------------+ + + The TCP Length is the TCP header plus the data length in octets + (this is not an explicitly transmitted quantity, but is computed + from the total length, and the header length). + + Urgent Pointer: 16 bits + + This field communicates the current value of the urgent pointer as a + positive offset from the sequence number in this segment. The + urgent pointer points to the sequence number of the octet following + the urgent data. This field should only be interpreted in segments + with the URG control bit set. + + Options: variable + + Options may occupy space at the end of the TCP header and are a + multiple of 8 bits in length. All options are included in the + checksum. An option may begin on any octet boundary. There are two + cases for the format of an option: + + Case 1: A single octet of option-kind. + + Case 2: An octet of option-kind, an octet of option-length, and + the actual option-data octets. + + The option-length counts the two octets of option-kind and + option-length as well as the option-data octets. + + Note that the list of options may be shorter than the data offset + field might imply. The content of the header beyond the + End-of-Option option should be header padding (i.e., zero). + + A TCP must implement all options. + + + + + [Page 17] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + Currently defined options include (kind indicated in octal): + + Kind Length Meaning + ---- ------ ------- + 0 - End of option list. + 1 - No-Operation. + 100 - Reserved. + 105 4 Buffer Size. + + + Specific Option Definitions + + End of Option List + + +--------+ + |00000000| + +--------+ + Kind=0 + + This option code indicates the end of the option list. This + might not coincide with the end of the TCP header according to + the Data Offset field. This is used at the end of all options, + not the end of each option, and need only be used if the end of + the options would not otherwise coincide with the end of the TCP + header. + + No-Operation + + +--------+ + |00000001| + +--------+ + Kind=1 + + This option code may be used between options, for example, to + align the beginning of a subsequent option on a word boundary. + There is no guarantee that senders will use this option, so + receivers must be prepared to process options even if they do + not begin on a word boundary. + + Buffer Size + + +--------+--------+---------+--------+ + |01000101|00000100| buffer size | + +--------+--------+---------+--------+ + Kind=105 Length=4 + + + + + +[Page 18] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Buffer Size Option Data: 16 bits + + If this option is present, then it communicates the receive + buffer size at the TCP which sends this segment. This field + should only be sent in the initial connection request (i.e., + in segments with the SYN control bit set). If this option is + not used, the default buffer size of one octet is assumed. + + Padding: variable + + The TCP header padding is used to ensure that the TCP header ends + and data begins on a 32 bit boundary. The padding is composed of + zeros. + +3.2. Terminology + + Before we can discuss very much about the operation of the TCP we need + to introduce some detailed terminology. The maintenance of a TCP + connection requires the remembering of several variables. We conceive + of these variables being stored in a connection record called a + Transmission Control Block or TCB. Among the variables stored in the + TCB are the local and remote socket numbers, the security and + precedence of the connection, pointers to the user's send and receive + buffers, pointers to the retransmit queue and to the current segment. + In addition several variables relating to the send and receive + sequence numbers are stored in the TCB. + + Send Sequence Variables + + SND.UNA - send unacknowledged + SND.NXT - send sequence + SND.WND - send window + SND.BS - send buffer size + SND.UP - send urgent pointer + SND.WL - send sequence number used for last window update + SND.LBB - send last buffer beginning + ISS - initial send sequence number + + Receive Sequence Variables + + RCV.NXT - receive sequence + RCV.WND - receive window + RCV.BS - receive buffer size + RCV.UP - receive urgent pointer + RCV.LBB - receive last buffer beginning + IRS - initial receive sequence number + + + + + [Page 19] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + The following diagrams may help to relate some of these variables to + the sequence space. + + Send Sequence Space + + 1 2 3 4 + ----------|----------|----------|---------- + SND.UNA SND.NXT SND.UNA + +SND.WND + + 1 - old sequence numbers which have been acknowledged + 2 - sequence numbers of unacknowledged data + 3 - sequence numbers allowed for new data transmission + 4 - future sequence numbers which are not yet allowed + + Send Sequence Space + + Figure 4. + + + + Receive Sequence Space + + 1 2 3 + ----------|----------|---------- + RCV.NXT RCV.NXT + +RCV.WND + + 1 - old sequence numbers which have been acknowledged + 2 - sequence numbers allowed for new reception + 3 - future sequence numbers which are not yet allowed + + Receive Sequence Space + + Figure 5. + + + + There are also some variables used frequently in the discussion that + take their values from the fields of the current segment. + + + + + + + + + + +[Page 20] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Current Segment Variables + + SEG.SEQ - segment sequence number + SEG.ACK - segment acknowledgment number + SEG.LEN - segment length + SEG.WND - segment window + SEG.UP - segment urgent pointer + SEG.PRC - segment precedence value + + A connection progresses through a series of states during its + lifetime. The states are: LISTEN, SYN-SENT, SYN-RECEIVED, + ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, TIME-WAIT, CLOSE-WAIT, CLOSING, + and the fictional state CLOSED. CLOSED is fictional because it + represents the state when there is no TCB, and therefore, no + connection. Briefly the meanings of the states are: + + LISTEN - represents waiting for a connection request from any remote + TCP and port. + + SYN-SENT - represents waiting for a matching connection request + after having sent a connection request. + + SYN-RECEIVED - represents waiting for a confirming connection + request acknowledgment after having both received and sent a + connection request. + + ESTABLISHED - represents an open connection, ready to transmit and + receive data segments. + + FIN-WAIT-1 - represents waiting for a connection termination request + from the remote TCP, or an acknowledgment of the connection + termination request previously sent. + + FIN-WAIT-2 - represents waiting for a connection termination request + from the remote TCP. + + TIME-WAIT - represents waiting for enough time to pass to be sure + the remote TCP received the acknowledgment of its connection + termination request. + + CLOSE-WAIT - represents waiting for a connection termination request + from the local user. + + CLOSING - represents waiting for a connection termination request + acknowledgment from the remote TCP. + + CLOSED - represents no connection state at all. + + + + [Page 21] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + A TCP connection progresses from one state to another in response to + events. The events are the user calls, OPEN, SEND, RECEIVE, CLOSE, + ABORT, and STATUS; the incoming segments, particularly those + containing the SYN and FIN flags; and timeouts. + + The Glossary contains a more complete list of terms and their + definitions. + + The state diagram in figure 6 only illustrates state changes, together + with the causing events and resulting actions, but addresses neither + error conditions nor actions which are not connected with state + changes. In a later section, more detail is offered with respect to + the reaction of the TCP to events. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 22] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + +---------+ ---------\ active OPEN + | CLOSED | \ ----------- + +---------+<---------\ \ create TCB + | ^ \ \ snd SYN + passive OPEN | | CLOSE \ \ + ------------ | | ---------- \ \ + create TCB | | delete TCB \ \ + V | \ \ + +---------+ CLOSE | \ + | LISTEN | ---------- | | + +---------+ delete TCB | | + rcv SYN | | SEND | | + ----------- | | ------- | V + +---------+ snd SYN,ACK / \ snd SYN +---------+ + | |<----------------- ------------------>| | + | SYN | rcv SYN | SYN | + | RCVD |<-----------------------------------------------| SENT | + | | snd ACK | | + | |------------------ -------------------| | + +---------+ rcv ACK of SYN \ / rcv SYN,ACK +---------+ + | -------------- | | ----------- + | x | | snd ACK + | V V + | CLOSE +---------+ + | ------- | ESTAB | + | snd FIN +---------+ + | CLOSE | | rcv FIN + V ------- | | ------- + +---------+ snd FIN / \ snd ACK +---------+ + | FIN |<----------------- ------------------>| CLOSE | + | WAIT-1 |------------------ -------------------| WAIT | + +---------+ rcv FIN \ / CLOSE +---------+ + | rcv ACK of FIN ------- | | ------- + | -------------- snd ACK | | snd FIN + V x V V + +---------+ +---------+ + |FINWAIT-2| | CLOSING | + +---------+ +---------+ + | rcv FIN | rcv ACK of FIN + | ------- Timeout=2MSL | -------------- + V snd ACK ------------ V delete TCB + +---------+ delete TCB +---------+ + |TIME WAIT|----------------->| CLOSED | + +---------+ +---------+ + + TCP Connection State Diagram + Figure 6. + + + [Page 23] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + +3.3. Sequence Numbers + + A fundamental notion in the design is that every octet of data sent + over a TCP connection has a sequence number. Since every octet is + sequenced, each of them can be acknowledged. The acknowledgment + mechanism employed is cumulative so that an acknowledgment of sequence + number X indicates that all octets up to but not including X have been + received. This mechanism allows for straight-forward duplicate + detection in the presence of retransmission. Numbering of octets + within a segment is that the first data octet immediately following + the header is the lowest numbered, and the following octets are + numbered consecutively. + + It is essential to remember that the actual sequence number space is + finite, though very large. This space ranges from 0 to 2**32 - 1. + Since the space is finite, all arithmetic dealing with sequence + numbers must be performed modulo 2**32. This unsigned arithmetic + preserves the relationship of sequence numbers as they cycle from + 2**32 - 1 to 0 again. There are some subtleties to computer modulo + arithmetic, so great care should be taken in programming the + comparison of such values. The typical kinds of sequence number + comparisons which the TCP must perform include: + + (a) Determining that an acknowledgment refers to some sequence + number sent but not yet acknowledged. + + (b) Determining that all sequence numbers occupied by a segment + have been acknowledged (e.g., to remove the segment from a + retransmission queue). + + (c) Determining that an incoming segment contains sequence numbers + which are expected (i.e., that the segment "overlaps" the + receive window). + + + + + + + + + + + + + + + + + +[Page 24] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + On send connections the following comparisons are needed: + + older sequence numbers newer sequence numbers + + + SND.UNA SEG.ACK SND.NXT + | | | + ----|----XXXXXXX------XXXXXXXXXX---------XXXXXX----|---- + | | | | | | + | | | + Segment 1 Segment 2 Segment 3 + + <----- sequence space -----> + + Sending Sequence Space Information + + Figure 7. + + SND.UNA = oldest unacknowledged sequence number + + SND.NXT = next sequence number to be sent + + SEG.ACK = acknowledgment (next sequence number expected by the + acknowledging TCP) + + SEG.SEQ = first sequence number of a segment + + SEG.SEQ+SEG.LEN-1 = last sequence number of a segment + + A new acknowledgment (called an "acceptable ack"), is one for which + the inequality below holds: + + SND.UNA < SEG.ACK =< SND.NXT + + All arithmetic is modulo 2**32 and that comparisons are unsigned. + "=<" means "less than or equal". + + A segment on the retransmission queue is fully acknowledged if the sum + of its sequence number and length is less than the acknowledgment + value in the incoming segment. + + SEG.LEN is the number of octets occupied by the data in the segment. + It is important to note that SEG.LEN must be non-zero; segments which + do not occupy any sequence space (e.g., empty acknowledgment segments) + are never placed on the retransmission queue, so would not go through + this particular test. + + + + + [Page 25] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + On receive connections the following comparisons are needed: + + older sequence numbers newer sequence numbers + + + RCV.NXT RCV.NXT+RCV.WND + | | + ---------XXX|XXX------XXXXXXXXXX---------XXX|XX--------- + | | | | | + | | | + Segment 1 Segment 2 Segment 3 + + <----- sequence space -----> + + Receiving Sequence Space Information + + Figure 8. + + RCV.NXT = next sequence number expected on incoming segments + + RCV.NXT+RCV.WND = last sequence number expected on incoming + segments, plus one + + SEG.SEQ = first sequence number occupied by the incoming segment + + SEG.SEQ+SEG.LEN-1 = last sequence number occupied by the incoming + segment + + A segment is judged to occupy a portion of valid receive sequence + space if + + 0 =< (SEG.SEQ+SEG.LEN-1 - RCV.NXT) < (RCV.NXT+RCV.WND - RCV.NXT) + + SEG.SEQ+SEG.LEN-1 is the last sequence number occupied by the segment; + RCV.NXT is the next sequence number expected on an incoming segment; + and RCV.NXT+RCV.WND is the right edge of the receive window. + + Actually, it is a little more complicated than this. Due to zero + windows and zero length segments, we have four cases for the + acceptability of an incoming segment: + + + + + + + + + + +[Page 26] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Segment Receive Test + Length Window + ------- ------- ------------------------------------------- + + 0 0 SEG.SEQ = RCV.NXT + + 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + + >0 0 not acceptable + + >0 >0 RCV.NXT < SEG.SEQ+SEG.LEN =< RCV.NXT+RCV.WND + + Note that the acceptance test for a segment, since it requires the end + of a segment to lie in the window, is somewhat more restrictive than + is absolutely necessary. If at least the first sequence number of the + segment lies in the receive window, or if some part of the segment + lies in the receive window, then the segment might be judged + acceptable. Thus, in figure 8, at least segments 1 and 2 are + acceptable by the strict rule, and segment 3 may or may not be, + depending on the strictness of interpretation of the rule. + + Note that when the receive window is zero no segments should be + acceptable except ACK segments. Thus, it should be possible for a TCP + to maintain a zero receive window while transmitting data and + receiving ACKs. + + We have taken advantage of the numbering scheme to protect certain + control information as well. This is achieved by implicitly including + some control flags in the sequence space so they can be retransmitted + and acknowledged without confusion (i.e., one and only one copy of the + control will be acted upon). Control information is not physically + carried in the segment data space. Consequently, we must adopt rules + for implicitly assigning sequence numbers to control. The SYN and FIN + are the only controls requiring this protection, and these controls + are used only at connection opening and closing. For sequence number + purposes, the SYN is considered to occur before the first actual data + octet of the segment in which it occurs, while the FIN is considered + to occur after the last actual data octet in a segment in which it + occurs. The segment length includes both data and sequence space + occupying controls. When a SYN is present then SEG.SEQ is the + sequence number of the SYN. + + Initial Sequence Number Selection + + The protocol places no restriction on a particular connection being + used over and over again. A connection is defined by a pair of + sockets. New instances of a connection will be referred to as + incarnations of the connection. The problem that arises owing to this + + + [Page 27] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + is -- "how does the TCP identify duplicate segments from previous + incarnations of the connection?" This problem becomes apparent if the + connection is being opened and closed in quick succession, or if the + connection breaks with loss of memory and is then reestablished. + + To avoid confusion we must prevent segments from one incarnation of a + connection from being used while the same sequence numbers may still + be present in the network from an earlier incarnation. We want to + assure this, even if a TCP crashes and loses all knowledge of the + sequence numbers it has been using. When new connections are created, + an initial sequence number (ISN) generator is employed which selects a + new 32 bit ISN. The generator is bound to a (possibly fictitious) 32 + bit clock whose low order bit is incremented roughly every 4 + microseconds. Thus, the ISN cycles approximately every 4.55 hours. + Since we assume that segments will stay in the network no more than + tens of seconds or minutes, at worst, we can reasonably assume that + ISN's will be unique. + + For each connection there is a send sequence number and a receive + sequence number. The initial send sequence number (ISS) is chosen by + the data sending TCP, and the initial receive sequence number (IRS) is + learned during the connection establishing procedure. + + For a connection to be established or initialized, the two TCPs must + synchronize on each other's initial sequence numbers. This is done in + an exchange of connection establishing messages carrying a control bit + called "SYN" (for synchronize) and the initial sequence numbers. As a + shorthand, messages carrying the SYN bit are also called "SYNs". + Hence, the solution requires a suitable mechanism for picking an + initial sequence number and a slightly involved handshake to exchange + the ISN's. A "three way handshake" is necessary because sequence + numbers are not tied to a global clock in the network, and TCPs may + have different mechanisms for picking the ISN's. The receiver of the + first SYN has no way of knowing whether the segment was an old delayed + one or not, unless it remembers the last sequence number used on the + connection (which is not always possible), and so it must ask the + sender to verify this SYN. + + The "three way handshake" and the advantages of a "clock-driven" + scheme are discussed in [4]. + + Knowing When to Keep Quiet + + To be sure that a TCP does not create a segment that carries a + sequence number which may be duplicated by an old segment remaining in + the network, the TCP must keep quiet for a maximum segment lifetime + (MSL) before assigning any sequence numbers upon starting up or + recovering from a crash in which memory of sequence numbers in use was + + +[Page 28] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + lost. For this specification the MSL is taken to be 2 minutes. This + is an engineering choice, and may be changed if experience indicates + it is desirable to do so. Note that if a TCP is reinitialized in some + sense, yet retains its memory of sequence numbers in use, then it need + not wait at all; it must only be sure to use sequence numbers larger + than those recently used. + + It should be noted that this strategy does not protect against + spoofing or other replay type duplicate message problems. + +3.4. Establishing a connection + + The "three-way handshake" is the procedure used to establish a + connection. This procedure normally is initiated by one TCP and + responded to by another TCP. The procedure also works if two TCP + simultaneously initiate the procedure. When simultaneous attempt + occurs, the TCP receives a "SYN" segment which carries no + acknowledgment after it has sent a "SYN". Of course, the arrival of + an old duplicate "SYN" segment can potentially make it appear, to the + recipient, that a simultaneous connection initiation is in progress. + Proper use of "reset" segments can disambiguate these cases. Several + examples of connection initiation follow. Although these examples do + not show connection synchronization using data-carrying segments, this + is perfectly legitimate, so long as the receiving TCP doesn't deliver + the data to the user until it is clear the data is valid (i.e., the + data must be buffered at the receiver until the connection reaches the + ESTABLISHED state). The three-way handshake reduces the possibility + of false connections. It is the implementation of a trade-off between + memory and messages to provide information for this checking. + + The simplest three-way handshake is shown in figure 9 below. The + figures should be interpreted in the following way. Each line is + numbered for reference purposes. Right arrows (-->) indicate + departure of a TCP segment from TCP A to TCP B, or arrival of a + segment at B from A. Left arrows (<--), indicate the reverse. + Ellipsis (...) indicates a segment which is still in the network + (delayed). An "XXX" indicates a segment which is lost or rejected. + Comments appear in parentheses. TCP states represent the state AFTER + the departure or arrival of the segment (whose contents are shown in + the center of each line). Segment contents are shown in abbreviated + form, with sequence number, control flags, and ACK field. Other + fields such as window, addresses, lengths, and text have been left out + in the interest of clarity. + + + + + + + + [Page 29] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + + + TCP A TCP B + + 1. CLOSED LISTEN + + 2. SYN-SENT --> --> SYN-RECEIVED + + 3. ESTABLISHED <-- <-- SYN-RECEIVED + + 4. ESTABLISHED --> --> ESTABLISHED + + 5. ESTABLISHED --> --> ESTABLISHED + + Basic 3-Way Handshake for Connection Synchronization + + Figure 9. + + In line 2 of figure 9, TCP A begins by sending a SYN segment + indicating that it will use sequence numbers starting with sequence + number 100. In line 3, TCP B sends a SYN and acknowledges the SYN it + received from TCP A. Note that the acknowledgment field indicates TCP + B is now expecting to hear sequence 101, acknowledging the SYN which + occupied sequence 100. + + At line 4, TCP A responds with an empty segment containing an ACK for + TCP B's SYN; and in line 5, TCP A sends some data. Note that the + sequence number of the segment in line 5 is the same as in line 4 + because the ACK does not occupy sequence number space (if it did, we + would wind up ACKing ACK's!). + + Simultaneous initiation is only slightly more complex, as is shown in + figure 10. Each TCP cycles from CLOSED to SYN-SENT to SYN-RECEIVED to + ESTABLISHED. + + The principle reason for the three-way handshake is to prevent old + duplicate connection initiations from causing confusion. To deal with + this, a special control message, reset, has been devised. If the + receiving TCP is in a non-synchronized state (i.e., SYN-SENT, + SYN-RECEIVED), it returns to LISTEN on receiving an acceptable reset. + If the TCP is in one of the synchronized states (ESTABLISHED, + FIN-WAIT-1, FIN-WAIT-2, TIME-WAIT, CLOSE-WAIT, CLOSING), it aborts the + connection and informs its user. We discuss this latter case under + "half-open" connections below. + + + + + + +[Page 30] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + + TCP A TCP B + + 1. CLOSED CLOSED + + 2. SYN-SENT --> ... + + 3. SYN-RECEIVED <-- <-- SYN-SENT + + 4. ... --> SYN-RECEIVED + + 5. SYN-RECEIVED --> ... + + 6. ESTABLISHED <-- <-- SYN-RECEIVED + + 7. ... --> ESTABLISHED + + Simultaneous Connection Synchronization + + Figure 10. + + + + TCP A TCP B + + 1. CLOSED LISTEN + + 2. SYN-SENT --> ... + + 3. (duplicate) ... --> SYN-RECEIVED + + 4. SYN-SENT <-- <-- SYN-RECEIVED + + 5. SYN-SENT --> --> LISTEN + + + 6. ... --> SYN-RECEIVED + + 7. SYN-SENT <-- <-- SYN-RECEIVED + + 8. ESTABLISHED --> --> ESTABLISHED + + Recovery from Old Duplicate SYN + + Figure 11. + + As a simple example of recovery from old duplicates, consider + + + [Page 31] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + figure 11. At line 3, an old duplicate SYN arrives at TCP B. TCP B + cannot tell that this is an old duplicate, so it responds normally + (line 4). TCP A detects that the ACK field is incorrect and returns a + RST (reset) with its SEQ field selected to make the segment + believable. TCP B, on receiving the RST, returns to the LISTEN state. + When the original SYN (pun intended) finally arrives at line 6, the + synchronization proceeds normally. If the SYN at line 6 had arrived + before the RST, a more complex exchange might have occurred with RST's + sent in both directions. + + Half-Open Connections and Other Anomalies + + An established connection is said to be "half-open" if one of the + TCPs has closed or aborted the connection at its end without the + knowledge of the other, or if the two ends of the connection have + become desynchronized owing to a crash that resulted in loss of + memory. Such connections will automatically become reset if an + attempt is made to send data in either direction. However, half-open + connections are expected to be unusual, and the recovery procedure is + mildly involved. + + If at site A the connection no longer exists, then an attempt by the + user at site B to send any data on it will result in the site B TCP + receiving a reset control message. Such a message should indicate to + the site B TCP that something is wrong, and it is expected to abort + the connection. + + Assume that two user processes A and B are communicating with one + another when a crash occurs causing loss of memory to A's TCP. + Depending on the operating system supporting A's TCP, it is likely + that some error recovery mechanism exists. When the TCP is up again, + A is likely to start again from the beginning or from a recovery + point. As a result, A will probably try to OPEN the connection again + or try to SEND on the connection it believes open. In the latter + case, it receives the error message "connection not open" from the + local (A's) TCP. In an attempt to establish the connection, A's TCP + will send a segment containing SYN. This scenario leads to the + example shown in figure 12. After TCP A crashes, the user attempts to + re-open the connection. TCP B, in the meantime, thinks the connection + is open. + + + + + + + + + + +[Page 32] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + + TCP A TCP B + + 1. (CRASH) (send 300,receive 100) + + 2. CLOSED ESTABLISHED + + 3. SYN-SENT --> --> (??) + + 4. (!!) <-- <-- ESTABLISHED + + 5. SYN-SENT --> --> (Abort!!) + + 6. CLOSED + + 7. SYN-SENT --> --> + + Half-Open Connection Discovery + + Figure 12. + + When the SYN arrives at line 3, TCP B, being in a synchronized state, + responds with an acknowledgment indicating what sequence it next + expects to hear (ACK 100). TCP A sees that this segment does not + acknowledge anything it sent and, being unsynchronized, sends a reset + (RST) because it has detected a half-open connection. TCP B aborts at + line 5. TCP A will continue to try to establish the connection; the + problem is now reduced to the basic 3-way handshake of figure 9. + + An interesting alternative case occurs when TCP A crashes and TCP B + tries to send data on what it thinks is a synchronized connection. + This is illustrated in figure 13. In this case, the data arriving at + TCP A from TCP B (line 2) is unacceptable because no such connection + exists, so TCP A sends a RST. The RST is acceptable so TCP B + processes it and aborts the connection. + + + + + + + + + + + + + + + [Page 33] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + + + TCP A TCP B + + 1. (CRASH) (send 300,receive 100) + + 2. (??) <-- <-- ESTABLISHED + + 3. --> --> (ABORT!!) + + Active Side Causes Half-Open Connection Discovery + + Figure 13. + + In figure 14, we find the two TCPs A and B with passive connections + waiting for SYN. An old duplicate arriving at TCP B (line 2) stirs B + into action. A SYN-ACK is returned (line 3) and causes TCP A to + generate a RST (the ACK in line 3 is not acceptable). TCP B accepts + the reset and returns to its passive LISTEN state. + + + + TCP A TCP B + + 1. LISTEN LISTEN + + 2. ... --> SYN-RECEIVED + + 3. (??) <-- <-- SYN-RECEIVED + + 4. --> --> (return to LISTEN!) + + 5. LISTEN LISTEN + + Old Duplicate SYN Initiates a Reset on two Passive Sockets + + Figure 14. + + A variety of other cases are possible, all of which are accounted for + by the following rules for RST generation and processing. + + Reset Generation + + As a general rule, reset (RST) should be sent whenever a segment + arrives which apparently is not intended for the current or a future + incarnation of the connection. A reset should not be sent if it is + not clear that this is the case. Thus, if any segment arrives for a + nonexistent connection, a reset should be sent. If a segment ACKs + + +[Page 34] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + something which has never been sent on the current connection, then + one of the following two cases applies. + + 1. If the connection is in any non-synchronized state (LISTEN, + SYN-SENT, SYN-RECEIVED) or if the connection does not exist, a reset + (RST) should be formed and sent for any segment that acknowledges + something not yet sent. The RST should take its SEQ field from the + ACK field of the offending segment (if the ACK control bit was set), + and its ACK bit should be reset (zero), except to refuse a initial + SYN. A reset is also sent if an incoming segment has a security level + or compartment which does not exactly match the level and compartment + requested for the connection. If the precedence of the incoming + segment is less than the precedence level requested a reset is sent. + + 2. If the connection is in a synchronized state (ESTABLISHED, + FIN-WAIT-1, FIN-WAIT-2, TIME-WAIT, CLOSE-WAIT, CLOSING), any + unacceptable segment should elicit only an empty acknowledgment + segment containing the current send-sequence number and an + acknowledgment indicating the next sequence number expected to be + received. + + Reset Processing + + All reset (RST) segments are validated by checking their SEQ-fields. + A reset is valid if its sequence number is in the window. In the case + of a RST received in response to an initial SYN any sequence number is + acceptable if the ACK field acknowledges the SYN. + + The receiver of a RST first validates it, then changes state. If the + receiver was in the LISTEN state, it ignores it. If the receiver was + in SYN-RECEIVED state and had previously been in the LISTEN state, + then the receiver returns to the LISTEN state, otherwise the receiver + aborts the connection and goes to the CLOSED state. If the receiver + was in any other state, it aborts the connection and advises the user + and goes to the CLOSED state. + +3.5. Closing a Connection + + CLOSE is an operation meaning "I have no more data to send." The + notion of closing a full-duplex connection is subject to ambiguous + interpretation, of course, since it may not be obvious how to treat + the receiving side of the connection. We have chosen to treat CLOSE + in a simplex fashion. The user who CLOSEs may continue to RECEIVE + until he is told that the other side has CLOSED also. Thus, a program + could initiate several SENDs followed by a CLOSE, and then continue to + RECEIVE until signaled that a RECEIVE failed because the other side + has CLOSED. We assume that the TCP will signal a user, even if no + RECEIVEs are outstanding, that the other side has closed, so the user + + + [Page 35] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + can terminate his side gracefully. A TCP will reliably deliver all + buffers SENT before the connection was CLOSED so a user who expects no + data in return need only wait to hear the connection was CLOSED + successfully to know that all his data was received at the destination + TCP. + + There are essentially three cases: + + 1) The user initiates by telling the TCP to CLOSE the connection + + 2) The remote TCP initiates by sending a FIN control signal + + 3) Both users CLOSE simultaneously + + Case 1: Local user initiates the close + + In this case, a FIN segment can be constructed and placed on the + outgoing segment queue. No further SENDs from the user will be + accepted by the TCP, and it enters the FIN-WAIT-1 state. RECEIVEs + are allowed in this state. All segments preceding and including FIN + will be retransmitted until acknowledged. When the other TCP has + both acknowledged the FIN and sent a FIN of its own, the first TCP + can ACK this FIN. It should be noted that a TCP receiving a FIN + will ACK but not send its own FIN until its user has CLOSED the + connection also. + + Case 2: TCP receives a FIN from the network + + If an unsolicited FIN arrives from the network, the receiving TCP + can ACK it and tell the user that the connection is closing. The + user should respond with a CLOSE, upon which the TCP can send a FIN + to the other TCP. The TCP then waits until its own FIN is + acknowledged whereupon it deletes the connection. If an ACK is not + forthcoming, after a timeout the connection is aborted and the user + is told. + + Case 3: both users close simultaneously + + A simultaneous CLOSE by users at both ends of a connection causes + FIN segments to be exchanged. When all segments preceding the FINs + have been processed and acknowledged, each TCP can ACK the FIN it + has received. Both will, upon receiving these ACKs, delete the + connection. + + + + + + + +[Page 36] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + + + TCP A TCP B + + 1. ESTABLISHED ESTABLISHED + + 2. (Close) + FIN-WAIT-1 --> --> CLOSE-WAIT + + 3. FIN-WAIT-2 <-- <-- CLOSE-WAIT + + 4. (Close) + TIME-WAIT <-- <-- CLOSING + + 5. TIME-WAIT --> --> CLOSED + + 6. (2 MSL) + CLOSED + + Normal Close Sequence + + Figure 15. + + + + TCP A TCP B + + 1. ESTABLISHED ESTABLISHED + + 2. (Close) (Close) + FIN-WAIT-1 --> ... FIN-WAIT-1 + <-- <-- + ... --> + + 3. CLOSING --> ... CLOSING + <-- <-- + ... --> + + 4. CLOSED CLOSED + + Simultaneous Close Sequence + + Figure 16. + + + + + + + + [Page 37] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + +3.6. Precedence and Security + + The intent is that connection be allowed only between ports operating + with exactly the same security and compartment values and at the + higher of the precedence level requested by the two parts. + + The precedence levels are: + + flash override - 111 + flash - 110 + immediate - 10X + priority - 01X + routine - 00X + + The security levels are: + + top secret - 11 + secret - 10 + confidential - 01 + unclassified - 00 + + The compartments are assigned by the Defense Communications Agency. + The defaults are precedence: routine, security: unclassified, + compartment: zero. A host which does not implement precedence or + security feature should clear these fields to zero for segments it + sends. + + A connection attempt with mismatched security/compartment values or a + lower precedence value should be rejected by sending a reset. + + Note that TCP modules which operate only at the default value of + precedence will still have to check the precedence of incoming + segments and possibly raise the precedence level they use on the + connection. + +3.7. Data Communication + + Once the connection is established data is communicated by the + exchange of segments. Because segments may be lost due to errors + (checksum test failure), or network congestion, TCP uses + retransmission (after a timeout) to ensure delivery of every segment. + Duplicate segments may arrive due to network or TCP retransmission. + As discussed in the section on sequence numbers the TCP performs + certain tests on the sequence and acknowledgment numbers in the + segments to verify their acceptability. + + The sender of data keeps track of the next sequence number to use in + the variable SND.NXT. The receiver of data keeps track of the next + + +[Page 38] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + sequence number to expect in the variable RCV.NXT. The sender of data + keeps track of the oldest unacknowledged sequence number in the + variable SND.UNA. If the data flow is momentarily idle and all data + sent has been acknowledged then the three variables will be equal. + + When the sender creates a segment and transmits it the sender advances + SND.NXT. When the receiver accepts a segment it advances RCV.NXT and + sends an acknowledgment. When the data sender receives an + acknowledgment it advances SND.UNA. The extent to which the values of + these variables differ is a measure of the delay in the communication. + + Normally the amount by which the variables are advanced is the length + of the data in the segment. However, when letters are used there are + special provisions for coordination the sequence numbers, the letter + boundaries, and the receive buffer boundaries. + + End of Letter Sequence Number Adjustments + + There is provision in TCP for the receiver of data to optionally + communicate to the sender of data on a connection at the time of the + connection synchronization the receiver's buffer size. If this is + done the receiver must use this fixed size of buffers for the lifetime + of the connection. If a buffer size is communicated then there is a + coordination between receive buffers, letters, and sequence numbers. + + Each time a buffer is completed either due to being filled or due to + an end of letter, the sequence number is incremented through the end + of that buffer. + + That is, whenever an EOL is transmitted, the sender advances its send + sequence number, SND.NXT, by an amount sufficient to consume all the + unused space in the receiver's buffer. The amount of space consumed + in this fashion is subtracted from the send window just as is the + space consumed by actual data. + + And, whenever an EOL is received, the receiver advances its receive + sequence number, RCV.NXT, by an amount sufficient to consume all the + unused space in the receiver's buffer. The amount of space consumed + in this fashion is subtracted from the receive window just as is the + space consumed by actual data. + + + + + + + + + + + [Page 39] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + older sequence numbers newer sequence numbers + + | Buffer 1 | Buffer 2 + | | + ----+-------------------------------+----------------- + XXXXXXXXXXXXXXXXXXXXX+++++++++++ + | | | + |<-----SEG.LEN------>| | + | | | + | | | + SEG.SEQ A B + + XXX - data octets from segment + +++ - phantom data + + <----- sequence space -----> + + End of Letter Adjustment + + Figure 17. + + In the case illustrated above, if the segment does not carry an EOL + flag, the next value of SND.NXT or RCV.NXT will be A. If it does + carry an EOL flag, the next value will be B. + + The exchange of buffer size and sequencing information is done in + units of octets. If no buffer size is stated, then the buffer size is + assumed to be 1 octet. The receiver tells the sender the size of the + buffer in a SYN segment that contains the 16 bit buffer size data in + an option field in the TCP header. + + Each EOL advances the sequence number (SN) to the next buffer boundary + + While LBB < SEG.SEQ+SEG.LEN + Do LBB <- LBB + BS End + SN <- LBB + + where LBB is the Last Buffer Beginning, and BS is the buffer size. + + The CLOSE user call implies an end of letter, as does the FIN control + flag in an incoming segment. + + The Communication of Urgent Information + + The objective of the TCP urgent mechanism is to allow the sending user + to stimulate the receiving user to accept some urgent data and to + permit the receiving TCP to indicate to the receiving user when all + the currently known urgent data has been received by the user. + + +[Page 40] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + This mechanism permits a point in the data stream to be designated as + the end of "urgent" information. Whenever this point is in advance of + the receive sequence number (RCV.NXT) at the receiving TCP, that TCP + should tell the user to go into "urgent mode"; when the receive + sequence number catches up to the urgent pointer, the TCP should tell + user to go into "normal mode". If the urgent pointer is updated while + the user is in "read fast" mode, the update will be invisible to the + user. + + The method employs a urgent field which is carried in all segments + transmitted. The URG control flag indicates that the urgent field is + meaningful and should be added to the segment sequence number to yield + the urgent pointer. The absence of this flag indicates that the + urgent pointer has not changed. + + To send an urgent indication the user must also send at least one data + octet. If the sending user also indicates end of letter, timely + delivery of the urgent information to the destination process is + enhanced. + + Managing the Window + + The window sent in each segment indicates the range of sequence number + the sender of the window (the data receiver) is currently prepared to + accept. There is an assumption that this is related to the currently + available data buffer space available for this connection. The window + information is a guideline to be aimed at. + + Indicating a large window encourages transmissions. If more data + arrives than can be accepted, it will be discarded. This will result + in excessive retransmissions, adding unnecessarily to the load on the + network and the TCPs. Indicating a small window may restrict the + transmission of data to the point of introducing a round trip delay + between each new segment transmitted. + + The mechanisms provided allow a TCP to advertise a large window and to + subsequently advertise a much smaller window without having accepted + that much data. This, so called "shrinking the window," is strongly + discouraged. The robustness principle dictates that TCPs will not + shrink the window themselves, but will be prepared for such behavior + on the part of other TCPs. + + The sending TCP must be prepared to accept and send at least one octet + of new data even if the send window is zero. The sending TCP should + regularly retransmit to the receiving TCP even when the window is + zero. Two minutes is recommended for the retransmission interval when + the window is zero. This retransmission is essential to guarantee + + + + [Page 41] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + that when either TCP has a zero window the re-opening of the window + will be reliably reported to the other. + + The sending TCP packages the data to be transmitted into segments + which fit the current window, and may repackage segments on the + retransmission queue. Such repackaging is not required, but may be + helpful. + + Users must keep reading connections they close for sending until the + TCP says no more data. + + In a connection with a one-way data flow, the window information will + be carried in acknowledgment segments that all have the same sequence + number so there will be no way to reorder them if they arrive out of + order. This is not a serious problem, but it will allow the window + information to be on occasion temporarily based on old reports from + the data receiver. + +3.8. Interfaces + + There are of course two interfaces of concern: the user/TCP interface + and the TCP/IP interface. We have a fairly elaborate model of the + user/TCP interface, but only a sketch of the interface to the lower + level protocol module. + + User/TCP Interface + + The functional description of user commands to the TCP is, at best, + fictional, since every operating system will have different + facilities. Consequently, we must warn readers that different TCP + implementations may have different user interfaces. However, all + TCPs must provide a certain minimum set of services to guarantee + that all TCP implementations can support the same protocol + hierarchy. This section specifies the functional interfaces + required of all TCP implementations. + + TCP User Commands + + The following sections functionally characterize a USER/TCP + interface. The notation used is similar to most procedure or + function calls in high level languages, but this usage is not + meant to rule out trap type service calls (e.g., SVCs, UUOs, + EMTs). + + The user commands described below specify the basic functions the + TCP must perform to support interprocess communication. + Individual implementations should define their own exact format, + and may provide combinations or subsets of the basic functions in + + +[Page 42] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + single calls. In particular, some implementations may wish to + automatically OPEN a connection on the first SEND or RECEIVE + issued by the user for a given connection. + + In providing interprocess communication facilities, the TCP must + not only accept commands, but must also return information to the + processes it serves. The latter consists of: + + (a) general information about a connection (e.g., interrupts, + remote close, binding of unspecified foreign socket). + + (b) replies to specific user commands indicating success or + various types of failure. + + Open + + Format: OPEN (local port, foreign socket, active/passive + [, buffer size] [, timeout] [, precedence] + [, security/compartment]) -> local connection name + + We assume that the local TCP is aware of the identity of the + processes it serves and will check the authority of the process + to use the connection specified. Depending upon the + implementation of the TCP, the local network and TCP identifiers + for the source address will either be supplied by the TCP or by + the processes that serve it (e.g., the program which interfaces + the TCP network). These considerations are the result of + concern about security, to the extent that no TCP be able to + masquerade as another one, and so on. Similarly, no process can + masquerade as another without the collusion of the TCP. + + If the active/passive flag is set to passive, then this is a + call to LISTEN for an incoming connection. A passive open may + have either a fully specified foreign socket to wait for a + particular connection or an unspecified foreign socket to wait + for any call. A fully specified passive call can be made active + by the subsequent execution of a SEND. + + A full-duplex transmission control block (TCB) is created and + partially filled in with data from the OPEN command parameters. + + On an active OPEN command, the TCP will begin the procedure to + synchronize (i.e., establish) the connection at once. + + The buffer size, if present, indicates that the caller will + always receive data from the connection in that size of buffers. + This buffer size is a measure of the buffer between the user and + + + + [Page 43] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + the local TCP. The buffer size between the two TCPs may be + different. + + The timeout, if present, permits the caller to set up a timeout + for all buffers transmitted on the connection. If a buffer is + not successfully delivered to the destination within the timeout + period, the TCP will abort the connection. The present global + default is 30 seconds. The buffer retransmission rate may vary; + most likely, it will be related to the measured time for + responses from the remote TCP. + + The TCP or some component of the operating system will verify + the users authority to open a connection with the specified + precedence or security/compartment. The absence of precedence + or security/compartment specification in the OPEN call indicates + the default values should be used. + + TCP will accept incoming requests as matching only if the + security/compartment information is exactly the same and only if + the precedence is equal to or higher than the precedence + requested in the OPEN call. + + The precedence for the connection is the higher of the values + requested in the OPEN call and received from the incoming + request, and fixed at that value for the life of the connection. + + Depending on the TCP implementation, either a local connection + name will be returned to the user by the TCP, or the user will + specify this local connection name (in which case another + parameter is needed in the call). The local connection name can + then be used as a short hand term for the connection defined by + the pair. + + Send + + Format: SEND(local connection name, buffer address, byte count, + EOL flag, URGENT flag [, timeout]) + + This call causes the data contained in the indicated user buffer + to be sent on the indicated connection. If the connection has + not been opened, the SEND is considered an error. Some + implementations may allow users to SEND first; in which case, an + automatic OPEN would be done. If the calling process is not + authorized to use this connection, an error is returned. + + If the EOL flag is set, the data is the End Of a Letter, and the + EOL bit will be set in the last TCP segment created from the + + + +[Page 44] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + buffer. If the EOL flag is not set, subsequent SENDs will + appear to be part of the same letter. + + If the URGENT flag is set, segments resulting from this call + will have the urgent pointer set to indicate that some of the + data associated with this call is urgent. This facility, for + example, can be used to simulate "break" signals from terminals + or error or completion codes from I/O devices. The semantics of + this signal to the receiving process are unspecified. The + receiving TCP will signal the urgent condition to the receiving + process as long as the urgent pointer indicates that data + preceding the urgent pointer has not been consumed by the + receiving process. The purpose of urgent is to stimulate the + receiver to accept some urgent data and to indicate to the + receiver when all the currently known urgent data has been + received. + + The number of times the sending user's TCP signals urgent will + not necessarily be equal to the number of times the receiving + user will be notified of the presence of urgent data. + + If no foreign socket was specified in the OPEN, but the + connection is established (e.g., because a LISTENing connection + has become specific due to a foreign segment arriving for the + local socket), then the designated buffer is sent to the implied + foreign socket. In general, users who make use of OPEN with an + unspecified foreign socket can make use of SEND without ever + explicitly knowing the foreign socket address. + + However, if a SEND is attempted before the foreign socket + becomes specified, an error will be returned. Users can use the + STATUS call to determine the status of the connection. In some + implementations the TCP may notify the user when an unspecified + socket is bound. + + If a timeout is specified, then the current timeout for this + connection is changed to the new one. + + In the simplest implementation, SEND would not return control to + the sending process until either the transmission was complete + or the timeout had been exceeded. However, this simple method + is both subject to deadlocks (for example, both sides of the + connection might try to do SENDs before doing any RECEIVEs) and + offers poor performance, so it is not recommended. A more + sophisticated implementation would return immediately to allow + the process to run concurrently with network I/O, and, + furthermore, to allow multiple SENDs to be in progress. + + + + [Page 45] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + Multiple SENDs are served in first come, first served order, so + the TCP will queue those it cannot service immediately. + + We have implicitly assumed an asynchronous user interface in + which a SEND later elicits some kind of SIGNAL or + pseudo-interrupt from the serving TCP. An alternative is to + return a response immediately. For instance, SENDs might return + immediate local acknowledgment, even if the segment sent had not + been acknowledged by the distant TCP. We could optimistically + assume eventual success. If we are wrong, the connection will + close anyway due to the timeout. In implementations of this + kind (synchronous), there will still be some asynchronous + signals, but these will deal with the connection itself, and not + with specific segments or letters. + + NOTA BENE: In order for the process to distinguish among error + or success indications for different SENDs, it might be + appropriate for the buffer address to be returned along with the + coded response to the SEND request. TCP-to-user signals are + discussed below, indicating the information which should be + returned to the calling process. + + Receive + + Format: RECEIVE (local connection name, buffer address, byte + count) + + This command allocates a receiving buffer associated with the + specified connection. If no OPEN precedes this command or the + calling process is not authorized to use this connection, an + error is returned. + + In the simplest implementation, control would not return to the + calling program until either the buffer was filled, or some + error occurred, but this scheme is highly subject to deadlocks. + A more sophisticated implementation would permit several + RECEIVEs to be outstanding at once. These would be filled as, + segments arrive. This strategy permits increased throughput at + the cost of a more elaborate scheme (possibly asynchronous) to + notify the calling program that a letter has been received or a + buffer filled. + + If insufficient buffer space is given to reassemble a complete + letter, the EOL flag will not be set in the response to the + RECEIVE. The buffer will be filled with as much data as it can + hold. The last buffer required to hold the letter is returned + with EOL signaled. + + + +[Page 46] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + The remaining parts of a partly delivered letter will be placed + in buffers as they are made available via successive RECEIVEs. + If a number of RECEIVEs are outstanding, they may be filled with + parts of a single long letter or with at most one letter each. + The return codes associated with each RECEIVE will indicate what + is contained in the buffer. + + If a buffer size was given in the OPEN call, then all buffers + presented in RECEIVE calls must be of exactly that size, or an + error indication will be returned. + + The URGENT flag will be set only if the receiving user has + previously been informed via a TCP-to-user signal, that urgent + data is waiting. The receiving user should thus be in + "read-fast" mode. If the URGENT flag is on, additional urgent + data remains. If the URGENT flag is off, this call to RECEIVE + has returned all the urgent data, and the user may now leave + "read-fast" mode. + + To distinguish among several outstanding RECEIVEs and to take + care of the case that a letter is smaller than the buffer + supplied, the return code is accompanied by both a buffer + pointer and a byte count indicating the actual length of the + letter received. + + Alternative implementations of RECEIVE might have the TCP + allocate buffer storage, or the TCP might share a ring buffer + with the user. Variations of this kind will produce obvious + variation in user interface to the TCP. + + Close + + Format: CLOSE(local connection name) + + This command causes the connection specified to be closed. If + the connection is not open or the calling process is not + authorized to use this connection, an error is returned. + Closing connections is intended to be a graceful operation in + the sense that outstanding SENDs will be transmitted (and + retransmitted), as flow control permits, until all have been + serviced. Thus, it should be acceptable to make several SEND + calls, followed by a CLOSE, and expect all the data to be sent + to the destination. It should also be clear that users should + continue to RECEIVE on CLOSING connections, since the other side + may be trying to transmit the last of its data. Thus, CLOSE + means "I have no more to send" but does not mean "I will not + receive any more." It may happen (if the user level protocol is + not well thought out) that the closing side is unable to get rid + + + [Page 47] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + of all its data before timing out. In this event, CLOSE turns + into ABORT, and the closing TCP gives up. + + The user may CLOSE the connection at any time on his own + initiative, or in response to various prompts from the TCP + (e.g., remote close executed, transmission timeout exceeded, + destination inaccessible). + + Because closing a connection requires communication with the + foreign TCP, connections may remain in the closing state for a + short time. Attempts to reopen the connection before the TCP + replies to the CLOSE command will result in error responses. + + Close also implies end of letter. + + Status + + Format: STATUS(local connection name) + + This is an implementation dependent user command and could be + excluded without adverse effect. Information returned would + typically come from the TCB associated with the connection. + + This command returns a data block containing the following + information: + + local socket, + foreign socket, + local connection name, + receive window, + send window, + connection state, + number of buffers awaiting acknowledgment, + number of buffers pending receipt (including partial ones), + receive buffer size, + urgent state, + precedence, + security/compartment, + and default transmission timeout. + + Depending on the state of the connection, or on the + implementation itself, some of this information may not be + available or meaningful. If the calling process is not + authorized to use this connection, an error is returned. This + prevents unauthorized processes from gaining information about a + connection. + + + + +[Page 48] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + Abort + + Format: ABORT (local connection name) + + This command causes all pending SENDs and RECEIVES to be + aborted, the TCB to be removed, and a special RESET message to + be sent to the TCP on the other side of the connection. + Depending on the implementation, users may receive abort + indications for each outstanding SEND or RECEIVE, or may simply + receive an ABORT-acknowledgment. + + TCP-to-User Messages + + It is assumed that the operating system environment provides a + means for the TCP to asynchronously signal the user program. When + the TCP does signal a user program, certain information is passed + to the user. Often in the specification the information will be + an error message. In other cases there will be information + relating to the completion of processing a SEND or RECEIVE or + other user call. + + The following information is provided: + + Local Connection Name Always + Response String Always + Buffer Address Send & Receive + Byte count (counts bytes received) Receive + End-of-Letter flag Receive + End-of-Urgent flag Receive + + TCP/Network Interface + + The TCP calls on a lower level protocol module to actually send and + receive information over a network. One case is that of the ARPA + internetwork system where the lower level module is the Internet + Protocol [2]. In most cases the following simple interface would be + adequate. + + + + + + + + + + + + + + [Page 49] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + + The following two calls satisfy the requirements for the TCP to + internet protocol module communication: + + SEND (dest, TOS, TTL, BufPTR, len, Id, DF, options => result) + + where: + + dest = destination address + TOS = type of service + TTL = time to live + BufPTR = buffer pointer + len = length of buffer + Id = Identifier + DF = Don't Fragment + options = internet option data + result = response + OK = datagram sent ok + Error = error in arguments or local network error + + Note that the precedence is included in the TOS and the + security/compartment is passed as an option. + + RECV (BufPTR => result, source, dest, prot, TOS, len) + + where: + + BufPTR = buffer pointer + result = response + OK = datagram received ok + Error = error in arguments + source = source address + dest = destination address + prot = protocol + TOS = type of service + options = internet option data + len = length of buffer + + Note that the precedence is in the TOS, and the + security/compartment is an option. + + When the TCP sends a segment, it executes the SEND call supplying + all the arguments. The internet protocol module, on receiving + this call, checks the arguments and prepares and sends the + message. If the arguments are good and the segment is accepted by + the local network, the call returns successfully. If either the + arguments are bad, or the segment is not accepted by the local + network, the call returns unsuccessfully. On unsuccessful + returns, a reasonable report should be made as to the cause of the + + +[Page 50] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + problem, but the details of such reports are up to individual + implementations. + + When a segment arrives at the internet protocol module from the + local network, either there is a pending RECV call from TCP or + there is not. In the first case, the pending call is satisfied by + passing the information from the segment to the TCP. In the + second case, the TCP is notified of a pending segment. + + The notification of a TCP may be via a pseudo interrupt or similar + mechanism, as appropriate in the particular operating system + environment of the implementation. + + A TCP's RECV call may then either be immediately satisfied by a + pending segment, or the call may be pending until a segment + arrives. + + We note that the Internet Protocol provides arguments for a type + of service and for a time to live. TCP uses the following + settings for these parameters: + + Type of Service = Precedence: none, Package: stream, + Reliability: higher, Preference: speed, Speed: higher; or + 00011111. + + Time to Live = one minute, or 00111100. + + Note that the assumed maximum segment lifetime is two minutes. + Here we explicitly ask that a segment be destroyed if it + cannot be delivered by the internet system within one minute. + + + + + + + + + + + + + + + + + + + + + [Page 51] + + + January 1980 +Transmission Control Protocol +Functional Specification + + + +3.9. Event Processing + + The activity of the TCP can be characterized as responding to events. + The events that occur can be cast into three categories: user calls, + arriving segments, and timeouts. This section describes the + processing the TCP does in response to each of the events. In many + cases the processing required depends on the state of the connection. + + Events that occur: + + User Calls + + OPEN + SEND + RECEIVE + CLOSE + ABORT + STATUS + + Arriving Segments + + SEGMENT ARRIVES + + Timeouts + + USER TIMEOUT + RETRANSMISSION TIMEOUT + + The model of the TCP/user interface is that user commands receive an + immediate return and possibly a delayed response via an event or + pseudo interrupt. In the following descriptions, the term "signal" + means cause a delayed response. + + Error responses are given as character strings. For example, user + commands referencing connections that do not exist receive "error: + connection not open". + + Please note in the following that all arithmetic on sequence numbers, + acknowledgment numbers, windows, et cetera, is modulo 2**32 the size + of the sequence number space. Also note that "=<" means less than or + equal to. + + A natural way to think about processing incoming segments is to + imagine that they are first tested for proper sequence number (i.e., + that their contents lie in the range of the expected "receive window" + in the sequence number space) and then that they are generally queued + and processed in sequence number order. + + + +[Page 52] + + +January 1980 + Transmission Control Protocol + Functional Specification + + + + When a segment overlaps other already received segments we reconstruct + the segment to contain just the new data, and adjust the header fields + to be consistent. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 53] + + + January 1980 +Transmission Control Protocol +Functional Specification + OPEN Call + + + + OPEN Call + + CLOSED STATE (i.e., TCB does not exist) + + Create a new transmission control block (TCB) to hold connection + state information. Fill in local socket identifier, foreign + socket, precedence, security/compartment, and user timeout + information. Verify the security and precedence requested are + allowed for this user, if not return "error: precedence not + allowed" or "error: security/compartment not allowed." If active + and the foreign socket is unspecified, return "error: foreign + socket unspecified"; if active and the foreign socket is + specified, issue a SYN segment. An initial send sequence number + (ISS) is selected and the TCP receive buffer size is selected (if + applicable). A SYN segment of the form is sent + (this may include the buffer size option if applicable). Set + SND.UNA to ISS, SND.NXT to ISS+1, SND.LBB to ISS+1, enter SYN-SENT + state, and return. + + If the caller does not have access to the local socket specified, + return "error: connection illegal for this process". If there is + no room to create a new connection, return "error: insufficient + resources". + + LISTEN STATE + SYN-SENT STATE + SYN-RECEIVED STATE + ESTABLISHED STATE + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING STATE + + Return "error: connection already exists". + + + + + + + + + + + + + + +[Page 54] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEND Call + + + + SEND Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, then + return "error: connection illegal for this process". + + Otherwise, return "error: connection does not exist". + + LISTEN STATE + + If the foreign socket is specified, then change the connection + from passive to active, select an ISS, and select the receive + buffer size. Send a SYN segment, set SND.UNA to ISS, SND.NXT to + ISS+1 and SND.LBB to ISS+1. Enter SYN-SENT state. Data + associated with SEND may be sent with SYN segment or queued for + transmission after entering ESTABLISHED state. The urgent bit if + requested in the command should be sent with the first data + segment sent as a result of this command. If there is no room to + queue the request, respond with "error: insufficient resources". + If Foreign socket was not specified, then return "error: foreign + socket unspecified". + + SYN-SENT STATE + + Queue for processing after the connection is ESTABLISHED. + Typically, nothing can be sent yet, anyway, because the send + window has not yet been set by the other side. If no space, + return "error: insufficient resources". + + SYN-RECEIVED STATE + + Queue for later processing after entering ESTABLISHED state. If + no space to queue, respond with "error: insufficient resources". + + ESTABLISHED STATE + + Segmentize the buffer, send or queue it for output, with a + piggybacked acknowledgment (acknowledgment value = RCV.NXT) with + the data. If there is insufficient space to remember this buffer, + simply return "error: insufficient resources". + + If remote buffer size is not one octet, and, if this is the end of + a letter, do the following end-of-letter/buffer-size adjustment + processing: + + + + + [Page 55] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEND Call + + + + if EOL = 0 then + + SND.NXT <- SEG.SEQ + SEG.LEN + + if EOL = 1 then + + While SND.LBB < SEG.SEQ + SEG.LEN + Do SND.LBB <- SND.LBB + SND.BS End + SND.NXT <- SND.LBB + + If the urgent flag is set, then SND.UP <- SND.NXT-1 and set the + urgent pointer in the outgoing segment. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + TIME-WAIT STATE + + Return "error: connection closing" and do not service request. + + CLOSE-WAIT STATE + + Segmentize any text to be sent and queue for output. If there is + insufficient space to remember the SEND, return "error: + insufficient resources" + + CLOSING STATE + + Respond with "error: connection closing" + + + + + + + + + + + + + + + + + + + + + +[Page 56] + + +January 1980 + Transmission Control Protocol + Functional Specification +RECEIVE Call + + + + RECEIVE Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise return "error: connection does not exist". + + LISTEN STATE + SYN-SENT STATE + SYN-RECEIVED STATE + + Queue for processing after entering ESTABLISHED state. If there + is no room to queue this request, respond with "error: + insufficient resources". + + ESTABLISHED STATE + + If insufficient incoming segments are queued to satisfy the + request, queue the request. If there is no queue space to + remember the RECEIVE, respond with "error: insufficient + resources". + + Reassemble queued incoming segments into receive buffer and return + to user. Mark "end of letter" (EOL) if this is the case. + + If RCV.UP is in advance of the data currently being passed to the + user notify the user of the presence of urgent data. + + When the TCP takes responsibility for delivering data to the user + that fact must be communicated to the sender via an + acknowledgment. The formation of such an acknowledgment is + described below in the discussion of processing an incoming + segment. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + Reassemble and return a letter, or as much as will fit, in the + user buffer. Queue the request if it cannot be serviced + immediately. + + + + + + + + [Page 57] + + + January 1980 +Transmission Control Protocol +Functional Specification + RECEIVE Call + + + + TIME-WAIT STATE + CLOSE-WAIT STATE + + Since the remote side has already sent FIN, RECEIVEs must be + satisfied by text already reassembled, but not yet delivered to + the user. If no reassembled segment text is awaiting delivery, + the RECEIVE should get a "error: connection closing" response. + Otherwise, any remaining text can be used to satisfy the RECEIVE. + + CLOSING STATE + + Return "error: connection closing" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 58] + + +January 1980 + Transmission Control Protocol + Functional Specification +CLOSE Call + + + + CLOSE Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise, return "error: connection does not exist". + + LISTEN STATE + + Any outstanding RECEIVEs should be returned with "error: closing" + responses. Delete TCB, return "ok". + + SYN-SENT STATE + + Delete the TCB and return "error: closing" responses to any + queued SENDs, or RECEIVEs. + + SYN-RECEIVED STATE + + Queue for processing after entering ESTABLISHED state or + segmentize and send FIN segment. If the latter, enter FIN-WAIT-1 + state. + + ESTABLISHED STATE + + Queue this until all preceding SENDs have been segmentized, then + form a FIN segment and send it. In any case, enter FIN-WAIT-1 + state. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + Strictly speaking, this is an error and should receive a "error: + connection closing" response. An "ok" response would be + acceptable, too, as long as a second FIN is not emitted (the first + FIN may be retransmitted though). + + + + + + + + + + + + [Page 59] + + + January 1980 +Transmission Control Protocol +Functional Specification + CLOSE Call + + + + TIME-WAIT STATE + + Strictly speaking, this is an error and should receive a "error: + connection closing" response. An "ok" response would be + acceptable, too. However, since the FIN has been sent and + acknowledged, nothing should be sent (or retransmitted). + + CLOSE-WAIT STATE + + Queue this request until all preceding SENDs have been + segmentized; then send a FIN segment, enter CLOSING state. + + CLOSING STATE + + Respond with "error: connection closing" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 60] + + +January 1980 + Transmission Control Protocol + Functional Specification +ABORT Call + + + + ABORT Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise return "error: connection does not exist". + + LISTEN STATE + + Any outstanding RECEIVEs should be returned with "error: + connection reset" responses. Delete TCB, return "ok". + + SYN-SENT STATE + + Delete the TCB and return "reset" responses to any queued SENDs, + or RECEIVEs. + + SYN-RECEIVED STATE + + Send a RST of the form: + + + + and return any unprocessed SENDs, or RECEIVEs with "reset" code, + delete the TCB. + + ESTABLISHED STATE + + Send a reset segment: + + + + All queued SENDs and RECEIVEs should be given "reset" responses; + all segments queued for transmission (except for the RST formed + above) or retransmission should be flushed, delete the TCB. + + + + + + + + + + + + + [Page 61] + + + January 1980 +Transmission Control Protocol +Functional Specification + ABORT Call + + + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + A reset segment (RST) should be formed and sent: + + + + Outstanding SENDs, RECEIVEs, CLOSEs, and/or segments queued for + retransmission, or segmentizing, should be flushed, with + "connection reset" notification to the user, delete the TCB. + + TIME-WAIT STATE + + Respond with "ok" and delete the TCB. + + CLOSE-WAIT STATE + + Flush any pending SENDs and RECEIVEs, returning "connection reset" + responses for them. Form and send a RST segment: + + + + Flush all segment queues and delete the TCB. + + CLOSING STATE + + Respond with "ok" and delete the TCB; flush any remaining segment + queues. If a CLOSE command is still pending, respond "error: + connection reset". + + + + + + + + + + + + + + + + + + + + +[Page 62] + + +January 1980 + Transmission Control Protocol + Functional Specification +STATUS Call + + + + STATUS Call + + CLOSED STATE (i.e., TCB does not exist) + + If the user should no have access to such a connection, return + "error: connection illegal for this process". + + Otherwise return "error: connection does not exist". + + LISTEN STATE + + Return "state = LISTEN", and the TCB pointer. + + SYN-SENT STATE + + Return "state = SYN-SENT", and the TCB pointer. + + SYN-RECEIVED STATE + + Return "state = SYN-RECEIVED", and the TCB pointer. + + ESTABLISHED STATE + + Return "state = ESTABLISHED", and the TCB pointer. + + FIN-WAIT-1 STATE + + Return "state = FIN-WAIT-1", and the TCB pointer. + + FIN-WAIT-2 STATE + + Return "state = FIN-WAIT-2", and the TCB pointer. + + TIME-WAIT STATE + + Return "state = TIME-WAIT and the TCB pointer. + + CLOSE-WAIT STATE + + Return "state = CLOSE-WAIT", and the TCB pointer. + + CLOSING STATE + + Return "state = CLOSING", and the TCB pointer. + + + + + + [Page 63] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + SEGMENT ARRIVES + + If the state is CLOSED (i.e., TCB does not exist) then + + all data in the incoming segment is discarded. An incoming + segment containing a RST is discarded. An incoming segment not + containing a RST causes a RST to be sent in response. The + acknowledgment and sequence field values are selected to make the + reset sequence acceptable to the TCP that sent the offending + segment. + + If the ACK bit is off, sequence number zero is used, + + + + If the ACK bit is on, + + + + Return. + + If the state is LISTEN then + + first check for an ACK + + Any acknowledgment is bad if it arrives on a connection still in + the LISTEN state. An acceptable reset segment should be formed + for any arriving ACK-bearing segment, except another RST. The + RST should be formatted as follows: + + + + Return. + + An incoming RST should be ignored. Return. + + if there was no ACK then check for a SYN + + If the SYN bit is set, check the security. If the + security/compartment on the incoming segment does not exactly + match the security/compartment in the TCB then send a reset and + return. If the SEG.PRC is less than the TCB.PRC then send a + reset and return. If the SEG.PRC is greater than the TCB.PRC + then set TCB.PRC<-SEG.PRC. Now RCV.NXT and RCV.LBB are set to + SEG.SEQ+1, IRS is set to SEG.SEQ and any other control or text + should be queued for processing later. ISS should be selected + and a SYN segment sent of the form: + + +[Page 64] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + + + SND.NXT and SND.LBB are set to ISS+1 and SND.UNA to ISS. The + connection state should be changed to SYN-RECEIVED. Note that + any other incoming control or data (combined with SYN) will be + processed in the SYN-RECEIVED state, but processing of SYN and + ACK should not be repeated. If the listen was not fully + specified (i.e., the foreign socket was not fully specified), + then the unspecified fields should be filled in now. + + if there was no SYN but there was other text or control + + Any other control or text-bearing segment (not containing SYN) + must have an ACK and thus would be discarded by the ACK + processing. An incoming RST segment could not be valid, since + it could not have been sent in response to anything sent by this + incarnation of the connection. So you are unlikely to get here, + but if you do, drop the segment, and return. + + If the state is SYN-SENT then + + first check for an ACK + + If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, or the + security/compartment in the segment does not exactly match the + security/compartment in the TCB, or the precedence in the + segment is less than the precedence in the TCB, send a reset + + + + and discard the segment. Return. + + If SND.UNA =< SEG.ACK =< SND.NXT and the security/compartment + and precedence are acceptable then the ACK is acceptable. + SND.UNA should be advanced to equal SEG.ACK, and any segments on + the retransmission queue which are thereby acknowledged should + be removed. + + if the ACK is ok (or there is no ACK), check the RST bit + + If the RST bit is set then signal the user "error: connection + reset", enter CLOSED state, drop the segment, delete TCB, and + return. + + if the ACK is ok (or there is no ACK) and it was not a RST, check + the SYN bit + + + + [Page 65] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + If the SYN bit is on and the security/compartment and precedence + are acceptable then, RCV.NXT and RCV.LBB are set to SEG.SEQ+1, + IRS is set to SEG.SEQ. If SND.UNA > ISS (our SYN has been + ACKed), change the connection state to ESTABLISHED, otherwise + enter SYN-RECEIVED. In any case, form an ACK segment: + + + + and send it. Data or controls which were queued for + transmission may be included. + + If SEG.PRC is greater than TCB.PRC set TCB.PRC<-SEG.PRC. + + If there are other controls or text in the segment then continue + processing at the fifth step below where the URG bit is checked, + otherwise return. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 66] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + Otherwise, + + first check sequence number + + SYN-RECEIVED STATE + ESTABLISHED STATE + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING STATE + + Segments are processed in sequence. Initial tests on arrival + are used to discard old duplicates, but further processing is + done in SEG.SEQ order. If a segment's contents straddle the + boundary between old and new, only the new parts should be + processed. + + There are four cases for the acceptability test for an incoming + segment: + + Segment Receive Test + Length Window + ------- ------- ------------------------------------------- + + 0 0 SEG.SEQ = RCV.NXT + + 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + + >0 0 not acceptable + + >0 >0 RCV.NXT < SEG.SEQ+SEG.LEN =< RCV.NXT+RCV.WND + + Note that the test above guarantees that the last sequence + number used by the segment lies in the receive-window. If the + RCV.WND is zero, no segments will be acceptable, but special + allowance should be made to accept valid ACKs, URGs and RSTs. + + If an incoming segment is not acceptable, an acknowledgment + should be sent in reply: + + + + If the incoming segment is unacceptable, drop it and return. + + + + + + [Page 67] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + second check security and precedence + + If the security/compartment and precedence in the segment do not + exactly match the security/compartment and precedence in the TCB + then form a reset and return. + + Note this check is placed following the sequence check to prevent + a segment from an old connection between these parts with a + different security or precedence from causing an abort of the + current connection. + + third check the ACK field, + + SYN-RECEIVED STATE + + If the RST bit is off and SND.UNA < SEG.ACK =< SND.NXT then set + SND.UNA <- SEG.ACK, remove any acknowledged segments from the + retransmission queue, and enter ESTABLISHED state. + + If the segment acknowledgment is not acceptable, form a reset + segment, + + + + and send it, unless the incoming segment is an RST (or there is + no ACK), in which case, it should be discarded, then return. + + ESTABLISHED STATE + + If SND.UNA < SEG.ACK =< SND.NXT then, set SND.UNA <- SEG.ACK. + Any segments on the retransmission queue which are thereby + entirely acknowledged are removed. Users should receive + positive acknowledgments for buffers which have been SENT and + fully acknowledged (i.e., SEND buffer should be returned with + "ok" response). If the ACK is a duplicate, it can be ignored. + + If the segment passes the sequence number and acknowledgment + number tests, the send window should be updated. If + SND.WL =< SEG.SEQ, set SND.WND <- SEG.WND and set + SND.WL <- SEG.SEQ. + + If the remote buffer size is not one, then the + end-of-letter/buffer-size adjustment to sequence numbers may + have an effect on the next expected sequence number to be + acknowledged. It is possible that the remote TCP will + acknowledge with a SEG.ACK equal to a sequence number of an + + + +[Page 68] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + octet that was skipped over at the end of a letter. This a mild + error on the remote TCPs part, but not cause for alarm. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + In addition to the processing for the ESTABLISHED state, if the + retransmission queue is empty, the user's CLOSE can be + acknowledged ("ok") but do not delete the TCB. + + TIME-WAIT STATE + + The only thing that can arrive in this state is a retransmission + of the remote FIN. Acknowledge it, and restart the 2 MSL + timeout. + + CLOSE-WAIT STATE + + Do the same processing as for the ESTABLISHED state. + + CLOSING STATE + + If the ACK acknowledges our FIN then delete the TCB (enter the + CLOSED state), otherwise ignore the segment. + + fourth check the RST bit, + + SYN-RECEIVED STATE + + If the RST bit is set then, if the segment has passed sequence + and acknowledgment tests, it is valid. If this connection was + initiated with a passive OPEN (i.e., came from the LISTEN + state), then return this connection to LISTEN state. The user + need not be informed. If this connection was initiated with an + active OPEN (i.e., came from SYN-SENT state) then the connection + was refused, signal the user "connection refused". In either + case, all segments on the retransmission queue should be + removed. + + + + + + + + + + + + [Page 69] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + ESTABLISHED + FIN-WAIT-1 + FIN-WAIT-2 + CLOSE-WAIT + CLOSING STATE + + If the RST bit is set then, any outstanding RECEIVEs and SEND + should receive "reset" responses. All segment queues should be + flushed. Users should also receive an unsolicited general + "connection reset" signal. Enter the CLOSED state, delete the + TCB, and return. + + TIME-WAIT + + Enter the CLOSED state, delete the TCB, and return. + + fifth, check the SYN bit, + + SYN-RECEIVED + ESTABLISHED STATE + + If the SYN bit is set, check the segment sequence number against + the receive window. The segment sequence number must be in the + receive window; if not, ignore the segment. If the SYN is on + and SEG.SEQ = IRS then everything is ok and no action is needed; + but if they are not equal, there is an error and a reset must be + sent. + + If a reset must be sent it is formed as follows: + + + + The connection must be aborted as if a RST had been received. + + FIN-WAIT STATE-1 + FIN-WAIT STATE-2 + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING STATE + + This case should not occur, since a duplicate of the SYN which + started the current connection incarnation will have been + filtered in the SEG.SEQ processing. Other SYN's will have been + rejected by this test as well (see SYN processing for + ESTABLISHED state). + + + + +[Page 70] + + +January 1980 + Transmission Control Protocol + Functional Specification +SEGMENT ARRIVES + + + + sixth, check the URG bit, + + ESTABLISHED STATE + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + If the URG bit is set, RCV.UP <- max(RCV.UP,SEG.UP), and signal + the user that the remote side has urgent data if the urgent + pointer (RCV.UP) is in advance of the data consumed. If the + user has already been signaled (or is still in the "urgent + mode") for this continuous sequence of urgent data, do not + signal the user again. + + TIME-WAIT STATE + CLOSE-WAIT STATE + CLOSING + + This should not occur, since a FIN has been received from the + remote side. Ignore the URG. + + seventh, process the segment text, + + ESTABLISHED STATE + + Once in the ESTABLISHED state, it is possible to deliver segment + text to user RECEIVE buffers. Text from segments can be moved + into buffers until either the buffer is full or the segment is + empty. If the segment empties and carries an EOL flag, then the + user is informed, when the buffer is returned, that an EOL has + been received. + + If buffer size is not one octet, then do the following + end-of-letter/buffer-size adjustment processing: + + if EOL = 0 then + + RCV.NXT <- SEG.SEQ + SEG.LEN + + if EOL = 1 then + + While RCV.LBB < SEG.SEQ+SEG.LEN + Do RCV.LBB <- RCV.LBB + RCV.BS End + RCV.NXT <- RCV.LBB + + When the TCP takes responsibility for delivering the data to the + user it must also acknowledge the receipt of the data. Send an + acknowledgment of the form: + + + [Page 71] + + + January 1980 +Transmission Control Protocol +Functional Specification + SEGMENT ARRIVES + + + + + + This acknowledgment should be piggybacked on a segment being + transmitted if possible without incurring undue delay. + + FIN-WAIT-1 STATE + FIN-WAIT-2 STATE + + If there are outstanding RECEIVEs, they should be satisfied, if + possible, with the text of this segment; remaining text should + be queued for further processing. If a RECEIVE is satisfied, + the user should be notified, with "end-of-letter" (EOL) signal, + if appropriate. + + TIME-WAIT STATE + CLOSE-WAIT STATE + + This should not occur, since a FIN has been received from the + remote side. Ignore the segment text. + + eighth, check the FIN bit, + + Send an acknowledgment for the FIN. Signal the user "connection + closing", and return any pending RECEIVEs with same message. Note + that FIN implies EOL for any segment text not yet delivered to the + user. If the current state is ESTABLISHED, enter the CLOSE-WAIT + state. If the current state is FIN-WAIT-1, enter the CLOSING + state. If the current state is FIN-WAIT-2, enter the TIME-WAIT + state. + + and return. + + + + + + + + + + + + + + + + + + +[Page 72] + + +January 1980 + Transmission Control Protocol + Functional Specification +USER TIMEOUT + + + + USER TIMEOUT + + For any state if the user timeout expires, flush all queues, signal + the user "error: connection aborted due to user timeout" in general + and for any outstanding calls, delete the TCB, and return. + + RETRANSMISSION TIMEOUT + + For any state if the retransmission timeout expires on a segment in + the retransmission queue, send the segment at the front of the + retransmission queue again, reinitialize the retransmission timer, + and return. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 73] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 74] + + +January 1980 + Transmission Control Protocol + + + + GLOSSARY + + + +1822 + BBN Report 1822, "The Specification of the Interconnection of + a Host and an IMP". The specification of interface between a + host and the ARPANET. + +ACK + A control bit (acknowledge) occupying no sequence space, which + indicates that the acknowledgment field of this segment + specifies the next sequence number the sender of this segment + is expecting to receive, hence acknowledging receipt of all + previous sequence numbers. + +ARPANET message + The unit of transmission between a host and an IMP in the + ARPANET. The maximum size is about 1012 octets (8096 bits). + +ARPANET packet + A unit of transmission used internally in the ARPANET between + IMPs. The maximum size is about 126 octets (1008 bits). + +buffer size + An option (buffer size) used to state the receive data buffer + size of the sender of this option. May only be sent in a + segment that also carries a SYN. + +connection + A logical communication path identified by a pair of sockets. + +datagram + A message sent in a packet switched computer communications + network. + +Destination Address + The destination address, usually the network and host + identifiers. + +EOL + A control bit (End of Letter) occupying no sequence space, + indicating that this segment ends a logical letter with the + last data octet in the segment. If this end of letter causes + a less than full buffer to be released to the user and the + connection buffer size is not one octet then the + end-of-letter/buffer-size adjustment to the receive sequence + number must be made. + + + + [Page 75] + + + January 1980 +Transmission Control Protocol +Glossary + + + +FIN + A control bit (finis) occupying one sequence number, which + indicates that the sender will send no more data or control + occupying sequence space. + +fragment + A portion of a logical unit of data, in particular an internet + fragment is a portion of an internet datagram. + +FTP + A file transfer protocol. + +header + Control information at the beginning of a message, segment, + fragment, packet or block of data. + +host + A computer. In particular a source or destination of messages + from the point of view of the communication network. + +Identification + An Internet Protocol field. This identifying value assigned + by the sender aids in assembling the fragments of a datagram. + +IMP + The Interface Message Processor, the packet switch of the + ARPANET. + +internet address + A source or destination address specific to the host level. + +internet datagram + The unit of data exchanged between an internet module and the + higher level protocol together with the internet header. + +internet fragment + A portion of the data of an internet datagram with an internet + header. + +IP + Internet Protocol. + +IRS + The Initial Receive Sequence number. The first sequence + number used by the sender on a connection. + + + + + +[Page 76] + + +January 1980 + Transmission Control Protocol + Glossary + + + +ISN + The Initial Sequence Number. The first sequence number used + on a connection, (either ISS or IRS). Selected on a clock + based procedure. + +ISS + The Initial Send Sequence number. The first sequence number + used by the sender on a connection. + +leader + Control information at the beginning of a message or block of + data. In particular, in the ARPANET, the control information + on an ARPANET message at the host-IMP interface. + +left sequence + This is the next sequence number to be acknowledged by the + data receiving TCP (or the lowest currently unacknowledged + sequence number) and is sometimes referred to as the left edge + of the send window. + +letter + A logical unit of data, in particular the logical unit of data + transmitted between processes via TCP. + +local packet + The unit of transmission within a local network. + +module + An implementation, usually in software, of a protocol or other + procedure. + +MSL + Maximum Segment Lifetime, the time a TCP segment can exist in + the internetwork system. Arbitrarily defined to be 2 minutes. + +octet + An eight bit byte. + +Options + An Option field may contain several options, and each option + may be several octets in length. The options are used + primarily in testing situations; for example, to carry + timestamps. Both the Internet Protocol and TCP provide for + options fields. + +packet + A package of data with a header which may or may not be + + + + [Page 77] + + + January 1980 +Transmission Control Protocol +Glossary + + + + logically complete. More often a physical packaging than a + logical packaging of data. + +port + The portion of a socket that specifies which logical input or + output channel of a process is associated with the data. + +process + A program in execution. A source or destination of data from + the point of view of the TCP or other host-to-host protocol. + +PSN + A Packet Switched Network. For example, the ARPANET. + +RCV.BS + receive buffer size, the remote buffer size + +RCV.LBB + receive last buffer beginning + +RCV.NXT + receive next sequence number + +RCV.UP + receive urgent pointer + +RCV.WND + receive window + +receive last buffer beginning + This is the sequence number of the first octet of the most + recent buffer. This value is use in calculating the next + sequence number when a segment contains an end of letter + indication. + +receive next sequence number + This is the next sequence number the local TCP is expecting to + receive. + +receive window + This represents the sequence numbers the local (receiving) TCP + is willing to receive. Thus, the local TCP considers that + segments overlapping the range RCV.NXT to + RCV.NXT + RCV.WND - 1 carry acceptable data or control. + Segments containing sequence numbers entirely outside of this + range are considered duplicates and discarded. + + + + +[Page 78] + + +January 1980 + Transmission Control Protocol + Glossary + + + +RST + A control bit (reset), occupying no sequence space, indicating + that the receiver should delete the connection without further + interaction. The receiver can determine, based on the + sequence number and acknowledgment fields of the incoming + segment, whether it should honor the reset command or ignore + it. In no case does receipt of a segment containing RST give + rise to a RST in response. + +RTP + Real Time Protocol: A host-to-host protocol for communication + of time critical information. + +Rubber EOL + An end of letter (EOL) requiring a sequence number adjustment + to align the beginning of the next letter on a buffer + boundary. + +SEG.ACK + segment acknowledgment + +SEG.LEN + segment length + +SEG.PRC + segment precedence value + +SEG.SEQ + segment sequence + +SEG.UP + segment urgent pointer field + +SEG.WND + segment window field + +segment + A logical unit of data, in particular a TCP segment is the + unit of data transfered between a pair of TCP modules. + +segment acknowledgment + The sequence number in the acknowledgment field of the + arriving segment. + +segment length + The amount of sequence number space occupied by a segment, + including any controls which occupy sequence space. + + + + [Page 79] + + + January 1980 +Transmission Control Protocol +Glossary + + + +segment sequence + The number in the sequence field of the arriving segment. + +send last buffer beginning + This is the sequence number of the first octet of the most + recent buffer. This value is used in calculating the next + sequence number when a segment contains an end of letter + indication. + +send sequence + This is the next sequence number the local (sending) TCP will + use on the connection. It is initially selected from an + initial sequence number curve (ISN) and is incremented for + each octet of data or sequenced control transmitted. + +send window + This represents the sequence numbers which the remote + (receiving) TCP is willing to receive. It is the value of the + window field specified in segments from the remote (data + receiving) TCP. The range of sequence numbers which may be + emitted by a TCP lies between SND.NXT and + SND.UNA + SND.WND - 1. + +SND.BS + send buffer size, the local buffer size + +SND.LBB + send last buffer beginning + +SND.NXT + send sequence + +SND.UNA + left sequence + +SND.UP + send urgent pointer + +SND.WL + send sequence number at last window update + +SND.WND + send window + +socket + An address which specifically includes a port identifier, that + is, the concatenation of an Internet Address with a TCP port. + + + +[Page 80] + + +January 1980 + Transmission Control Protocol + Glossary + + + +Source Address + The source address, usually the network and host identifiers. + +SYN + A control bit in the incoming segment, occupying one sequence + number, used at the initiation of a connection, to indicate + where the sequence numbering will start. + +TCB + Transmission control block, the data structure that records + the state of a connection. + +TCB.PRC + The precedence of the connection. + +TCP + Transmission Control Protocol: A host-to-host protocol for + reliable communication in internetwork environments. + +TOS + Type of Service, an Internet Protocol field. + +Type of Service + An Internet Protocol field which indicates the type of service + for this internet fragment. + +URG + A control bit (urgent), occupying no sequence space, used to + indicate that the receiving user should be notified to do + urgent processing as long as there is data to be consumed with + sequence numbers less than the value indicated in the urgent + pointer. + +urgent pointer + A control field meaningful only when the URG bit is on. This + field communicates the value of the urgent pointer which + indicates the data octet associated with the sending user's + urgent call. + + + + + + + + + + + + + [Page 81] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 82] + + +January 1980 + Transmission Control Protocol + + + + REFERENCES + + + +[1] Cerf, V., and R. Kahn, "A Protocol for Packet Network + Intercommunication," IEEE Transactions on Communications, + Vol. COM-22, No. 5, pp 637-648, May 1974. + +[2] Postel, J. (ed.), "DOD Standard Internet Protocol," Defense + Advanced Research Projects Agency, Information Processing + Techniques Office, RFC 760, IEN 128, January 1980. + +[3] Feinler, E. and J. Postel, ARPANET Protocol Handbook, Network + Information Center, SRI International, Menlo Park, CA, + January 1978. + +[4] Dalal, Y. and C. Sunshine, "Connection Management in Transport + Protocols," Computer Networks, Vol. 2, No. 6, pp. 454-473, + December 1978. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 83] + + + January 1980 +Transmission Control Protocol + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 84] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc768.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc768.txt new file mode 100644 index 0000000..4f13551 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc768.txt @@ -0,0 +1,174 @@ + + +RFC 768 J. Postel + ISI + 28 August 1980 + + + + User Datagram Protocol + ---------------------- + +Introduction +------------ + +This User Datagram Protocol (UDP) is defined to make available a +datagram mode of packet-switched computer communication in the +environment of an interconnected set of computer networks. This +protocol assumes that the Internet Protocol (IP) [1] is used as the +underlying protocol. + +This protocol provides a procedure for application programs to send +messages to other programs with a minimum of protocol mechanism. The +protocol is transaction oriented, and delivery and duplicate protection +are not guaranteed. Applications requiring ordered reliable delivery of +streams of data should use the Transmission Control Protocol (TCP) [2]. + +Format +------ + + + 0 7 8 15 16 23 24 31 + +--------+--------+--------+--------+ + | Source | Destination | + | Port | Port | + +--------+--------+--------+--------+ + | | | + | Length | Checksum | + +--------+--------+--------+--------+ + | + | data octets ... + +---------------- ... + + User Datagram Header Format + +Fields +------ + +Source Port is an optional field, when meaningful, it indicates the port +of the sending process, and may be assumed to be the port to which a +reply should be addressed in the absence of any other information. If +not used, a value of zero is inserted. + + + + + +Postel [page 1] + + + 28 Aug 1980 +User Datagram Protocol RFC 768 +Fields + + + +Destination Port has a meaning within the context of a particular +internet destination address. + +Length is the length in octets of this user datagram including this +header and the data. (This means the minimum value of the length is +eight.) + +Checksum is the 16-bit one's complement of the one's complement sum of a +pseudo header of information from the IP header, the UDP header, and the +data, padded with zero octets at the end (if necessary) to make a +multiple of two octets. + +The pseudo header conceptually prefixed to the UDP header contains the +source address, the destination address, the protocol, and the UDP +length. This information gives protection against misrouted datagrams. +This checksum procedure is the same as is used in TCP. + + 0 7 8 15 16 23 24 31 + +--------+--------+--------+--------+ + | source address | + +--------+--------+--------+--------+ + | destination address | + +--------+--------+--------+--------+ + | zero |protocol| UDP length | + +--------+--------+--------+--------+ + +If the computed checksum is zero, it is transmitted as all ones (the +equivalent in one's complement arithmetic). An all zero transmitted +checksum value means that the transmitter generated no checksum (for +debugging or for higher level protocols that don't care). + +User Interface +-------------- + +A user interface should allow + + the creation of new receive ports, + + receive operations on the receive ports that return the data octets + and an indication of source port and source address, + + and an operation that allows a datagram to be sent, specifying the + data, source and destination ports and addresses to be sent. + + + + + + +[page 2] Postel + + +28 Aug 1980 +RFC 768 User Datagram Protocol + IP Interface + + + +IP Interface +------------- + +The UDP module must be able to determine the source and destination +internet addresses and the protocol field from the internet header. One +possible UDP/IP interface would return the whole internet datagram +including all of the internet header in response to a receive operation. +Such an interface would also allow the UDP to pass a full internet +datagram complete with header to the IP to send. The IP would verify +certain fields for consistency and compute the internet header checksum. + +Protocol Application +-------------------- + +The major uses of this protocol is the Internet Name Server [3], and the +Trivial File Transfer [4]. + +Protocol Number +--------------- + +This is protocol 17 (21 octal) when used in the Internet Protocol. +Other protocol numbers are listed in [5]. + +References +---------- + +[1] Postel, J., "Internet Protocol," RFC 760, USC/Information + Sciences Institute, January 1980. + +[2] Postel, J., "Transmission Control Protocol," RFC 761, + USC/Information Sciences Institute, January 1980. + +[3] Postel, J., "Internet Name Server," USC/Information Sciences + Institute, IEN 116, August 1979. + +[4] Sollins, K., "The TFTP Protocol," Massachusetts Institute of + Technology, IEN 133, January 1980. + +[5] Postel, J., "Assigned Numbers," USC/Information Sciences + Institute, RFC 762, January 1980. + + + + + + + + + +Postel [page 3] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc792.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc792.txt new file mode 100644 index 0000000..5c659e8 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc792.txt @@ -0,0 +1,1218 @@ + + +Network Working Group J. Postel +Request for Comments: 792 ISI + September 1981 +Updates: RFCs 777, 760 +Updates: IENs 109, 128 + + INTERNET CONTROL MESSAGE PROTOCOL + + DARPA INTERNET PROGRAM + PROTOCOL SPECIFICATION + + + +Introduction + + The Internet Protocol (IP) [1] is used for host-to-host datagram + service in a system of interconnected networks called the + Catenet [2]. The network connecting devices are called Gateways. + These gateways communicate between themselves for control purposes + via a Gateway to Gateway Protocol (GGP) [3,4]. Occasionally a + gateway or destination host will communicate with a source host, for + example, to report an error in datagram processing. For such + purposes this protocol, the Internet Control Message Protocol (ICMP), + is used. ICMP, uses the basic support of IP as if it were a higher + level protocol, however, ICMP is actually an integral part of IP, and + must be implemented by every IP module. + + ICMP messages are sent in several situations: for example, when a + datagram cannot reach its destination, when the gateway does not have + the buffering capacity to forward a datagram, and when the gateway + can direct the host to send traffic on a shorter route. + + The Internet Protocol is not designed to be absolutely reliable. The + purpose of these control messages is to provide feedback about + problems in the communication environment, not to make IP reliable. + There are still no guarantees that a datagram will be delivered or a + control message will be returned. Some datagrams may still be + undelivered without any report of their loss. The higher level + protocols that use IP must implement their own reliability procedures + if reliable communication is required. + + The ICMP messages typically report errors in the processing of + datagrams. To avoid the infinite regress of messages about messages + etc., no ICMP messages are sent about ICMP messages. Also ICMP + messages are only sent about errors in handling fragment zero of + fragemented datagrams. (Fragment zero has the fragment offeset equal + zero). + + + + + + + + [Page 1] + + + September 1981 +RFC 792 + + + +Message Formats + + ICMP messages are sent using the basic IP header. The first octet of + the data portion of the datagram is a ICMP type field; the value of + this field determines the format of the remaining data. Any field + labeled "unused" is reserved for later extensions and must be zero + when sent, but receivers should not use these fields (except to + include them in the checksum). Unless otherwise noted under the + individual format descriptions, the values of the internet header + fields are as follows: + + Version + + 4 + + IHL + + Internet header length in 32-bit words. + + Type of Service + + 0 + + Total Length + + Length of internet header and data in octets. + + Identification, Flags, Fragment Offset + + Used in fragmentation, see [1]. + + Time to Live + + Time to live in seconds; as this field is decremented at each + machine in which the datagram is processed, the value in this + field should be at least as great as the number of gateways which + this datagram will traverse. + + Protocol + + ICMP = 1 + + Header Checksum + + The 16 bit one's complement of the one's complement sum of all 16 + bit words in the header. For computing the checksum, the checksum + field should be zero. This checksum may be replaced in the + future. + + +[Page 2] + + +September 1981 +RFC 792 + + + + Source Address + + The address of the gateway or host that composes the ICMP message. + Unless otherwise noted, this can be any of a gateway's addresses. + + Destination Address + + The address of the gateway or host to which the message should be + sent. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 3] + + + September 1981 +RFC 792 + + + +Destination Unreachable Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address from the original datagram's data. + + ICMP Fields: + + Type + + 3 + + Code + + 0 = net unreachable; + + 1 = host unreachable; + + 2 = protocol unreachable; + + 3 = port unreachable; + + 4 = fragmentation needed and DF set; + + 5 = source route failed. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + + +[Page 4] + + +September 1981 +RFC 792 + + + + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + If, according to the information in the gateway's routing tables, + the network specified in the internet destination field of a + datagram is unreachable, e.g., the distance to the network is + infinity, the gateway may send a destination unreachable message + to the internet source host of the datagram. In addition, in some + networks, the gateway may be able to determine if the internet + destination host is unreachable. Gateways in these networks may + send destination unreachable messages to the source host when the + destination host is unreachable. + + If, in the destination host, the IP module cannot deliver the + datagram because the indicated protocol module or process port is + not active, the destination host may send a destination + unreachable message to the source host. + + Another case is when a datagram must be fragmented to be forwarded + by a gateway yet the Don't Fragment flag is on. In this case the + gateway must discard the datagram and may return a destination + unreachable message. + + Codes 0, 1, 4, and 5 may be received from a gateway. Codes 2 and + 3 may be received from a host. + + + + + + + + + + + + + + + + + + + + + + [Page 5] + + + September 1981 +RFC 792 + + + +Time Exceeded Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address from the original datagram's data. + + ICMP Fields: + + Type + + 11 + + Code + + 0 = time to live exceeded in transit; + + 1 = fragment reassembly time exceeded. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + If the gateway processing a datagram finds the time to live field + + +[Page 6] + + +September 1981 +RFC 792 + + + + is zero it must discard the datagram. The gateway may also notify + the source host via the time exceeded message. + + If a host reassembling a fragmented datagram cannot complete the + reassembly due to missing fragments within its time limit it + discards the datagram, and it may send a time exceeded message. + + If fragment zero is not available then no time exceeded need be + sent at all. + + Code 0 may be received from a gateway. Code 1 may be received + from a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 7] + + + September 1981 +RFC 792 + + + +Parameter Problem Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address from the original datagram's data. + + ICMP Fields: + + Type + + 12 + + Code + + 0 = pointer indicates the error. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Pointer + + If code = 0, identifies the octet where an error was detected. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + + + +[Page 8] + + +September 1981 +RFC 792 + + + + Description + + If the gateway or host processing a datagram finds a problem with + the header parameters such that it cannot complete processing the + datagram it must discard the datagram. One potential source of + such a problem is with incorrect arguments in an option. The + gateway or host may also notify the source host via the parameter + problem message. This message is only sent if the error caused + the datagram to be discarded. + + The pointer identifies the octet of the original datagram's header + where the error was detected (it may be in the middle of an + option). For example, 1 indicates something is wrong with the + Type of Service, and (if there are options present) 20 indicates + something is wrong with the type code of the first option. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 9] + + + September 1981 +RFC 792 + + + +Source Quench Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | unused | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address of the original datagram's data. + + ICMP Fields: + + Type + + 4 + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + A gateway may discard internet datagrams if it does not have the + buffer space needed to queue the datagrams for output to the next + network on the route to the destination network. If a gateway + + +[Page 10] + + +September 1981 +RFC 792 + + + + discards a datagram, it may send a source quench message to the + internet source host of the datagram. A destination host may also + send a source quench message if datagrams arrive too fast to be + processed. The source quench message is a request to the host to + cut back the rate at which it is sending traffic to the internet + destination. The gateway may send a source quench message for + every message that it discards. On receipt of a source quench + message, the source host should cut back the rate at which it is + sending traffic to the specified destination until it no longer + receives source quench messages from the gateway. The source host + can then gradually increase the rate at which it sends traffic to + the destination until it again receives source quench messages. + + The gateway or host may send the source quench message when it + approaches its capacity limit rather than waiting until the + capacity is exceeded. This means that the data datagram which + triggered the source quench message may be delivered. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 11] + + + September 1981 +RFC 792 + + + +Redirect Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Gateway Internet Address | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Internet Header + 64 bits of Original Data Datagram | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Destination Address + + The source network and address of the original datagram's data. + + ICMP Fields: + + Type + + 5 + + Code + + 0 = Redirect datagrams for the Network. + + 1 = Redirect datagrams for the Host. + + 2 = Redirect datagrams for the Type of Service and Network. + + 3 = Redirect datagrams for the Type of Service and Host. + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Gateway Internet Address + + Address of the gateway to which traffic for the network specified + in the internet destination network field of the original + datagram's data should be sent. + + + + +[Page 12] + + +September 1981 +RFC 792 + + + + Internet Header + 64 bits of Data Datagram + + The internet header plus the first 64 bits of the original + datagram's data. This data is used by the host to match the + message to the appropriate process. If a higher level protocol + uses port numbers, they are assumed to be in the first 64 data + bits of the original datagram's data. + + Description + + The gateway sends a redirect message to a host in the following + situation. A gateway, G1, receives an internet datagram from a + host on a network to which the gateway is attached. The gateway, + G1, checks its routing table and obtains the address of the next + gateway, G2, on the route to the datagram's internet destination + network, X. If G2 and the host identified by the internet source + address of the datagram are on the same network, a redirect + message is sent to the host. The redirect message advises the + host to send its traffic for network X directly to gateway G2 as + this is a shorter path to the destination. The gateway forwards + the original datagram's data to its internet destination. + + For datagrams with the IP source route options and the gateway + address in the destination address field, a redirect message is + not sent even if there is a better route to the ultimate + destination than the next address in the source route. + + Codes 0, 1, 2, and 3 may be received from a gateway. + + + + + + + + + + + + + + + + + + + + + + + [Page 13] + + + September 1981 +RFC 792 + + + +Echo or Echo Reply Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Data ... + +-+-+-+-+- + + IP Fields: + + Addresses + + The address of the source in an echo message will be the + destination of the echo reply message. To form an echo reply + message, the source and destination addresses are simply reversed, + the type code changed to 0, and the checksum recomputed. + + IP Fields: + + Type + + 8 for echo message; + + 0 for echo reply message. + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + If the total length is odd, the received data is padded with one + octet of zeros for computing the checksum. This checksum may be + replaced in the future. + + Identifier + + If code = 0, an identifier to aid in matching echos and replies, + may be zero. + + Sequence Number + + +[Page 14] + + +September 1981 +RFC 792 + + + + If code = 0, a sequence number to aid in matching echos and + replies, may be zero. + + Description + + The data received in the echo message must be returned in the echo + reply message. + + The identifier and sequence number may be used by the echo sender + to aid in matching the replies with the echo requests. For + example, the identifier might be used like a port in TCP or UDP to + identify a session, and the sequence number might be incremented + on each echo request sent. The echoer returns these same values + in the echo reply. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 15] + + + September 1981 +RFC 792 + + + +Timestamp or Timestamp Reply Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Originate Timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Receive Timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Transmit Timestamp | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Addresses + + The address of the source in a timestamp message will be the + destination of the timestamp reply message. To form a timestamp + reply message, the source and destination addresses are simply + reversed, the type code changed to 14, and the checksum + recomputed. + + IP Fields: + + Type + + 13 for timestamp message; + + 14 for timestamp reply message. + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Identifier + + + + +[Page 16] + + +September 1981 +RFC 792 + + + + If code = 0, an identifier to aid in matching timestamp and + replies, may be zero. + + Sequence Number + + If code = 0, a sequence number to aid in matching timestamp and + replies, may be zero. + + Description + + The data received (a timestamp) in the message is returned in the + reply together with an additional timestamp. The timestamp is 32 + bits of milliseconds since midnight UT. One use of these + timestamps is described by Mills [5]. + + The Originate Timestamp is the time the sender last touched the + message before sending it, the Receive Timestamp is the time the + echoer first touched it on receipt, and the Transmit Timestamp is + the time the echoer last touched the message on sending it. + + If the time is not available in miliseconds or cannot be provided + with respect to midnight UT then any time can be inserted in a + timestamp provided the high order bit of the timestamp is also set + to indicate this non-standard value. + + The identifier and sequence number may be used by the echo sender + to aid in matching the replies with the requests. For example, + the identifier might be used like a port in TCP or UDP to identify + a session, and the sequence number might be incremented on each + request sent. The destination returns these same values in the + reply. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + [Page 17] + + + September 1981 +RFC 792 + + + +Information Request or Information Reply Message + + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Type | Code | Checksum | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Identifier | Sequence Number | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + IP Fields: + + Addresses + + The address of the source in a information request message will be + the destination of the information reply message. To form a + information reply message, the source and destination addresses + are simply reversed, the type code changed to 16, and the checksum + recomputed. + + IP Fields: + + Type + + 15 for information request message; + + 16 for information reply message. + + Code + + 0 + + Checksum + + The checksum is the 16-bit ones's complement of the one's + complement sum of the ICMP message starting with the ICMP Type. + For computing the checksum , the checksum field should be zero. + This checksum may be replaced in the future. + + Identifier + + If code = 0, an identifier to aid in matching request and replies, + may be zero. + + Sequence Number + + If code = 0, a sequence number to aid in matching request and + replies, may be zero. + + +[Page 18] + + +September 1981 +RFC 792 + + + + Description + + This message may be sent with the source network in the IP header + source and destination address fields zero (which means "this" + network). The replying IP module should send the reply with the + addresses fully specified. This message is a way for a host to + find out the number of the network it is on. + + The identifier and sequence number may be used by the echo sender + to aid in matching the replies with the requests. For example, + the identifier might be used like a port in TCP or UDP to identify + a session, and the sequence number might be incremented on each + request sent. The destination returns these same values in the + reply. + + Code 0 may be received from a gateway or a host. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 19] + + + September 1981 +RFC 792 + + + +Summary of Message Types + + 0 Echo Reply + + 3 Destination Unreachable + + 4 Source Quench + + 5 Redirect + + 8 Echo + + 11 Time Exceeded + + 12 Parameter Problem + + 13 Timestamp + + 14 Timestamp Reply + + 15 Information Request + + 16 Information Reply + + + + + + + + + + + + + + + + + + + + + + + + + + + +[Page 20] + + +September 1981 +RFC 792 + + + +References + + [1] Postel, J. (ed.), "Internet Protocol - DARPA Internet Program + Protocol Specification," RFC 791, USC/Information Sciences + Institute, September 1981. + + [2] Cerf, V., "The Catenet Model for Internetworking," IEN 48, + Information Processing Techniques Office, Defense Advanced + Research Projects Agency, July 1978. + + [3] Strazisar, V., "Gateway Routing: An Implementation + Specification", IEN 30, Bolt Beranek and Newman, April 1979. + + [4] Strazisar, V., "How to Build a Gateway", IEN 109, Bolt Beranek + and Newman, August 1979. + + [5] Mills, D., "DCNET Internet Clock Service," RFC 778, COMSAT + Laboratories, April 1981. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [Page 21] + diff --git a/roles/dotfiles/files/.emacs.d/RFC/rfc894.txt b/roles/dotfiles/files/.emacs.d/RFC/rfc894.txt new file mode 100644 index 0000000..d5cd5eb --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/RFC/rfc894.txt @@ -0,0 +1,171 @@ + + +Network Working Group Charles Hornig +Request for Comments: 894 Symbolics Cambridge Research Center + April 1984 + + A Standard for the Transmission of IP Datagrams over Ethernet Networks + + +Status of this Memo + + This RFC specifies a standard method of encapsulating Internet + Protocol (IP) [1] datagrams on an Ethernet [2]. This RFC specifies a + standard protocol for the ARPA-Internet community. + +Introduction + + This memo applies to the Ethernet (10-megabit/second, 48-bit + addresses). The procedure for transmission of IP datagrams on the + Experimental Ethernet (3-megabit/second, 8-bit addresses) is + described in [3]. + +Frame Format + + IP datagrams are transmitted in standard Ethernet frames. The type + field of the Ethernet frame must contain the value hexadecimal 0800. + The data field contains the IP header followed immediately by the IP + data. + + The minimum length of the data field of a packet sent over an + Ethernet is 46 octets. If necessary, the data field should be padded + (with octets of zero) to meet the Ethernet minimum frame size. This + padding is not part of the IP packet and is not included in the total + length field of the IP header. + + The minimum length of the data field of a packet sent over an + Ethernet is 1500 octets, thus the maximum length of an IP datagram + sent over an Ethernet is 1500 octets. Implementations are encouraged + to support full-length packets. Gateway implementations MUST be + prepared to accept full-length packets and fragment them if + necessary. If a system cannot receive full-length packets, it should + take steps to discourage others from sending them, such as using the + TCP Maximum Segment Size option [4]. + + Note: Datagrams on the Ethernet may be longer than the general + Internet default maximum packet size of 576 octets. Hosts connected + to an Ethernet should keep this in mind when sending datagrams to + hosts not on the same Ethernet. It may be appropriate to send + smaller datagrams to avoid unnecessary fragmentation at intermediate + gateways. Please see [4] for further information on this point. + + + + + +Hornig [Page 1] + + + +RFC 894 April 1984 + + +Address Mappings + + The mapping of 32-bit Internet addresses to 48-bit Ethernet addresses + can be done several ways. A static table could be used, or a dynamic + discovery procedure could be used. + + Static Table + + Each host could be provided with a table of all other hosts on the + local network with both their Ethernet and Internet addresses. + + Dynamic Discovery + + Mappings between 32-bit Internet addresses and 48-bit Ethernet + addresses could be accomplished through the Address Resolution + Protocol (ARP) [5]. Internet addresses are assigned arbitrarily + on some Internet network. Each host's implementation must know + its own Internet address and respond to Ethernet Address + Resolution packets appropriately. It should also use ARP to + translate Internet addresses to Ethernet addresses when needed. + + Broadcast Address + + The broadcast Internet address (the address on that network with a + host part of all binary ones) should be mapped to the broadcast + Ethernet address (of all binary ones, FF-FF-FF-FF-FF-FF hex). + + The use of the ARP dynamic discovery procedure is strongly + recommended. + +Trailer Formats + + Some versions of Unix 4.2bsd use a different encapsulation method in + order to get better network performance with the VAX virtual memory + architecture. Consenting systems on the same Ethernet may use this + format between themselves. + + No host is required to implement it, and no datagrams in this format + should be sent to any host unless the sender has positive knowledge + that the recipient will be able to interpret them. Details of the + trailer encapsulation may be found in [6]. + + (Note: At the present time Unix 4.2bsd will either always use + trailers or never use them (per interface), depending on a boot-time + option. This is expected to be changed in the future. Unix 4.2bsd + also uses a non-standard Internet broadcast address with a host part + of all zeroes, this may also be changed in the future.) + + + +Hornig [Page 2] + + + +RFC 894 April 1984 + + +Byte Order + + As described in Appendix B of the Internet Protocol + specification [1], the IP datagram is transmitted over the Ethernet + as a series of 8-bit bytes. + +References + + [1] Postel, J., "Internet Protocol", RFC-791, USC/Information + Sciences Institute, September 1981. + + [2] "The Ethernet - A Local Area Network", Version 1.0, Digital + Equipment Corporation, Intel Corporation, Xerox Corporation, + September 1980. + + [3] Postel, J., "A Standard for the Transmission of IP Datagrams + over Experimental Ethernet Networks", RFC-895, USC/Information + Sciences Institute, April 1984. + + [4] Postel, J., "The TCP Maximum Segment Size Option and Related + Topics", RFC-879, USC/Information Sciences Institute, November 1983. + + [5] Plummer, D., "An Ethernet Address Resolution Protocol", RFC-826, + Symbolics Cambridge Research Center, November 1982. + + [6] Leffler, S., and M. Karels, "Trailer Encapsulations", RFC-893, + University of California at Berkeley, April 1984. + + + + + + + + + + + + + + + + + + + + + + + +Hornig [Page 3] + diff --git a/roles/dotfiles/files/.emacs.d/ac-comphist.dat b/roles/dotfiles/files/.emacs.d/ac-comphist.dat new file mode 100644 index 0000000..35eafff --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/ac-comphist.dat @@ -0,0 +1 @@ +(nil) diff --git a/roles/dotfiles/files/.emacs.d/ensure.el b/roles/dotfiles/files/.emacs.d/ensure.el new file mode 100644 index 0000000..07f0b88 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/ensure.el @@ -0,0 +1,35 @@ +(defun ensure-package (package) + (unless (package-installed-p package) + (package-install package))) + +(unless (file-directory-p "/home/kyle/.emacs.d/elpa/archives/melpa") + (package-refresh-contents)) + +(let ((initial-package-list + '(auto-complete + cargo + ;; chess + cider + geiser + ;; gnugo + go ;; play the game + go-autocomplete + go-direx + go-guru + go-mode + jedi + keychain-environment + lua-mode + luarocks + magit + markdown-mode + paredit + pelican-mode + projectile + racket-mode + rust-mode + scpaste + slime + undo-tree))) + (dolist (package initial-package-list) + (ensure-package package))) diff --git a/roles/dotfiles/files/.emacs.d/ido.last b/roles/dotfiles/files/.emacs.d/ido.last new file mode 100644 index 0000000..d19df78 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/ido.last @@ -0,0 +1,29 @@ +;;; -*- coding: utf-8 -*- + +;; ----- ido-last-directory-list ----- +( + ("/home/k.isom/kodiak/" . "ktos/") + ("/home/k.isom/" . "kodiak/") +) + +;; ----- ido-work-directory-list ----- +( + "/home/k.isom/kodiak/ktos/" +) + +;; ----- ido-work-file-list ----- +( + "README.org" +) + +;; ----- ido-dir-file-cache ----- +( + ("/home/k.isom/" (25647 820 345033 248000) ".local/" ".python_history" ".Xresources" ".aws.sh" ".aws/" "Downloads/" ".docker/" "./" ".npm/" ".sudo_as_admin_successful" ".bash_history" ".cache/" ".dmrc" ".bazelrc" ".bash_logout" ".profile.bak" "tmp/" ".config/" ".java/" ".mozilla/" "../" ".bashrc" "Pictures/" "src/" "obs.img" ".amplify/" ".gitconfig" "Music/" "Public/" ".pyenv/" "git/" ".lesshst" "token.txt" ".emacs.d/" ".profile" "snap/" ".viminfo" "kodiak/" ".yarn/" ".bazel/" "Documents/" ".GlobalProtect/" "Templates/" ".pki/" ".Xauthority" "Videos/" ".gnupg/" ".xsession-errors" "token.txt~" "java_error_in_clion_.hprof" "Desktop/" ".xsession-errors.old" ".ssh/") + + ("/home/k.isom/kodiak/ktos/" (25646 63617 378004 643000) "scripts/" "kodiak.xml" "./" "devtools.xml" "build-stack-from-source.xml" "device-table" "../" "cuda-tensorrt.xml" "nvidia-drivers.xml" "README.org" "rfs.xml" "initfs/" "update-syspart.sh" "rootfs/" "makefile" "xorg.xml" "reprepro.xml" "ifs.xml" ".git/") + + ("/home/k.isom/kodiak/" (25646 63612 290013 440000) "./" "vehicle/" "ktos/" "../") +) + +;; ----- ido-unc-hosts-cache ----- +t diff --git a/roles/dotfiles/files/.emacs.d/init.el b/roles/dotfiles/files/.emacs.d/init.el new file mode 100644 index 0000000..0464564 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/init.el @@ -0,0 +1,209 @@ +;;; startup without syntax highlighting +;;; (global-font-lock-mode 0) + +;; set up package handling +(require 'package) + +(setq gnutls-algorithm-priority "NORMAL:-VERS-TLS1.3") +(add-to-list 'package-archives + '("melpa" . "https://melpa.org/packages/")) + +(package-initialize) +(require 'cl) +(let* ((home-dir (getenv "HOME")) + (ensure-lisp (concatenate 'string home-dir "/.emacs.d/ensure.el"))) + (load ensure-lisp)) + +;; reduce brain damage +(tool-bar-mode 0) +(menu-bar-mode 0) +(setq inhibit-startup-screen t) +(setq display-time-24hr-format t) +(display-time-mode) +(column-number-mode) + +;; useful when writing +(global-set-key (kbd "C-c w") 'count-words) + +;; remove whitespace to make room for more cyberspace +(add-hook 'before-save-hook 'delete-trailing-whitespace) + +;; hippie-expand is the best +(require 'hippie-exp) +(require 'auto-complete) +(global-auto-complete-mode t) +(ac-set-trigger-key "") +(global-set-key (kbd "") 'ac-expand) + +;; eshell is pretty okay +(global-set-key (kbd "C-x m") 'eshell) + +;; ido-mode makes finding files way more awesome +;; note: C-x C-f C-f will kick back to normal find-file for when ido's tab +;; completion is getting in the way. +(require 'ido) +(ido-mode 1) + +;; magit, not yours +(require 'magit) +(global-set-key (kbd "C-x g") 'magit-status) + +;; undo-tree is undo done right +(require 'undo-tree) +(global-undo-tree-mode) + +;; i like refilling paragraphs +(global-set-key (kbd "M-q") 'fill-paragraph) + +;; i install things to /usr/local +(add-to-list 'exec-path "/home/kyle/bin") +(add-to-list 'exec-path "/usr/local/bin") + +;; tell me where i'm at +(column-number-mode) + +;;; i like cua-rectangle +(cua-mode t) +(cua-selection-mode 'emacs) +(global-set-key (kbd "M-RET") 'cua-rectangle-mark-mode) + +(require 'scpaste) +(setq scpaste-http-destination "https://p.kyleisom.net" + scpaste-scp-destination "p.kyleisom.net:/var/www/sites/p/") + +;;; useful for writing +(global-set-key (kbd "C-x w") 'count-words) + +;;; used with pollen +(global-set-key (kbd "C-c C-d") + (lambda () (interactive) (insert "\u25ca"))) +(add-to-list 'auto-mode-alist '("\\.poly.pm\\'" . text-mode)) + +(require 'markdown-mode) + +;; python stuff +(add-hook 'python-mode-hook 'jedi:setup) +(setq jedi:complete-on-dot t) ; optional + +;; golang stuff +(setq gofmt-command "goimports") +(require 'go-mode) +(add-hook 'before-save-hook 'gofmt-before-save) + +(when (file-exists-p (expand-file-name "~/quicklisp/slime-helper.el")) + (load (expand-file-name "~/quicklisp/slime-helper.el")) + (ensure-package 'slime) + ;; Replace "sbcl" with the path to your implementation + (setq inferior-lisp-program "sbcl") + (slime-setup '(slime-fancy + slime-autodoc + slime-indentation)) + + (setq slime-net-coding-system 'utf-8-unix + slime-truncate-lines nil) + + (setq lisp-lambda-list-keyword-parameter-alignment t + lisp-lambda-list-keyword-alignment t)) + +(add-to-list 'auto-mode-alist '("\\.ros\\'" . lisp-mode)) + + +(add-hook 'clojure-mode-hook #'enable-paredit-mode) +(add-hook 'lisp-mode-hook #'enable-paredit-mode) +(add-hook 'lisp-interaction-mode-hook #'enable-paredit-mode) +(add-hook 'scheme-mode-hook #'enable-paredit-mode) + +;;; rust stuff +(add-hook 'rust-mode-hook #'racer-mode) +(add-hook 'racer-mode-hook #'eldoc-mode) +(add-hook 'racer-mode-hook #'company-mode) + +(require 'rust-mode) +(define-key rust-mode-map (kbd "TAB") #'company-indent-or-complete-common) +(setq company-tooltip-align-annotations t) + +;;; Project Interaction Library for Emacs +(require 'projectile) +(define-key projectile-mode-map (kbd "s-p") 'projectile-command-map) +(define-key projectile-mode-map (kbd "C-c p") 'projectile-command-map) +(setq projectile-project-search-path '("~/src/" "~/code/")) +(projectile-mode +1) + + +;;; +;;; _:_ +;;; '-.-' +;;; () __.'.__ +;;; .-:--:-. |_______| +;;; () \____/ \=====/ +;;; /\ {====} )___( +;;; (\=, //\\ )__( /_____\ +;;; __ |'-'-'| // .\ ( ) /____\ | | +;;; / \ |_____| (( \_ \ )__( | | | | +;;; \__/ |===| )) `\_) /____\ | | | | +;;; /____\ | | (/ \ | | | | | | +;;; | | | | | _.-'| | | | | | | +;;; |__| )___( )___( /____\ /____\ /_____\ +;;; (====) (=====) (=====) (======) (======) (=======) +;;; }===={ }====={ }====={ }======{ }======{ }======={ +;;; (______)(_______)(_______)(________)(________)(_________) +(setq chess-ai-depth 2) + + +(custom-set-variables + ;; custom-set-variables was added by Custom. + ;; If you edit it by hand, you could mess it up, so be careful. + ;; Your init file should contain only one such instance. + ;; If there is more than one, they won't work right. + '(ansi-color-names-vector + ["#2d3743" "#ff4242" "#74af68" "#dbdb95" "#34cae2" "#008b8b" "#00ede1" "#e1e1e0"]) + '(chess-default-display (quote chess-plain)) + '(custom-safe-themes + (quote + ("bf390ecb203806cbe351b966a88fc3036f3ff68cd2547db6ee3676e87327b311" "e1943fd6568d49ec819ee3711c266a8a120e452ba08569045dd8f50cc5ec5dd3" "4561c67b0764aa6343d710bb0a6f3a96319252b2169d371802cc94adfea5cfc9" "5f95ce79b4a8870b3486b04de22ca2e0785b287da8779f512cdd847f42266989" default))) + '(custom-theme-directory "~/.emacs.d/themes") + '(global-font-lock-mode t) + '(package-selected-packages + (quote + (yaml-mode projectile company-racer ac-racer racer erlang go-rename blackboard-bold-mode blacken jedi minimal-theme monochrome-theme monotropic-theme nimbus-theme noctilux-theme nord-theme nordless-theme northcode-theme paganini-theme paper-theme melancholy-theme go-imports guile-scheme slime chess pelican-mode gnugo go go-autocomplete go-direx go-guru go-mode markdown-mode irfc scpaste cargo undo-tree magit auto-complete)))) +(custom-set-faces + ;; custom-set-faces was added by Custom. + ;; If you edit it by hand, you could mess it up, so be careful. + ;; Your init file should contain only one such instance. + ;; If there is more than one, they won't work right. + ) + +(setq +DEFAULT-THEME+ "weyland-yutani") +(defun toggle-fontlock () + (if (font-lock-mode) + (progn + (message "disabling font-lock-mode") + (global-font-lock-mode 0)) + (progn + (message "enabling font-lock-mode") + (load-theme +DEFAULT-THEME+) + (global-font-lock-mode t)))) + +(put 'upcase-region 'disabled nil) +(put 'downcase-region 'disabled nil) + +(keychain-refresh-environment) +(require 'ox-publish) +(setq org-publish-project-alist + '(("notes" + :base-directory "~/notes/" + :publishing-directory "/ssh:phobos.wntrmute.net:/var/www/sites/tmp/" + :publishing-function org-html-publish-to-html + :headline-levels 4 ; Just the default for this project. + :auto-preamble t) + ("notes-static" + :base-directory "~/notes/" + :base-extension "css\\|js\\|png\\|jpg\\|gif\\|pdf\\|mp3\\|ogg\\|swf" + :publishing-directory "/ssh:phobos.wntrmute.net:/var/www/sites/tmp/" + :recursive t + :publishing-function org-publish-attachment))) + +;;; Load fira-code support. +(when (window-system) + (set-frame-font "Ubuntu Mono 13")) +;; (load "~/.emacs.d/fira-code.el") diff --git a/roles/dotfiles/files/.emacs.d/themes/eink-dark-theme.el b/roles/dotfiles/files/.emacs.d/themes/eink-dark-theme.el new file mode 100644 index 0000000..9fc3326 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/themes/eink-dark-theme.el @@ -0,0 +1,256 @@ +;;; eink-dark-theme.el --- Emacs theme with a dark background. + +;; Copyright (C) 2015, K. Isom + +;; Author: K. Isom +;; https://git.kyleisom.net/style/eink-emacs +;; Version: 0.2 +;; Package-Requires: ((emacs "24")) +;; Created with emacs-theme-generator, https://github.com/mswift42/theme-creator. + + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;; This file is not part of Emacs. + +;;; Commentary: + +;;; Code: + + (deftheme eink-dark) + (let ((class '((class color) (min-colors 89))) + (fg1 "#b3b3b3") + (fg2 "#a3a3a3") + (fg3 "#949494") + (fg4 "#858585") + (bg1 "#1d1f21") + (bg2 "#2c2e30") + (bg3 "#3b3d3f") + (bg4 "#4c4d4f") + (key2 "#bbbbbb") + (key3 "#9d9d9d") + (builtin "#b3b3b3") + (keyword "#b3b3b3") + (const "#b3b3b3") + (comment "#696969") + (func "#b3b3b3") + (str "#b3b3b3") + (type "#b3b3b3") + (var "#b3b3b3") + (warning "#cd2626")) + (custom-theme-set-faces + 'eink-dark + `(default ((,class (:background ,bg1 :foreground ,fg1)))) + `(font-lock-builtin-face ((,class (:foreground ,builtin)))) + `(font-lock-comment-face ((,class (:foreground ,comment)))) + `(font-lock-negation-char-face ((,class (:foreground ,const)))) + `(font-lock-reference-face ((,class (:foreground ,const)))) + `(font-lock-constant-face ((,class (:foreground ,const)))) + `(font-lock-doc-face ((,class (:foreground ,comment)))) + `(font-lock-function-name-face ((,class (:foreground ,func :bold t)))) + `(font-lock-keyword-face ((,class (:bold ,class :foreground ,keyword)))) + `(font-lock-string-face ((,class (:foreground ,str)))) + `(font-lock-type-face ((,class (:foreground ,type )))) + `(font-lock-variable-name-face ((,class (:foreground ,var)))) + `(font-lock-warning-face ((,class (:foreground ,warning :background ,bg2)))) + `(region ((,class (:background ,fg1 :foreground ,bg1)))) + `(highlight ((,class (:foreground ,fg3 :background ,bg3)))) + `(hl-line ((,class (:background ,bg2)))) + `(fringe ((,class (:background ,bg2 :foreground ,fg4)))) + `(cursor ((,class (:background ,bg3)))) + `(show-paren-match-face ((,class (:background ,warning)))) + `(isearch ((,class (:bold t :foreground ,warning :background ,bg3)))) + `(mode-line ((,class (:box (:line-width 1 :color nil) :bold t :foreground ,fg4 :background ,bg2)))) + `(mode-line-inactive ((,class (:box (:line-width 1 :color nil :style pressed-button) :foreground ,key3 :background ,bg1 :weight normal)))) + `(mode-line-buffer-id ((,class (:bold t :foreground ,func :background nil)))) + `(mode-line-highlight ((,class (:foreground ,keyword :box nil :weight bold)))) + `(mode-line-emphasis ((,class (:foreground ,fg1)))) + `(vertical-border ((,class (:foreground ,fg3)))) + `(minibuffer-prompt ((,class (:bold t :foreground ,keyword)))) + `(default-italic ((,class (:italic t)))) + `(link ((,class (:foreground ,const :underline t)))) + `(org-code ((,class (:foreground ,fg2)))) + `(org-hide ((,class (:foreground ,fg4)))) + `(org-level-1 ((,class (:bold t :foreground ,fg2 :height 1.1)))) + `(org-level-2 ((,class (:bold nil :foreground ,fg3)))) + `(org-level-3 ((,class (:bold t :foreground ,fg4)))) + `(org-level-4 ((,class (:bold nil :foreground ,bg4)))) + `(org-date ((,class (:underline t :foreground ,var) ))) + `(org-footnote ((,class (:underline t :foreground ,fg4)))) + `(org-link ((,class (:underline t :foreground ,type )))) + `(org-special-keyword ((,class (:foreground ,func)))) + `(org-block ((,class (:foreground ,fg3)))) + `(org-quote ((,class (:inherit org-block :slant italic)))) + `(org-verse ((,class (:inherit org-block :slant italic)))) + `(org-todo ((,class (:box (:line-width 1 :color ,fg3) :foreground ,keyword :bold t)))) + `(org-done ((,class (:box (:line-width 1 :color ,bg3) :bold t :foreground ,bg4)))) + `(org-warning ((,class (:underline t :foreground ,warning)))) + `(org-agenda-structure ((,class (:weight bold :foreground ,fg3 :box (:color ,fg4) :background ,bg3)))) + `(org-agenda-date ((,class (:foreground ,var :height 1.1 )))) + `(org-agenda-date-weekend ((,class (:weight normal :foreground ,fg4)))) + `(org-agenda-date-today ((,class (:weight bold :foreground ,keyword :height 1.4)))) + `(org-agenda-done ((,class (:foreground ,bg4)))) + `(org-scheduled ((,class (:foreground ,type)))) + `(org-scheduled-today ((,class (:foreground ,func :weight bold :height 1.2)))) + `(org-ellipsis ((,class (:foreground ,builtin)))) + `(org-verbatim ((,class (:foreground ,fg4)))) + `(org-document-info-keyword ((,class (:foreground ,func)))) + `(font-latex-bold-face ((,class (:foreground ,type)))) + `(font-latex-italic-face ((,class (:foreground ,key3 :italic t)))) + `(font-latex-string-face ((,class (:foreground ,str)))) + `(font-latex-match-reference-keywords ((,class (:foreground ,const)))) + `(font-latex-match-variable-keywords ((,class (:foreground ,var)))) + `(ido-only-match ((,class (:foreground ,warning)))) + `(org-sexp-date ((,class (:foreground ,fg4)))) + `(ido-first-match ((,class (:foreground ,keyword :bold t)))) + `(gnus-header-content ((,class (:foreground ,keyword)))) + `(gnus-header-from ((,class (:foreground ,var)))) + `(gnus-header-name ((,class (:foreground ,type)))) + `(gnus-header-subject ((,class (:foreground ,func :bold t)))) + `(mu4e-view-url-number-face ((,class (:foreground ,type)))) + `(mu4e-cited-1-face ((,class (:foreground ,fg2)))) + `(mu4e-cited-7-face ((,class (:foreground ,fg3)))) + `(mu4e-header-marks-face ((,class (:foreground ,type)))) + `(ffap ((,class (:foreground ,fg4)))) + `(js2-private-function-call ((,class (:foreground ,const)))) + `(js2-jsdoc-html-tag-delimiter ((,class (:foreground ,str)))) + `(js2-jsdoc-html-tag-name ((,class (:foreground ,key2)))) + `(js2-external-variable ((,class (:foreground ,type )))) + `(js2-function-param ((,class (:foreground ,const)))) + `(js2-jsdoc-value ((,class (:foreground ,str)))) + `(js2-private-member ((,class (:foreground ,fg3)))) + `(js3-warning-face ((,class (:underline ,keyword)))) + `(js3-error-face ((,class (:underline ,warning)))) + `(js3-external-variable-face ((,class (:foreground ,var)))) + `(js3-function-param-face ((,class (:foreground ,key3)))) + `(js3-jsdoc-tag-face ((,class (:foreground ,keyword)))) + `(js3-instance-member-face ((,class (:foreground ,const)))) + `(warning ((,class (:foreground ,warning)))) + `(ac-completion-face ((,class (:underline t :foreground ,keyword)))) + `(info-quoted-name ((,class (:foreground ,builtin)))) + `(info-string ((,class (:foreground ,str)))) + `(icompletep-determined ((,class :foreground ,builtin))) + `(undo-tree-visualizer-current-face ((,class :foreground ,builtin))) + `(undo-tree-visualizer-default-face ((,class :foreground ,fg2))) + `(undo-tree-visualizer-unmodified-face ((,class :foreground ,var))) + `(undo-tree-visualizer-register-face ((,class :foreground ,type))) + `(slime-repl-inputed-output-face ((,class (:foreground ,type)))) + `(trailing-whitespace ((,class :foreground nil :background ,warning))) + `(rainbow-delimiters-depth-1-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-2-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-3-face ((,class :foreground ,var))) + `(rainbow-delimiters-depth-4-face ((,class :foreground ,const))) + `(rainbow-delimiters-depth-5-face ((,class :foreground ,keyword))) + `(rainbow-delimiters-depth-6-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-7-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-8-face ((,class :foreground ,var))) + `(magit-item-highlight ((,class :background ,bg3))) + `(magit-section-heading ((,class (:foreground ,keyword :weight bold)))) + `(magit-hunk-heading ((,class (:background ,bg3)))) + `(magit-section-highlight ((,class (:background ,bg2)))) + `(magit-hunk-heading-highlight ((,class (:background ,bg3)))) + `(magit-diff-context-highlight ((,class (:background ,bg3 :foreground ,fg3)))) + `(magit-diffstat-added ((,class (:foreground ,type)))) + `(magit-diffstat-removed ((,class (:foreground ,var)))) + `(magit-process-ok ((,class (:foreground ,func :weight bold)))) + `(magit-process-ng ((,class (:foreground ,warning :weight bold)))) + `(magit-branch ((,class (:foreground ,const :weight bold)))) + `(magit-log-author ((,class (:foreground ,fg3)))) + `(magit-hash ((,class (:foreground ,fg2)))) + `(magit-diff-file-header ((,class (:foreground ,fg2 :background ,bg3)))) + `(lazy-highlight ((,class (:foreground ,fg2 :background ,bg3)))) + `(term ((,class (:foreground ,fg1 :background ,bg1)))) + `(term-color-black ((,class (:foreground ,bg3 :background ,bg3)))) + `(term-color-blue ((,class (:foreground ,func :background ,func)))) + `(term-color-red ((,class (:foreground ,keyword :background ,bg3)))) + `(term-color-green ((,class (:foreground ,type :background ,bg3)))) + `(term-color-yellow ((,class (:foreground ,var :background ,var)))) + `(term-color-magenta ((,class (:foreground ,builtin :background ,builtin)))) + `(term-color-cyan ((,class (:foreground ,str :background ,str)))) + `(term-color-white ((,class (:foreground ,fg2 :background ,fg2)))) + `(rainbow-delimiters-unmatched-face ((,class :foreground ,warning))) + `(helm-header ((,class (:foreground ,fg2 :background ,bg1 :underline nil :box nil)))) + `(helm-source-header ((,class (:foreground ,keyword :background ,bg1 :underline nil :weight bold)))) + `(helm-selection ((,class (:background ,bg2 :underline nil)))) + `(helm-selection-line ((,class (:background ,bg2)))) + `(helm-visible-mark ((,class (:foreground ,bg1 :background ,bg3)))) + `(helm-candidate-number ((,class (:foreground ,bg1 :background ,fg1)))) + `(helm-separator ((,class (:foreground ,type :background ,bg1)))) + `(helm-time-zone-current ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-time-zone-home ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-not-saved ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-process ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-buffer-saved-out ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-buffer-size ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-ff-directory ((,class (:foreground ,func :background ,bg1 :weight bold)))) + `(helm-ff-file ((,class (:foreground ,fg1 :background ,bg1 :weight normal)))) + `(helm-ff-executable ((,class (:foreground ,key2 :background ,bg1 :weight normal)))) + `(helm-ff-invalid-symlink ((,class (:foreground ,key3 :background ,bg1 :weight bold)))) + `(helm-ff-symlink ((,class (:foreground ,keyword :background ,bg1 :weight bold)))) + `(helm-ff-prefix ((,class (:foreground ,bg1 :background ,keyword :weight normal)))) + `(helm-grep-cmd-line ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-file ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-finish ((,class (:foreground ,fg2 :background ,bg1)))) + `(helm-grep-lineno ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-match ((,class (:foreground nil :background nil :inherit helm-match)))) + `(helm-grep-running ((,class (:foreground ,func :background ,bg1)))) + `(helm-moccur-buffer ((,class (:foreground ,func :background ,bg1)))) + `(helm-source-go-package-godoc-description ((,class (:foreground ,str)))) + `(helm-bookmark-w3m ((,class (:foreground ,type)))) + `(company-echo-common ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-preview ((,class (:background ,bg1 :foreground ,key2)))) + `(company-preview-common ((,class (:foreground ,bg2 :foreground ,fg3)))) + `(company-preview-search ((,class (:foreground ,type :background ,bg1)))) + `(company-scrollbar-bg ((,class (:background ,bg3)))) + `(company-scrollbar-fg ((,class (:foreground ,keyword)))) + `(company-tooltip ((,class (:foreground ,fg2 :background ,bg1 :bold t)))) + `(company-tooltop-annotation ((,class (:foreground ,const)))) + `(company-tooltip-common ((,class ( :foreground ,fg3)))) + `(company-tooltip-common-selection ((,class (:foreground ,str)))) + `(company-tooltip-mouse ((,class (:inherit highlight)))) + `(company-tooltip-selection ((,class (:background ,bg3 :foreground ,fg3)))) + `(company-template-field ((,class (:inherit region)))) + `(web-mode-builtin-face ((,class (:inherit ,font-lock-builtin-face)))) + `(web-mode-comment-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-constant-face ((,class (:inherit ,font-lock-constant-face)))) + `(web-mode-keyword-face ((,class (:foreground ,keyword)))) + `(web-mode-doctype-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-function-name-face ((,class (:inherit ,font-lock-function-name-face)))) + `(web-mode-string-face ((,class (:foreground ,str)))) + `(web-mode-type-face ((,class (:inherit ,font-lock-type-face)))) + `(web-mode-html-attr-name-face ((,class (:foreground ,func)))) + `(web-mode-html-attr-value-face ((,class (:foreground ,keyword)))) + `(web-mode-warning-face ((,class (:inherit ,font-lock-warning-face)))) + `(web-mode-html-tag-face ((,class (:foreground ,builtin)))) + `(jde-java-font-lock-package-face ((t (:foreground ,var)))) + `(jde-java-font-lock-public-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-private-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-constant-face ((t (:foreground ,const)))) + `(jde-java-font-lock-modifier-face ((t (:foreground ,key3)))) + `(jde-jave-font-lock-protected-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-number-face ((t (:foreground ,var)))))) + +;;;###autoload +(when load-file-name + (add-to-list 'custom-theme-load-path + (file-name-as-directory (file-name-directory load-file-name)))) + +(provide-theme 'eink-dark) + +;; Local Variables: +;; no-byte-compile: t +;; End: + +;;; eink-dark-theme.el ends here + diff --git a/roles/dotfiles/files/.emacs.d/themes/eink-light-theme.el b/roles/dotfiles/files/.emacs.d/themes/eink-light-theme.el new file mode 100644 index 0000000..a70e395 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/themes/eink-light-theme.el @@ -0,0 +1,256 @@ +;;; eink-light-theme.el --- Emacs theme with a light background. + +;; Copyright (C) 2015, K. Isom + +;; Author: K. Isom +;; https://git.kyleisom.net/style/eink-emacs +;; Version: 0.2 +;; Package-Requires: ((emacs "24")) +;; Created with emacs-theme-generator, https://github.com/mswift42/theme-creator. + + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;; This file is not part of Emacs. + +;;; Commentary: + +;;; Code: + + (deftheme eink-light) + (let ((class '((class color) (min-colors 89))) + (fg1 "#1c1c1c") + (fg2 "#2b2b2b") + (fg3 "#3a3a3a") + (fg4 "#4b4b4b") + (bg1 "#fffafa") + (bg2 "#e8e3e3") + (bg3 "#d1cdcd") + (bg4 "#bbb8b8") + (key2 "#313131") + (key3 "#1a1a1a") + (builtin "#1c1c1c") + (keyword "#1c1c1c") + (const "#1c1c1c") + (comment "#7f7f7f") + (func "#1c1c1c") + (str "#1c1c1c") + (type "#1c1c1c") + (var "#1c1c1c") + (warning "#cd2626")) + (custom-theme-set-faces + 'eink-light + `(default ((,class (:background ,bg1 :foreground ,fg1)))) + `(font-lock-builtin-face ((,class (:foreground ,builtin)))) + `(font-lock-comment-face ((,class (:foreground ,comment)))) + `(font-lock-negation-char-face ((,class (:foreground ,const)))) + `(font-lock-reference-face ((,class (:foreground ,const)))) + `(font-lock-constant-face ((,class (:foreground ,const)))) + `(font-lock-doc-face ((,class (:foreground ,comment)))) + `(font-lock-function-name-face ((,class (:foreground ,func :bold t)))) + `(font-lock-keyword-face ((,class (:bold ,class :foreground ,keyword)))) + `(font-lock-string-face ((,class (:foreground ,str)))) + `(font-lock-type-face ((,class (:foreground ,type )))) + `(font-lock-variable-name-face ((,class (:foreground ,var)))) + `(font-lock-warning-face ((,class (:foreground ,warning :background ,bg2)))) + `(region ((,class (:background ,fg1 :foreground ,bg1)))) + `(highlight ((,class (:foreground ,fg3 :background ,bg3)))) + `(hl-line ((,class (:background ,bg2)))) + `(fringe ((,class (:background ,bg2 :foreground ,fg4)))) + `(cursor ((,class (:background ,bg3)))) + `(show-paren-match-face ((,class (:background ,warning)))) + `(isearch ((,class (:bold t :foreground ,warning :background ,bg3)))) + `(mode-line ((,class (:box (:line-width 1 :color nil) :bold t :foreground ,fg4 :background ,bg2)))) + `(mode-line-inactive ((,class (:box (:line-width 1 :color nil :style pressed-button) :foreground ,key3 :background ,bg1 :weight normal)))) + `(mode-line-buffer-id ((,class (:bold t :foreground ,func :background nil)))) + `(mode-line-highlight ((,class (:foreground ,keyword :box nil :weight bold)))) + `(mode-line-emphasis ((,class (:foreground ,fg1)))) + `(vertical-border ((,class (:foreground ,fg3)))) + `(minibuffer-prompt ((,class (:bold t :foreground ,keyword)))) + `(default-italic ((,class (:italic t)))) + `(link ((,class (:foreground ,const :underline t)))) + `(org-code ((,class (:foreground ,fg2)))) + `(org-hide ((,class (:foreground ,fg4)))) + `(org-level-1 ((,class (:bold t :foreground ,fg2 :height 1.1)))) + `(org-level-2 ((,class (:bold nil :foreground ,fg3)))) + `(org-level-3 ((,class (:bold t :foreground ,fg4)))) + `(org-level-4 ((,class (:bold nil :foreground ,bg4)))) + `(org-date ((,class (:underline t :foreground ,var) ))) + `(org-footnote ((,class (:underline t :foreground ,fg4)))) + `(org-link ((,class (:underline t :foreground ,type )))) + `(org-special-keyword ((,class (:foreground ,func)))) + `(org-block ((,class (:foreground ,fg3)))) + `(org-quote ((,class (:inherit org-block :slant italic)))) + `(org-verse ((,class (:inherit org-block :slant italic)))) + `(org-todo ((,class (:box (:line-width 1 :color ,fg3) :foreground ,keyword :bold t)))) + `(org-done ((,class (:box (:line-width 1 :color ,bg3) :bold t :foreground ,bg4)))) + `(org-warning ((,class (:underline t :foreground ,warning)))) + `(org-agenda-structure ((,class (:weight bold :foreground ,fg3 :box (:color ,fg4) :background ,bg3)))) + `(org-agenda-date ((,class (:foreground ,var :height 1.1 )))) + `(org-agenda-date-weekend ((,class (:weight normal :foreground ,fg4)))) + `(org-agenda-date-today ((,class (:weight bold :foreground ,keyword :height 1.4)))) + `(org-agenda-done ((,class (:foreground ,bg4)))) + `(org-scheduled ((,class (:foreground ,type)))) + `(org-scheduled-today ((,class (:foreground ,func :weight bold :height 1.2)))) + `(org-ellipsis ((,class (:foreground ,builtin)))) + `(org-verbatim ((,class (:foreground ,fg4)))) + `(org-document-info-keyword ((,class (:foreground ,func)))) + `(font-latex-bold-face ((,class (:foreground ,type)))) + `(font-latex-italic-face ((,class (:foreground ,key3 :italic t)))) + `(font-latex-string-face ((,class (:foreground ,str)))) + `(font-latex-match-reference-keywords ((,class (:foreground ,const)))) + `(font-latex-match-variable-keywords ((,class (:foreground ,var)))) + `(ido-only-match ((,class (:foreground ,warning)))) + `(org-sexp-date ((,class (:foreground ,fg4)))) + `(ido-first-match ((,class (:foreground ,keyword :bold t)))) + `(gnus-header-content ((,class (:foreground ,keyword)))) + `(gnus-header-from ((,class (:foreground ,var)))) + `(gnus-header-name ((,class (:foreground ,type)))) + `(gnus-header-subject ((,class (:foreground ,func :bold t)))) + `(mu4e-view-url-number-face ((,class (:foreground ,type)))) + `(mu4e-cited-1-face ((,class (:foreground ,fg2)))) + `(mu4e-cited-7-face ((,class (:foreground ,fg3)))) + `(mu4e-header-marks-face ((,class (:foreground ,type)))) + `(ffap ((,class (:foreground ,fg4)))) + `(js2-private-function-call ((,class (:foreground ,const)))) + `(js2-jsdoc-html-tag-delimiter ((,class (:foreground ,str)))) + `(js2-jsdoc-html-tag-name ((,class (:foreground ,key2)))) + `(js2-external-variable ((,class (:foreground ,type )))) + `(js2-function-param ((,class (:foreground ,const)))) + `(js2-jsdoc-value ((,class (:foreground ,str)))) + `(js2-private-member ((,class (:foreground ,fg3)))) + `(js3-warning-face ((,class (:underline ,keyword)))) + `(js3-error-face ((,class (:underline ,warning)))) + `(js3-external-variable-face ((,class (:foreground ,var)))) + `(js3-function-param-face ((,class (:foreground ,key3)))) + `(js3-jsdoc-tag-face ((,class (:foreground ,keyword)))) + `(js3-instance-member-face ((,class (:foreground ,const)))) + `(warning ((,class (:foreground ,warning)))) + `(ac-completion-face ((,class (:underline t :foreground ,keyword)))) + `(info-quoted-name ((,class (:foreground ,builtin)))) + `(info-string ((,class (:foreground ,str)))) + `(icompletep-determined ((,class :foreground ,builtin))) + `(undo-tree-visualizer-current-face ((,class :foreground ,builtin))) + `(undo-tree-visualizer-default-face ((,class :foreground ,fg2))) + `(undo-tree-visualizer-unmodified-face ((,class :foreground ,var))) + `(undo-tree-visualizer-register-face ((,class :foreground ,type))) + `(slime-repl-inputed-output-face ((,class (:foreground ,type)))) + `(trailing-whitespace ((,class :foreground nil :background ,warning))) + `(rainbow-delimiters-depth-1-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-2-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-3-face ((,class :foreground ,var))) + `(rainbow-delimiters-depth-4-face ((,class :foreground ,const))) + `(rainbow-delimiters-depth-5-face ((,class :foreground ,keyword))) + `(rainbow-delimiters-depth-6-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-7-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-8-face ((,class :foreground ,var))) + `(magit-item-highlight ((,class :background ,bg3))) + `(magit-section-heading ((,class (:foreground ,keyword :weight bold)))) + `(magit-hunk-heading ((,class (:background ,bg3)))) + `(magit-section-highlight ((,class (:background ,bg2)))) + `(magit-hunk-heading-highlight ((,class (:background ,bg3)))) + `(magit-diff-context-highlight ((,class (:background ,bg3 :foreground ,fg3)))) + `(magit-diffstat-added ((,class (:foreground ,type)))) + `(magit-diffstat-removed ((,class (:foreground ,var)))) + `(magit-process-ok ((,class (:foreground ,func :weight bold)))) + `(magit-process-ng ((,class (:foreground ,warning :weight bold)))) + `(magit-branch ((,class (:foreground ,const :weight bold)))) + `(magit-log-author ((,class (:foreground ,fg3)))) + `(magit-hash ((,class (:foreground ,fg2)))) + `(magit-diff-file-header ((,class (:foreground ,fg2 :background ,bg3)))) + `(lazy-highlight ((,class (:foreground ,fg2 :background ,bg3)))) + `(term ((,class (:foreground ,fg1 :background ,bg1)))) + `(term-color-black ((,class (:foreground ,bg3 :background ,bg3)))) + `(term-color-blue ((,class (:foreground ,func :background ,func)))) + `(term-color-red ((,class (:foreground ,keyword :background ,bg3)))) + `(term-color-green ((,class (:foreground ,type :background ,bg3)))) + `(term-color-yellow ((,class (:foreground ,var :background ,var)))) + `(term-color-magenta ((,class (:foreground ,builtin :background ,builtin)))) + `(term-color-cyan ((,class (:foreground ,str :background ,str)))) + `(term-color-white ((,class (:foreground ,fg2 :background ,fg2)))) + `(rainbow-delimiters-unmatched-face ((,class :foreground ,warning))) + `(helm-header ((,class (:foreground ,fg2 :background ,bg1 :underline nil :box nil)))) + `(helm-source-header ((,class (:foreground ,keyword :background ,bg1 :underline nil :weight bold)))) + `(helm-selection ((,class (:background ,bg2 :underline nil)))) + `(helm-selection-line ((,class (:background ,bg2)))) + `(helm-visible-mark ((,class (:foreground ,bg1 :background ,bg3)))) + `(helm-candidate-number ((,class (:foreground ,bg1 :background ,fg1)))) + `(helm-separator ((,class (:foreground ,type :background ,bg1)))) + `(helm-time-zone-current ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-time-zone-home ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-not-saved ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-process ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-buffer-saved-out ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-buffer-size ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-ff-directory ((,class (:foreground ,func :background ,bg1 :weight bold)))) + `(helm-ff-file ((,class (:foreground ,fg1 :background ,bg1 :weight normal)))) + `(helm-ff-executable ((,class (:foreground ,key2 :background ,bg1 :weight normal)))) + `(helm-ff-invalid-symlink ((,class (:foreground ,key3 :background ,bg1 :weight bold)))) + `(helm-ff-symlink ((,class (:foreground ,keyword :background ,bg1 :weight bold)))) + `(helm-ff-prefix ((,class (:foreground ,bg1 :background ,keyword :weight normal)))) + `(helm-grep-cmd-line ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-file ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-finish ((,class (:foreground ,fg2 :background ,bg1)))) + `(helm-grep-lineno ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-match ((,class (:foreground nil :background nil :inherit helm-match)))) + `(helm-grep-running ((,class (:foreground ,func :background ,bg1)))) + `(helm-moccur-buffer ((,class (:foreground ,func :background ,bg1)))) + `(helm-source-go-package-godoc-description ((,class (:foreground ,str)))) + `(helm-bookmark-w3m ((,class (:foreground ,type)))) + `(company-echo-common ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-preview ((,class (:background ,bg1 :foreground ,key2)))) + `(company-preview-common ((,class (:foreground ,bg2 :foreground ,fg3)))) + `(company-preview-search ((,class (:foreground ,type :background ,bg1)))) + `(company-scrollbar-bg ((,class (:background ,bg3)))) + `(company-scrollbar-fg ((,class (:foreground ,keyword)))) + `(company-tooltip ((,class (:foreground ,fg2 :background ,bg1 :bold t)))) + `(company-tooltop-annotation ((,class (:foreground ,const)))) + `(company-tooltip-common ((,class ( :foreground ,fg3)))) + `(company-tooltip-common-selection ((,class (:foreground ,str)))) + `(company-tooltip-mouse ((,class (:inherit highlight)))) + `(company-tooltip-selection ((,class (:background ,bg3 :foreground ,fg3)))) + `(company-template-field ((,class (:inherit region)))) + `(web-mode-builtin-face ((,class (:inherit ,font-lock-builtin-face)))) + `(web-mode-comment-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-constant-face ((,class (:inherit ,font-lock-constant-face)))) + `(web-mode-keyword-face ((,class (:foreground ,keyword)))) + `(web-mode-doctype-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-function-name-face ((,class (:inherit ,font-lock-function-name-face)))) + `(web-mode-string-face ((,class (:foreground ,str)))) + `(web-mode-type-face ((,class (:inherit ,font-lock-type-face)))) + `(web-mode-html-attr-name-face ((,class (:foreground ,func)))) + `(web-mode-html-attr-value-face ((,class (:foreground ,keyword)))) + `(web-mode-warning-face ((,class (:inherit ,font-lock-warning-face)))) + `(web-mode-html-tag-face ((,class (:foreground ,builtin)))) + `(jde-java-font-lock-package-face ((t (:foreground ,var)))) + `(jde-java-font-lock-public-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-private-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-constant-face ((t (:foreground ,const)))) + `(jde-java-font-lock-modifier-face ((t (:foreground ,key3)))) + `(jde-jave-font-lock-protected-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-number-face ((t (:foreground ,var)))))) + +;;;###autoload +(when load-file-name + (add-to-list 'custom-theme-load-path + (file-name-as-directory (file-name-directory load-file-name)))) + +(provide-theme 'eink-light) + +;; Local Variables: +;; no-byte-compile: t +;; End: + +;;; eink-light-theme.el ends here + diff --git a/roles/dotfiles/files/.emacs.d/themes/weyland-yutani-theme.el b/roles/dotfiles/files/.emacs.d/themes/weyland-yutani-theme.el new file mode 100644 index 0000000..2b4b3af --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/themes/weyland-yutani-theme.el @@ -0,0 +1,271 @@ + + +;;; weyland-yutani-theme.el --- Emacs theme with a dark background. + +;; Copyright (C) 2014 , Joe Staursky + +;; Author: Joe Staursky +;; +;; Version: 0.1 +;; Package-Requires: ((emacs "24")) +;; Created with emacs-theme-generator, https://github.com/mswift42/theme-creator. + + +;; This program is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;; This file is not part of Emacs. + +;;; Commentary: + +;;; Code: + + (deftheme weyland-yutani) + (let ((class '((class color) (min-colors 89))) + (fg1 "#a0a8b8") + (fg2 "#9299a8") + (fg3 "#848b98") + (fg4 "#777d88") + (bg1 "#141e20") + (bg2 "#232d2f") + (bg3 "#333c3e") + (bg4 "#444d4e") + (key2 "#a0e88b") + (key3 "#82c96e") + (builtin "#a3646f") + (keyword "#93e57c") + (const "#d1d68b") + (comment "#565766") + (func "#beb7f7") + (str "#627e95") + (type "#5992c2") + (var "#9e79b3") + (warning "#fcbec9")) + (custom-theme-set-faces + 'weyland-yutani + `(default ((,class (:background ,bg1 :foreground ,fg1)))) + `(font-lock-builtin-face ((,class (:foreground ,builtin)))) + `(company-tooltip-annotation-selection ((,class (:foreground ,func)))) + + `(company-tooltip-annotation ((,class (:foreground ,const)))) + + `(font-lock-comment-face ((,class (:foreground ,comment)))) + `(font-lock-negation-char-face ((,class (:foreground ,const)))) + `(font-lock-reference-face ((,class (:foreground ,const)))) + `(font-lock-constant-face ((,class (:foreground ,const)))) + `(font-lock-doc-face ((,class (:foreground ,comment)))) + `(font-lock-function-name-face ((,class (:foreground ,func :bold t)))) + `(font-lock-keyword-face ((,class (:bold ,class :foreground ,keyword)))) + `(font-lock-string-face ((,class (:foreground ,str)))) + `(font-lock-type-face ((,class (:foreground ,type )))) + `(font-lock-variable-name-face ((,class (:foreground ,var)))) + `(font-lock-warning-face ((,class (:foreground ,warning :background ,bg2)))) + `(region ((,class (:background ,fg1 :foreground ,bg1)))) + `(highlight ((,class (:foreground ,fg3 :background ,bg3)))) + `(hl-line ((,class (:background ,bg2)))) + `(fringe ((,class (:background ,bg2 :foreground ,fg4)))) + `(cursor ((,class (:background ,bg3)))) + `(show-paren-match-face ((,class (:background ,warning)))) + `(isearch ((,class (:bold t :foreground ,warning :background ,bg3)))) + `(mode-line ((,class (:box (:line-width 1 :color nil) :bold t :foreground ,fg4 :background ,bg2)))) + `(mode-line-inactive ((,class (:box (:line-width 1 :color nil :style pressed-button) :foreground ,key3 :background ,bg1 :weight normal)))) + `(mode-line-buffer-id ((,class (:bold t :foreground ,func :background nil)))) + `(mode-line-highlight ((,class (:foreground ,keyword :box nil :weight bold)))) + `(mode-line-emphasis ((,class (:foreground ,fg1)))) + `(vertical-border ((,class (:foreground ,fg3)))) + `(minibuffer-prompt ((,class (:bold t :foreground ,keyword)))) + `(default-italic ((,class (:italic t)))) + `(link ((,class (:foreground ,const :underline t)))) + `(org-code ((,class (:foreground ,fg2)))) + `(org-hide ((,class (:foreground ,fg4)))) + `(org-level-1 ((,class (:bold t :foreground ,fg2 :height 1.1)))) + `(org-level-2 ((,class (:bold nil :foreground ,fg3)))) + `(org-level-3 ((,class (:bold t :foreground ,fg4)))) + `(org-level-4 ((,class (:bold nil :foreground ,bg4)))) + `(org-date ((,class (:underline t :foreground ,var) ))) + `(org-footnote ((,class (:underline t :foreground ,fg4)))) + `(org-link ((,class (:underline t :foreground ,type )))) + `(org-special-keyword ((,class (:foreground ,func)))) + `(org-block ((,class (:foreground ,fg3)))) + `(org-quote ((,class (:inherit org-block :slant italic)))) + `(org-verse ((,class (:inherit org-block :slant italic)))) + `(org-todo ((,class (:box (:line-width 1 :color ,fg3) :foreground ,keyword :bold t)))) + `(org-done ((,class (:box (:line-width 1 :color ,bg3) :bold t :foreground ,bg4)))) + `(org-warning ((,class (:underline t :foreground ,warning)))) + `(org-agenda-structure ((,class (:weight bold :foreground ,fg3 :box (:color ,fg4) :background ,bg3)))) + `(org-agenda-date ((,class (:foreground ,var :height 1.1 )))) + `(org-agenda-date-weekend ((,class (:weight normal :foreground ,fg4)))) + `(org-agenda-date-today ((,class (:weight bold :foreground ,keyword :height 1.4)))) + `(org-agenda-done ((,class (:foreground ,bg4)))) + `(org-scheduled ((,class (:foreground ,type)))) + `(org-scheduled-today ((,class (:foreground ,func :weight bold :height 1.2)))) + `(org-ellipsis ((,class (:foreground ,builtin)))) + `(org-verbatim ((,class (:foreground ,fg4)))) + `(org-document-info-keyword ((,class (:foreground ,func)))) + `(font-latex-bold-face ((,class (:foreground ,type)))) + `(font-latex-italic-face ((,class (:foreground ,key3 :italic t)))) + `(font-latex-string-face ((,class (:foreground ,str)))) + `(font-latex-match-reference-keywords ((,class (:foreground ,const)))) + `(font-latex-match-variable-keywords ((,class (:foreground ,var)))) + `(ido-only-match ((,class (:foreground ,warning)))) + `(org-sexp-date ((,class (:foreground ,fg4)))) + `(ido-first-match ((,class (:foreground ,keyword :bold t)))) + `(gnus-header-content ((,class (:foreground ,keyword)))) + `(gnus-header-from ((,class (:foreground ,var)))) + `(gnus-header-name ((,class (:foreground ,type)))) + `(gnus-header-subject ((,class (:foreground ,func :bold t)))) + `(mu4e-view-url-number-face ((,class (:foreground ,type)))) + `(mu4e-cited-1-face ((,class (:foreground ,fg2)))) + `(mu4e-cited-7-face ((,class (:foreground ,fg3)))) + `(mu4e-header-marks-face ((,class (:foreground ,type)))) + `(ffap ((,class (:foreground ,fg4)))) + `(js2-private-function-call ((,class (:foreground ,const)))) + `(js2-jsdoc-html-tag-delimiter ((,class (:foreground ,str)))) + `(js2-jsdoc-html-tag-name ((,class (:foreground ,key2)))) + `(js2-external-variable ((,class (:foreground ,type )))) + `(js2-function-param ((,class (:foreground ,const)))) + `(js2-jsdoc-value ((,class (:foreground ,str)))) + `(js2-private-member ((,class (:foreground ,fg3)))) + `(js3-warning-face ((,class (:underline ,keyword)))) + `(js3-error-face ((,class (:underline ,warning)))) + `(js3-external-variable-face ((,class (:foreground ,var)))) + `(js3-function-param-face ((,class (:foreground ,key3)))) + `(js3-jsdoc-tag-face ((,class (:foreground ,keyword)))) + `(js3-instance-member-face ((,class (:foreground ,const)))) + `(warning ((,class (:foreground ,warning)))) + `(ac-completion-face ((,class (:underline t :foreground ,keyword)))) + `(info-quoted-name ((,class (:foreground ,builtin)))) + `(info-string ((,class (:foreground ,str)))) + `(icompletep-determined ((,class :foreground ,builtin))) + `(undo-tree-visualizer-current-face ((,class :foreground ,builtin))) + `(undo-tree-visualizer-default-face ((,class :foreground ,fg2))) + `(undo-tree-visualizer-unmodified-face ((,class :foreground ,var))) + `(undo-tree-visualizer-register-face ((,class :foreground ,type))) + `(slime-repl-inputed-output-face ((,class (:foreground ,type)))) + `(trailing-whitespace ((,class :foreground nil :background ,warning))) + `(rainbow-delimiters-depth-1-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-2-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-3-face ((,class :foreground ,var))) + `(rainbow-delimiters-depth-4-face ((,class :foreground ,const))) + `(rainbow-delimiters-depth-5-face ((,class :foreground ,keyword))) + `(rainbow-delimiters-depth-6-face ((,class :foreground ,fg1))) + `(rainbow-delimiters-depth-7-face ((,class :foreground ,type))) + `(rainbow-delimiters-depth-8-face ((,class :foreground ,var))) + `(magit-item-highlight ((,class :background ,bg3))) + `(magit-section-heading ((,class (:foreground ,keyword :weight bold)))) + `(magit-hunk-heading ((,class (:background ,bg3)))) + `(magit-section-highlight ((,class (:background ,bg2)))) + `(magit-hunk-heading-highlight ((,class (:background ,bg3)))) + `(magit-diff-context-highlight ((,class (:background ,bg3 :foreground ,fg3)))) + `(magit-diffstat-added ((,class (:foreground ,type)))) + `(magit-diffstat-removed ((,class (:foreground ,var)))) + `(magit-process-ok ((,class (:foreground ,func :weight bold)))) + `(magit-process-ng ((,class (:foreground ,warning :weight bold)))) + `(magit-branch ((,class (:foreground ,const :weight bold)))) + `(magit-log-author ((,class (:foreground ,fg3)))) + `(magit-hash ((,class (:foreground ,fg2)))) + `(magit-diff-file-header ((,class (:foreground ,fg2 :background ,bg3)))) + `(lazy-highlight ((,class (:foreground ,fg2 :background ,bg3)))) + `(term ((,class (:foreground ,fg1 :background ,bg1)))) + `(term-color-black ((,class (:foreground ,bg3 :background ,bg3)))) + `(term-color-blue ((,class (:foreground ,func :background ,func)))) + `(term-color-red ((,class (:foreground ,keyword :background ,bg3)))) + `(term-color-green ((,class (:foreground ,type :background ,bg3)))) + `(term-color-yellow ((,class (:foreground ,var :background ,var)))) + `(term-color-magenta ((,class (:foreground ,builtin :background ,builtin)))) + `(term-color-cyan ((,class (:foreground ,str :background ,str)))) + `(term-color-white ((,class (:foreground ,fg2 :background ,fg2)))) + `(rainbow-delimiters-unmatched-face ((,class :foreground ,warning))) + `(helm-header ((,class (:foreground ,fg2 :background ,bg1 :underline nil :box nil)))) + `(helm-source-header ((,class (:foreground ,keyword :background ,bg1 :underline nil :weight bold)))) + `(helm-selection ((,class (:background ,bg2 :underline nil)))) + `(helm-selection-line ((,class (:background ,bg2)))) + `(helm-visible-mark ((,class (:foreground ,bg1 :background ,bg3)))) + `(helm-candidate-number ((,class (:foreground ,bg1 :background ,fg1)))) + `(helm-separator ((,class (:foreground ,type :background ,bg1)))) + `(helm-time-zone-current ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-time-zone-home ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-not-saved ((,class (:foreground ,type :background ,bg1)))) + `(helm-buffer-process ((,class (:foreground ,builtin :background ,bg1)))) + `(helm-buffer-saved-out ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-buffer-size ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-ff-directory ((,class (:foreground ,func :background ,bg1 :weight bold)))) + `(helm-ff-file ((,class (:foreground ,fg1 :background ,bg1 :weight normal)))) + `(helm-ff-executable ((,class (:foreground ,key2 :background ,bg1 :weight normal)))) + `(helm-ff-invalid-symlink ((,class (:foreground ,key3 :background ,bg1 :weight bold)))) + `(helm-ff-symlink ((,class (:foreground ,keyword :background ,bg1 :weight bold)))) + `(helm-ff-prefix ((,class (:foreground ,bg1 :background ,keyword :weight normal)))) + `(helm-grep-cmd-line ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-file ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-finish ((,class (:foreground ,fg2 :background ,bg1)))) + `(helm-grep-lineno ((,class (:foreground ,fg1 :background ,bg1)))) + `(helm-grep-match ((,class (:foreground nil :background nil :inherit helm-match)))) + `(helm-grep-running ((,class (:foreground ,func :background ,bg1)))) + `(helm-moccur-buffer ((,class (:foreground ,func :background ,bg1)))) + `(helm-source-go-package-godoc-description ((,class (:foreground ,str)))) + `(helm-bookmark-w3m ((,class (:foreground ,type)))) + + + + `(company-echo ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-preview ((,class (:background ,bg1 :foreground ,key2)))) + `(company-tooltip ((,class (:foreground ,fg2 :background ,bg1 :bold t)))) + `(company-echo-common ((,class (:foreground ,bg1 :background ,fg1)))) + `(company-scrollbar-bg ((,class (:background ,bg3)))) + `(company-scrollbar-fg ((,class (:foreground ,keyword)))) + `(company-tooltip-mouse ((,class (:inherit highlight)))) + `(company-preview-common ((,class (:foreground ,bg2 :foreground ,fg3)))) + `(company-template-field ((,class (:inherit region)))) + `(company-tooltop-search ((,class (:inherit region)))) + `(company-tooltip-common ((,class ( :foreground ,fg3)))) + `(company-preview-search ((,class (:foreground ,type :background ,bg1)))) + `(company-tooltip-selection ((,class (:background ,bg3 :foreground ,fg3)))) + `(company-tooltop-annotation ((,class (:foreground ,const)))) + `(company-tooltip-common-selection ((,class (:foreground ,str)))) + `(company-tooltop-search-selection ((,class (:foreground ,const)))) + `(company-tooltop-annotation-selection ((,class (:foreground ,const)))) + `(web-mode-builtin-face ((,class (:inherit ,font-lock-builtin-face)))) + `(web-mode-comment-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-constant-face ((,class (:inherit ,font-lock-constant-face)))) + `(web-mode-keyword-face ((,class (:foreground ,keyword)))) + `(web-mode-doctype-face ((,class (:inherit ,font-lock-comment-face)))) + `(web-mode-function-name-face ((,class (:inherit ,font-lock-function-name-face)))) + `(web-mode-string-face ((,class (:foreground ,str)))) + `(web-mode-type-face ((,class (:inherit ,font-lock-type-face)))) + `(web-mode-html-attr-name-face ((,class (:foreground ,func)))) + `(web-mode-html-attr-value-face ((,class (:foreground ,keyword)))) + `(web-mode-warning-face ((,class (:inherit ,font-lock-warning-face)))) + `(web-mode-html-tag-face ((,class (:foreground ,builtin)))) + `(jde-java-font-lock-package-face ((t (:foreground ,var)))) + `(jde-java-font-lock-public-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-private-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-constant-face ((t (:foreground ,const)))) + `(jde-java-font-lock-modifier-face ((t (:foreground ,key3)))) + `(jde-jave-font-lock-protected-face ((t (:foreground ,keyword)))) + `(jde-java-font-lock-number-face ((t (:foreground ,var)))) + + + )) + +;;;###autoload +(when load-file-name + (add-to-list 'custom-theme-load-path + (file-name-as-directory (file-name-directory load-file-name)))) + +(provide-theme 'weyland-yutani) + +;; Local Variables: +;; no-byte-compile: t +;; End: + +;;; weyland-yutani-theme.el ends here diff --git a/roles/dotfiles/files/.emacs.d/transient/history.el b/roles/dotfiles/files/.emacs.d/transient/history.el new file mode 100644 index 0000000..90b5a84 --- /dev/null +++ b/roles/dotfiles/files/.emacs.d/transient/history.el @@ -0,0 +1 @@ +nil \ No newline at end of file diff --git a/roles/dotfiles/files/.gitconfig b/roles/dotfiles/files/.gitconfig new file mode 100644 index 0000000..2b41646 --- /dev/null +++ b/roles/dotfiles/files/.gitconfig @@ -0,0 +1,19 @@ +[user] + name = Kyle Isom + email = kyle@imap.cc + +[color] + ui = false + +[core] + excludesfile = /home/kyle/.gitignore_global + editor = mg + +[http] + cookiefile = /home/kyle/.gitcookies + +[init] + defaultBranch = master + +[push] + default = simple diff --git a/roles/dotfiles/files/.gitignore_global b/roles/dotfiles/files/.gitignore_global new file mode 100644 index 0000000..93d91d9 --- /dev/null +++ b/roles/dotfiles/files/.gitignore_global @@ -0,0 +1,5 @@ +*~ +*# +.#* +.*.sw? +tags diff --git a/roles/dotfiles/files/.hgrc b/roles/dotfiles/files/.hgrc new file mode 100644 index 0000000..876b14d --- /dev/null +++ b/roles/dotfiles/files/.hgrc @@ -0,0 +1,34 @@ +# example user config (see 'hg help config' for more info) +[ui] +# name and email, e.g. +# username = Jane Doe +username = Kyle Isom +editor = /usr/bin/mg + +# We recommend enabling tweakdefaults to get slight improvements to +# the UI over time. Make sure to set HGPLAIN in the environment when +# writing scripts! +tweakdefaults = True + +# uncomment to disable color in command output +# (see 'hg help color' for details) +color = never + +# uncomment to disable command output pagination +# (see 'hg help pager' for details) +paginate = never + +[extensions] +# uncomment the lines below to enable some popular extensions +# (see 'hg help extensions' for more info) +# +histedit = +rebase = +shelve = +uncommit = +hgext.mq= +hgext.patchbomb= +purge= + +[diff] +git = True diff --git a/roles/dotfiles/files/.mg b/roles/dotfiles/files/.mg new file mode 100644 index 0000000..7943a79 --- /dev/null +++ b/roles/dotfiles/files/.mg @@ -0,0 +1,3 @@ +column-number-mode +backup-to-home-directory +bksp-mode diff --git a/roles/dotfiles/files/.profile_custom b/roles/dotfiles/files/.profile_custom new file mode 100644 index 0000000..c69fa53 --- /dev/null +++ b/roles/dotfiles/files/.profile_custom @@ -0,0 +1,33 @@ +# ~/.profile: executed by the command interpreter for login shells. +# This file is not read by bash(1), if ~/.bash_profile or ~/.bash_login +# exists. +# see /usr/share/doc/bash/examples/startup-files for examples. +# the files are located in the bash-doc package. + +# the default umask is set in /etc/profile; for setting the umask +# for ssh logins, install and configure the libpam-umask package. +#umask 022 + +# if running bash +if [ -n "$BASH_VERSION" ]; then + # include .bashrc if it exists + if [ -f "$HOME/.bashrc" ]; then + . "$HOME/.bashrc" + fi +fi + +# set PATH so it includes user's private bin if it exists +if [ -d "$HOME/bin" ] ; then + PATH="$HOME/bin:$PATH" +fi + +# set PATH so it includes user's private bin if it exists +if [ -d "$HOME/.local/bin" ] ; then + PATH="$HOME/.local/bin:$PATH" +fi + +[ -f ~/.cargo/env ] && source $HOME/.cargo/env + +alias co='git checkout' +alias st='git status' +alias prb='git pull --rebase' diff --git a/roles/dotfiles/files/.vim/autoload/plug.vim b/roles/dotfiles/files/.vim/autoload/plug.vim new file mode 100644 index 0000000..4e05630 --- /dev/null +++ b/roles/dotfiles/files/.vim/autoload/plug.vim @@ -0,0 +1,2526 @@ +" vim-plug: Vim plugin manager +" ============================ +" +" Download plug.vim and put it in ~/.vim/autoload +" +" curl -fLo ~/.vim/autoload/plug.vim --create-dirs \ +" https://raw.githubusercontent.com/junegunn/vim-plug/master/plug.vim +" +" Edit your .vimrc +" +" call plug#begin('~/.vim/plugged') +" +" " Make sure you use single quotes +" +" " Shorthand notation; fetches https://github.com/junegunn/vim-easy-align +" Plug 'junegunn/vim-easy-align' +" +" " Any valid git URL is allowed +" Plug 'https://github.com/junegunn/vim-github-dashboard.git' +" +" " Multiple Plug commands can be written in a single line using | separators +" Plug 'SirVer/ultisnips' | Plug 'honza/vim-snippets' +" +" " On-demand loading +" Plug 'scrooloose/nerdtree', { 'on': 'NERDTreeToggle' } +" Plug 'tpope/vim-fireplace', { 'for': 'clojure' } +" +" " Using a non-master branch +" Plug 'rdnetto/YCM-Generator', { 'branch': 'stable' } +" +" " Using a tagged release; wildcard allowed (requires git 1.9.2 or above) +" Plug 'fatih/vim-go', { 'tag': '*' } +" +" " Plugin options +" Plug 'nsf/gocode', { 'tag': 'v.20150303', 'rtp': 'vim' } +" +" " Plugin outside ~/.vim/plugged with post-update hook +" Plug 'junegunn/fzf', { 'dir': '~/.fzf', 'do': './install --all' } +" +" " Unmanaged plugin (manually installed and updated) +" Plug '~/my-prototype-plugin' +" +" " Initialize plugin system +" call plug#end() +" +" Then reload .vimrc and :PlugInstall to install plugins. +" +" Plug options: +" +"| Option | Description | +"| ----------------------- | ------------------------------------------------ | +"| `branch`/`tag`/`commit` | Branch/tag/commit of the repository to use | +"| `rtp` | Subdirectory that contains Vim plugin | +"| `dir` | Custom directory for the plugin | +"| `as` | Use different name for the plugin | +"| `do` | Post-update hook (string or funcref) | +"| `on` | On-demand loading: Commands or ``-mappings | +"| `for` | On-demand loading: File types | +"| `frozen` | Do not update unless explicitly specified | +" +" More information: https://github.com/junegunn/vim-plug +" +" +" Copyright (c) 2017 Junegunn Choi +" +" MIT License +" +" Permission is hereby granted, free of charge, to any person obtaining +" a copy of this software and associated documentation files (the +" "Software"), to deal in the Software without restriction, including +" without limitation the rights to use, copy, modify, merge, publish, +" distribute, sublicense, and/or sell copies of the Software, and to +" permit persons to whom the Software is furnished to do so, subject to +" the following conditions: +" +" The above copyright notice and this permission notice shall be +" included in all copies or substantial portions of the Software. +" +" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +" MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +" NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +" LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +" OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +" WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +if exists('g:loaded_plug') + finish +endif +let g:loaded_plug = 1 + +let s:cpo_save = &cpo +set cpo&vim + +let s:plug_src = 'https://github.com/junegunn/vim-plug.git' +let s:plug_tab = get(s:, 'plug_tab', -1) +let s:plug_buf = get(s:, 'plug_buf', -1) +let s:mac_gui = has('gui_macvim') && has('gui_running') +let s:is_win = has('win32') +let s:nvim = has('nvim-0.2') || (has('nvim') && exists('*jobwait') && !s:is_win) +let s:vim8 = has('patch-8.0.0039') && exists('*job_start') +let s:me = resolve(expand(':p')) +let s:base_spec = { 'branch': 'master', 'frozen': 0 } +let s:TYPE = { +\ 'string': type(''), +\ 'list': type([]), +\ 'dict': type({}), +\ 'funcref': type(function('call')) +\ } +let s:loaded = get(s:, 'loaded', {}) +let s:triggers = get(s:, 'triggers', {}) + +function! plug#begin(...) + if a:0 > 0 + let s:plug_home_org = a:1 + let home = s:path(fnamemodify(expand(a:1), ':p')) + elseif exists('g:plug_home') + let home = s:path(g:plug_home) + elseif !empty(&rtp) + let home = s:path(split(&rtp, ',')[0]) . '/plugged' + else + return s:err('Unable to determine plug home. Try calling plug#begin() with a path argument.') + endif + if fnamemodify(home, ':t') ==# 'plugin' && fnamemodify(home, ':h') ==# s:first_rtp + return s:err('Invalid plug home. '.home.' is a standard Vim runtime path and is not allowed.') + endif + + let g:plug_home = home + let g:plugs = {} + let g:plugs_order = [] + let s:triggers = {} + + call s:define_commands() + return 1 +endfunction + +function! s:define_commands() + command! -nargs=+ -bar Plug call plug#() + if !executable('git') + return s:err('`git` executable not found. Most commands will not be available. To suppress this message, prepend `silent!` to `call plug#begin(...)`.') + endif + command! -nargs=* -bar -bang -complete=customlist,s:names PlugInstall call s:install(0, []) + command! -nargs=* -bar -bang -complete=customlist,s:names PlugUpdate call s:update(0, []) + command! -nargs=0 -bar -bang PlugClean call s:clean(0) + command! -nargs=0 -bar PlugUpgrade if s:upgrade() | execute 'source' s:esc(s:me) | endif + command! -nargs=0 -bar PlugStatus call s:status() + command! -nargs=0 -bar PlugDiff call s:diff() + command! -nargs=? -bar -bang -complete=file PlugSnapshot call s:snapshot(0, ) +endfunction + +function! s:to_a(v) + return type(a:v) == s:TYPE.list ? a:v : [a:v] +endfunction + +function! s:to_s(v) + return type(a:v) == s:TYPE.string ? a:v : join(a:v, "\n") . "\n" +endfunction + +function! s:glob(from, pattern) + return s:lines(globpath(a:from, a:pattern)) +endfunction + +function! s:source(from, ...) + let found = 0 + for pattern in a:000 + for vim in s:glob(a:from, pattern) + execute 'source' s:esc(vim) + let found = 1 + endfor + endfor + return found +endfunction + +function! s:assoc(dict, key, val) + let a:dict[a:key] = add(get(a:dict, a:key, []), a:val) +endfunction + +function! s:ask(message, ...) + call inputsave() + echohl WarningMsg + let answer = input(a:message.(a:0 ? ' (y/N/a) ' : ' (y/N) ')) + echohl None + call inputrestore() + echo "\r" + return (a:0 && answer =~? '^a') ? 2 : (answer =~? '^y') ? 1 : 0 +endfunction + +function! s:ask_no_interrupt(...) + try + return call('s:ask', a:000) + catch + return 0 + endtry +endfunction + +function! s:lazy(plug, opt) + return has_key(a:plug, a:opt) && + \ (empty(s:to_a(a:plug[a:opt])) || + \ !isdirectory(a:plug.dir) || + \ len(s:glob(s:rtp(a:plug), 'plugin')) || + \ len(s:glob(s:rtp(a:plug), 'after/plugin'))) +endfunction + +function! plug#end() + if !exists('g:plugs') + return s:err('Call plug#begin() first') + endif + + if exists('#PlugLOD') + augroup PlugLOD + autocmd! + augroup END + augroup! PlugLOD + endif + let lod = { 'ft': {}, 'map': {}, 'cmd': {} } + + if exists('g:did_load_filetypes') + filetype off + endif + for name in g:plugs_order + if !has_key(g:plugs, name) + continue + endif + let plug = g:plugs[name] + if get(s:loaded, name, 0) || !s:lazy(plug, 'on') && !s:lazy(plug, 'for') + let s:loaded[name] = 1 + continue + endif + + if has_key(plug, 'on') + let s:triggers[name] = { 'map': [], 'cmd': [] } + for cmd in s:to_a(plug.on) + if cmd =~? '^.\+' + if empty(mapcheck(cmd)) && empty(mapcheck(cmd, 'i')) + call s:assoc(lod.map, cmd, name) + endif + call add(s:triggers[name].map, cmd) + elseif cmd =~# '^[A-Z]' + let cmd = substitute(cmd, '!*$', '', '') + if exists(':'.cmd) != 2 + call s:assoc(lod.cmd, cmd, name) + endif + call add(s:triggers[name].cmd, cmd) + else + call s:err('Invalid `on` option: '.cmd. + \ '. Should start with an uppercase letter or ``.') + endif + endfor + endif + + if has_key(plug, 'for') + let types = s:to_a(plug.for) + if !empty(types) + augroup filetypedetect + call s:source(s:rtp(plug), 'ftdetect/**/*.vim', 'after/ftdetect/**/*.vim') + augroup END + endif + for type in types + call s:assoc(lod.ft, type, name) + endfor + endif + endfor + + for [cmd, names] in items(lod.cmd) + execute printf( + \ 'command! -nargs=* -range -bang -complete=file %s call s:lod_cmd(%s, "", , , , %s)', + \ cmd, string(cmd), string(names)) + endfor + + for [map, names] in items(lod.map) + for [mode, map_prefix, key_prefix] in + \ [['i', '', ''], ['n', '', ''], ['v', '', 'gv'], ['o', '', '']] + execute printf( + \ '%snoremap %s %s:call lod_map(%s, %s, %s, "%s")', + \ mode, map, map_prefix, string(map), string(names), mode != 'i', key_prefix) + endfor + endfor + + for [ft, names] in items(lod.ft) + augroup PlugLOD + execute printf('autocmd FileType %s call lod_ft(%s, %s)', + \ ft, string(ft), string(names)) + augroup END + endfor + + call s:reorg_rtp() + filetype plugin indent on + if has('vim_starting') + if has('syntax') && !exists('g:syntax_on') + syntax enable + end + else + call s:reload_plugins() + endif +endfunction + +function! s:loaded_names() + return filter(copy(g:plugs_order), 'get(s:loaded, v:val, 0)') +endfunction + +function! s:load_plugin(spec) + call s:source(s:rtp(a:spec), 'plugin/**/*.vim', 'after/plugin/**/*.vim') +endfunction + +function! s:reload_plugins() + for name in s:loaded_names() + call s:load_plugin(g:plugs[name]) + endfor +endfunction + +function! s:trim(str) + return substitute(a:str, '[\/]\+$', '', '') +endfunction + +function! s:version_requirement(val, min) + for idx in range(0, len(a:min) - 1) + let v = get(a:val, idx, 0) + if v < a:min[idx] | return 0 + elseif v > a:min[idx] | return 1 + endif + endfor + return 1 +endfunction + +function! s:git_version_requirement(...) + if !exists('s:git_version') + let s:git_version = map(split(split(s:system('git --version'))[2], '\.'), 'str2nr(v:val)') + endif + return s:version_requirement(s:git_version, a:000) +endfunction + +function! s:progress_opt(base) + return a:base && !s:is_win && + \ s:git_version_requirement(1, 7, 1) ? '--progress' : '' +endfunction + +if s:is_win + function! s:rtp(spec) + return s:path(a:spec.dir . get(a:spec, 'rtp', '')) + endfunction + + function! s:path(path) + return s:trim(substitute(a:path, '/', '\', 'g')) + endfunction + + function! s:dirpath(path) + return s:path(a:path) . '\' + endfunction + + function! s:is_local_plug(repo) + return a:repo =~? '^[a-z]:\|^[%~]' + endfunction +else + function! s:rtp(spec) + return s:dirpath(a:spec.dir . get(a:spec, 'rtp', '')) + endfunction + + function! s:path(path) + return s:trim(a:path) + endfunction + + function! s:dirpath(path) + return substitute(a:path, '[/\\]*$', '/', '') + endfunction + + function! s:is_local_plug(repo) + return a:repo[0] =~ '[/$~]' + endfunction +endif + +function! s:err(msg) + echohl ErrorMsg + echom '[vim-plug] '.a:msg + echohl None +endfunction + +function! s:warn(cmd, msg) + echohl WarningMsg + execute a:cmd 'a:msg' + echohl None +endfunction + +function! s:esc(path) + return escape(a:path, ' ') +endfunction + +function! s:escrtp(path) + return escape(a:path, ' ,') +endfunction + +function! s:remove_rtp() + for name in s:loaded_names() + let rtp = s:rtp(g:plugs[name]) + execute 'set rtp-='.s:escrtp(rtp) + let after = globpath(rtp, 'after') + if isdirectory(after) + execute 'set rtp-='.s:escrtp(after) + endif + endfor +endfunction + +function! s:reorg_rtp() + if !empty(s:first_rtp) + execute 'set rtp-='.s:first_rtp + execute 'set rtp-='.s:last_rtp + endif + + " &rtp is modified from outside + if exists('s:prtp') && s:prtp !=# &rtp + call s:remove_rtp() + unlet! s:middle + endif + + let s:middle = get(s:, 'middle', &rtp) + let rtps = map(s:loaded_names(), 's:rtp(g:plugs[v:val])') + let afters = filter(map(copy(rtps), 'globpath(v:val, "after")'), '!empty(v:val)') + let rtp = join(map(rtps, 'escape(v:val, ",")'), ',') + \ . ','.s:middle.',' + \ . join(map(afters, 'escape(v:val, ",")'), ',') + let &rtp = substitute(substitute(rtp, ',,*', ',', 'g'), '^,\|,$', '', 'g') + let s:prtp = &rtp + + if !empty(s:first_rtp) + execute 'set rtp^='.s:first_rtp + execute 'set rtp+='.s:last_rtp + endif +endfunction + +function! s:doautocmd(...) + if exists('#'.join(a:000, '#')) + execute 'doautocmd' ((v:version > 703 || has('patch442')) ? '' : '') join(a:000) + endif +endfunction + +function! s:dobufread(names) + for name in a:names + let path = s:rtp(g:plugs[name]).'/**' + for dir in ['ftdetect', 'ftplugin'] + if len(finddir(dir, path)) + if exists('#BufRead') + doautocmd BufRead + endif + return + endif + endfor + endfor +endfunction + +function! plug#load(...) + if a:0 == 0 + return s:err('Argument missing: plugin name(s) required') + endif + if !exists('g:plugs') + return s:err('plug#begin was not called') + endif + let names = a:0 == 1 && type(a:1) == s:TYPE.list ? a:1 : a:000 + let unknowns = filter(copy(names), '!has_key(g:plugs, v:val)') + if !empty(unknowns) + let s = len(unknowns) > 1 ? 's' : '' + return s:err(printf('Unknown plugin%s: %s', s, join(unknowns, ', '))) + end + let unloaded = filter(copy(names), '!get(s:loaded, v:val, 0)') + if !empty(unloaded) + for name in unloaded + call s:lod([name], ['ftdetect', 'after/ftdetect', 'plugin', 'after/plugin']) + endfor + call s:dobufread(unloaded) + return 1 + end + return 0 +endfunction + +function! s:remove_triggers(name) + if !has_key(s:triggers, a:name) + return + endif + for cmd in s:triggers[a:name].cmd + execute 'silent! delc' cmd + endfor + for map in s:triggers[a:name].map + execute 'silent! unmap' map + execute 'silent! iunmap' map + endfor + call remove(s:triggers, a:name) +endfunction + +function! s:lod(names, types, ...) + for name in a:names + call s:remove_triggers(name) + let s:loaded[name] = 1 + endfor + call s:reorg_rtp() + + for name in a:names + let rtp = s:rtp(g:plugs[name]) + for dir in a:types + call s:source(rtp, dir.'/**/*.vim') + endfor + if a:0 + if !s:source(rtp, a:1) && !empty(s:glob(rtp, a:2)) + execute 'runtime' a:1 + endif + call s:source(rtp, a:2) + endif + call s:doautocmd('User', name) + endfor +endfunction + +function! s:lod_ft(pat, names) + let syn = 'syntax/'.a:pat.'.vim' + call s:lod(a:names, ['plugin', 'after/plugin'], syn, 'after/'.syn) + execute 'autocmd! PlugLOD FileType' a:pat + call s:doautocmd('filetypeplugin', 'FileType') + call s:doautocmd('filetypeindent', 'FileType') +endfunction + +function! s:lod_cmd(cmd, bang, l1, l2, args, names) + call s:lod(a:names, ['ftdetect', 'after/ftdetect', 'plugin', 'after/plugin']) + call s:dobufread(a:names) + execute printf('%s%s%s %s', (a:l1 == a:l2 ? '' : (a:l1.','.a:l2)), a:cmd, a:bang, a:args) +endfunction + +function! s:lod_map(map, names, with_prefix, prefix) + call s:lod(a:names, ['ftdetect', 'after/ftdetect', 'plugin', 'after/plugin']) + call s:dobufread(a:names) + let extra = '' + while 1 + let c = getchar(0) + if c == 0 + break + endif + let extra .= nr2char(c) + endwhile + + if a:with_prefix + let prefix = v:count ? v:count : '' + let prefix .= '"'.v:register.a:prefix + if mode(1) == 'no' + if v:operator == 'c' + let prefix = "\" . prefix + endif + let prefix .= v:operator + endif + call feedkeys(prefix, 'n') + endif + call feedkeys(substitute(a:map, '^', "\", '') . extra) +endfunction + +function! plug#(repo, ...) + if a:0 > 1 + return s:err('Invalid number of arguments (1..2)') + endif + + try + let repo = s:trim(a:repo) + let opts = a:0 == 1 ? s:parse_options(a:1) : s:base_spec + let name = get(opts, 'as', fnamemodify(repo, ':t:s?\.git$??')) + let spec = extend(s:infer_properties(name, repo), opts) + if !has_key(g:plugs, name) + call add(g:plugs_order, name) + endif + let g:plugs[name] = spec + let s:loaded[name] = get(s:loaded, name, 0) + catch + return s:err(v:exception) + endtry +endfunction + +function! s:parse_options(arg) + let opts = copy(s:base_spec) + let type = type(a:arg) + if type == s:TYPE.string + let opts.tag = a:arg + elseif type == s:TYPE.dict + call extend(opts, a:arg) + if has_key(opts, 'dir') + let opts.dir = s:dirpath(expand(opts.dir)) + endif + else + throw 'Invalid argument type (expected: string or dictionary)' + endif + return opts +endfunction + +function! s:infer_properties(name, repo) + let repo = a:repo + if s:is_local_plug(repo) + return { 'dir': s:dirpath(expand(repo)) } + else + if repo =~ ':' + let uri = repo + else + if repo !~ '/' + throw printf('Invalid argument: %s (implicit `vim-scripts'' expansion is deprecated)', repo) + endif + let fmt = get(g:, 'plug_url_format', 'https://git::@github.com/%s.git') + let uri = printf(fmt, repo) + endif + return { 'dir': s:dirpath(g:plug_home.'/'.a:name), 'uri': uri } + endif +endfunction + +function! s:install(force, names) + call s:update_impl(0, a:force, a:names) +endfunction + +function! s:update(force, names) + call s:update_impl(1, a:force, a:names) +endfunction + +function! plug#helptags() + if !exists('g:plugs') + return s:err('plug#begin was not called') + endif + for spec in values(g:plugs) + let docd = join([s:rtp(spec), 'doc'], '/') + if isdirectory(docd) + silent! execute 'helptags' s:esc(docd) + endif + endfor + return 1 +endfunction + +function! s:syntax() + syntax clear + syntax region plug1 start=/\%1l/ end=/\%2l/ contains=plugNumber + syntax region plug2 start=/\%2l/ end=/\%3l/ contains=plugBracket,plugX + syn match plugNumber /[0-9]\+[0-9.]*/ contained + syn match plugBracket /[[\]]/ contained + syn match plugX /x/ contained + syn match plugDash /^-/ + syn match plugPlus /^+/ + syn match plugStar /^*/ + syn match plugMessage /\(^- \)\@<=.*/ + syn match plugName /\(^- \)\@<=[^ ]*:/ + syn match plugSha /\%(: \)\@<=[0-9a-f]\{4,}$/ + syn match plugTag /(tag: [^)]\+)/ + syn match plugInstall /\(^+ \)\@<=[^:]*/ + syn match plugUpdate /\(^* \)\@<=[^:]*/ + syn match plugCommit /^ \X*[0-9a-f]\{7,9} .*/ contains=plugRelDate,plugEdge,plugTag + syn match plugEdge /^ \X\+$/ + syn match plugEdge /^ \X*/ contained nextgroup=plugSha + syn match plugSha /[0-9a-f]\{7,9}/ contained + syn match plugRelDate /([^)]*)$/ contained + syn match plugNotLoaded /(not loaded)$/ + syn match plugError /^x.*/ + syn region plugDeleted start=/^\~ .*/ end=/^\ze\S/ + syn match plugH2 /^.*:\n-\+$/ + syn keyword Function PlugInstall PlugStatus PlugUpdate PlugClean + hi def link plug1 Title + hi def link plug2 Repeat + hi def link plugH2 Type + hi def link plugX Exception + hi def link plugBracket Structure + hi def link plugNumber Number + + hi def link plugDash Special + hi def link plugPlus Constant + hi def link plugStar Boolean + + hi def link plugMessage Function + hi def link plugName Label + hi def link plugInstall Function + hi def link plugUpdate Type + + hi def link plugError Error + hi def link plugDeleted Ignore + hi def link plugRelDate Comment + hi def link plugEdge PreProc + hi def link plugSha Identifier + hi def link plugTag Constant + + hi def link plugNotLoaded Comment +endfunction + +function! s:lpad(str, len) + return a:str . repeat(' ', a:len - len(a:str)) +endfunction + +function! s:lines(msg) + return split(a:msg, "[\r\n]") +endfunction + +function! s:lastline(msg) + return get(s:lines(a:msg), -1, '') +endfunction + +function! s:new_window() + execute get(g:, 'plug_window', 'vertical topleft new') +endfunction + +function! s:plug_window_exists() + let buflist = tabpagebuflist(s:plug_tab) + return !empty(buflist) && index(buflist, s:plug_buf) >= 0 +endfunction + +function! s:switch_in() + if !s:plug_window_exists() + return 0 + endif + + if winbufnr(0) != s:plug_buf + let s:pos = [tabpagenr(), winnr(), winsaveview()] + execute 'normal!' s:plug_tab.'gt' + let winnr = bufwinnr(s:plug_buf) + execute winnr.'wincmd w' + call add(s:pos, winsaveview()) + else + let s:pos = [winsaveview()] + endif + + setlocal modifiable + return 1 +endfunction + +function! s:switch_out(...) + call winrestview(s:pos[-1]) + setlocal nomodifiable + if a:0 > 0 + execute a:1 + endif + + if len(s:pos) > 1 + execute 'normal!' s:pos[0].'gt' + execute s:pos[1] 'wincmd w' + call winrestview(s:pos[2]) + endif +endfunction + +function! s:finish_bindings() + nnoremap R :call retry() + nnoremap D :PlugDiff + nnoremap S :PlugStatus + nnoremap U :call status_update() + xnoremap U :call status_update() + nnoremap ]] :silent! call section('') + nnoremap [[ :silent! call section('b') +endfunction + +function! s:prepare(...) + if empty(getcwd()) + throw 'Invalid current working directory. Cannot proceed.' + endif + + for evar in ['$GIT_DIR', '$GIT_WORK_TREE'] + if exists(evar) + throw evar.' detected. Cannot proceed.' + endif + endfor + + call s:job_abort() + if s:switch_in() + if b:plug_preview == 1 + pc + endif + enew + else + call s:new_window() + endif + + nnoremap q :if b:plug_preview==1pcendifbd + if a:0 == 0 + call s:finish_bindings() + endif + let b:plug_preview = -1 + let s:plug_tab = tabpagenr() + let s:plug_buf = winbufnr(0) + call s:assign_name() + + for k in ['', 'L', 'o', 'X', 'd', 'dd'] + execute 'silent! unmap ' k + endfor + setlocal buftype=nofile bufhidden=wipe nobuflisted nolist noswapfile nowrap cursorline modifiable nospell + if exists('+colorcolumn') + setlocal colorcolumn= + endif + setf vim-plug + if exists('g:syntax_on') + call s:syntax() + endif +endfunction + +function! s:assign_name() + " Assign buffer name + let prefix = '[Plugins]' + let name = prefix + let idx = 2 + while bufexists(name) + let name = printf('%s (%s)', prefix, idx) + let idx = idx + 1 + endwhile + silent! execute 'f' fnameescape(name) +endfunction + +function! s:chsh(swap) + let prev = [&shell, &shellcmdflag, &shellredir] + if s:is_win + set shell=cmd.exe shellcmdflag=/c shellredir=>%s\ 2>&1 + elseif a:swap + set shell=sh shellredir=>%s\ 2>&1 + endif + return prev +endfunction + +function! s:bang(cmd, ...) + try + let [sh, shellcmdflag, shrd] = s:chsh(a:0) + " FIXME: Escaping is incomplete. We could use shellescape with eval, + " but it won't work on Windows. + let cmd = a:0 ? s:with_cd(a:cmd, a:1) : a:cmd + if s:is_win + let batchfile = tempname().'.bat' + call writefile(["@echo off\r", cmd . "\r"], batchfile) + let cmd = batchfile + endif + let g:_plug_bang = (s:is_win && has('gui_running') ? 'silent ' : '').'!'.escape(cmd, '#!%') + execute "normal! :execute g:_plug_bang\\" + finally + unlet g:_plug_bang + let [&shell, &shellcmdflag, &shellredir] = [sh, shellcmdflag, shrd] + if s:is_win + call delete(batchfile) + endif + endtry + return v:shell_error ? 'Exit status: ' . v:shell_error : '' +endfunction + +function! s:regress_bar() + let bar = substitute(getline(2)[1:-2], '.*\zs=', 'x', '') + call s:progress_bar(2, bar, len(bar)) +endfunction + +function! s:is_updated(dir) + return !empty(s:system_chomp('git log --pretty=format:"%h" "HEAD...HEAD@{1}"', a:dir)) +endfunction + +function! s:do(pull, force, todo) + for [name, spec] in items(a:todo) + if !isdirectory(spec.dir) + continue + endif + let installed = has_key(s:update.new, name) + let updated = installed ? 0 : + \ (a:pull && index(s:update.errors, name) < 0 && s:is_updated(spec.dir)) + if a:force || installed || updated + execute 'cd' s:esc(spec.dir) + call append(3, '- Post-update hook for '. name .' ... ') + let error = '' + let type = type(spec.do) + if type == s:TYPE.string + if spec.do[0] == ':' + if !get(s:loaded, name, 0) + let s:loaded[name] = 1 + call s:reorg_rtp() + endif + call s:load_plugin(spec) + try + execute spec.do[1:] + catch + let error = v:exception + endtry + if !s:plug_window_exists() + cd - + throw 'Warning: vim-plug was terminated by the post-update hook of '.name + endif + else + let error = s:bang(spec.do) + endif + elseif type == s:TYPE.funcref + try + let status = installed ? 'installed' : (updated ? 'updated' : 'unchanged') + call spec.do({ 'name': name, 'status': status, 'force': a:force }) + catch + let error = v:exception + endtry + else + let error = 'Invalid hook type' + endif + call s:switch_in() + call setline(4, empty(error) ? (getline(4) . 'OK') + \ : ('x' . getline(4)[1:] . error)) + if !empty(error) + call add(s:update.errors, name) + call s:regress_bar() + endif + cd - + endif + endfor +endfunction + +function! s:hash_match(a, b) + return stridx(a:a, a:b) == 0 || stridx(a:b, a:a) == 0 +endfunction + +function! s:checkout(spec) + let sha = a:spec.commit + let output = s:system('git rev-parse HEAD', a:spec.dir) + if !v:shell_error && !s:hash_match(sha, s:lines(output)[0]) + let output = s:system( + \ 'git fetch --depth 999999 && git checkout '.s:esc(sha).' --', a:spec.dir) + endif + return output +endfunction + +function! s:finish(pull) + let new_frozen = len(filter(keys(s:update.new), 'g:plugs[v:val].frozen')) + if new_frozen + let s = new_frozen > 1 ? 's' : '' + call append(3, printf('- Installed %d frozen plugin%s', new_frozen, s)) + endif + call append(3, '- Finishing ... ') | 4 + redraw + call plug#helptags() + call plug#end() + call setline(4, getline(4) . 'Done!') + redraw + let msgs = [] + if !empty(s:update.errors) + call add(msgs, "Press 'R' to retry.") + endif + if a:pull && len(s:update.new) < len(filter(getline(5, '$'), + \ "v:val =~ '^- ' && v:val !~# 'Already up.to.date'")) + call add(msgs, "Press 'D' to see the updated changes.") + endif + echo join(msgs, ' ') + call s:finish_bindings() +endfunction + +function! s:retry() + if empty(s:update.errors) + return + endif + echo + call s:update_impl(s:update.pull, s:update.force, + \ extend(copy(s:update.errors), [s:update.threads])) +endfunction + +function! s:is_managed(name) + return has_key(g:plugs[a:name], 'uri') +endfunction + +function! s:names(...) + return sort(filter(keys(g:plugs), 'stridx(v:val, a:1) == 0 && s:is_managed(v:val)')) +endfunction + +function! s:check_ruby() + silent! ruby require 'thread'; VIM::command("let g:plug_ruby = '#{RUBY_VERSION}'") + if !exists('g:plug_ruby') + redraw! + return s:warn('echom', 'Warning: Ruby interface is broken') + endif + let ruby_version = split(g:plug_ruby, '\.') + unlet g:plug_ruby + return s:version_requirement(ruby_version, [1, 8, 7]) +endfunction + +function! s:update_impl(pull, force, args) abort + let sync = index(a:args, '--sync') >= 0 || has('vim_starting') + let args = filter(copy(a:args), 'v:val != "--sync"') + let threads = (len(args) > 0 && args[-1] =~ '^[1-9][0-9]*$') ? + \ remove(args, -1) : get(g:, 'plug_threads', 16) + + let managed = filter(copy(g:plugs), 's:is_managed(v:key)') + let todo = empty(args) ? filter(managed, '!v:val.frozen || !isdirectory(v:val.dir)') : + \ filter(managed, 'index(args, v:key) >= 0') + + if empty(todo) + return s:warn('echo', 'No plugin to '. (a:pull ? 'update' : 'install')) + endif + + if !s:is_win && s:git_version_requirement(2, 3) + let s:git_terminal_prompt = exists('$GIT_TERMINAL_PROMPT') ? $GIT_TERMINAL_PROMPT : '' + let $GIT_TERMINAL_PROMPT = 0 + for plug in values(todo) + let plug.uri = substitute(plug.uri, + \ '^https://git::@github\.com', 'https://github.com', '') + endfor + endif + + if !isdirectory(g:plug_home) + try + call mkdir(g:plug_home, 'p') + catch + return s:err(printf('Invalid plug directory: %s. '. + \ 'Try to call plug#begin with a valid directory', g:plug_home)) + endtry + endif + + if has('nvim') && !exists('*jobwait') && threads > 1 + call s:warn('echom', '[vim-plug] Update Neovim for parallel installer') + endif + + let use_job = s:nvim || s:vim8 + let python = (has('python') || has('python3')) && !use_job + let ruby = has('ruby') && !use_job && (v:version >= 703 || v:version == 702 && has('patch374')) && !(s:is_win && has('gui_running')) && threads > 1 && s:check_ruby() + + let s:update = { + \ 'start': reltime(), + \ 'all': todo, + \ 'todo': copy(todo), + \ 'errors': [], + \ 'pull': a:pull, + \ 'force': a:force, + \ 'new': {}, + \ 'threads': (python || ruby || use_job) ? min([len(todo), threads]) : 1, + \ 'bar': '', + \ 'fin': 0 + \ } + + call s:prepare(1) + call append(0, ['', '']) + normal! 2G + silent! redraw + + let s:clone_opt = get(g:, 'plug_shallow', 1) ? + \ '--depth 1' . (s:git_version_requirement(1, 7, 10) ? ' --no-single-branch' : '') : '' + + if has('win32unix') + let s:clone_opt .= ' -c core.eol=lf -c core.autocrlf=input' + endif + + let s:submodule_opt = s:git_version_requirement(2, 8) ? ' --jobs='.threads : '' + + " Python version requirement (>= 2.7) + if python && !has('python3') && !ruby && !use_job && s:update.threads > 1 + redir => pyv + silent python import platform; print platform.python_version() + redir END + let python = s:version_requirement( + \ map(split(split(pyv)[0], '\.'), 'str2nr(v:val)'), [2, 6]) + endif + + if (python || ruby) && s:update.threads > 1 + try + let imd = &imd + if s:mac_gui + set noimd + endif + if ruby + call s:update_ruby() + else + call s:update_python() + endif + catch + let lines = getline(4, '$') + let printed = {} + silent! 4,$d _ + for line in lines + let name = s:extract_name(line, '.', '') + if empty(name) || !has_key(printed, name) + call append('$', line) + if !empty(name) + let printed[name] = 1 + if line[0] == 'x' && index(s:update.errors, name) < 0 + call add(s:update.errors, name) + end + endif + endif + endfor + finally + let &imd = imd + call s:update_finish() + endtry + else + call s:update_vim() + while use_job && sync + sleep 100m + if s:update.fin + break + endif + endwhile + endif +endfunction + +function! s:log4(name, msg) + call setline(4, printf('- %s (%s)', a:msg, a:name)) + redraw +endfunction + +function! s:update_finish() + if exists('s:git_terminal_prompt') + let $GIT_TERMINAL_PROMPT = s:git_terminal_prompt + endif + if s:switch_in() + call append(3, '- Updating ...') | 4 + for [name, spec] in items(filter(copy(s:update.all), 'index(s:update.errors, v:key) < 0 && (s:update.force || s:update.pull || has_key(s:update.new, v:key))')) + let [pos, _] = s:logpos(name) + if !pos + continue + endif + if has_key(spec, 'commit') + call s:log4(name, 'Checking out '.spec.commit) + let out = s:checkout(spec) + elseif has_key(spec, 'tag') + let tag = spec.tag + if tag =~ '\*' + let tags = s:lines(s:system('git tag --list '.s:shellesc(tag).' --sort -version:refname 2>&1', spec.dir)) + if !v:shell_error && !empty(tags) + let tag = tags[0] + call s:log4(name, printf('Latest tag for %s -> %s', spec.tag, tag)) + call append(3, '') + endif + endif + call s:log4(name, 'Checking out '.tag) + let out = s:system('git checkout -q '.s:esc(tag).' -- 2>&1', spec.dir) + else + let branch = s:esc(get(spec, 'branch', 'master')) + call s:log4(name, 'Merging origin/'.branch) + let out = s:system('git checkout -q '.branch.' -- 2>&1' + \. (has_key(s:update.new, name) ? '' : ('&& git merge --ff-only origin/'.branch.' 2>&1')), spec.dir) + endif + if !v:shell_error && filereadable(spec.dir.'/.gitmodules') && + \ (s:update.force || has_key(s:update.new, name) || s:is_updated(spec.dir)) + call s:log4(name, 'Updating submodules. This may take a while.') + let out .= s:bang('git submodule update --init --recursive'.s:submodule_opt.' 2>&1', spec.dir) + endif + let msg = s:format_message(v:shell_error ? 'x': '-', name, out) + if v:shell_error + call add(s:update.errors, name) + call s:regress_bar() + silent execute pos 'd _' + call append(4, msg) | 4 + elseif !empty(out) + call setline(pos, msg[0]) + endif + redraw + endfor + silent 4 d _ + try + call s:do(s:update.pull, s:update.force, filter(copy(s:update.all), 'index(s:update.errors, v:key) < 0 && has_key(v:val, "do")')) + catch + call s:warn('echom', v:exception) + call s:warn('echo', '') + return + endtry + call s:finish(s:update.pull) + call setline(1, 'Updated. Elapsed time: ' . split(reltimestr(reltime(s:update.start)))[0] . ' sec.') + call s:switch_out('normal! gg') + endif +endfunction + +function! s:job_abort() + if (!s:nvim && !s:vim8) || !exists('s:jobs') + return + endif + + for [name, j] in items(s:jobs) + if s:nvim + silent! call jobstop(j.jobid) + elseif s:vim8 + silent! call job_stop(j.jobid) + endif + if j.new + call s:system('rm -rf ' . s:shellesc(g:plugs[name].dir)) + endif + endfor + let s:jobs = {} +endfunction + +function! s:last_non_empty_line(lines) + let len = len(a:lines) + for idx in range(len) + let line = a:lines[len-idx-1] + if !empty(line) + return line + endif + endfor + return '' +endfunction + +function! s:job_out_cb(self, data) abort + let self = a:self + let data = remove(self.lines, -1) . a:data + let lines = map(split(data, "\n", 1), 'split(v:val, "\r", 1)[-1]') + call extend(self.lines, lines) + " To reduce the number of buffer updates + let self.tick = get(self, 'tick', -1) + 1 + if !self.running || self.tick % len(s:jobs) == 0 + let bullet = self.running ? (self.new ? '+' : '*') : (self.error ? 'x' : '-') + let result = self.error ? join(self.lines, "\n") : s:last_non_empty_line(self.lines) + call s:log(bullet, self.name, result) + endif +endfunction + +function! s:job_exit_cb(self, data) abort + let a:self.running = 0 + let a:self.error = a:data != 0 + call s:reap(a:self.name) + call s:tick() +endfunction + +function! s:job_cb(fn, job, ch, data) + if !s:plug_window_exists() " plug window closed + return s:job_abort() + endif + call call(a:fn, [a:job, a:data]) +endfunction + +function! s:nvim_cb(job_id, data, event) dict abort + return a:event == 'stdout' ? + \ s:job_cb('s:job_out_cb', self, 0, join(a:data, "\n")) : + \ s:job_cb('s:job_exit_cb', self, 0, a:data) +endfunction + +function! s:spawn(name, cmd, opts) + let job = { 'name': a:name, 'running': 1, 'error': 0, 'lines': [''], + \ 'batchfile': (s:is_win && (s:nvim || s:vim8)) ? tempname().'.bat' : '', + \ 'new': get(a:opts, 'new', 0) } + let s:jobs[a:name] = job + let cmd = has_key(a:opts, 'dir') ? s:with_cd(a:cmd, a:opts.dir) : a:cmd + if !empty(job.batchfile) + call writefile(["@echo off\r", cmd . "\r"], job.batchfile) + let cmd = job.batchfile + endif + let argv = add(s:is_win ? ['cmd', '/c'] : ['sh', '-c'], cmd) + + if s:nvim + call extend(job, { + \ 'on_stdout': function('s:nvim_cb'), + \ 'on_exit': function('s:nvim_cb'), + \ }) + let jid = jobstart(argv, job) + if jid > 0 + let job.jobid = jid + else + let job.running = 0 + let job.error = 1 + let job.lines = [jid < 0 ? argv[0].' is not executable' : + \ 'Invalid arguments (or job table is full)'] + endif + elseif s:vim8 + let jid = job_start(s:is_win ? join(argv, ' ') : argv, { + \ 'out_cb': function('s:job_cb', ['s:job_out_cb', job]), + \ 'exit_cb': function('s:job_cb', ['s:job_exit_cb', job]), + \ 'out_mode': 'raw' + \}) + if job_status(jid) == 'run' + let job.jobid = jid + else + let job.running = 0 + let job.error = 1 + let job.lines = ['Failed to start job'] + endif + else + let job.lines = s:lines(call('s:system', [cmd])) + let job.error = v:shell_error != 0 + let job.running = 0 + endif +endfunction + +function! s:reap(name) + let job = s:jobs[a:name] + if job.error + call add(s:update.errors, a:name) + elseif get(job, 'new', 0) + let s:update.new[a:name] = 1 + endif + let s:update.bar .= job.error ? 'x' : '=' + + let bullet = job.error ? 'x' : '-' + let result = job.error ? join(job.lines, "\n") : s:last_non_empty_line(job.lines) + call s:log(bullet, a:name, empty(result) ? 'OK' : result) + call s:bar() + + if has_key(job, 'batchfile') && !empty(job.batchfile) + call delete(job.batchfile) + endif + call remove(s:jobs, a:name) +endfunction + +function! s:bar() + if s:switch_in() + let total = len(s:update.all) + call setline(1, (s:update.pull ? 'Updating' : 'Installing'). + \ ' plugins ('.len(s:update.bar).'/'.total.')') + call s:progress_bar(2, s:update.bar, total) + call s:switch_out() + endif +endfunction + +function! s:logpos(name) + for i in range(4, line('$')) + if getline(i) =~# '^[-+x*] '.a:name.':' + for j in range(i + 1, line('$')) + if getline(j) !~ '^ ' + return [i, j - 1] + endif + endfor + return [i, i] + endif + endfor + return [0, 0] +endfunction + +function! s:log(bullet, name, lines) + if s:switch_in() + let [b, e] = s:logpos(a:name) + if b > 0 + silent execute printf('%d,%d d _', b, e) + if b > winheight('.') + let b = 4 + endif + else + let b = 4 + endif + " FIXME For some reason, nomodifiable is set after :d in vim8 + setlocal modifiable + call append(b - 1, s:format_message(a:bullet, a:name, a:lines)) + call s:switch_out() + endif +endfunction + +function! s:update_vim() + let s:jobs = {} + + call s:bar() + call s:tick() +endfunction + +function! s:tick() + let pull = s:update.pull + let prog = s:progress_opt(s:nvim || s:vim8) +while 1 " Without TCO, Vim stack is bound to explode + if empty(s:update.todo) + if empty(s:jobs) && !s:update.fin + call s:update_finish() + let s:update.fin = 1 + endif + return + endif + + let name = keys(s:update.todo)[0] + let spec = remove(s:update.todo, name) + let new = empty(globpath(spec.dir, '.git', 1)) + + call s:log(new ? '+' : '*', name, pull ? 'Updating ...' : 'Installing ...') + redraw + + let has_tag = has_key(spec, 'tag') + if !new + let [error, _] = s:git_validate(spec, 0) + if empty(error) + if pull + let fetch_opt = (has_tag && !empty(globpath(spec.dir, '.git/shallow'))) ? '--depth 99999999' : '' + call s:spawn(name, printf('git fetch %s %s 2>&1', fetch_opt, prog), { 'dir': spec.dir }) + else + let s:jobs[name] = { 'running': 0, 'lines': ['Already installed'], 'error': 0 } + endif + else + let s:jobs[name] = { 'running': 0, 'lines': s:lines(error), 'error': 1 } + endif + else + call s:spawn(name, + \ printf('git clone %s %s %s %s 2>&1', + \ has_tag ? '' : s:clone_opt, + \ prog, + \ s:shellesc(spec.uri), + \ s:shellesc(s:trim(spec.dir))), { 'new': 1 }) + endif + + if !s:jobs[name].running + call s:reap(name) + endif + if len(s:jobs) >= s:update.threads + break + endif +endwhile +endfunction + +function! s:update_python() +let py_exe = has('python') ? 'python' : 'python3' +execute py_exe "<< EOF" +import datetime +import functools +import os +try: + import queue +except ImportError: + import Queue as queue +import random +import re +import shutil +import signal +import subprocess +import tempfile +import threading as thr +import time +import traceback +import vim + +G_NVIM = vim.eval("has('nvim')") == '1' +G_PULL = vim.eval('s:update.pull') == '1' +G_RETRIES = int(vim.eval('get(g:, "plug_retries", 2)')) + 1 +G_TIMEOUT = int(vim.eval('get(g:, "plug_timeout", 60)')) +G_CLONE_OPT = vim.eval('s:clone_opt') +G_PROGRESS = vim.eval('s:progress_opt(1)') +G_LOG_PROB = 1.0 / int(vim.eval('s:update.threads')) +G_STOP = thr.Event() +G_IS_WIN = vim.eval('s:is_win') == '1' + +class PlugError(Exception): + def __init__(self, msg): + self.msg = msg +class CmdTimedOut(PlugError): + pass +class CmdFailed(PlugError): + pass +class InvalidURI(PlugError): + pass +class Action(object): + INSTALL, UPDATE, ERROR, DONE = ['+', '*', 'x', '-'] + +class Buffer(object): + def __init__(self, lock, num_plugs, is_pull): + self.bar = '' + self.event = 'Updating' if is_pull else 'Installing' + self.lock = lock + self.maxy = int(vim.eval('winheight(".")')) + self.num_plugs = num_plugs + + def __where(self, name): + """ Find first line with name in current buffer. Return line num. """ + found, lnum = False, 0 + matcher = re.compile('^[-+x*] {0}:'.format(name)) + for line in vim.current.buffer: + if matcher.search(line) is not None: + found = True + break + lnum += 1 + + if not found: + lnum = -1 + return lnum + + def header(self): + curbuf = vim.current.buffer + curbuf[0] = self.event + ' plugins ({0}/{1})'.format(len(self.bar), self.num_plugs) + + num_spaces = self.num_plugs - len(self.bar) + curbuf[1] = '[{0}{1}]'.format(self.bar, num_spaces * ' ') + + with self.lock: + vim.command('normal! 2G') + vim.command('redraw') + + def write(self, action, name, lines): + first, rest = lines[0], lines[1:] + msg = ['{0} {1}{2}{3}'.format(action, name, ': ' if first else '', first)] + msg.extend([' ' + line for line in rest]) + + try: + if action == Action.ERROR: + self.bar += 'x' + vim.command("call add(s:update.errors, '{0}')".format(name)) + elif action == Action.DONE: + self.bar += '=' + + curbuf = vim.current.buffer + lnum = self.__where(name) + if lnum != -1: # Found matching line num + del curbuf[lnum] + if lnum > self.maxy and action in set([Action.INSTALL, Action.UPDATE]): + lnum = 3 + else: + lnum = 3 + curbuf.append(msg, lnum) + + self.header() + except vim.error: + pass + +class Command(object): + CD = 'cd /d' if G_IS_WIN else 'cd' + + def __init__(self, cmd, cmd_dir=None, timeout=60, cb=None, clean=None): + self.cmd = cmd + if cmd_dir: + self.cmd = '{0} {1} && {2}'.format(Command.CD, cmd_dir, self.cmd) + self.timeout = timeout + self.callback = cb if cb else (lambda msg: None) + self.clean = clean if clean else (lambda: None) + self.proc = None + + @property + def alive(self): + """ Returns true only if command still running. """ + return self.proc and self.proc.poll() is None + + def execute(self, ntries=3): + """ Execute the command with ntries if CmdTimedOut. + Returns the output of the command if no Exception. + """ + attempt, finished, limit = 0, False, self.timeout + + while not finished: + try: + attempt += 1 + result = self.try_command() + finished = True + return result + except CmdTimedOut: + if attempt != ntries: + self.notify_retry() + self.timeout += limit + else: + raise + + def notify_retry(self): + """ Retry required for command, notify user. """ + for count in range(3, 0, -1): + if G_STOP.is_set(): + raise KeyboardInterrupt + msg = 'Timeout. Will retry in {0} second{1} ...'.format( + count, 's' if count != 1 else '') + self.callback([msg]) + time.sleep(1) + self.callback(['Retrying ...']) + + def try_command(self): + """ Execute a cmd & poll for callback. Returns list of output. + Raises CmdFailed -> return code for Popen isn't 0 + Raises CmdTimedOut -> command exceeded timeout without new output + """ + first_line = True + + try: + tfile = tempfile.NamedTemporaryFile(mode='w+b') + preexec_fn = not G_IS_WIN and os.setsid or None + self.proc = subprocess.Popen(self.cmd, stdout=tfile, + stderr=subprocess.STDOUT, + stdin=subprocess.PIPE, shell=True, + preexec_fn=preexec_fn) + thrd = thr.Thread(target=(lambda proc: proc.wait()), args=(self.proc,)) + thrd.start() + + thread_not_started = True + while thread_not_started: + try: + thrd.join(0.1) + thread_not_started = False + except RuntimeError: + pass + + while self.alive: + if G_STOP.is_set(): + raise KeyboardInterrupt + + if first_line or random.random() < G_LOG_PROB: + first_line = False + line = '' if G_IS_WIN else nonblock_read(tfile.name) + if line: + self.callback([line]) + + time_diff = time.time() - os.path.getmtime(tfile.name) + if time_diff > self.timeout: + raise CmdTimedOut(['Timeout!']) + + thrd.join(0.5) + + tfile.seek(0) + result = [line.decode('utf-8', 'replace').rstrip() for line in tfile] + + if self.proc.returncode != 0: + raise CmdFailed([''] + result) + + return result + except: + self.terminate() + raise + + def terminate(self): + """ Terminate process and cleanup. """ + if self.alive: + if G_IS_WIN: + os.kill(self.proc.pid, signal.SIGINT) + else: + os.killpg(self.proc.pid, signal.SIGTERM) + self.clean() + +class Plugin(object): + def __init__(self, name, args, buf_q, lock): + self.name = name + self.args = args + self.buf_q = buf_q + self.lock = lock + self.tag = args.get('tag', 0) + + def manage(self): + try: + if os.path.exists(self.args['dir']): + self.update() + else: + self.install() + with self.lock: + thread_vim_command("let s:update.new['{0}'] = 1".format(self.name)) + except PlugError as exc: + self.write(Action.ERROR, self.name, exc.msg) + except KeyboardInterrupt: + G_STOP.set() + self.write(Action.ERROR, self.name, ['Interrupted!']) + except: + # Any exception except those above print stack trace + msg = 'Trace:\n{0}'.format(traceback.format_exc().rstrip()) + self.write(Action.ERROR, self.name, msg.split('\n')) + raise + + def install(self): + target = self.args['dir'] + if target[-1] == '\\': + target = target[0:-1] + + def clean(target): + def _clean(): + try: + shutil.rmtree(target) + except OSError: + pass + return _clean + + self.write(Action.INSTALL, self.name, ['Installing ...']) + callback = functools.partial(self.write, Action.INSTALL, self.name) + cmd = 'git clone {0} {1} {2} {3} 2>&1'.format( + '' if self.tag else G_CLONE_OPT, G_PROGRESS, self.args['uri'], + esc(target)) + com = Command(cmd, None, G_TIMEOUT, callback, clean(target)) + result = com.execute(G_RETRIES) + self.write(Action.DONE, self.name, result[-1:]) + + def repo_uri(self): + cmd = 'git rev-parse --abbrev-ref HEAD 2>&1 && git config -f .git/config remote.origin.url' + command = Command(cmd, self.args['dir'], G_TIMEOUT,) + result = command.execute(G_RETRIES) + return result[-1] + + def update(self): + actual_uri = self.repo_uri() + expect_uri = self.args['uri'] + regex = re.compile(r'^(?:\w+://)?(?:[^@/]*@)?([^:/]*(?::[0-9]*)?)[:/](.*?)(?:\.git)?/?$') + ma = regex.match(actual_uri) + mb = regex.match(expect_uri) + if ma is None or mb is None or ma.groups() != mb.groups(): + msg = ['', + 'Invalid URI: {0}'.format(actual_uri), + 'Expected {0}'.format(expect_uri), + 'PlugClean required.'] + raise InvalidURI(msg) + + if G_PULL: + self.write(Action.UPDATE, self.name, ['Updating ...']) + callback = functools.partial(self.write, Action.UPDATE, self.name) + fetch_opt = '--depth 99999999' if self.tag and os.path.isfile(os.path.join(self.args['dir'], '.git/shallow')) else '' + cmd = 'git fetch {0} {1} 2>&1'.format(fetch_opt, G_PROGRESS) + com = Command(cmd, self.args['dir'], G_TIMEOUT, callback) + result = com.execute(G_RETRIES) + self.write(Action.DONE, self.name, result[-1:]) + else: + self.write(Action.DONE, self.name, ['Already installed']) + + def write(self, action, name, msg): + self.buf_q.put((action, name, msg)) + +class PlugThread(thr.Thread): + def __init__(self, tname, args): + super(PlugThread, self).__init__() + self.tname = tname + self.args = args + + def run(self): + thr.current_thread().name = self.tname + buf_q, work_q, lock = self.args + + try: + while not G_STOP.is_set(): + name, args = work_q.get_nowait() + plug = Plugin(name, args, buf_q, lock) + plug.manage() + work_q.task_done() + except queue.Empty: + pass + +class RefreshThread(thr.Thread): + def __init__(self, lock): + super(RefreshThread, self).__init__() + self.lock = lock + self.running = True + + def run(self): + while self.running: + with self.lock: + thread_vim_command('noautocmd normal! a') + time.sleep(0.33) + + def stop(self): + self.running = False + +if G_NVIM: + def thread_vim_command(cmd): + vim.session.threadsafe_call(lambda: vim.command(cmd)) +else: + def thread_vim_command(cmd): + vim.command(cmd) + +def esc(name): + return '"' + name.replace('"', '\"') + '"' + +def nonblock_read(fname): + """ Read a file with nonblock flag. Return the last line. """ + fread = os.open(fname, os.O_RDONLY | os.O_NONBLOCK) + buf = os.read(fread, 100000).decode('utf-8', 'replace') + os.close(fread) + + line = buf.rstrip('\r\n') + left = max(line.rfind('\r'), line.rfind('\n')) + if left != -1: + left += 1 + line = line[left:] + + return line + +def main(): + thr.current_thread().name = 'main' + nthreads = int(vim.eval('s:update.threads')) + plugs = vim.eval('s:update.todo') + mac_gui = vim.eval('s:mac_gui') == '1' + + lock = thr.Lock() + buf = Buffer(lock, len(plugs), G_PULL) + buf_q, work_q = queue.Queue(), queue.Queue() + for work in plugs.items(): + work_q.put(work) + + start_cnt = thr.active_count() + for num in range(nthreads): + tname = 'PlugT-{0:02}'.format(num) + thread = PlugThread(tname, (buf_q, work_q, lock)) + thread.start() + if mac_gui: + rthread = RefreshThread(lock) + rthread.start() + + while not buf_q.empty() or thr.active_count() != start_cnt: + try: + action, name, msg = buf_q.get(True, 0.25) + buf.write(action, name, ['OK'] if not msg else msg) + buf_q.task_done() + except queue.Empty: + pass + except KeyboardInterrupt: + G_STOP.set() + + if mac_gui: + rthread.stop() + rthread.join() + +main() +EOF +endfunction + +function! s:update_ruby() + ruby << EOF + module PlugStream + SEP = ["\r", "\n", nil] + def get_line + buffer = '' + loop do + char = readchar rescue return + if SEP.include? char.chr + buffer << $/ + break + else + buffer << char + end + end + buffer + end + end unless defined?(PlugStream) + + def esc arg + %["#{arg.gsub('"', '\"')}"] + end + + def killall pid + pids = [pid] + if /mswin|mingw|bccwin/ =~ RUBY_PLATFORM + pids.each { |pid| Process.kill 'INT', pid.to_i rescue nil } + else + unless `which pgrep 2> /dev/null`.empty? + children = pids + until children.empty? + children = children.map { |pid| + `pgrep -P #{pid}`.lines.map { |l| l.chomp } + }.flatten + pids += children + end + end + pids.each { |pid| Process.kill 'TERM', pid.to_i rescue nil } + end + end + + def compare_git_uri a, b + regex = %r{^(?:\w+://)?(?:[^@/]*@)?([^:/]*(?::[0-9]*)?)[:/](.*?)(?:\.git)?/?$} + regex.match(a).to_a.drop(1) == regex.match(b).to_a.drop(1) + end + + require 'thread' + require 'fileutils' + require 'timeout' + running = true + iswin = VIM::evaluate('s:is_win').to_i == 1 + pull = VIM::evaluate('s:update.pull').to_i == 1 + base = VIM::evaluate('g:plug_home') + all = VIM::evaluate('s:update.todo') + limit = VIM::evaluate('get(g:, "plug_timeout", 60)') + tries = VIM::evaluate('get(g:, "plug_retries", 2)') + 1 + nthr = VIM::evaluate('s:update.threads').to_i + maxy = VIM::evaluate('winheight(".")').to_i + vim7 = VIM::evaluate('v:version').to_i <= 703 && RUBY_PLATFORM =~ /darwin/ + cd = iswin ? 'cd /d' : 'cd' + tot = VIM::evaluate('len(s:update.todo)') || 0 + bar = '' + skip = 'Already installed' + mtx = Mutex.new + take1 = proc { mtx.synchronize { running && all.shift } } + logh = proc { + cnt = bar.length + $curbuf[1] = "#{pull ? 'Updating' : 'Installing'} plugins (#{cnt}/#{tot})" + $curbuf[2] = '[' + bar.ljust(tot) + ']' + VIM::command('normal! 2G') + VIM::command('redraw') + } + where = proc { |name| (1..($curbuf.length)).find { |l| $curbuf[l] =~ /^[-+x*] #{name}:/ } } + log = proc { |name, result, type| + mtx.synchronize do + ing = ![true, false].include?(type) + bar += type ? '=' : 'x' unless ing + b = case type + when :install then '+' when :update then '*' + when true, nil then '-' else + VIM::command("call add(s:update.errors, '#{name}')") + 'x' + end + result = + if type || type.nil? + ["#{b} #{name}: #{result.lines.to_a.last || 'OK'}"] + elsif result =~ /^Interrupted|^Timeout/ + ["#{b} #{name}: #{result}"] + else + ["#{b} #{name}"] + result.lines.map { |l| " " << l } + end + if lnum = where.call(name) + $curbuf.delete lnum + lnum = 4 if ing && lnum > maxy + end + result.each_with_index do |line, offset| + $curbuf.append((lnum || 4) - 1 + offset, line.gsub(/\e\[./, '').chomp) + end + logh.call + end + } + bt = proc { |cmd, name, type, cleanup| + tried = timeout = 0 + begin + tried += 1 + timeout += limit + fd = nil + data = '' + if iswin + Timeout::timeout(timeout) do + tmp = VIM::evaluate('tempname()') + system("(#{cmd}) > #{tmp}") + data = File.read(tmp).chomp + File.unlink tmp rescue nil + end + else + fd = IO.popen(cmd).extend(PlugStream) + first_line = true + log_prob = 1.0 / nthr + while line = Timeout::timeout(timeout) { fd.get_line } + data << line + log.call name, line.chomp, type if name && (first_line || rand < log_prob) + first_line = false + end + fd.close + end + [$? == 0, data.chomp] + rescue Timeout::Error, Interrupt => e + if fd && !fd.closed? + killall fd.pid + fd.close + end + cleanup.call if cleanup + if e.is_a?(Timeout::Error) && tried < tries + 3.downto(1) do |countdown| + s = countdown > 1 ? 's' : '' + log.call name, "Timeout. Will retry in #{countdown} second#{s} ...", type + sleep 1 + end + log.call name, 'Retrying ...', type + retry + end + [false, e.is_a?(Interrupt) ? "Interrupted!" : "Timeout!"] + end + } + main = Thread.current + threads = [] + watcher = Thread.new { + if vim7 + while VIM::evaluate('getchar(1)') + sleep 0.1 + end + else + require 'io/console' # >= Ruby 1.9 + nil until IO.console.getch == 3.chr + end + mtx.synchronize do + running = false + threads.each { |t| t.raise Interrupt } unless vim7 + end + threads.each { |t| t.join rescue nil } + main.kill + } + refresh = Thread.new { + while true + mtx.synchronize do + break unless running + VIM::command('noautocmd normal! a') + end + sleep 0.2 + end + } if VIM::evaluate('s:mac_gui') == 1 + + clone_opt = VIM::evaluate('s:clone_opt') + progress = VIM::evaluate('s:progress_opt(1)') + nthr.times do + mtx.synchronize do + threads << Thread.new { + while pair = take1.call + name = pair.first + dir, uri, tag = pair.last.values_at *%w[dir uri tag] + exists = File.directory? dir + ok, result = + if exists + chdir = "#{cd} #{iswin ? dir : esc(dir)}" + ret, data = bt.call "#{chdir} && git rev-parse --abbrev-ref HEAD 2>&1 && git config -f .git/config remote.origin.url", nil, nil, nil + current_uri = data.lines.to_a.last + if !ret + if data =~ /^Interrupted|^Timeout/ + [false, data] + else + [false, [data.chomp, "PlugClean required."].join($/)] + end + elsif !compare_git_uri(current_uri, uri) + [false, ["Invalid URI: #{current_uri}", + "Expected: #{uri}", + "PlugClean required."].join($/)] + else + if pull + log.call name, 'Updating ...', :update + fetch_opt = (tag && File.exist?(File.join(dir, '.git/shallow'))) ? '--depth 99999999' : '' + bt.call "#{chdir} && git fetch #{fetch_opt} #{progress} 2>&1", name, :update, nil + else + [true, skip] + end + end + else + d = esc dir.sub(%r{[\\/]+$}, '') + log.call name, 'Installing ...', :install + bt.call "git clone #{clone_opt unless tag} #{progress} #{uri} #{d} 2>&1", name, :install, proc { + FileUtils.rm_rf dir + } + end + mtx.synchronize { VIM::command("let s:update.new['#{name}'] = 1") } if !exists && ok + log.call name, result, ok + end + } if running + end + end + threads.each { |t| t.join rescue nil } + logh.call + refresh.kill if refresh + watcher.kill +EOF +endfunction + +function! s:shellesc_cmd(arg) + let escaped = substitute(a:arg, '[&|<>()@^]', '^&', 'g') + let escaped = substitute(escaped, '%', '%%', 'g') + let escaped = substitute(escaped, '"', '\\^&', 'g') + let escaped = substitute(escaped, '\(\\\+\)\(\\^\)', '\1\1\2', 'g') + return '^"'.substitute(escaped, '\(\\\+\)$', '\1\1', '').'^"' +endfunction + +function! s:shellesc(arg) + if &shell =~# 'cmd.exe$' + return s:shellesc_cmd(a:arg) + endif + return shellescape(a:arg) +endfunction + +function! s:glob_dir(path) + return map(filter(s:glob(a:path, '**'), 'isdirectory(v:val)'), 's:dirpath(v:val)') +endfunction + +function! s:progress_bar(line, bar, total) + call setline(a:line, '[' . s:lpad(a:bar, a:total) . ']') +endfunction + +function! s:compare_git_uri(a, b) + " See `git help clone' + " https:// [user@] github.com[:port] / junegunn/vim-plug [.git] + " [git@] github.com[:port] : junegunn/vim-plug [.git] + " file:// / junegunn/vim-plug [/] + " / junegunn/vim-plug [/] + let pat = '^\%(\w\+://\)\='.'\%([^@/]*@\)\='.'\([^:/]*\%(:[0-9]*\)\=\)'.'[:/]'.'\(.\{-}\)'.'\%(\.git\)\=/\?$' + let ma = matchlist(a:a, pat) + let mb = matchlist(a:b, pat) + return ma[1:2] ==# mb[1:2] +endfunction + +function! s:format_message(bullet, name, message) + if a:bullet != 'x' + return [printf('%s %s: %s', a:bullet, a:name, s:lastline(a:message))] + else + let lines = map(s:lines(a:message), '" ".v:val') + return extend([printf('x %s:', a:name)], lines) + endif +endfunction + +function! s:with_cd(cmd, dir) + return printf('cd%s %s && %s', s:is_win ? ' /d' : '', s:shellesc(a:dir), a:cmd) +endfunction + +function! s:system(cmd, ...) + try + let [sh, shellcmdflag, shrd] = s:chsh(1) + let cmd = a:0 > 0 ? s:with_cd(a:cmd, a:1) : a:cmd + if s:is_win + let batchfile = tempname().'.bat' + call writefile(["@echo off\r", cmd . "\r"], batchfile) + let cmd = batchfile + endif + return system(s:is_win ? '('.cmd.')' : cmd) + finally + let [&shell, &shellcmdflag, &shellredir] = [sh, shellcmdflag, shrd] + if s:is_win + call delete(batchfile) + endif + endtry +endfunction + +function! s:system_chomp(...) + let ret = call('s:system', a:000) + return v:shell_error ? '' : substitute(ret, '\n$', '', '') +endfunction + +function! s:git_validate(spec, check_branch) + let err = '' + if isdirectory(a:spec.dir) + let result = s:lines(s:system('git rev-parse --abbrev-ref HEAD 2>&1 && git config -f .git/config remote.origin.url', a:spec.dir)) + let remote = result[-1] + if v:shell_error + let err = join([remote, 'PlugClean required.'], "\n") + elseif !s:compare_git_uri(remote, a:spec.uri) + let err = join(['Invalid URI: '.remote, + \ 'Expected: '.a:spec.uri, + \ 'PlugClean required.'], "\n") + elseif a:check_branch && has_key(a:spec, 'commit') + let result = s:lines(s:system('git rev-parse HEAD 2>&1', a:spec.dir)) + let sha = result[-1] + if v:shell_error + let err = join(add(result, 'PlugClean required.'), "\n") + elseif !s:hash_match(sha, a:spec.commit) + let err = join([printf('Invalid HEAD (expected: %s, actual: %s)', + \ a:spec.commit[:6], sha[:6]), + \ 'PlugUpdate required.'], "\n") + endif + elseif a:check_branch + let branch = result[0] + " Check tag + if has_key(a:spec, 'tag') + let tag = s:system_chomp('git describe --exact-match --tags HEAD 2>&1', a:spec.dir) + if a:spec.tag !=# tag && a:spec.tag !~ '\*' + let err = printf('Invalid tag: %s (expected: %s). Try PlugUpdate.', + \ (empty(tag) ? 'N/A' : tag), a:spec.tag) + endif + " Check branch + elseif a:spec.branch !=# branch + let err = printf('Invalid branch: %s (expected: %s). Try PlugUpdate.', + \ branch, a:spec.branch) + endif + if empty(err) + let [ahead, behind] = split(s:lastline(s:system(printf( + \ 'git rev-list --count --left-right HEAD...origin/%s', + \ a:spec.branch), a:spec.dir)), '\t') + if !v:shell_error && ahead + if behind + " Only mention PlugClean if diverged, otherwise it's likely to be + " pushable (and probably not that messed up). + let err = printf( + \ "Diverged from origin/%s (%d commit(s) ahead and %d commit(s) behind!\n" + \ .'Backup local changes and run PlugClean and PlugUpdate to reinstall it.', a:spec.branch, ahead, behind) + else + let err = printf("Ahead of origin/%s by %d commit(s).\n" + \ .'Cannot update until local changes are pushed.', + \ a:spec.branch, ahead) + endif + endif + endif + endif + else + let err = 'Not found' + endif + return [err, err =~# 'PlugClean'] +endfunction + +function! s:rm_rf(dir) + if isdirectory(a:dir) + call s:system((s:is_win ? 'rmdir /S /Q ' : 'rm -rf ') . s:shellesc(a:dir)) + endif +endfunction + +function! s:clean(force) + call s:prepare() + call append(0, 'Searching for invalid plugins in '.g:plug_home) + call append(1, '') + + " List of valid directories + let dirs = [] + let errs = {} + let [cnt, total] = [0, len(g:plugs)] + for [name, spec] in items(g:plugs) + if !s:is_managed(name) + call add(dirs, spec.dir) + else + let [err, clean] = s:git_validate(spec, 1) + if clean + let errs[spec.dir] = s:lines(err)[0] + else + call add(dirs, spec.dir) + endif + endif + let cnt += 1 + call s:progress_bar(2, repeat('=', cnt), total) + normal! 2G + redraw + endfor + + let allowed = {} + for dir in dirs + let allowed[s:dirpath(fnamemodify(dir, ':h:h'))] = 1 + let allowed[dir] = 1 + for child in s:glob_dir(dir) + let allowed[child] = 1 + endfor + endfor + + let todo = [] + let found = sort(s:glob_dir(g:plug_home)) + while !empty(found) + let f = remove(found, 0) + if !has_key(allowed, f) && isdirectory(f) + call add(todo, f) + call append(line('$'), '- ' . f) + if has_key(errs, f) + call append(line('$'), ' ' . errs[f]) + endif + let found = filter(found, 'stridx(v:val, f) != 0') + end + endwhile + + 4 + redraw + if empty(todo) + call append(line('$'), 'Already clean.') + else + let s:clean_count = 0 + call append(3, ['Directories to delete:', '']) + redraw! + if a:force || s:ask_no_interrupt('Delete all directories?') + call s:delete([6, line('$')], 1) + else + call setline(4, 'Cancelled.') + nnoremap d :set opfunc=delete_opg@ + nmap dd d_ + xnoremap d :call delete_op(visualmode(), 1) + echo 'Delete the lines (d{motion}) to delete the corresponding directories' + endif + endif + 4 + setlocal nomodifiable +endfunction + +function! s:delete_op(type, ...) + call s:delete(a:0 ? [line("'<"), line("'>")] : [line("'["), line("']")], 0) +endfunction + +function! s:delete(range, force) + let [l1, l2] = a:range + let force = a:force + while l1 <= l2 + let line = getline(l1) + if line =~ '^- ' && isdirectory(line[2:]) + execute l1 + redraw! + let answer = force ? 1 : s:ask('Delete '.line[2:].'?', 1) + let force = force || answer > 1 + if answer + call s:rm_rf(line[2:]) + setlocal modifiable + call setline(l1, '~'.line[1:]) + let s:clean_count += 1 + call setline(4, printf('Removed %d directories.', s:clean_count)) + setlocal nomodifiable + endif + endif + let l1 += 1 + endwhile +endfunction + +function! s:upgrade() + echo 'Downloading the latest version of vim-plug' + redraw + let tmp = tempname() + let new = tmp . '/plug.vim' + + try + let out = s:system(printf('git clone --depth 1 %s %s', s:plug_src, tmp)) + if v:shell_error + return s:err('Error upgrading vim-plug: '. out) + endif + + if readfile(s:me) ==# readfile(new) + echo 'vim-plug is already up-to-date' + return 0 + else + call rename(s:me, s:me . '.old') + call rename(new, s:me) + unlet g:loaded_plug + echo 'vim-plug has been upgraded' + return 1 + endif + finally + silent! call s:rm_rf(tmp) + endtry +endfunction + +function! s:upgrade_specs() + for spec in values(g:plugs) + let spec.frozen = get(spec, 'frozen', 0) + endfor +endfunction + +function! s:status() + call s:prepare() + call append(0, 'Checking plugins') + call append(1, '') + + let ecnt = 0 + let unloaded = 0 + let [cnt, total] = [0, len(g:plugs)] + for [name, spec] in items(g:plugs) + let is_dir = isdirectory(spec.dir) + if has_key(spec, 'uri') + if is_dir + let [err, _] = s:git_validate(spec, 1) + let [valid, msg] = [empty(err), empty(err) ? 'OK' : err] + else + let [valid, msg] = [0, 'Not found. Try PlugInstall.'] + endif + else + if is_dir + let [valid, msg] = [1, 'OK'] + else + let [valid, msg] = [0, 'Not found.'] + endif + endif + let cnt += 1 + let ecnt += !valid + " `s:loaded` entry can be missing if PlugUpgraded + if is_dir && get(s:loaded, name, -1) == 0 + let unloaded = 1 + let msg .= ' (not loaded)' + endif + call s:progress_bar(2, repeat('=', cnt), total) + call append(3, s:format_message(valid ? '-' : 'x', name, msg)) + normal! 2G + redraw + endfor + call setline(1, 'Finished. '.ecnt.' error(s).') + normal! gg + setlocal nomodifiable + if unloaded + echo "Press 'L' on each line to load plugin, or 'U' to update" + nnoremap L :call status_load(line('.')) + xnoremap L :call status_load(line('.')) + end +endfunction + +function! s:extract_name(str, prefix, suffix) + return matchstr(a:str, '^'.a:prefix.' \zs[^:]\+\ze:.*'.a:suffix.'$') +endfunction + +function! s:status_load(lnum) + let line = getline(a:lnum) + let name = s:extract_name(line, '-', '(not loaded)') + if !empty(name) + call plug#load(name) + setlocal modifiable + call setline(a:lnum, substitute(line, ' (not loaded)$', '', '')) + setlocal nomodifiable + endif +endfunction + +function! s:status_update() range + let lines = getline(a:firstline, a:lastline) + let names = filter(map(lines, 's:extract_name(v:val, "[x-]", "")'), '!empty(v:val)') + if !empty(names) + echo + execute 'PlugUpdate' join(names) + endif +endfunction + +function! s:is_preview_window_open() + silent! wincmd P + if &previewwindow + wincmd p + return 1 + endif +endfunction + +function! s:find_name(lnum) + for lnum in reverse(range(1, a:lnum)) + let line = getline(lnum) + if empty(line) + return '' + endif + let name = s:extract_name(line, '-', '') + if !empty(name) + return name + endif + endfor + return '' +endfunction + +function! s:preview_commit() + if b:plug_preview < 0 + let b:plug_preview = !s:is_preview_window_open() + endif + + let sha = matchstr(getline('.'), '^ \X*\zs[0-9a-f]\{7,9}') + if empty(sha) + return + endif + + let name = s:find_name(line('.')) + if empty(name) || !has_key(g:plugs, name) || !isdirectory(g:plugs[name].dir) + return + endif + + if exists('g:plug_pwindow') && !s:is_preview_window_open() + execute g:plug_pwindow + execute 'e' sha + else + execute 'pedit' sha + wincmd P + endif + setlocal previewwindow filetype=git buftype=nofile nobuflisted modifiable + try + let [sh, shellcmdflag, shrd] = s:chsh(1) + let cmd = 'cd '.s:shellesc(g:plugs[name].dir).' && git show --no-color --pretty=medium '.sha + if s:is_win + let batchfile = tempname().'.bat' + call writefile(["@echo off\r", cmd . "\r"], batchfile) + let cmd = batchfile + endif + execute 'silent %!' cmd + finally + let [&shell, &shellcmdflag, &shellredir] = [sh, shellcmdflag, shrd] + if s:is_win + call delete(batchfile) + endif + endtry + setlocal nomodifiable + nnoremap q :q + wincmd p +endfunction + +function! s:section(flags) + call search('\(^[x-] \)\@<=[^:]\+:', a:flags) +endfunction + +function! s:format_git_log(line) + let indent = ' ' + let tokens = split(a:line, nr2char(1)) + if len(tokens) != 5 + return indent.substitute(a:line, '\s*$', '', '') + endif + let [graph, sha, refs, subject, date] = tokens + let tag = matchstr(refs, 'tag: [^,)]\+') + let tag = empty(tag) ? ' ' : ' ('.tag.') ' + return printf('%s%s%s%s%s (%s)', indent, graph, sha, tag, subject, date) +endfunction + +function! s:append_ul(lnum, text) + call append(a:lnum, ['', a:text, repeat('-', len(a:text))]) +endfunction + +function! s:diff() + call s:prepare() + call append(0, ['Collecting changes ...', '']) + let cnts = [0, 0] + let bar = '' + let total = filter(copy(g:plugs), 's:is_managed(v:key) && isdirectory(v:val.dir)') + call s:progress_bar(2, bar, len(total)) + for origin in [1, 0] + let plugs = reverse(sort(items(filter(copy(total), (origin ? '' : '!').'(has_key(v:val, "commit") || has_key(v:val, "tag"))')))) + if empty(plugs) + continue + endif + call s:append_ul(2, origin ? 'Pending updates:' : 'Last update:') + for [k, v] in plugs + let range = origin ? '..origin/'.v.branch : 'HEAD@{1}..' + let cmd = 'git log --graph --color=never '.join(map(['--pretty=format:%x01%h%x01%d%x01%s%x01%cr', range], 's:shellesc(v:val)')) + if has_key(v, 'rtp') + let cmd .= ' -- '.s:shellesc(v.rtp) + endif + let diff = s:system_chomp(cmd, v.dir) + if !empty(diff) + let ref = has_key(v, 'tag') ? (' (tag: '.v.tag.')') : has_key(v, 'commit') ? (' '.v.commit) : '' + call append(5, extend(['', '- '.k.':'.ref], map(s:lines(diff), 's:format_git_log(v:val)'))) + let cnts[origin] += 1 + endif + let bar .= '=' + call s:progress_bar(2, bar, len(total)) + normal! 2G + redraw + endfor + if !cnts[origin] + call append(5, ['', 'N/A']) + endif + endfor + call setline(1, printf('%d plugin(s) updated.', cnts[0]) + \ . (cnts[1] ? printf(' %d plugin(s) have pending updates.', cnts[1]) : '')) + + if cnts[0] || cnts[1] + nnoremap (plug-preview) :silent! call preview_commit() + if empty(maparg("\", 'n')) + nmap (plug-preview) + endif + if empty(maparg('o', 'n')) + nmap o (plug-preview) + endif + endif + if cnts[0] + nnoremap X :call revert() + echo "Press 'X' on each block to revert the update" + endif + normal! gg + setlocal nomodifiable +endfunction + +function! s:revert() + if search('^Pending updates', 'bnW') + return + endif + + let name = s:find_name(line('.')) + if empty(name) || !has_key(g:plugs, name) || + \ input(printf('Revert the update of %s? (y/N) ', name)) !~? '^y' + return + endif + + call s:system('git reset --hard HEAD@{1} && git checkout '.s:esc(g:plugs[name].branch).' --', g:plugs[name].dir) + setlocal modifiable + normal! "_dap + setlocal nomodifiable + echo 'Reverted' +endfunction + +function! s:snapshot(force, ...) abort + call s:prepare() + setf vim + call append(0, ['" Generated by vim-plug', + \ '" '.strftime("%c"), + \ '" :source this file in vim to restore the snapshot', + \ '" or execute: vim -S snapshot.vim', + \ '', '', 'PlugUpdate!']) + 1 + let anchor = line('$') - 3 + let names = sort(keys(filter(copy(g:plugs), + \'has_key(v:val, "uri") && !has_key(v:val, "commit") && isdirectory(v:val.dir)'))) + for name in reverse(names) + let sha = s:system_chomp('git rev-parse --short HEAD', g:plugs[name].dir) + if !empty(sha) + call append(anchor, printf("silent! let g:plugs['%s'].commit = '%s'", name, sha)) + redraw + endif + endfor + + if a:0 > 0 + let fn = expand(a:1) + if filereadable(fn) && !(a:force || s:ask(a:1.' already exists. Overwrite?')) + return + endif + call writefile(getline(1, '$'), fn) + echo 'Saved as '.a:1 + silent execute 'e' s:esc(fn) + setf vim + endif +endfunction + +function! s:split_rtp() + return split(&rtp, '\\\@ f :Import fmt +" +" Drop fmt +" au Filetype go nnoremap F :Drop fmt +" +" Import the word under your cursor +" au Filetype go nnoremap k +" \ :exe 'Import ' . expand('') +" +" The backslash '\' is the default maplocalleader, so it is possible that +" your vim is set to use a different character (:help maplocalleader). +" +" Options: +" +" g:go_import_commands [default=1] +" +" Flag to indicate whether to enable the commands listed above. +" +if exists("b:did_ftplugin_go_import") + finish +endif + +if !exists("g:go_import_commands") + let g:go_import_commands = 1 +endif + +if g:go_import_commands + command! -buffer -nargs=? -complete=customlist,go#complete#Package Drop call s:SwitchImport(0, '', ) + command! -buffer -nargs=1 -complete=customlist,go#complete#Package Import call s:SwitchImport(1, '', ) + command! -buffer -nargs=* -complete=customlist,go#complete#Package ImportAs call s:SwitchImport(1, ) +endif + +function! s:SwitchImport(enabled, localname, path) + let view = winsaveview() + let path = a:path + + " Quotes are not necessary, so remove them if provided. + if path[0] == '"' + let path = strpart(path, 1) + endif + if path[len(path)-1] == '"' + let path = strpart(path, 0, len(path) - 1) + endif + if path == '' + call s:Error('Import path not provided') + return + endif + + " Extract any site prefix (e.g. github.com/). + " If other imports with the same prefix are grouped separately, + " we will add this new import with them. + " Only up to and including the first slash is used. + let siteprefix = matchstr(path, "^[^/]*/") + + let qpath = '"' . path . '"' + if a:localname != '' + let qlocalpath = a:localname . ' ' . qpath + else + let qlocalpath = qpath + endif + let indentstr = 0 + let packageline = -1 " Position of package name statement + let appendline = -1 " Position to introduce new import + let deleteline = -1 " Position of line with existing import + let linesdelta = 0 " Lines added/removed + + " Find proper place to add/remove import. + let line = 0 + while line <= line('$') + let linestr = getline(line) + + if linestr =~# '^package\s' + let packageline = line + let appendline = line + + elseif linestr =~# '^import\s\+(' + let appendstr = qlocalpath + let indentstr = 1 + let appendline = line + let firstblank = -1 + let lastprefix = "" + while line <= line("$") + let line = line + 1 + let linestr = getline(line) + let m = matchlist(getline(line), '^\()\|\(\s\+\)\(\S*\s*\)"\(.\+\)"\)') + if empty(m) + if siteprefix == "" && a:enabled + " must be in the first group + break + endif + " record this position, but keep looking + if firstblank < 0 + let firstblank = line + endif + continue + endif + if m[1] == ')' + " if there's no match, add it to the first group + if appendline < 0 && firstblank >= 0 + let appendline = firstblank + endif + break + endif + let lastprefix = matchstr(m[4], "^[^/]*/") + if a:localname != '' && m[3] != '' + let qlocalpath = printf('%-' . (len(m[3])-1) . 's %s', a:localname, qpath) + endif + let appendstr = m[2] . qlocalpath + let indentstr = 0 + if m[4] == path + let appendline = -1 + let deleteline = line + break + elseif m[4] < path + " don't set candidate position if we have a site prefix, + " we've passed a blank line, and this doesn't share the same + " site prefix. + if siteprefix == "" || firstblank < 0 || match(m[4], "^" . siteprefix) >= 0 + let appendline = line + endif + elseif siteprefix != "" && match(m[4], "^" . siteprefix) >= 0 + " first entry of site group + let appendline = line - 1 + break + endif + endwhile + break + + elseif linestr =~# '^import ' + if appendline == packageline + let appendstr = 'import ' . qlocalpath + let appendline = line - 1 + endif + let m = matchlist(linestr, '^import\(\s\+\)\(\S*\s*\)"\(.\+\)"') + if !empty(m) + if m[3] == path + let appendline = -1 + let deleteline = line + break + endif + if m[3] < path + let appendline = line + endif + if a:localname != '' && m[2] != '' + let qlocalpath = printf("%s %" . len(m[2])-1 . "s", a:localname, qpath) + endif + let appendstr = 'import' . m[1] . qlocalpath + endif + + elseif linestr =~# '^\(var\|const\|type\|func\)\>' + break + + endif + let line = line + 1 + endwhile + + " Append or remove the package import, as requested. + if a:enabled + if deleteline != -1 + call s:Error(qpath . ' already being imported') + elseif appendline == -1 + call s:Error('No package line found') + else + if appendline == packageline + call append(appendline + 0, '') + call append(appendline + 1, 'import (') + call append(appendline + 2, ')') + let appendline += 2 + let linesdelta += 3 + let appendstr = qlocalpath + let indentstr = 1 + endif + call append(appendline, appendstr) + execute appendline + 1 + if indentstr + execute 'normal >>' + endif + let linesdelta += 1 + endif + else + if deleteline == -1 + call s:Error(qpath . ' not being imported') + else + execute deleteline . 'd' + let linesdelta -= 1 + + if getline(deleteline-1) =~# '^import\s\+(' && getline(deleteline) =~# '^)' + " Delete empty import block + let deleteline -= 1 + execute deleteline . "d" + execute deleteline . "d" + let linesdelta -= 2 + endif + + if getline(deleteline) == '' && getline(deleteline - 1) == '' + " Delete spacing for removed line too. + execute deleteline . "d" + let linesdelta -= 1 + endif + endif + endif + + " Adjust view for any changes. + let view.lnum += linesdelta + let view.topline += linesdelta + if view.topline < 0 + let view.topline = 0 + endif + + " Put buffer back where it was. + call winrestview(view) + +endfunction + +function! s:Error(s) + echohl Error | echo a:s | echohl None +endfunction + +let b:did_ftplugin_go_import = 1 + +" vim:ts=4:sw=4:et diff --git a/roles/dotfiles/files/.vim/ftplugin/go/test.sh b/roles/dotfiles/files/.vim/ftplugin/go/test.sh new file mode 100755 index 0000000..d8a5b89 --- /dev/null +++ b/roles/dotfiles/files/.vim/ftplugin/go/test.sh @@ -0,0 +1,78 @@ +#!/bin/bash -e +# +# Copyright 2012 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. +# +# Tests for import.vim. + +cd $(dirname $0) + +cat > base.go <&1 -n "$1: " + vim -e -s -u /dev/null -U /dev/null --noplugin -c "source import.vim" \ + -c "$1" -c 'wq! test.go' base.go + # ensure blank lines are treated correctly + if ! gofmt test.go | cmp test.go -; then + echo 2>&1 "gofmt conflict" + gofmt test.go | diff -u test.go - | sed "s/^/ /" 2>&1 + fail=1 + return + fi + if ! [[ $(cat test.go) =~ $2 ]]; then + echo 2>&1 "$2 did not match" + cat test.go | sed "s/^/ /" 2>&1 + fail=1 + return + fi + echo 2>&1 "ok" +} + +# Tests for Import + +test_one "Import baz" '"baz".*"bytes"' +test_one "Import io/ioutil" '"io".*"io/ioutil".*"net"' +test_one "Import myc" '"io".*"myc".*"net"' # prefix of a site prefix +test_one "Import nat" '"io".*"nat".*"net"' +test_one "Import net/http" '"net".*"net/http".*"mycorp/foo"' +test_one "Import zoo" '"net".*"zoo".*"mycorp/foo"' +test_one "Import mycorp/bar" '"net".*"mycorp/bar".*"mycorp/foo"' +test_one "Import mycorp/goo" '"net".*"mycorp/foo".*"mycorp/goo"' + +# Tests for Drop + +cat > base.go <&1 "FAIL" + exit 1 +fi +echo 2>&1 "PASS" diff --git a/roles/dotfiles/files/.vimrc b/roles/dotfiles/files/.vimrc new file mode 100644 index 0000000..12cd9c6 --- /dev/null +++ b/roles/dotfiles/files/.vimrc @@ -0,0 +1,133 @@ +" General options +set backspace=indent,eol,start +set cindent autoindent +set confirm +set encoding=utf-8 +set incsearch +set hidden +set mouse=a +set nocompatible +set noexpandtab +set nohlsearch +set number +set ruler +set showcmd +set showmatch +set showmode +set tags=./tags,tags,/usr/src/sys/arch/amd64/tags,/var/db/libc.tags +set t_Co=256 +set ttyfast +source /usr/share/vim/vim82/ftplugin/man.vim + +filetype plugin on + +nnoremap :tag +nnoremap :pop + +nnoremap :bprev + +" fix glitches in certain terminals +" backspace +imap ^? ^H + +" f7 toggles spelling on/off +nn :setlocal spell! spell? + +" view binary files as hex +" Convert to hex and back; does not save changes +nn :%!xxd -g 1 +nn :%!xxd -g 1 -r + +" makefile magic +" compiler stuff +let g:compiler_gcc_ignore_unmatched_lines=1 +let mapleader=',' +" quickfix :make +nmap m :wa:silent! make \| redraw! \| cw +vmap m :wa:silent! make \| redraw! \| cw +nn ,c :silent! make clean \| redraw! \| cw +" handy shortcuts +map h :ccl +map s :cw +map l :cl +" jump between messages +map n :cn +map p :cp + +" format selection +map f :!fmt + + +" @c comment, @u uncomment, @p print function name +let @u='0xx$xx^[' +let @c='I/*^[A*/^[' +let @p='ofprintf(stderr, "%s\n", __func__);^[' + +:ab #d #define +:ab #i #include + +autocmd FileType make setlocal noexpandtab +autocmd FileType c setlocal noexpandtab +autocmd FileType cc setlocal noexpandtab +autocmd FileType python setlocal expandtab shiftwidth=4 softtabstop=4 +autocmd FileType ada setlocal expandtab shiftwidth=3 softtabstop=3 tabstop=3 + +" Plugins + +" Initialization +call plug#begin('~/.vim/bundle') + +Plug 'scrooloose/nerdtree' +Plug 'junegunn/fzf' +Plug 'fatih/vim-go', { 'for': 'go' } +Plug 'ambv/black', { 'for': 'python' } +Plug 'mileszs/ack.vim' +Plug 'racer-rust/vim-racer', { 'for': 'rust' } + +" Themes +Plug 'KKPMW/oldbook-vim' +Plug 'agreco/vim-citylights' +Plug 'xdefrag/vim-beelzebub' +Plug 'logico-dev/typewriter' +Plug 'vim-scripts/wombat256.vim' + +call plug#end() + +" NERDTree +map o :NERDTree + +" FZF +nmap (fzf-maps-n) +xmap (fzf-maps-x) +omap (fzf-maps-o) +imap (fzf-complete-word) +imap (fzf-complete-path) +imap (fzf-complete-file-ag) +imap (fzf-complete-line) + +command! FZFBuffers call fzf#run({'source': map(range(1, bufnr('$')), 'bufname(v:val)'), 'sink': 'e', 'down': '30%'}) +map b :FZFBuffers + +" Ack +if executable('ag') + let g:ackprg = 'ag --vimgrep' +endif + +" The space is signficant. +map / :Ack + +" Go stuff +map i :GoImports +map i :GoImports + +let g:go_fmt_autosave = 1 +let g:go_fmt_command = "goimports" + +au FileType rust nmap gd (rust-def) +autocmd Filetype c,cpp inoremap t :wa:silent! make test \| redraw! \| cw +autocmd Filetype go map t :wa:GoTest +autocmd Filetype go map C-] :w:GoDef +autocmd Filetype go map C-\ :w:GoDefPop + + +colorscheme oldbook diff --git a/roles/dotfiles/files/bin/em b/roles/dotfiles/files/bin/em new file mode 100755 index 0000000..02e8fd3 --- /dev/null +++ b/roles/dotfiles/files/bin/em @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +if [ -z "$DISPLAY" ] +then + NW="" +else + NW="-n" +fi + +if [ -z "$@" ] +then + cd $HOME +fi + +emacsclient $NW -c -a '' "$@"