Uploaded image for project: 'Erlang/OTP'
  1. Erlang/OTP
  2. ERL-884

tls_connection crash due to mishandled timeout in 21.3

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Major
    • Resolution: Fixed
    • Affects Version/s: 21.3
    • Fix Version/s: 21.3.1
    • Component/s: ssl
    • Labels:
      None

      Description

      I'm seeing a strange crash on tls connections which seems to have been caused by a behaviour change between 21.2 and 21.3.

      I've got a websocket client which used to work fine, but now appears to crash after 5-6 seconds of uptime due to an mishandled timeout. tls_connection seems to be trying to reply to an undefined caller. The problem appears to be within Erlang library code - perhaps there was an unintended behaviour change during the refactoring of the TLS state handling?

      Here's the full error:

      =ERROR REPORT==== 14-Mar-2019::17:05:11.983847 ===
      ** State machine <0.119.0> terminating
      ** Last event = {{timeout,recv},timeout}
      ** When server state  = [{data,
                                [{"State",
                                  {connection,
                                   {state,
                                    {static_env,client,gen_tcp,tls_connection,tcp,
                                     tcp_closed,tcp_error,
                                     "URL_REDACTED",443,#Port<0.8>,
                                     #Ref<0.2863505047.3655467009.136674>,
                                     #Ref<0.2863505047.3655467009.136681>,
                                     ssl_session_cache,
                                     {ssl_crl_cache,
                                      {{#Ref<0.2863505047.3655467009.136677>,
                                        #Ref<0.2863505047.3655467009.136678>},
                                       []}},
                                     {#Ref<0.2863505047.3655467009.136675>,
                                      #Ref<0.2863505047.3655467009.136676>},
                                     #Ref<0.2863505047.3655335937.143696>,undefined},
                                    "***",
                                    {ssl_options,tls,
                                     [{3,3},{3,2},{3,1}], 
                                     verify_none,
                                     {#Fun<ssl.8.129595827>,[]},
                                     #Fun<ssl.9.129595827>,false,false,undefined,1,
                                     <<>>,"***",<<>>,"***","***","***",<<>>,"***",
                                     undefined,undefined,"***","***",
                                     [<<"À,">>,<<"À0">>,<<"À$">>,<<"À(">>,<<"À.">>,
                                      <<"À2">>,<<"À&">>,<<"À*">>,
                                      <<0,159>>,
                                      <<0,163>>,
                                      <<0,107>>,
                                      <<0,106>>,
                                      <<"À+">>,<<"À/">>,<<"À#">>,<<"À'">>,<<"À-">>,
                                      <<"À1">>,<<"À%">>,<<"À)">>,
                                      <<0,158>>,
                                      <<0,162>>,
                                      <<0,103>>,
                                      <<0,64>>,
                                      <<"À\n">>,
                                      <<192,20>>,
                                      <<0,57>>,
                                      <<0,56>>,
                                      <<192,5>>, 
                                      <<192,15>>,
                                      <<"À\t">>,
                                      <<192,19>>,
                                      <<0,51>>,
                                      <<0,50>>,
                                      <<192,4>>,
                                      <<192,14>>],
                                     undefined,true,268435456,true,undefined,
                                     infinity,false,undefined,undefined,undefined,
                                     undefined,true,"URL_REDACTED",[],
                                     undefined,undefined,true,one_n_minus_one,false,
                                     false,
                                     {ssl_crl_cache,{internal,[]}},
                                     [{sha512,ecdsa},
                                      {sha512,rsa},
                                      {sha384,ecdsa},
                                      {sha384,rsa},
                                      {sha256,ecdsa},
                                      {sha256,rsa},
                                      {sha224,ecdsa},
                                      {sha224,rsa},
                                      {sha,ecdsa},
                                      {sha,rsa},
                                      {sha,dsa}],
                                     {elliptic_curves,
                                      [{1,3,132,0,39},
                                       {1,3,132,0,38},
                                       {1,3,132,0,35},
                                       {1,3,36,3,3,2,8,1,1,13},
                                       {1,3,132,0,36},
                                       {1,3,132,0,37},
                                       {1,3,36,3,3,2,8,1,1,11},
                                       {1,3,132,0,34},
                                       {1,3,132,0,16},
                                       {1,3,132,0,17},
                                       {1,3,36,3,3,2,8,1,1,7},
                                       {1,3,132,0,10},
                                       {1,2,840,10045,3,1,7},
                                       {1,3,132,0,3},
                                       {1,3,132,0,26},
                                       {1,3,132,0,27},
                                       {1,3,132,0,32},
                                       {1,3,132,0,33},
                                       {1,3,132,0,24},
                                       {1,3,132,0,25},
                                       {1,3,132,0,31},
                                       {1,2,840,10045,3,1,1},
                                       {1,3,132,0,1},
                                       {1,3,132,0,2},
                                       {1,3,132,0,15},
                                       {1,3,132,0,9},
                                       {1,3,132,0,8},
                                       {1,3,132,0,30}]},
                                     undefined,262144,full,[]}, 
                                    {socket_options,binary,0,0,0,true},
                                    "***","***",false,
                                    #{active_n => 100,active_n_toggle => false,
                                      sender => <0.118.0>},
                                    "***","***","***","***",undefined,
                                    undefined}}}]}]
      ** Reason for termination = error:{bad_action_from_state_function,
                                            {reply,undefined,{error,timeout}}}
      ** Callback mode = state_functions
      ** Stacktrace =
      **  [{gen_statem,parse_actions_reply,7,[{file,"gen_statem.erl"},{line,1290}]},
           {gen_statem,loop_event_actions_list,10,
                       [{file,"gen_statem.erl"},{line,1206}]},
           {tls_connection,init,1,[{file,"tls_connection.erl"},{line,133}]},
           {proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,249}]}]
       
      =CRASH REPORT==== 14-Mar-2019::17:05:11.992527 ===
        crasher:
          initial call: tls_connection:init/1
          pid: <0.119.0>
          registered_name: []
          exception error: {bad_action_from_state_function,
                               {reply,undefined,{error,timeout}}}
            in function  gen_statem:parse_actions_reply/7 (gen_statem.erl, line 1290)
            in call from gen_statem:loop_event_actions_list/10 (gen_statem.erl, line 1206)
            in call from tls_connection:init/1 (tls_connection.erl, line 133)
          ancestors: [tls_connection_sup,ssl_connection_sup,ssl_sup,<0.85.0>]
          message_queue_len: 0
          messages: []
          links: [<0.118.0>,<0.91.0>]
          dictionary: [{ssl_pem_cache,ssl_pem_cache},{ssl_manager,ssl_manager}]
          trap_exit: true
          status: running
          heap_size: 10958
          stack_size: 27
          reductions: 82081
        neighbours:
          neighbour:
            pid: <0.118.0>
            registered_name: []
            initial call: tls_sender:init/1
            current_function: {gen_statem,loop_receive,3}
            ancestors: [<0.117.0>,<0.105.0>]
            message_queue_len: 0
            links: [<0.119.0>]
            trap_exit: false
            status: waiting
            heap_size: 233
            stack_size: 12
            reductions: 566
            current_stacktrace: [{gen_statem,loop_receive,3,
                                    [{file,"gen_statem.erl"},{line,880}]},
                        {proc_lib,init_p_do_apply,3,
                                  [{file,"proc_lib.erl"},{line,249}]}]
      =SUPERVISOR REPORT==== 14-Mar-2019::17:05:11.998005 ===
          supervisor: {local,tls_connection_sup}
          errorContext: child_terminated
          reason: {{bad_action_from_state_function,
                       {reply,undefined,{error,timeout}}},
                   [{gen_statem,parse_actions_reply,7,
                        [{file,"gen_statem.erl"},{line,1290}]},
                    {gen_statem,loop_event_actions_list,10,
                        [{file,"gen_statem.erl"},{line,1206}]},
                    {tls_connection,init,1,[{file,"tls_connection.erl"},{line,133}]},
                    {proc_lib,init_p_do_apply,3,
                        [{file,"proc_lib.erl"},{line,249}]}]}
          offender: [{pid,<0.119.0>},
                     {id,undefined},
                     {mfargs,{tls_connection,start_link,undefined}},
                     {restart_type,temporary},
                     {shutdown,4000},
                     {child_type,worker}]
      

        Issue Links

          Activity

          Hide
          dtip dtip added a comment -

          I appear to have been beaten to it by a few seconds. This is a dup of ERL-883

          Show
          dtip dtip added a comment - I appear to have been beaten to it by a few seconds. This is a dup of ERL-883
          Hide
          shdblowers Steven Blowers added a comment -

          boom

          Show
          shdblowers Steven Blowers added a comment - boom
          Hide
          shdblowers Steven Blowers added a comment -

          this one has more detail however.

          Show
          shdblowers Steven Blowers added a comment - this one has more detail however.
          Hide
          ingela Ingela Anderton Andin added a comment -

          This is a little symptom treating, I will look into the real issue more tomorrow, but it could be a workaround atleast.

          diff --git a/lib/ssl/src/ssl_connection.erl b/lib/ssl/src/ssl_connection.erl
          index ad81288f64..1b90450edc 100644
          --- a/lib/ssl/src/ssl_connection.erl
          +++ b/lib/ssl/src/ssl_connection.erl
          @@ -1310,6 +1310,8 @@ handle_common_event({timeout, handshake}, close, _StateName, #state{start_or_rec
               {stop_and_reply,
                {shutdown, user_timeout},
                {reply, StartFrom, {error, timeout}}, State#state{start_or_recv_from = undefined}};
          +handle_common_event({timeout, recv}, timeout, StateName, #state{start_or_recv_from = undefined} = State, _) ->
          +    {next_state, StateName, State};
           handle_common_event({timeout, recv}, timeout, StateName, #state{start_or_recv_from = RecvFrom} = State, _) ->
               {next_state, StateName, State#state{start_or_recv_from = undefined,
                                                   bytes_to_read = undefined}, [{reply, RecvFrom, {error, timeout}}]};
          
          

          Show
          ingela Ingela Anderton Andin added a comment - This is a little symptom treating, I will look into the real issue more tomorrow, but it could be a workaround atleast. diff --git a/lib/ssl/src/ssl_connection.erl b/lib/ssl/src/ssl_connection.erl index ad81288f64..1b90450edc 100644 --- a/lib/ssl/src/ssl_connection.erl +++ b/lib/ssl/src/ssl_connection.erl @@ -1310,6 +1310,8 @@ handle_common_event({timeout, handshake}, close, _StateName, #state{start_or_rec {stop_and_reply, {shutdown, user_timeout}, {reply, StartFrom, {error, timeout}}, State#state{start_or_recv_from = undefined}}; +handle_common_event({timeout, recv}, timeout, StateName, #state{start_or_recv_from = undefined} = State, _) -> + {next_state, StateName, State}; handle_common_event({timeout, recv}, timeout, StateName, #state{start_or_recv_from = RecvFrom} = State, _) -> {next_state, StateName, State#state{start_or_recv_from = undefined, bytes_to_read = undefined}, [{reply, RecvFrom, {error, timeout}}]};
          Hide
          ingela Ingela Anderton Andin added a comment -

          Here is the real patch. It is a timing related bug and there was one place that missed to cancel the recv timer.

          diff --git a/lib/ssl/src/tls_connection.erl b/lib/ssl/src/tls_connection.erl
          index 3229004c9d..aa7b501831 100644
          --- a/lib/ssl/src/tls_connection.erl
          +++ b/lib/ssl/src/tls_connection.erl
          @@ -224,6 +224,22 @@ next_event(StateName, Record, State, Actions) ->
           
           %%% TLS record protocol level application data messages 
           
          +handle_protocol_record(#ssl_tls{type = ?APPLICATION_DATA, fragment = Data}, StateName, 
          +                       #state{start_or_recv_from = From,
          +                              socket_options = #socket_options{active = false}} = State0) when From =/= undefined ->
          +    case ssl_connection:read_application_data(Data, State0) of
          +       {stop, _, _} = Stop->
          +            Stop;
          +       {Record, #state{start_or_recv_from = Caller} = State1} ->
          +            TimerAction = case Caller of
          +                              undefined -> %% Passive recv complete cancel timer
          +                                  [{{timeout, recv}, infinity, timeout}];
          +                              _ ->
          +                                  []
          +                          end,
          +            {next_state, StateName, State, Actions} = next_event(StateName, Record, State1, TimerAction), 
          +            ssl_connection:hibernate_after(StateName, State, Actions)
          +    end;
           handle_protocol_record(#ssl_tls{type = ?APPLICATION_DATA, fragment = Data}, StateName, State0) ->
               case ssl_connection:read_application_data(Data, State0) of
                  {stop, _, _} = Stop->
          
          

          Show
          ingela Ingela Anderton Andin added a comment - Here is the real patch. It is a timing related bug and there was one place that missed to cancel the recv timer. diff --git a/lib/ssl/src/tls_connection.erl b/lib/ssl/src/tls_connection.erl index 3229004c9d..aa7b501831 100644 --- a/lib/ssl/src/tls_connection.erl +++ b/lib/ssl/src/tls_connection.erl @@ -224,6 +224,22 @@ next_event(StateName, Record, State, Actions) -> %%% TLS record protocol level application data messages +handle_protocol_record(#ssl_tls{type = ?APPLICATION_DATA, fragment = Data}, StateName, + #state{start_or_recv_from = From, + socket_options = #socket_options{active = false}} = State0) when From =/= undefined -> + case ssl_connection:read_application_data(Data, State0) of + {stop, _, _} = Stop-> + Stop; + {Record, #state{start_or_recv_from = Caller} = State1} -> + TimerAction = case Caller of + undefined -> %% Passive recv complete cancel timer + [{{timeout, recv}, infinity, timeout}]; + _ -> + [] + end, + {next_state, StateName, State, Actions} = next_event(StateName, Record, State1, TimerAction), + ssl_connection:hibernate_after(StateName, State, Actions) + end; handle_protocol_record(#ssl_tls{type = ?APPLICATION_DATA, fragment = Data}, StateName, State0) -> case ssl_connection:read_application_data(Data, State0) of {stop, _, _} = Stop->
          Hide
          ingela Ingela Anderton Andin added a comment -

          Will be released soon

          Show
          ingela Ingela Anderton Andin added a comment - Will be released soon

            People

            • Assignee:
              otp_team_ps Team PS
              Reporter:
              dtip dtip
            • Votes:
              2 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

              • Created:
                Updated:
                Resolved:

                Development