
     j6U                       d dl mZ d dlmZmZmZ d dlZd dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZ d dlmZmZm Z m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 er d dl8Z8d dl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZB d dlCmDZD e G d de                      ZE G d de)          ZF G d deFe1          ZGdS )    )annotations)TYPE_CHECKINGClassVarLiteralN)
get_option)libmissing)NDArrayBacked)ensure_string_array)pa_version_under10p1)function)doc)ExtensionDtypeStorageExtensionDtyperegister_extension_dtype)is_array_likeis_bool_dtypeis_integer_dtypeis_object_dtypeis_string_dtypepandas_dtype)ops)masked_reductions)ExtensionArray)FloatingArrayFloatingDtype)IntegerArrayIntegerDtype)NumpyExtensionArray)extract_array)check_array_indexer)isna)	AxisIntDtypeDtypeObjNumpySorterNumpyValueArrayLikeScalarSelfnpttype_t)Seriesc                      e Zd ZU dZdZded<   edd            ZdZdddZ	edd            Z
edd            ZddZddZd	S )StringDtypea,  
    Extension dtype for string data.

    .. warning::

       StringDtype is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    storage : {"python", "pyarrow", "pyarrow_numpy"}, optional
        If not given, the value of ``pd.options.mode.string_storage``.

    Attributes
    ----------
    None

    Methods
    -------
    None

    Examples
    --------
    >>> pd.StringDtype()
    string[python]

    >>> pd.StringDtype(storage="pyarrow")
    string[pyarrow]
    stringzClassVar[str]namereturnlibmissing.NAType | floatc                H    | j         dk    rt          j        S t          j        S )Npyarrow_numpy)storagenpnan
libmissingNAselfs    nC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\pandas/core/arrays/string_.pyna_valuezStringDtype.na_valuem   s     <?* 	!6M=     r5   NNonec                    |#t          d          }|rd}nt          d          }|dvrt          d| d          |dv rt          rt          d          || _        d S )	Nzfuture.infer_stringr4   zmode.string_storage>   pythonpyarrowr4   z<Storage must be 'python', 'pyarrow' or 'pyarrow_numpy'. Got z	 instead.)rC   r4   z;pyarrow>=10.0.1 is required for PyArrow backed StringArray.)r   
ValueErrorr   ImportErrorr5   )r;   r5   infer_strings      r<   __init__zStringDtype.__init__v   s     	<%&;<<L <)$%:;;@@ 	** * *   22 	7K 	M   r>   	type[str]c                    t           S N)strr:   s    r<   typezStringDtype.type   s    
r>   r)   c                ,   t          |t                    st          dt          |                     |dk    r
 |             S |dk    r | d          S |dk    r | d          S |dk    r | d	          S t          d
| j         d| d          )a|  
        Construct a StringDtype from a string.

        Parameters
        ----------
        string : str
            The type of the name. The storage type will be taking from `string`.
            Valid options and their storage types are

            ========================== ==============================================
            string                     result storage
            ========================== ==============================================
            ``'string'``               pd.options.mode.string_storage, default python
            ``'string[python]'``       python
            ``'string[pyarrow]'``      pyarrow
            ========================== ==============================================

        Returns
        -------
        StringDtype

        Raise
        -----
        TypeError
            If the string is not a valid option.
        z.'construct_from_string' expects a string, got r/   zstring[python]rB   r?   zstring[pyarrow]rC   zstring[pyarrow_numpy]r4   zCannot construct a 'z' from '')
isinstancerK   	TypeErrorrL   __name__)clsr/   s     r<   construct_from_stringz!StringDtype.construct_from_string   s    8 &#&& 	OfOO   X 		T355L'' 	T3x(((((( 	T3y)))).. 	T3////R3<RRRRRSSSr>   type_t[BaseStringArray]c                T    ddl m}m} | j        dk    rt          S | j        dk    r|S |S )zq
        Return the array type associated with this dtype.

        Returns
        -------
        type
        r   )ArrowStringArrayArrowStringArrayNumpySemanticsrB   rC   )pandas.core.arrays.string_arrowrV   rW   r5   StringArray)r;   rV   rW   s      r<   construct_array_typez StringDtype.construct_array_type   s\    	
 	
 	
 	
 	
 	
 	
 	

 <8# 	2\Y& 	2##11r>   array$pyarrow.Array | pyarrow.ChunkedArrayBaseStringArrayc                d   | j         dk    rddlm}  ||          S | j         dk    rddlm}  ||          S ddl}t          ||j                  r|g}n|j        }g }|D ]H}|                    d          }t          |t          j        	          }|                    |           It          |          dk    rt          j        g t           
          }nt          j        |          }t$                              t$                    }t)          j        ||t-          d                     |S )zH
        Construct StringArray from pyarrow Array/ChunkedArray.
        rC   r   )rV   r4   )rW   NF)zero_copy_only)r=   dtyperB   r?   )r5   rX   rV   rW   rC   rO   Arraychunksto_numpyr   r8   r9   appendlenr6   r[   objectconcatenaterY   __new__r
   rG   r.   )	r;   r[   rV   rW   rC   rc   resultsarrnew_string_arrays	            r<   __from_arrow__zStringDtype.__from_arrow__   sh    <9$ 	$HHHHHH##E***\_, 	$VVVVVV11%888NNN%// & G $ $ ll%l88)#
FFFs####v;;! 	*(2V,,,CC.))C '..{;;)))	
 	
 	

  r>   )r1   r2   rJ   r1   r@   )r1   rH   )r1   r)   )r1   rT   )r[   r\   r1   r]   )rQ   
__module____qualname____doc__r0   __annotations__propertyr=   	_metadatarG   rL   classmethodrS   rZ   rm    r>   r<   r.   r.   G   s          @ #D"""" ! ! ! X! I    $    X (T (T (T [(TZ2 2 2 2,+  +  +  +  +  + r>   r.   c                  \    e Zd ZdZ eej                  d             Zed	d            ZdS )
r]   z8
    Mixin class for StringArray, ArrowStringArray.
    c                r    | j         dk    rd | D             S t          |                                           S )N   c                6    g | ]}|                                 S rv   )tolist).0xs     r<   
<listcomp>z*BaseStringArray.tolist.<locals>.<listcomp>  s     ---1AHHJJ---r>   )ndimlistrd   r:   s    r<   r{   zBaseStringArray.tolist  s:    9q= 	.------DMMOO$$$r>   ra   r%   r1   r)   c                n    t          j        |d          dvrt          |                     ||          S )NTskipna)r/   emptyr`   )r   infer_dtyperD   _from_sequence)rR   scalarsra   s      r<   _from_scalarszBaseStringArray._from_scalars	  s>    ?740008KK 	!!'!777r>   N)ra   r%   r1   r)   )	rQ   ro   rp   rq   r   r   r{   ru   r   rv   r>   r<   r]   r]      sg          	S	% %  %
 8 8 8 [8 8 8r>   r]   c                  R    e Zd ZdZdZd6d7 fdZd	 Zed
ddd8d            Zed
ddd8d            Z	ed9d            Z
d:dZd Zd; fdZd<dZd=d> fdZdddd?d Zd@dAd"Zd@dAd#Zd=dBd&Zd6dCd)Z eej                  	 	 dDdE fd2            Zd3 ZeZej        Z	 dFdGd5Z xZS )HrY   aY  
    Extension array for string data.

    .. warning::

       StringArray is considered experimental. The implementation and
       parts of the API may change without warning.

    Parameters
    ----------
    values : array-like
        The array of data.

        .. warning::

           Currently, this expects an object-dtype ndarray
           where the elements are Python strings
           or nan-likes (``None``, ``np.nan``, ``NA``).
           This may change without warning in the future. Use
           :meth:`pandas.array` with ``dtype="string"`` for a stable way of
           creating a `StringArray` from any sequence.

        .. versionchanged:: 1.5.0

           StringArray now accepts array-likes containing
           nan-likes(``None``, ``np.nan``) for the ``values`` parameter
           in addition to strings and :attr:`pandas.NA`

    copy : bool, default False
        Whether to copy the array of data.

    Attributes
    ----------
    None

    Methods
    -------
    None

    See Also
    --------
    :func:`pandas.array`
        The recommended function for creating a StringArray.
    Series.str
        The string methods are available on Series backed by
        a StringArray.

    Notes
    -----
    StringArray returns a BooleanArray for comparison methods.

    Examples
    --------
    >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string")
    <StringArray>
    ['This is', 'some text', <NA>, 'data.']
    Length: 4, dtype: string

    Unlike arrays instantiated with ``dtype="object"``, ``StringArray``
    will convert the values to strings.

    >>> pd.array(['1', 1], dtype="object")
    <NumpyExtensionArray>
    ['1', 1]
    Length: 2, dtype: object
    >>> pd.array(['1', 1], dtype="string")
    <StringArray>
    ['1', '1']
    Length: 2, dtype: string

    However, instantiating StringArrays directly with non-strings will raise an error.

    For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:

    >>> pd.array(["a", None, "c"], dtype="string") == "a"
    <BooleanArray>
    [True, <NA>, False]
    Length: 3, dtype: boolean
    	extensionFcopyboolr1   r@   c                    t          |          }t                                          ||           t          |t	          |                     s|                                  t          j        | | j        t          d                     d S )Nr   rB   r?   )	r    superrG   rO   rL   	_validater
   _ndarrayr.   )r;   valuesr   	__class__s      r<   rG   zStringArray.__init__g  s}    v&&d+++&$t**-- 	NNtT]K4Q4Q4QRRRRRr>   c                   t          | j                  r*t          j        | j        d          st	          d          | j        j        dk    rt	          d| j        j         d          | j        j        dk    r.t          j        | j                            d                     d	S t          j        | j                   d	S )
z*Validate that we only store NA or strings.Tr   z7StringArray requires a sequence of strings or pandas.NArg   z>StringArray requires a sequence of strings or pandas.NA. Got 'z' dtype instead.   KN)	rf   r   r   is_string_arrayrD   ra   r   convert_nans_to_NAravelr:   s    r<   r   zStringArray._validateo  s    t} 	Xc&9$-PT&U&U&U 	XVWWW=(* 	:M': : :  
 =! 	2"4=#6#6s#;#;<<<<<"4=11111r>   Nra   r   ra   Dtype | Nonec               <   |rLt          |t                    r|dk    s1t          |          }t          |t                    r|j        dk    sJ ddlm} t          ||          r5|j        }|j        }t          j
        ||d          }t          j        ||<   nIt          j        |          rt          j        |          }t          j
        |t          j        |          }|                     |           }t#          j        ||t          d                     |S )	Nr/   rB   r   )BaseMaskedArrayF)r   convert_na_value)r=   r   r?   )rO   rK   r   r.   r5   pandas.core.arrays.maskedr   _mask_datar   r   r8   r9   is_pyarrow_arrayr6   r[   ri   r
   rG   )rR   r   ra   r   r   	na_valuesresultrl   s           r<   r   zStringArray._from_sequence  s.    	P*UC00 	PUh5F 	P ''Ee[11Oemx6OOOO======g// 	YI]F,V$QVWWWF *F9 #G,, , (7++,Wz}SWXXXF ;;s++/X9V9V9VWWWr>   c               2    |                      |||          S )Nr   )r   )rR   stringsra   r   s       r<   _from_sequence_of_stringsz%StringArray._from_sequence_of_strings  s     !!'T!BBBr>   c                    t          j        |t                    }t          j        |d d <    | |                              |d          S )Nr`   Fr   )r6   r   rg   r8   r9   astype)rR   shapera   r   s       r<   _emptyzStringArray._empty  sF    %v...Mqqq	s6{{!!%e!444r>   c                    ddl }||                                }| j                                        }d||                                 <   |                    ||d          S )z6
        Convert myself into a pyarrow Array.
        r   NT)rL   from_pandas)rC   r/   r   r   r"   r[   )r;   rL   par   s       r<   __arrow_array__zStringArray.__arrow_array__  sb     	 	99;;D##%%"tyy{{xxTtx<<<r>   c                n    | j                                         }|                                 }d ||<   |d fS rJ   )r   r   r"   )r;   rk   masks      r<   _values_for_factorizez!StringArray._values_for_factorize  s5    m  ""yy{{D	Dyr>   c                <   t          |d          }t          |t          |                     r|j        }t	          | |          }t          j        |          }t          j        |          }|r|st          d          |rEt          |          rt          j
        }nt          |t                    st          d| d          nt          |          st          j        |t                     }t#          |          r%t          j        |d          st          d          t          |          }|                                r0|                                }t          j
        |t          |          <   t+                                          ||           d S )	NT)extract_numpyz)setting an array element with a sequence.zCannot set non-string value 'z' into a StringArray.r`   r   zMust provide strings.)r    rO   rL   r   r!   r   	is_scalarrD   r"   r8   r9   rK   rP   r   r6   asarrayrg   rf   r   anyr   r   __setitem__)r;   keyvalue
scalar_keyscalar_valuer   r   s         r<   r   zStringArray.__setitem__  s   e4888eT$ZZ(( 	#NE!$,,]3''
}U++ 	Jl 	JHIII  	3E{{ "s++ PEPPP  
 !'' 8
57775zz 9#"5eD"I"I"I 9 7888;;Dxxzz 3

%/]d5kk"C'''''r>   r   npt.NDArray[np.bool_]c                2    t          j        | ||           d S rJ   )r   _putmask)r;   r   r   s      r<   r   zStringArray._putmask  s     	dE22222r>   Tc                   t          |          }|| j        k    r|r|                                 S | S t          |t                    r^| j                                        }|                                 }d||<   |                    |j                  }t          ||d          S t          |t                    rY|                                 }|                                 }d||<   |                    |j                  }t          ||d          S t          |t                    rt          j        | ||          S t          j        |t          j                  rX| j                                        }|                                 }d||<   |                    |          }t          j        ||<   |S t%                                          ||          S )Nr   Fr   0)r   ra   r   rO   r   r   r"   r   numpy_dtyper   r   r   r   r   r6   
issubdtypefloatingr7   r   )r;   ra   r   rk   r   r   r   s         r<   r   zStringArray.astype  s   U##DJ 	 #yy{{"K|,, 	-$$&&C99;;DCIZZ 122F59999}-- 	))++C99;;DCIZZ 122F E::::~.. 		!(ud;;;]5"+.. 	-$$&&C99;;DCIZZ&&F6F4LMww~~eT***r>   r   r   axisr0   rK   r   r   AxisInt | Nonec               f    |dv r t          | |          ||          S t          d| d          )N)minmaxr   zCannot perform reduction 'z' with string dtype)getattrrP   )r;   r0   r   r   kwargss        r<   _reducezStringArray._reduce  sK     >! 	A&74&&f4@@@@NTNNNOOOr>   r(   c                    t          j        d|           t          j        |                                 |                                 |          }|                     ||          S Nrv   )r   r   r   )nvvalidate_minr   r   rd   r"   _wrap_reduction_resultr;   r   r   r   r   s        r<   r   zStringArray.min  Z    
F###"&==??V
 
 
 **4888r>   c                    t          j        d|           t          j        |                                 |                                 |          }|                     ||          S r   )r   validate_maxr   r   rd   r"   r   r   s        r<   r   zStringArray.max  r   r>   dropnar,   c                    ddl m}  || j        |                              d          }|j                            | j                  |_        |S )Nr   )value_counts_internal)r   Int64)pandas.core.algorithmsr   r   r   indexra   )r;   r   value_countsr   s       r<   r   zStringArray.value_counts  sV    PPPPPPdmF;;;BB7KK|**4:66r>   deepintc                Z    | j         j        }|r|t          j        | j                   z   S |S rJ   )r   nbytesr   memory_usage_of_objects)r;   r   r   s      r<   memory_usagezStringArray.memory_usage"  s2    % 	GC7FFFFr>   leftr   $NumpyValueArrayLike | ExtensionArraysideLiteral['left', 'right']sorterNumpySorter | Nonenpt.NDArray[np.intp] | np.intpc                x    | j         rt          d          t                                          |||          S )NzOsearchsorted requires array to be sorted, which is impossible with NAs present.)r   r   r   )_hasnarD   r   searchsorted)r;   r   r   r   r   s       r<   r   zStringArray.searchsorted(  sG     ; 	$   ww##%d6#JJJr>   c                   ddl m} t          |t                    r|j        }t          |           t          |          z  }| }t          j        |          skt          |          t          |           k    r/t          dt          |            dt          |                     t          j        |          }||         }|j        t          j        v rSt          j        | j        d          }t           j        ||<    || j        |         |          ||<   t          |          S t          j        t          | j                  d          } || j        |         |          ||<    |||          S )Nr   BooleanArrayz"Lengths of operands do not match: z != rg   r`   r   )pandas.arraysr   rO   rY   r   r"   r   r   rf   rD   r6   r   rQ   r   ARITHMETIC_BINOPS
empty_liker8   r9   zeros)r;   otheropr   r   validr   s          r<   _cmp_methodzStringArray._cmp_method6  sk   ......e[)) 	#NEDzzDKK'}U## 	!5zzSYY&  TTTTE

TT   Ju%%E%LE;#// 		.]4=AAAF%=F4LBt}U3U;;F5Mv&&& Xc$-00???FBt}U3U;;F5M<---r>   convertc           
     $   ddl m} |t          d          }|| j        j        }t          |           }t          j        |           }t          |          st          |          rt          |          rt          }n|}t          |          }	|	rd}n'|t          j        d          k    rt          |          }t          j        |||                    d          d|t          j        |          	          }
|	sd|d d <    ||
|          S t          |          rJt!          |          s;t          j        |||                    d          d|
          }
t#          |
          S t          j        |||                    d                    S )Nr   r   rB   r?   ry   r   uint8F)r   r=   ra   )r   r=   )r   r   r.   ra   r=   r"   r6   r   r   r   r   r   r   map_infer_maskviewr   r   rY   )r;   fr=   ra   r   r   r   rk   constructorna_value_is_nar   s              r<   _str_mapzStringArray._str_map\  s    	/..... 	2111E 	+z*HDzzjE"" (	BmE&:&: (	B&& +**!(^^N *"(6*** *>>'		'""! huoo
 
 
F "  QQQ;vt,,,U## 	BOE,B,B 	B'Q		'**EH  F v&&& %c1dii.@.@AAAr>   )F)r   r   r1   r@   )ra   r   r   r   )r1   rY   rJ   rn   )r   r   r1   r@   )T)r   r   )r0   rK   r   r   r   r   )NT)r   r   r1   r(   )r   r   r1   r,   )r   r   r1   r   )r   N)r   r   r   r   r   r   r1   r   )NNT)ra   r   r   r   )rQ   ro   rp   rq   _typrG   r   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _arith_methodr8   r9   _str_na_valuer  __classcell__)r   s   @r<   rY   rY     s       N Nb DS S S S S S S2 2 2  >BQV           [ < /3%C C C C C [C
 5 5 5 [5
= = = =  ( ( ( ( ( (B3 3 3 3+ + + + + + +D ,0P P P P P P9 9 9 9 99 9 9 9 9         	S	$%% *0%)	K K K K K K &%K. . .<  M MM MQ5B 5B 5B 5B 5B 5B 5B 5B 5Br>   rY   )H
__future__r   typingr   r   r   numpyr6   pandas._configr   pandas._libsr   r	   r8   pandas._libs.arraysr
   pandas._libs.libr   pandas.compatr   pandas.compat.numpyr   r   pandas.util._decoratorsr   pandas.core.dtypes.baser   r   r   pandas.core.dtypes.commonr   r   r   r   r   r   pandas.corer   pandas.core.array_algosr   pandas.core.arrays.baser   pandas.core.arrays.floatingr   r   pandas.core.arrays.integerr   r   pandas.core.arrays.numpy_r   pandas.core.constructionr    pandas.core.indexersr!   pandas.core.missingr"   rC   pandas._typingr#   r$   r%   r&   r'   r(   r)   r*   r+   pandasr,   r.   r]   rY   rv   r>   r<   <module>r     s   " " " " " "              % % % % % %        . - - - - - 0 0 0 0 0 0 . . . . . . . . . . . . ' ' ' ' ' '         
                      5 5 5 5 5 5 2 2 2 2 2 2               : 9 9 9 9 9 2 2 2 2 2 2 4 4 4 4 4 4 $ $ $ $ $ $ NNN
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  s  s  s  s  s ' s  s  s l8 8 8 8 8n 8 8 8*~B ~B ~B ~B ~B/#6 ~B ~B ~B ~B ~Br>   