From 8a51a5633f7d2c29617347001c5a1347b817ce0a Mon Sep 17 00:00:00 2001
From: Jonathan Besomi <43236409+JonathanBesomi@users.noreply.github.com>
Date: Mon, 27 Apr 2020 17:23:12 +0200
Subject: [PATCH] Updating API to v1.0.4

---
 website/docs/api-preprocessing.md  | 130 +++++++++++++++++++++++------
 website/docs/api-representation.md |  34 +++++++-
 website/docs/api-visualization.md  |  30 +++++--
 3 files changed, 160 insertions(+), 34 deletions(-)

diff --git a/website/docs/api-preprocessing.md b/website/docs/api-preprocessing.md
index 0e2ed23a..ebb26df6 100644
--- a/website/docs/api-preprocessing.md
+++ b/website/docs/api-preprocessing.md
@@ -5,7 +5,63 @@ title: Preprocessing
 
 # Preprocessing
 
-Utility functions to clean text-columns of a dataframe.
+Preprocess text-based Pandas DataFrame.
+
+
+### texthero.preprocessing.clean(s, pipeline=None)
+Clean pandas series by appling a preprocessing pipeline.
+
+For information regarding a specific function type help(texthero.preprocessing.func_name).
+The default preprocessing pipeline is the following:
+
+> 
+> * fillna
+
+
+> * lowercase
+
+
+> * remove_digits
+
+
+> * remove_punctuation
+
+
+> * remove_diacritics
+
+
+> * remove_stop_words
+
+
+> * remove_whitespace
+
+
+* **Return type**
+
+    `Series`
+
+
+
+### texthero.preprocessing.do_stemm(input, stem='snowball')
+Stem series using either NLTK ‘porter’ or ‘snowball’ stemmers.
+
+Not in the default pipeline.
+
+
+* **Parameters**
+
+    
+    * **input** (`Series`) – 
+
+
+    * **stem** – Can be either ‘snowball’ or ‘stemm’
+
+
+
+* **Return type**
+
+    `Series`
+
 
 
 ### texthero.preprocessing.fillna(input)
@@ -19,13 +75,18 @@ Replace not assigned values with empty spaces.
 
 
 ### texthero.preprocessing.get_default_pipeline()
-Default pipeline:
+Return a list contaning all the methods used in the default cleaning pipeline.
+
+Return a list with the following function
 
     
-    * remove_lowercase
+    * fillna
+
 
+    * lowercase
 
-    * remove_numbers
+
+    * remove_digits
 
 
     * remove_punctuation
@@ -34,17 +95,20 @@ Default pipeline:
     * remove_diacritics
 
 
-    * remove_white_space
+    * remove_stop_words
 
 
-    * remove_stop_words
+    * remove_whitespace
+
+
+* **Return type**
 
+    []
 
-    * stemming
 
 
 ### texthero.preprocessing.lowercase(input)
-Lowercase all cells.
+Lowercase all text.
 
 
 * **Return type**
@@ -54,7 +118,7 @@ Lowercase all cells.
 
 
 ### texthero.preprocessing.remove_diacritics(input)
-Remove diacritics (as accent marks) from input
+Remove all diacritics.
 
 
 * **Return type**
@@ -64,7 +128,7 @@ Remove diacritics (as accent marks) from input
 
 
 ### texthero.preprocessing.remove_digits(input, only_blocks=True)
-Remove all digits.
+Remove all digits from a series and replace it with a single space.
 
 
 * **Parameters**
@@ -76,31 +140,43 @@ Remove all digits.
     * **only_blocks** (*bool*) – Remove only blocks of digits. For instance, hel1234lo 1234 becomes hel1234lo.
 
 
+### Examples
 
-* **Returns**
-
-    
+```python
+>>> import texthero
+>>> import pandas as pd
+>>> s = pd.Series(["texthero 1234 He11o"])
+>>> texthero.preprocessing.remove_digits(s)
+0    texthero He11o
+dtype: object
+>>> texthero.preprocessing.remove_digits(s, only_blocks=False)
+0    texthero   He o
+dtype: object
+```
 
 
 * **Return type**
 
-    pd.Series
+    `Series`
 
 
-### Examples
 
-```python
->>> import texthero
->>> s = pd.Series(["remove_digits_s remove all the 1234 digits of a pandas series. H1N1"])
->>> texthero.preprocessing.remove_digits_s(s)
-u'remove_digits_s remove all the digits of a pandas series. H1N1'
->>> texthero.preprocessing.remove_digits_s(s, only_blocks=False)
-u'remove_digits_s remove all the digits of a pandas series. HN'
-```
+### texthero.preprocessing.remove_punctuation(input)
+Remove string.punctuation (!”#$%&’()\*+,-./:;<=>?@[]^_\`{|}~).
 
+Replace it with a single space.
 
-### texthero.preprocessing.remove_punctuation(input)
-Remove punctuations from input
+
+* **Return type**
+
+    `Series`
+
+
+
+### texthero.preprocessing.remove_stop_words(input)
+Remove all stop words using NLTK stopwords list.
+
+List of stopwords: NLTK ‘english’ stopwords, 179 items.
 
 
 * **Return type**
@@ -109,8 +185,8 @@ Remove punctuations from input
 
 
 
-### texthero.preprocessing.remove_whitespaces(input)
-Remove any type of space between words.
+### texthero.preprocessing.remove_whitespace(input)
+Remove all white spaces between words.
 
 
 * **Return type**
diff --git a/website/docs/api-representation.md b/website/docs/api-representation.md
index 6132437a..fd3f28f5 100644
--- a/website/docs/api-representation.md
+++ b/website/docs/api-representation.md
@@ -3,4 +3,36 @@ id: api-representation
 title: Representation
 ---
 
-Text representation
+Map words into vectors using different algorithms such as TF-IDF, word2vec or GloVe.
+
+
+### texthero.representation.do_count(s, max_features=100)
+Represent input on a Count vector space.
+
+
+### texthero.representation.do_dbscan(s, eps=0.5, min_samples=5, metric='euclidean', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=None)
+Perform DBSCAN clustering.
+
+
+### texthero.representation.do_kmeans(s, n_clusters=5, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=- 1, algorithm='auto')
+Perform K-means clustering algorithm.
+
+
+### texthero.representation.do_meanshift(s, bandwidth=None, seeds=None, bin_seeding=False, min_bin_freq=1, cluster_all=True, n_jobs=None, max_iter=300)
+Perform mean shift clustering.
+
+
+### texthero.representation.do_nmf(s, n_components=2)
+Perform non-negative matrix factorization.
+
+
+### texthero.representation.do_pca(s, n_components=2)
+Perform PCA.
+
+
+### texthero.representation.do_tfidf(s, max_features=100)
+Represent input on a TF-IDF vector space.
+
+
+### texthero.representation.do_tsne(s, vector_columns, n_components, perplexity, early_exaggeration, learning_rate, n_iter)
+Perform TSNE.
diff --git a/website/docs/api-visualization.md b/website/docs/api-visualization.md
index 4c5434d2..b192714a 100644
--- a/website/docs/api-visualization.md
+++ b/website/docs/api-visualization.md
@@ -5,15 +5,23 @@ title: Visualization
 
 # Visualization
 
-Text visualization
+Visualize insights and statistics of a text-based Pandas DataFrame.
 
 
 ### texthero.visualization.scatterplot(df, col, color=None, hover_data=None, title='')
-Scatterplot of df[column].
+Show scatterplot using python plotly scatter.
 
-The df[column] must be a tuple of 2d-coordinates.
 
-Usage example:
+* **Parameters**
+
+    
+    * **df** – 
+
+
+    * **col** – The name of the column of the DataFrame used for x and y axis.
+
+
+### Examples
 
 ```python
 >>> import texthero
@@ -22,8 +30,18 @@ Usage example:
 ```
 
 
-### texthero.visualization.top_words(s, normalize=True)
-Return most common words of a given series sorted from most used.
+### texthero.visualization.top_words(s, normalize=False)
+Return most common words.
+
+
+* **Parameters**
+
+    
+    * **s** (`Series`) – 
+
+
+    * **normalize** – Default is False. If set to True, returns normalized values.
+
 
 
 * **Return type**