# These blocks with the play button on the left are code cells. If you are working in CodaLab, you can run these cells or copy/paste the codes into your IDE to see the outcome.

x = 10                              # assign 10 to variable x as an integer
print (x)
print (type(x))
y = 'python'                        # assign 'python' to variable y as a string
print (y)
x = 20
print(x)                            # reassign 20 to x


x = True                                            # assign True to x
y = False                                           # assign False to y
print(type(x))                                      # print the type of x
print(x is True)                                    # 'is' is an identity operator, it checks if x is true or not and returns a Boolean value
print (x and y)                                     # logical AND is a binary operation, meaning it requires two operands, it returns True only if two operands are True, otherwise False
print (x or y)                                      # logical OR is a binary operation, it returns False only if two operands are False, otherwise True
print (not x)                                       # logical NOT is a unary operation, meaning it requires only one operand, it returns the opposite of its operand


x = 10                                              # assign 10 to x
print(type(x))                                      # print the type of x
x = x + 1                                           # increment x by one
x +=1                                               # increment x by one, exactly as above
y = 20
x = y                                               # reassign x with y
print(x)
print(y % x)                                        # modulo operator returns the remaining value of a division


x = 2.4                                             # assign 2.4 to x
print(type(x))                                      # print the type of x
x = x + 1                                           # increment x by one
x +=1.3                                             # increment x by 1.3, exactly as above
y = 2
print(type(y))
x *= y                                              # x = x * y multiply x with y and assign the result to x
print(type(x))                                      # float = float * int


x = 'sample string 1'
y = 'sample string "2"'                             # embedd another string in a string
print(type(x))                                      # print the type of x
print (x + ' and ' + y)                             # adding two strings (concatination)
print (x.capitalize())                              # increment x by 1.3, exactly as above

print(len(x))
print(x.replace('sample', 'new'))
print(f'Python strings can be formatted in different ways like this <{x}> and this <{y}>.')
print('This is another way to format {0} and {1}.'.format(x, y))


list_of_bio_types = ['gene', 'drug', 'chemical', 'virus', 'illness']            # instantiate a list
print(type(list_of_bio_types))
list_of_bio_types.append('symptom')                                             # lists may have items with different datatypes
print(list_of_bio_types)
list_of_bio_types.pop()                                                         # remove the last item
print(list_of_bio_types)
print(len(list_of_bio_types))                                                   # length of list
print(list_of_bio_types[0])                                                     # access the first item of a list; Python uses 0 indexing; index 0= first item, index 1=second item ...
print(list_of_bio_types[-1])                                                    # access the last item of a list
print('-'*20)

# list slicing
print(list_of_bio_types[:2])                                                    # items from the zero index to 2 (exclusive)
print(list_of_bio_types[1:3])                                                   # items from the first index to 3 (exclusive)
print(list_of_bio_types[1:-2])                                                  # items from the first index to the last second index (exclusive)
list_of_bio_types[:2] = ['cell', 'vitamin']                                     # replace multiple value
print(list_of_bio_types)
print('-'*20)


print(range(0, 10, 2))                                                          #range is a built-in function that generates a list of numbers; here from 0, to 10 with 2 as step size. step size and begin parameters are optional.
print(list(range(0, 10, 2)))                                                    # The output of range function is an object of type range; to see the actual numbers it should be cast into list 
print([x+100 for x in range(10)])                                               # list comprehension; an easy way to generate lists 
print('-'*20)

# general indexing format: [begin index: end index: steps]; all three are optional
print(list_of_bio_types[2::2])                                                  # from the second index to the last, every other item
print(list_of_bio_types[::-1])                                                  # all list backward


sample_dictionary = dict()                                                      # instantiate a dictionary
sample_dictionary = {'1':'one', '2':'two', '3':'three', '4':'four', '5':'five'} # instantiate a dictionary
print(type(sample_dictionary))
sample_dictionary['6']= 'six'                                                   # add a new item or change the existing item of a dictionary
print(sample_dictionary)
print(len(sample_dictionary))                                                   # length of dictionary
print(sample_dictionary['1'])                                                   # access the value of a key, if the key is not available, a KeyError will be issued 
print(sample_dictionary.get('10', None))                                        # access the value of a key, if the key is not available, the specified default value (None here) is returned
print({str(x):x for x in range(10)})                                            # dict comprehension, an easy way to generate dictionaries


x = (4, 5, 4)
print(type(x))
print(x.index(5))                                                               # get the index of an item
print(len(x))
x1, x2, x3 = x                                                                  # one line assignment
print(x1, x2, x3)
print(x[1])
x[1] = 10                                                                       # Error; tuple values can not be reassigned


x = {4, 5, 6}
print(type(x))
x.add(6)                                                                        # adding items that already exist does nothing      
print(x)
x.remove(6)
print(x)
print(8 in x)                                                                   # IN is a membership operator, it checks if x has 6 and returns a Boolean value accordingly
y = {5, 9, 10}
print(x.union(y))
print(x.intersection(y))


x = [(4, 5), (6, 7), (8, 9)]                                                                # list of tuples
x = [{4:'four', 5:'five'}, {6:'six', 7:'seven'}, {8:'eight', 9:'nine'}]                     # list of dictionaries
x = {(6, 7):'six_seven', (7, 8):'seven_eight'}                                              # dictionary of tuples
x = {(6, 7):['six', 'seven'], (7, 8):['seven', 'eight']}                                    # dictionary of tuples and lists: dictionary keys should be immutable (not changeable), so tuples can be keys but lists can not


# Singl condition
condition = True
if condition: # The condition can be any expression that returns a Boolean value, either True or False. 
    print('condition is True')

x = 7
y = 7
if (x>y):
    print ('{0} is bigger than {1}'.format(x, y))

# Double conditions
if (x>y):
    print ('{0} is bigger than {1}'.format(x, y))
else:
    print ('{0} is smaller than or equal to {1}'.format(x, y))


# More than two conditions
if (x>y):
    print ('{0} is bigger than {1}'.format(x, y))
elif (x<y):
  print ('{0} is smaller than {1}'.format(x, y))
else:
  print ('{0} is equal to {1}'.format(x, y))


list_of_bio_types = ['gene', 'drug', 'chemical', 'virus', 'illness']

for typ in list_of_bio_types:                                                   # iterating over the items of a list
    print(typ)

print('-'*30)
for id, typ in enumerate(list_of_bio_types):                                    # the built-in function, enumerate, returns the items' index along items from a sequence
    print('Index: {}, type: {}'.format(id, typ))

dictionary_of_bio_types = {id:typ for id, typ in enumerate(list_of_bio_types)}  # dictionary comprehension is a efficient way for generating dictionaries

print('-'*30)
for key in dictionary_of_bio_types.keys():                                      # iterating over dictionary keys. (keys() function returns a list of keys from a dictionary)
    print('Key: {}, value: {}'.format(key, dictionary_of_bio_types[key]))

print('-'*30)
for value in dictionary_of_bio_types.values():                                  # iterating over dictionary values. (values() function returns a list of values from a dictionary)
    print('Value: {}'.format(value))

print('-'*30)
for key, value in dictionary_of_bio_types.items():                              # iterating over dictionary keys and values. (items() function returns a list of (key, item) tuples from a dictionary)
    print('Key: {}, value: {}'.format(key, value))


list_of_bio_types = ['gene', 'drug', 'chemical', 'virus', 'illness']

while (list_of_bio_types):                                                      # list with at least one item evaluates as True. With the last item removed from list, the empty list evaluates to False, hence while loop stops
    print(list_of_bio_types.pop())                                              # pop function removes and returns the last item from list


list_of_bio_types = ['gene', 'drug', 'chemical', 'virus', 'illness']

for typ in list_of_bio_types:
    if typ == 'chemical':
        break
    print(typ)


list_of_bio_types = ['gene', 'drug', 'chemical', 'virus', 'illness']

for typ in list_of_bio_types:
    if typ == 'chemical':
        continue
    print(typ)


list_of_bio_types = ['gene', 'drug', 'chemical', 'virus', 'illness']


for typ in list_of_bio_types:
    pass



file_obj = open("example.txt", "w")                                             # open a file and write some lines in it. 'w' means 'write a text file', other common modes: wb for write binary, r, for read text, rb for read binary, ...
print ("Name of the file: ", file_obj.name)
file_obj.write("This is the first line in my first file.\n")
file_obj.write("This is the second line in my first file.")
file_obj.close()                                                                # it is a best practise to close the file after finishing working on that

with open("example.txt", "r") as file_obj:                                      # use this structure and do not need to close file_obj manually
  for line in file_obj.readlines():                                             # file ojects has a function readlines which does exactly as the name suggests 
    print(line.rstrip())                                                        # rstrip() removes the new line character at the end of each line


def my_function(input_arguments):
  """
  docstring: describe what this function does, what are possible inputs(maybe none) and what are possible outputs(maybe none)
  """
  # do stuff
  return 'if anything to return'

def compute_second_power(x):                                                    # function name as descriptive as possible
    '''                                                                         # docstring
    input: x an integer
    return: the second power of x
    '''
    result = x**2                                                               # what function does
    return result                                                               # return the results

print(compute_second_power(x=2))                                                # call a function

def compute_power(x, power=2):                                                  # functions may have optional arguments. Optional arguments have a default value when the function is defined. When called, if they are not provided with a value, the default value is used
    '''                                
    inputs: 
        x: an integer
        power: an integer 
    return: the powers of x
    '''
    result = x**power
    return result

print(compute_power(x=2))
print(compute_power(x=2, power=3))

def compute_power(x:int, power:int=2)->int:                                     # functions may be typed to help static type checkers and better readability
    '''                                
    inputs: 
        x: an integer
        power: an integer 
    return: the second power of x
    '''
    result = x**power
    return result

print(compute_power(x=2))
print(compute_power(x=2, power=3))


from numpy.linalg import svd # this line load svd(Singular Value Dicomposition) as a function from linalg (linear algebra) module within the numpy library


import numpy as np                                                              # making sure that numpy is installed and imported properly; we usually use 'as' to use a library with a short nickname, since we need to repeat it so many times, hence using a nickname is easier

scalar_variable = 2.0                                                           # scalar value

vector = np.array([1, 2, 3, 4])                                                 # creating a one-dimensional array(i.e., vector) using a list; items of the list all should have the same type (in contrast to pythpn list)
print('type of array: ', type(vector))                                          # type of array
print('data type of array: ', vector.dtype)                                     # data type of array
print('shape of an array: ', vector.shape)                                      # shape of an array is a tuple with sizes of each dimension, number of dimensions is the 'rank' of an array; here rank is one and the size of the only dimension is 4
print('second item of the array: ', vector[1])                                  # second item of the array
print('-'*30)

matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])                                 # creating two dimension array(i.e., matrix) using a list
print('number of dimensions: ', matrix.ndim)                                     # number of dimension
print('shape: ', matrix.shape)
print(matrix[1])
print('-'*30)

tensor = np.array([[[1, 2, 3, 4]], [[5, 6, 7, 8]]])                             # creating three dimensional array using a list
print('shape: ', tensor.shape)
print(tensor[1])
print('-'*30)

vector = np.linspace(1, 5, 10)                                                  # constructing an array with 10 items equally spaced between 1, 5
print(vector)
print('-'*30)

array = [[1, 2, 3, 4]]
repeated_array = np.repeat(array, repeats=5, axis=0)                            # repeating the array,  5 times as rows (axis=0)
print('shape', repeated_array.shape)
print(repeated_array)


import numpy as np

array = np.array([1, 2, 3, 4])                                                  # inferred implicitly 
print(array.dtype)
print('-'*30)

array = np.array([1.0, 2, 3, 4])                                                # inferred implicitly (only one float in an array changes the data type of the entire array to float)
print(array.dtype)
print('-'*30)

array = np.array([1, 2, 3, 4], dtype=np.int64)                                  # declared explicitly 
print(array.dtype)
print('-'*30)

array = array.astype(np.float32)                                                # change dtype of an array 
print(array.dtype)
print('-'*30)


import numpy as np

array = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
sub_array = array[:, 2:3]
print(sub_array)
sub_array[0][0] = 10
print(sub_array)
print(array)                                                                    # change a value in subarray changes the original array as well   
print('-'*30)

array = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
sub_array = array[:, 2:3].copy()                                                # making a separate copy
print(sub_array)
sub_array[0][0] = 10
print(sub_array)
print(array)
print('-'*30)


import numpy as np
np.set_printoptions(suppress=True)                                              # round very small numbers to zero 

zero_array = np.zeros(shape = (5, 2))                                           # 5 by 2 matrix of zeros
print('zero array ', zero_array)
print('-'*30)

zero_array = np.ones(shape = (5, 2))                                            # 5 by 2 matrix of ones
print('one array ', zero_array)
print('-'*30)

full_array = np.full(shape = (2, 2), fill_value=5)                              # 2 by 2 matrix of 5s
print('full array ', full_array)
print('-'*30)

full_array = np.full_like(zero_array, fill_value=5)                             # gets the shape of zero_array, but fills with 5
print('full array ', full_array)
print('-'*30)

identity_array = np.eye(N = 2)                                                  # 2 by 2 identity matrix
print('identity array ', identity_array)
print('-'*30)

random_array = np.random.random((2, 3))                                         # 2 by 3 matrix initialized with decimal random numbers
print('random array ', random_array)
print('-'*30)

random_array = np.random.randint(2, 10, (3, 3))                                 # 3 by 3 matrix initialized with integer random numbers between 2 and 10
print('random array ', random_array)
print('-'*30)

inverse_of_random_array = np.linalg.inv(random_array)
print('inverse array ', inverse_of_random_array)
print()
print(np.dot(random_array, inverse_of_random_array))                            # an array multiplied with its inverse yields an identity matrix (more on matrix multiplication in the next block)


import numpy as np

vector = np.array([1, 2, 3, 4], dtype=np.float32)                               # use dtype argument to explicitly use a particular data type
matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.float32)

print('-'*20,' vecotr ', '-'*20)
print(vector)
print()

print('-'*20,' matrix ', '-'*20)
print(matrix)
print()

print('-'*20,' matrix matrix addition ', '-'*20)
print(np.add(matrix, matrix))                                                   # element-wise addition, equivalent to matrix + matrix with overloaded +
print()

print('-'*20,' matrix vector addition ', '-'*20)
print(np.add(matrix, vector))                                                   # although element-wise addition asserts that two matrices have the same dimension, Numpy uses broadcasting here, meaning that it constructs a matrix by stacking the same copy of the vector on the fly
print()

print('-'*20,' matrix matrix multiplication ', '-'*20)
print(np.multiply(matrix, matrix))                                              # element-wise multiplication, equivalent to matrix * matrix with overloaded *
print()

print('-'*20,' elementwise square root ', '-'*20)
print(np.sqrt(matrix))                                                          # element-wise square root
print()

print('-'*20,' matrix max ', '-'*20)
print(np.max(matrix, axis=0))                                                   # max of an array; on rows (axis=1), on columns (axis=0), or globally (without axis)
print()


import numpy as np

vector = np.array([1, 2, 3, 4], dtype=np.float32)                               # use dtype argument to explicitly use a particular data type
matrix = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.float32)


print('-'*20,' inner product ', '-'*20)
print(np.dot(vector, vector))                                                   # inner product of two vectors
print()

print('-'*20,' matrix vector multiplication ', '-'*20)
print(np.dot(matrix, vector))                                                   # matrix vector multiplication, yields a vector
print()

print('-'*20,' matrix matrix multiplication ', '-'*20)
print(np.dot(matrix, np.transpose(matrix)))                                     # matrix matrix multiplication, yields a matrix the second matrix above should be transposed before multiplication. Other shape manipulations are possible by the reshape function
print()


print('matrix shape', matrix.shape)
matrix_reshaped = matrix.reshape((4, 2))
print('matrix new shape', matrix_reshaped.shape)
print()

print('-'*20,' matrix matrix multiplication ', '-'*20)
print(np.matmul(matrix, np.transpose(matrix)))                                  # another way to multiply two matrices
print()

print('-'*20,' identity matrix determinant ', '-'*20)
print(np.linalg.det(np.eye(3)))                                                 # computing the determinant of an identity matrix
print()

print('-'*20,' stacking vertically ', '-'*20)
print(np.vstack([vector, vector, vector, vector]))                              # stacking vectors or matrices vertically
print()

print('-'*20,' stacking horizontally ', '-'*20)
print(np.hstack([vector, vector, vector]))                                      # stacking vectors or matrices horizontally
print()


import numpy as np
rank_2_array_1 = np.array([[1, 2, 3, 4], [1, 2, 3, 4]])
print(rank_2_array_1.shape)

rank_2_array_2 = rank_2_array_1[:2, -2:]                                        # two indexes for two dimensions
print(rank_2_array_2.shape)
print(rank_2_array_2)


import numpy as np
rank_2_array_1 = np.array([[1.1, 2.2, 3.3, 4.4], [5.5, 6.6, 7.7, 8.8]])
print(rank_2_array_1.shape)

rank_2_array_2 = rank_2_array_1[[1, 0, 0, 1], [3, 1, 0, 2]]                     # [[index to rows], [index to columns]];e.g., 8.8 is the index 3 of columns from index 1 of rows
print(rank_2_array_2.shape)
print(rank_2_array_2)


import numpy as np
rank_2_array_1 = np.array([[1.1, 2.2, 3.3, 4.4], [5.5, 6.6, 7.7, 8.8]])
print(rank_2_array_1.shape)
rank_1_array_1 = rank_2_array_1[1, :]         # gets all columns from the second row, so shape would be (4, )
print(rank_1_array_1.shape)

rank_1_array_2 = rank_2_array_1[:, 1]         # gets all rows from the second column, so shape would be (2, )
print(rank_1_array_2.shape)


import numpy as np

distribution_1 = np.array([0.1, 0.49,0.87,0.59,0.52,0.42,0.34,0.7])
print(distribution_1.shape)
print('-'*30)

distribution_2 = distribution_1>0.5                                             # for each item it check this condition, returns an array with the same size but with Boolean values
print(distribution_2.shape)
print(distribution_2)
print('-'*30)

distribution_3 = distribution_1[distribution_1>0.5]                             # use the Boolean vector as an index, return values associated with the indices
print(distribution_3.shape)
print(distribution_3)

print('-'*30)
distribution_4 = distribution_1[(distribution_1>0.5) & (distribution_1<0.6)]    # Boolean conditions can be complex
print(distribution_4.shape)
print(distribution_4)


from skimage import io                                                                            # import io module from skimage library
from matplotlib.pylab import plt                                                                  # import pyplot module 
image_matrix = io.imread('https://drive.google.com/uc?id=1Bsmk_7b4dBJPr1y-bQfLttYQPiMs-q_o')      # call imread function to get the image matrix. An RGB image is a 3-dimensional (height, width, colors) mextix with values between 0-255 for each color
print(image_matrix.shape)
plt.imshow(image_matrix)                                                                          # visualize the image


plt.imshow(image_matrix[100:500, 100:400])                                      # slicing


plt.imshow(image_matrix[:,:,::-1])                                              # all rows, all columns, but colors reversed


plt.imshow(image_matrix[::-1,:,:])                                              # all columns and colors, but rows reversed


plt.imshow(image_matrix[::2,:,:])                                               # all columns and colors, but every other rows


plt.imshow(image_matrix + 10)                                                   # elementwise addition


plt.imshow(np.where(image_matrix>200, 255, 0))                                  # use np.where to find the index of pixels whose values are greater than 200 and replace them with white (255), replace the rest with black (0)


import pandas as pd

series_1 = pd.Series(np.linspace(0, 10, 5), name = 'random_series')             # instantiate a new series
print('series_1 shape ', series_1.shape)
print()

print('series_1 head \n', series_1.head(3))                                     # get the first few items in a series, head(n) to show n first items
print()

print('series_1 tail \n', series_1.tail(3))                                     # get the last few items in a series, tail(n) to show n last items
print()

print ('series_1 max ', np.max(series_1))                                       # apply mathematical operation on series
print()

print ('series_1 statistical description \n', series_1.describe())              # basic descriptive statistics on the values
print()

series_1.index = ['first', 'second', 'third', 'fourth', 'fifth']                # setting the index manually
print()

print('series_1 with index \n', series_1.head())
print()

print(series_1['fifth'])


import pandas as pd
# pandas has many methods for reading data into dataframe, including read_csv, read_excel, read_pickle. 
# Here we use read csv. csv (comma separated values) are text files where each line is a row in which columns are (often) separated with a comma.
# if columns are separated with something else like tab ('\t'), argument delimiter='\t' should be added to read_csv function

df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1582781719&period2=1614404119&interval=1d&events=history&includeAdjustedClose=true')
print(df.shape)
print(df.head())

new_df = df.iloc[:10]                                                           # use 'iloc' to get only the first 10 rows
print(list(new_df))                                                             #list of columns
print(new_df.columns.values)                                                    #list of columns
print('-'*30)

print(new_df['Open'])                                                           # get only one column. new_df.Open is another way to do so as long as the column names do not contain space 
print(new_df.Open)
print('-'*30)

print(new_df[['Close', 'Open']])                                                # particular columns in particular order
print('-'*30)


# dataframe indexing
print(new_df.iloc[:4, :2])                                                      # iloc for integer indexing; only first 4 rows and first two columns 
print('-'*30)

print(new_df.loc[new_df.index[1:5], ['Open', 'Close']] )                        # loc for indexing using labels and integers 
print('-'*30)

print(new_df.loc[(new_df.Open >66) & (new_df.Open<68)])                         # loc for Boolean search 
print('-'*30)

new_df = new_df[['Open', 'High', 'Low', 'Close']]                               # select only useful columns; other way is to drop undesired columns: new_df.drop(columns=['Date', 'Adj Close', 'Volume']) 
print(new_df)
print('-'*30)

# new_df = new_df.drop(columns=['Date', 'Adj Close', 'Volume'])                 # same output as above 
# print(new_df)  
# print('-'*30) 

new_df.columns = 'O', 'H', 'L', 'C'                                             # changing column names
print(new_df)
print('-'*30)

new_df['O_C'] = new_df['O'] - new_df['C']                                       # add new column
print(new_df)
print('-'*30)

print(new_df.describe())                                                        # basic descriptive statistics for the dataframe
print('-'*30)

new_df = new_df.sort_values('O')                                                # sort dataframe rows based on Open values
print(new_df)
print('-'*30)

new_df.to_csv('updated_data.csv', index=False)                                  # with all changes we did to the data now it is time to save the changes so the next time we have cleaned data. We can ignore writing indexes into the file since it is not required; index=False


import pandas as pd
# We can continue by loading the file that we saved earlier

new_df = pd.read_csv('updated_data.csv')
print(new_df)
print('-'*30)

new_df.reset_index(drop=True, inplace=True)                                     # resetting index without creating new dataframe (inplace=True)
print(new_df)
print('-'*30)

for index, row in new_df.iterrows():                                            # iterating over rows
    print(index, row.O)
print('-'*30)

new_df.plot(kind = 'scatter', x = 'O', y = 'C')                                 # to visualizations within pandas

A Gentle Introduction to Python¶

Contents¶

Introduction¶

Setting up¶

Step 1: Installing Python¶

Step 2: Installing a virtual environmet¶

Step 3: Installing Integrated Development Environment¶

Step 4: Git and Github¶

Basics¶

Variables¶

Data types¶

Operators¶

Conditionals¶

Loops¶

Loop control¶

File Input/Output¶

Functions¶

Modules and Libraries¶

Conclusion¶

Useful libraries¶

Numpy¶

Data structure:¶

Data types:¶

Specific matrices:¶

Operations:¶

Slicing:¶

Example:¶

Pandas¶

Series:¶

Dataframe:¶